Commit d7c90f38 authored by BODERE's avatar BODERE
Browse files

refactor: python good practises

parent bc163ced
*.py -crlf
*.sh -crlf
\ No newline at end of file
# ide
/.idea
# build
/**/build
/**/__pycache__
# distribution
/dist
# venv
/venv
# tests
/**/.pytest_cache
/.coverage
/coverage.xml
/pylint.txt
# ci
/public/
/docs/site/
# image-annotations
***
The image-annotations project frames annotations of marine images coming from mainly used softwares. It allows users to create a structured dataframe of annotations with metadata and proposes a toolbox to analyse them including visualization and verification tools, statistical analysis and integration to machine learning algorithms.
## Table of Contents
***
1. [General Info](#general-info)
2. [Technologies](#technologies)
3. [Installation](#installation)
4. [Collaboration](#collaboration)
5. [FAQs](#faqs)
### General Info
***
The ongoing project proposes the following fonctions:
* create of annotation dataframe
* visualization of annotations
* visualization of annoted images
* a map of annotations
* a catalogue of species from an annotation project
* a repertory of annoted images
* a pdf report of an annotation project
![Image text](C:/Users/Administrateur/Documents/CODES/images-annotation/screenshot_git.PNG)
##### Technologies
***
A list of technologies used within the project:
* [python](https://www.python.org/downloads/): Version 3.8.5
###### Installation
***
$ git clone https:gitlab.ifremer.fr/image/image-annotations
$ cd ../path/to/the/file
$ npm install
$ npm start
####### Collaboration
***
> If the annotation project come from a different software than those used in images-annotation toolbox, it is possible to create the same dataframe inspirated of the source code of create_dataframe_dss.py. You can share your code to increase the choice of annotations software in the image-annotations toolbox.
> Lot of machine learning algorithms can be applied on annotation predictions. Do not hesitate to share your results.
######## FAQs
***
A list of frequently asked questions
1. How to create a dataframe from my project?
If your project come from the following software: DSS, Biigle, choice the appropriate function. If not send a mail or see collaboration part.
2. Can I use the images-annotation toolbox from hand-made annotations?
Yes
# -*- coding: utf-8 -*-
from pathlib import Path
import csv
import numpy as np
from typing import Union
def create_dataframe_dss(
path_csv : Path,
path_out: str,
null_str: str = 'NULL',
delimiter: str = ';',
path_rejection: Union[str, int] = -1) -> None:
""" the dataframe of annotations of a dss project (exported as .csv)
Args:
path_csv (str):
path_out (str):
null_str (str):
delimiter (str):
path_rejection (str):
Examples:
>>> print(str_to_datetime('20180201','%Y%m%d'))
2018-02-01 00:00:00
>>> print(str_to_datetime('2018-02-01T14:28:42+0200'))
2018-02-01 14:28:42+02:00
"""
# returns
# option: returns lines of the dss project rejected of the dataframe ( lake of information)
with open(path_csv, "r") as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter=delimiter)
csv_out = open(path_out, 'w', newline='')
csv_writer = csv.writer(csv_out, delimiter=';')
labels = list()
pos = list()
levels = list()
filenames = list()
csv_writer.writerow(['label','userlevel', 'img_filename','pos1x_rect', 'pos1y_rect', 'pos2x_rect','pos2y_rect'])
non_annot = list()
temp = 0
for row in csv_reader:
temp = temp+1
k1 = row['polygon_values']
k2 = row['pos2x']
k3 = row['pos1x']
# cas ou l'annotation est faite avec un polygône
if k1 != null_str:
poly = k1
# 2DO
nb_points = poly.count('x') # compte le nombre de points
px = list()
py = list()
for i in range(nb_points):
px.append(int(poly[poly.find('x')+4:poly.find('y')-3]))
py.append(int(poly[poly.find('y')+4:poly.find('}')]))
poly = poly[poly.find('}')+1:poly.find(']')+1]
posx = [min(px),max(px)]
posy = [min(py),max(py)]
elif k2 != null_str:
posx = [int(float(row['pos1x'])),int(float(row['pos2x']))]
posy = [int(float(row['pos1y'])),int(float(row['pos2y']))]
elif k3 != null_str:
pos1x = int(float(row['pos1x']))
pos1y = int(float(row['pos1y']))
posx = [max(pos1x - 5, 0),pos1x + 5]
posy = [max(pos1y - 5, 0),pos1y + 5]
else:
non_annot.append(temp)
continue
# fields of interest for the simplified csv
img_filename = row['name']
userlevel = row['userlevel']
label = row['name_fr']
labels.append(label)
levels.append(userlevel)
#if os.path.exists(path_img+img_filename) and row['pos1x'] != null_string and row['pos1y'] != null_string :
csv_writer.writerow([label,userlevel, img_filename, min(posx), min(posy), max(posx), max(posy)])
pos.append([[min(posx), min(posy)], [max(posx),max(posy)]])
filenames.append(img_filename)
if path_rejection != -1:
np.savetxt(path_rejection, non_annot, fmt='%s')
#csv_file.close()
csv_out.close()
###
path_csv = 'C:/Users/Administrateur/Documents/CODES/json_annot/buccins_test.csv'
path_out = 'C:/Users/Administrateur/Documents/MISSIONS/buccins_t/data.csv'
create_dataframe_dss(path_csv, path_out, null_str = '', delimiter = ',')
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
def unique_data(path_data, s_err, path_stat = -1):
# returns the dataframe of annotations incremented of colonns: unique and occurences
df = pd.read_csv(path_data, sep = ';')
[nb_annot, nb_col] = df.shape
df = df.assign(unique = -1)
df = df.assign(occurences = -1)
levels = df['userlevel']
levels = levels.astype(int)
level_max = max(levels) # user levels to store
columns = ['pos1x_rect','pos1y_rect','pos2x_rect','pos2y_rect']
pos = df[columns]
pos = pos.to_numpy()
list_temp = list(range(0,nb_annot))
stat_unique = list(['stats']) # stats de chacune des annotations uniques
list_err = list()
while np.shape(list_temp)[0] > 0:
i = list_temp[0] # on prend le premier element de la list et on le compare aux autres
df.at[i, 'unique'] = 1 # annotation unique
df.at[i, 'occurences'] = i # un unique est sa propre occurence
print('annotation unique ' + str(i))
# on enlève cette annotation de la liste de celles a etudier
list_temp.remove(i) # attention list = list.remove(x) ne marche pas car remove ne renvoie rien. C'est comme append
# on recupere son level et le nom de l'image
level_i = df['userlevel'][i]
filename_i = df['img_filename'][i]
label_i = df['label'][i]
# on intègre ce level dans les stat de l'annotation unique i
annot_stat = np.zeros(level_max+1) # attention indices décalés par rapport à la valeur du level
annot_stat[level_i-1] = annot_stat[level_i-1] + 1
# on cherche et on enlève les occurrences de l'annotation unique i
occ_i = list()
err_i = list()
occ_i.append(i)
err_i.append(0)
for ind_j in range(0,np.shape(list_temp)[0]):
j = list_temp[ind_j]
# test si même image de référence
filename_j = df['img_filename'][j]
label_j = df['label'][j]
if filename_i == filename_j and label_i == label_j: # si même image de ref, calcul de la distance entre les rectangles
err1 = np.sqrt((pos[i,0]-pos[j,0])**2 + (pos[i,1]-pos[j,1])**2) # err1 erreur sur le premier coin du rectangle
err2 = np.sqrt((pos[i,2]-pos[j,2])**2 + (pos[i,3]-pos[j,3])**2) # err2 erreur sur le deuxième coin du rectangle
err = err1 + err2
list_err.append(err)
# si l'annotation en cours est la même que l'unique i (err<s_err) alors on l'enlève
if err < s_err:
# on liste les occurences de l'annotation unique i
occ_i.append(j)
err_i.append(err)
# on remplit les stat de l'annotation unique i
level_j = levels[j] # level de l'annotation en cours j
annot_stat[level_j-1] = annot_stat[level_j-1] + 1
stat_unique.append([i, annot_stat])
# remove occurence and attribut at each occurence its unique
for o in range(1,np.shape(occ_i)[0]): # on commence à 1 car on a deja remove i de list_temp
list_temp.remove(occ_i[o])
df.at[occ_i[o], 'occurences'] = i # attribut at the occurence o its unique i
df.to_csv(path_data, sep=';')
if path_stat != -1:
np.savetxt(path_stat, stat_unique, fmt='%s')
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def visualize_annot(lines2see, path_data, path_img):
# visualize annotations being of the same image
df = pd.read_csv(path_data, sep = ';')
[nb_lines, nb_col] = df.shape
if type(lines2see) == int:
print('1 annotation')
img_name = df['img_filename'][lines2see]
img_name = path_img + img_name
pos1x_rect = df['pos1x_rect'][lines2see]
pos1y_rect = df['pos1y_rect'][lines2see]
pos2x_rect = df['pos2x_rect'][lines2see]
pos2y_rect = df['pos2y_rect'][lines2see]
# lecture img_ref
img = cv2.imread(img_name)
if img is None :
print('no reference image: '+img_name)
else:
x_img, y_img, z_img = img.shape
# contour de l'annotation
thb_img = img
thb_img = cv2.rectangle(thb_img, (pos1x_rect, pos1y_rect), (pos2x_rect, pos2y_rect), (255,0,0), 5)
# rajout de l'échelle: 1/10eme de l'image arrondi
n_0 = len(str(x_img))
n_line = round(x_img/10**(n_0-2))*10
cv2.line(thb_img, (0, x_img), (n_line,x_img), (0, 255, 0),50)
cv2.putText(thb_img, 'd = '+str(n_line), (0, x_img - 50), cv2.FONT_HERSHEY_PLAIN, 6, (0, 255, 0),8)
# visualisation
plt.imshow(thb_img, #numpy array generating the image
#cmap = 'gray', #color map used to specify colors
interpolation='nearest' #algorithm used to blend square colors; with 'nearest' colors will not be blended
)
else:
print('several annotations')
nb_lines2see = np.shape(lines2see)[0]
img_name = df['img_filename'][lines2see[0]]
img_name = path_img + img_name
pos1x_rect = df['pos1x_rect'][lines2see[0]]
pos1y_rect = df['pos1y_rect'][lines2see[0]]
pos2x_rect = df['pos2x_rect'][lines2see[0]]
pos2y_rect = df['pos2y_rect'][lines2see[0]]
# lecture img_ref
img = plt.imread(img_name)
if img is None :
print('no reference image: '+img_name)
else:
print('reference image: '+img_name)
x_img, y_img, z_img = img.shape
# contour de l'annotation
thb_img = img
thb_img = cv2.rectangle(thb_img, (pos1x_rect, pos1y_rect), (pos2x_rect, pos2y_rect), (255,0,0), 5)
# rajout de l'échelle: 1/10eme de l'image arrondi
n_0 = len(str(x_img))
n_line = round(x_img/10**(n_0-2))*10
cv2.line(thb_img, (0, x_img), (n_line,x_img), (0, 255, 0),50)
cv2.putText(thb_img, 'd = '+str(n_line), (0, x_img - 50), cv2.FONT_HERSHEY_PLAIN, 6, (0, 255, 0),8)
for n in range(1,nb_lines2see):
pos1x_rect = df['pos1x_rect'][lines2see[n]]
pos1y_rect = df['pos1y_rect'][lines2see[n]]
pos2x_rect = df['pos2x_rect'][lines2see[n]]
pos2y_rect = df['pos2y_rect'][lines2see[n]]
thb_img = cv2.rectangle(thb_img, (pos1x_rect, pos1y_rect), (pos2x_rect, pos2y_rect), (255,0,0), 5)
# visualisation
plt.imshow(thb_img, #numpy array generating the image
#cmap = 'gray', #color map used to specify colors
interpolation='nearest' #algorithm used to blend square colors; with 'nearest' colors will not be blended
)
##############
lines2see = [4, 8]
path_data = 'C:/Users/Administrateur/Documents/MISSIONS/buccins_t/data.csv'
path_img = 'C:/Users/Administrateur/Desktop/MISSION1_IMG/'
visualize_annot(lines2see, path_data, path_img)
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def visualize_annot(lines2see, path_data, path_img):
# visualize annotations being of the same image
df = pd.read_csv(path_data, sep = ';')
[nb_lines, nb_col] = df.shape
if type(lines2see) == int:
print('1 annotation')
img_name = df['img_filename'][lines2see]
img_name = path_img + img_name
pos1x_rect = df['pos1x_rect'][lines2see]
pos1y_rect = df['pos1y_rect'][lines2see]
pos2x_rect = df['pos2x_rect'][lines2see]
pos2y_rect = df['pos2y_rect'][lines2see]
# lecture img_ref
img = cv2.imread(img_name)
if img is None :
print('no reference image: '+img_name)
else:
x_img, y_img, z_img = img.shape
# contour de l'annotation
thb_img = img
thb_img = cv2.rectangle(thb_img, (pos1x_rect, pos1y_rect), (pos2x_rect, pos2y_rect), (255,0,0), 5)
# rajout de l'échelle: 1/10eme de l'image arrondi
n_0 = len(str(x_img))
n_line = round(x_img/10**(n_0-2))*10
cv2.line(thb_img, (0, x_img), (n_line,x_img), (0, 255, 0),50)
cv2.putText(thb_img, 'd = '+str(n_line), (0, x_img - 50), cv2.FONT_HERSHEY_PLAIN, 6, (0, 255, 0),8)
# visualisation
plt.imshow(thb_img, #numpy array generating the image
#cmap = 'gray', #color map used to specify colors
interpolation='nearest' #algorithm used to blend square colors; with 'nearest' colors will not be blended
)
else:
print('several annotations')
nb_lines2see = np.shape(lines2see)[0]
img_name = df['img_filename'][lines2see[0]]
img_name = path_img + img_name
pos1x_rect = df['pos1x_rect'][lines2see[0]]
pos1y_rect = df['pos1y_rect'][lines2see[0]]
pos2x_rect = df['pos2x_rect'][lines2see[0]]
pos2y_rect = df['pos2y_rect'][lines2see[0]]
# lecture img_ref
img = cv2.imread(img_name)
if img is None :
print('no reference image: '+img_name)
else:
print('reference image: '+img_name)
x_img, y_img, z_img = img.shape
# contour de l'annotation
thb_img = img
thb_img = cv2.rectangle(thb_img, (pos1x_rect, pos1y_rect), (pos2x_rect, pos2y_rect), (255,0,0), 5)
# rajout de l'échelle: 1/10eme de l'image arrondi
n_0 = len(str(x_img))
n_line = round(x_img/10**(n_0-2))*10
cv2.line(thb_img, (0, x_img), (n_line,x_img), (0, 255, 0),50)
cv2.putText(thb_img, 'd = '+str(n_line), (0, x_img - 50), cv2.FONT_HERSHEY_PLAIN, 6, (0, 255, 0),8)
for n in range(1,nb_lines2see):
pos1x_rect = df['pos1x_rect'][lines2see[n]]
pos1y_rect = df['pos1y_rect'][lines2see[n]]
pos2x_rect = df['pos2x_rect'][lines2see[n]]
pos2y_rect = df['pos2y_rect'][lines2see[n]]
thb_img = cv2.rectangle(thb_img, (pos1x_rect, pos1y_rect), (pos2x_rect, pos2y_rect), (255,0,0), 5)
# visualisation
plt.imshow(thb_img, #numpy array generating the image
#cmap = 'gray', #color map used to specify colors
interpolation='nearest' #algorithm used to blend square colors; with 'nearest' colors will not be blended
)
##############
lines2see = 3
path_data = 'C:/Users/Administrateur/Documents/MISSIONS/buccins_t/data.csv'
path_img = 'C:/Users/Administrateur/Desktop/MISSION1_IMG/'
visualize_annot(lines2see, path_data, path_img)
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
def visualize_annoted_images(path_data, path_img, rep_out, path_colormap = 0, unique = True):
# verifie que le répertoire existe et si non en crée un
if not os.path.exists(rep_out):
os.makedirs(rep_out)
df = pd.read_csv(path_data, sep = ';')
# si unique = True alors on travaille sur le dataframe des uniques sinon sur tout le dataframe
unique = True
if unique == True:
df = df[df['unique']==1]
[nb_annot, nb_col] = df.shape
#cluster des labels pour les couleurs
labels_unique = df['label']
labels_unique = labels_unique.unique()
nb_labels_unique = np.shape(labels_unique)[0]
img_colours = np.zeros([1000,1000,3], dtype = np.uint8())
colors_pal = sns.color_palette("bright", nb_labels_unique)
colors = np.zeros([nb_labels_unique,3], dtype = np.uint8())
for i in range(nb_labels_unique):
colors[i][0] = round(colors_pal[i][0]*255)
colors[i][1] = round(colors_pal[i][1]*255)
colors[i][2] = round(colors_pal[i][2]*255)
c0 = colors[i][0]
c1 = colors[i][1]
c2 = colors[i][2]
cv2.putText(img_colours, labels_unique[i], (0, 50*i + 50), cv2.FONT_HERSHEY_PLAIN, 3, (int(c0), int(c1), int(c2)), 6)
if path_colormap != 0:
cv2.imwrite(path_colormap,img_colours)
# list of all images of data
imgs = df['img_filename']
imgs = imgs.unique()
nb_imgs = np.shape(imgs)[0]
for i in range(0,nb_imgs):
print('image '+str(i+1)+'/'+str(nb_imgs))
# image in annotation process
new_img = plt.imread(path_img+imgs[i])
# list of the annotations of the i-th image
annot_i = df[df['img_filename']==imgs[i]].index.tolist()
nb_annot_i = np.shape(annot_i)[0]
# draw the annotations
for j in range(nb_annot_i):
# color definition (function of the label of the annotation)
label_unique = df['label'][annot_i[j]]
ind_lab = np.where(labels_unique == label_unique)
ind_lab = ind_lab[0][0]
c0 = int(list(colors[ind_lab])[0])
c1 = int(list(colors[ind_lab])[1])
c2 = int(list(colors[ind_lab])[2])
# contour draw
pos1x_rect = df['pos1x_rect'][annot_i[j]]
pos1y_rect = df['pos1y_rect'][annot_i[j]]
pos2x_rect = df['pos2x_rect'][annot_i[j]]
pos2y_rect = df['pos2y_rect'][annot_i[j]]
new_img = cv2.rectangle(new_img, (pos1x_rect, pos1y_rect), (pos2x_rect, pos2y_rect), (c0, c1, c2), 5)
# save
cv2.imwrite(rep_out+imgs[i],new_img)
##############
path_data = 'C:/Users/Administrateur/Documents/MISSIONS/buccins_t/data.csv'
path_img = 'C:/Users/Administrateur/Desktop/MISSION1_IMG/'
rep_out = 'C:/Users/Administrateur/Documents/MISSIONS/buccins_t/unique_annoted_images/'
path_colormap = 'C:/Users/Administrateur/Documents/MISSIONS/buccins_t/colormap.jpg'
visualize_annoted_images(path_data, path_img, rep_out, path_colormap = path_colormap, unique = True)
This diff is collapsed.
[build-system]
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "image-annotations"
version = "0.0.0"
description = "Create image annotations dataframe and an associated toolbox for visualization, analysis and machine learning."
authors = [
"Philippe Anne-Charlotte <anne-charlotte.philippe@ifremer.fr>"
]
license = "MIT"
readme = "README.md"
homepage = "https://gitlab.ifremer.fr/image/image-annotations/"
repository = "https://gitlab.ifremer.fr/image/image-annotations/"
documentation = "https://gitlab.ifremer.fr/image/image-annotations/"
exclude = [
# exclude tests directory from wheel (only on sdist)
{ path= "tests", format = "wheel" }
]
[tool.poetry-dynamic-versioning]
enable = true
vcs = "git"
style = "semver"
pattern = "^v?(?P<base>\\d+\\.\\d+\\.\\d+)(-?((?P<stage>[a-zA-Z]+)\\.?(?P<revision>\\d+)?))?$"
# Requirements
[tool.poetry.dependencies]
python = "^3.8"
python-csv = "0.0.13"
opencv-python = "^4.5"
matplotlib = "^3.3.2"
numpy = "^1.19.2"
pandas = "^1.1.3"
seaborn = "^0.11.0"
scipy = "^1.5.2"
[tool.poetry.dev-dependencies]
# static analysis
flake8 = "^3.8"
flake8-docstrings = "^1.5"
pylint = "^2.6"
isort = "^5.8"
pytest = "^6.2"
black = { version = "^18.3-alpha.0", python = "^3.8.5" }
# -*- coding: utf-8 -*-
"""
script pour avoir les versions des packages utilisés