From b636ac08b8dfb7576ae45152b719fbd16c208960 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Sun, 18 Jun 2023 22:02:33 +1000 Subject: [PATCH] update file to use new pylint settings, added types and using docstrings in goolge format with partial openapi spec --- ai.py | 159 ++++++++++++++++++++++++++++++++++--------------- dups.py | 180 +++++++++++++++++++++++++++++++++----------------------- face.py | 107 ++++++++++++++++++++++----------- path.py | 90 ++++++++++++++++++---------- 4 files changed, 348 insertions(+), 188 deletions(-) diff --git a/ai.py b/ai.py index 4e727f8..700acc4 100644 --- a/ai.py +++ b/ai.py @@ -1,90 +1,140 @@ -from wtforms import SubmitField, StringField, HiddenField, validators, Form -from flask_wtf import FlaskForm +""" file containing all functions to handle routes relating to AI functionality """ + +# pylint: disable=singleton-comparison + from flask import request, render_template, redirect, make_response -from main import db, app, ma -from sqlalchemy import Sequence -from sqlalchemy.exc import SQLAlchemyError -from path import Path, PathType -from files import Entry, Dir, File, PathDirLink -from person import Refimg, Person, PersonRefimgLink -from flask_login import login_required, current_user +from flask_login import login_required from PIL import Image import io import base64 - -from job import Job, JobExtra, Joblog, NewJob +from main import db, app +from path import PathType +from files import Entry, File +from job import JobExtra, NewJob from face import Face, FaceFileLink, FaceRefimgLink, FaceNoMatchOverride, FaceForceMatchOverride -# pylint: disable=no-member - -################################################################################ -# /ai_stats -> placholder for some sort of stats ################################################################################ @app.route("/ai_stats", methods=["GET"]) @login_required def ai_stats(): - stats = db.session.execute( "select p.tag, count(f.id) from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id and ffl.face_id = f.id group by p.tag order by 2 desc" ) - cnt_res = db.session.execute( "select count(1) from ( select p.tag from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id and ffl.face_id = f.id group by p.tag ) as foo" ) + """ route to handle URL: /ai_stats + --- + responses: + 200: + description: renders ai_stats.html to display counts of how many matches for each person we have + """ + + stats = db.session.execute( { + "select p.tag, count(f.id) " + "from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl " + "where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id " + " and ffl.face_id = f.id group by p.tag order by 2 desc" } ) + cnt_res = db.session.execute( { + "select count(1) from " + " ( select p.tag from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl " + " where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id " + " and ffl.face_id = f.id group by p.tag ) as foo" } ) num_stats=cnt_res.first()[0] fstats={} - fstats['files_with_a_face'] = db.session.execute( "select count(distinct file_eid) as count from face_file_link" ).first()[0] - fstats['files_with_a_match'] = db.session.execute( "select count(distinct ffl.file_eid) as count from face_file_link ffl, face_refimg_link frl where frl.face_id = ffl.face_id" ).first()[0] - fstats['files_with_missing_matches'] = db.session.execute( "select count(distinct ffl.file_eid) from face f left join face_refimg_link frl on f.id = frl.face_id join face_file_link ffl on f.id = ffl.face_id where frl.refimg_id is null" ).first()[0] + fstats["files_with_a_face"] = db.session.execute( "select count(distinct file_eid) as count from face_file_link" ).first()[0] + sql="select count(distinct ffl.file_eid) as count from face_file_link ffl, face_refimg_link frl where frl.face_id = ffl.face_id" + fstats["files_with_a_match"] = db.session.execute( sql ).first()[0] + sql={"select count(distinct ffl.file_eid) from face f left join " + " face_refimg_link frl on f.id = frl.face_id join face_file_link ffl on f.id = ffl.face_id where frl.refimg_id is null" } + fstats["files_with_missing_matches"] = db.session.execute( sql ).first()[0] # files_with_no_matches? - fstats['all_faces'] = db.session.execute( "select count(distinct face_id) as count from face_file_link" ).first()[0] - fstats['all_matched_faces'] = db.session.execute( "select count(distinct face_id) as count from face_refimg_link" ).first()[0] - fstats['all_unmatched_faces'] = db.session.execute( "select count(f.id) from face f left join face_refimg_link frl on f.id = frl.face_id where frl.refimg_id is null" ).first()[0] + fstats["all_faces"] = db.session.execute( "select count(distinct face_id) as count from face_file_link" ).first()[0] + fstats["all_matched_faces"] = db.session.execute( "select count(distinct face_id) as count from face_refimg_link" ).first()[0] + sql="select count(f.id) from face f left join face_refimg_link frl on f.id = frl.face_id where frl.refimg_id is null" + fstats["all_unmatched_faces"] = db.session.execute( sql).first()[0] - return render_template("ai_stats.html", page_title='AI Statistics', stats=stats, num_stats=num_stats, fstats=fstats ) + return render_template("ai_stats.html", page_title="AI Statistics", stats=stats, num_stats=num_stats, fstats=fstats ) -################################################################################ -# /run_ai_on -> creates a job, with extras containing entry ids (eid-0, eid-1, -# etc.) and person=all|dad, etc. Room to consider threshold, algo, etc. ################################################################################ @app.route("/run_ai_on", methods=["POST"]) @login_required def run_ai_on(): + """ route to handle URL: /run_ai_on + + this route creates a job for the job manager to scan for face(s) with AI on the + files/dirs passed in as form variables named eid-X, where X=0, 1, 2, etc. and + each eid-X contains an eid from the database for the dir/file entry + + jobextras created containing entry ids (eid-0, eid-1, + and person=all|dad, etc. Room to consider threshold, algo, etc. + --- + responses: + 302: + description: redirects to /jobs page showing all jobs (including this new one) + """ jex=[] for el in request.form: - jex.append( JobExtra( name=f"{el}", value=request.form[el] ) ) - job=NewJob( "run_ai_on", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in selected file(s)" ) + jex.append( JobExtra( name=el, value=request.form[el] ) ) + NewJob( "run_ai_on", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in selected file(s)" ) return redirect("/jobs") +################################################################################ @app.route("/run_ai_on_import", methods=["GET"]) @login_required def run_ai_on_import(): + """ route to handle URL: /run_ai_on_import + + this route creates a job for the job manager to scan for the all faces with AI on + all the files in the import dir + --- + responses: + 302: + description: redirects to /jobs page showing all jobs (including this new one) + """ jex=[] - ptype=PathType.query.filter(PathType.name=='Import').first() - jex.append( JobExtra( name=f"person", value="all" ) ) - jex.append( JobExtra( name=f"path_type", value=ptype.id ) ) - job=NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in import path(s)") + ptype=PathType.query.filter(PathType.name=="Import").first() + jex.append( JobExtra( name="person", value="all" ) ) + jex.append( JobExtra( name="path_type", value=ptype.id ) ) + NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in import path(s)") return redirect("/jobs") +################################################################################ @app.route("/run_ai_on_storage", methods=["GET"]) @login_required def run_ai_on_storage(): + """ route to handle URL: /run_ai_on_storage + + this route creates a job for the job manager to scan for the all faces with AI on + all the files in the storage dir + --- + responses: + 302: + description: redirects to /jobs page showing all jobs (including this new one) + """ jex=[] - ptype=PathType.query.filter(PathType.name=='Storage').first() - jex.append( JobExtra( name=f"person", value="all" ) ) - jex.append( JobExtra( name=f"path_type", value=ptype.id ) ) - job=NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in storage path(s)") + ptype=PathType.query.filter(PathType.name=="Storage").first() + jex.append( JobExtra( name="person", value="all" ) ) + jex.append( JobExtra( name="path_type", value=ptype.id ) ) + NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in storage path(s)") return redirect("/jobs") +################################################################################ @app.route("/unmatched_faces", methods=["GET"]) @login_required def unmatched_faces(): + """ route to handle URL: /unmatched_faces + --- + responses: + 200: + description: renders faces.html to show up to 10 faces that AI has found, that have no matching person + """ # get overrides and exclude them as they have been processed already fnmo_ids = [id[0] for id in FaceNoMatchOverride.query.with_entities(FaceNoMatchOverride.face_id).all()] fmo_ids = [id[0] for id in FaceForceMatchOverride.query.with_entities(FaceForceMatchOverride.face_id).all()] - faces=Face.query.join(FaceFileLink).join(FaceRefimgLink, isouter=True).filter(FaceRefimgLink.refimg_id==None).filter(Face.id.not_in(fnmo_ids+fmo_ids)).order_by(Face.h.desc()).limit(10).all() - imgs={} + faces=Face.query.join(FaceFileLink).join(FaceRefimgLink, isouter=True).filter(FaceRefimgLink.refimg_id==None) \ + .filter(Face.id.not_in(fnmo_ids+fmo_ids)).order_by(Face.h.desc()).limit(10).all() for face in faces: - f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face.id).first() + f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face.id).first() face.file_eid=f.id face.url=f.FullPathOnFS() face.filename=f.name @@ -96,7 +146,7 @@ def unmatched_faces(): im = Image.open(f.FullPathOnFS()) region = im.crop((x, y, x2, y2)) img_bytearray = io.BytesIO() - region.save(img_bytearray, format='JPEG') + region.save(img_bytearray, format="JPEG") img_bytearray = img_bytearray.getvalue() face.img = base64.b64encode(img_bytearray) face.img = str(face.img)[2:-1] @@ -104,14 +154,29 @@ def unmatched_faces(): return render_template("faces.html", faces=faces) -# this is called in Ajax, when we manually override a face that is currently unmatched load -# the original full image, find the current face's coords, grab pixels 10% larger and return -# it so we can show it in the dbox, and be able to pass it around for refimg creation (if needed) -@app.route("/get_face_from_image/", methods=["POST"]) +################################################################################ +@app.route("/get_face_from_image/", methods=["POST"]) @login_required def get_face_from_image(face_id): + """ route to handle URL: /get_face_from_image/ + + this is called in Ajax, when we manually override a face that is currently unmatched load + the original full image, find the current face's coords, grab pixels 10% larger and return + it so we can show it in the dbox, and be able to pass it around for refimg creation (if needed) + --- + responses: + 200: + description: Base64-encoded image of face AI found returned successfully + content: + text/plain: + schema: + type: string + format: binary + description: Base64-encoded image data + """ + face=Face.query.get(face_id) - f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face_id).first() + f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face_id).first() x=face.face_left*0.95 y=face.face_top*0.95 x2=face.face_right*1.05 @@ -120,7 +185,7 @@ def get_face_from_image(face_id): im = Image.open(f.FullPathOnFS()) region = im.crop((x, y, x2, y2)) img_bytearray = io.BytesIO() - region.save(img_bytearray, format='JPEG') + region.save(img_bytearray, format="JPEG") img_bytearray = img_bytearray.getvalue() face_img = base64.b64encode(img_bytearray) face_img = str(face_img)[2:-1] diff --git a/dups.py b/dups.py index 3106933..bbf7f21 100644 --- a/dups.py +++ b/dups.py @@ -1,47 +1,36 @@ -from wtforms import SubmitField, StringField, HiddenField, validators, Form -from flask_wtf import FlaskForm -from flask import request, render_template, send_from_directory -from main import db, app, ma -from sqlalchemy import Sequence -from sqlalchemy.exc import SQLAlchemyError -import os -import glob -from PIL import Image -from pymediainfo import MediaInfo -import hashlib -import exifread -import base64 -import numpy -import cv2 -import time +""" functions provided to process duplicate photo data from DB into usable data structures """ import re ################################################################################ # Local Class imports ################################################################################ -from settings import Settings -from shared import SymlinkName, PA +from shared import PA from path import PathType -################################################################################ -# DupRow class is a simple 'struct' to keep data per duplicate file / just to -# avoid using python list/dicts intermixed, and be able to consistently use -# dot-notation of fields +################################################################################ class DupRow(PA): - def __init__(self, hash, file, dir, did, fid): + """ DupRow class is a simple 'struct' to keep data per duplicate file + + Created just to avoid using python list/dicts intermixed, and be able to consistently use + dot-notation of fields + """ + + def __init__(self, _hash, file, _dir, did, fid): ### DupRow Attributes -- note, simple class, no methods ### - self.h=hash + self.h=_hash self.f=file - self.d=dir + self.d=_dir self.did=did self.id=fid return -################################################################################ -# DupPathRow class is a simple 'struct' to keep data per files in duplicate paths -# just to avoid using python list/dicts intermixed, and be able to consistently use -# dot-notation of fields +################################################################################ class DupPathRow(PA): + """ DupPathRow class is a simple 'struct' to keep data per files in duplicate paths + + Created just to avoid using python list/dicts intermixed, and be able to consistently use + dot-notation of fields + """ def __init__(self, count, d1, d2, did1, did2, hashes ): self.count=count self.d1=d1 @@ -51,33 +40,37 @@ class DupPathRow(PA): self.hashes=hashes return -################################################################################ -# Duplicates class is used with one instance/object to process all the -# 'duplicate' data from the Database, and parse it into more usable data -# structures. This is needed also, as the database content shows duplicates -# more than once, e.g. -# file1 and file2 are a duplicate, then later file2 and file 1 are 'another' duplicate -# The class passes over the data in 2 passes. The first pass in AddDup() finds -# any files in the import and storage path and marks the storage ones to keep, -# the import ones to delete. Anything else is either a set of files duplicated -# inside the import path or set of files duplicated in the storage path -# The first pass, simply concatenates these into a data structure -# (im_same_dups) that contains all the duplicates with a key of the md5 hash -# -# The second pass (), processes these duplicates to see if there are any in the -# storage path that follow the pattern 'YYYY/YYYYMMDD' -> if so mark these to -# keep and the rest to be deleted. -# -# After the 2 passes, we have data structures that allow the web to break up -# the duplicates into batches to process: -# 1) auto delete any in the import path that are also in the storage path -# - careful here, if we have any in the import path and 2+ in the storage path, leave it for manual intervention -# 2) auto delete any in the storage path that are in a set where 1 of them match the 'YYYY/YYYYMMDD' format, the rest are deleted -# 3) a set of directories where there are only 2 duplicate files (with the same file name), just in a different dir - allow user to choose the dir to keep -# 4) a set of individual files where I want the user to make a decision (3 or more copies, those with different filenames, or in the same dir) - allow user to choose file to keep +################################################################################ class Duplicates(PA): + """ Duplicates class that has methods to process DB duplicate photo data + + The Duplicates class is used with one instance/object to process all the + 'duplicate' data from the Database, and parse it into more usable data + structures. This is needed also, as the database content shows duplicates + more than once, e.g. + file1 and file2 are a duplicate, then later file2 and file 1 are 'another' duplicate + The class passes over the data in 2 passes. The first pass in AddDup() finds + any files in the import and storage path and marks the storage ones to keep, + the import ones to delete. Anything else is either a set of files duplicated + inside the import path or set of files duplicated in the storage path + The first pass, simply concatenates these into a data structure + (im_same_dups) that contains all the duplicates with a key of the md5 hash + + The second pass (), processes these duplicates to see if there are any in the + storage path that follow the pattern 'YYYY/YYYYMMDD' -> if so mark these to + keep and the rest to be deleted. + + After the 2 passes, we have data structures that allow the web to break up + the duplicates into batches to process: + 1) auto delete any in the import path that are also in the storage path + - careful here, if we have any in the import path and 2+ in the storage path, leave it for manual intervention + 2) auto delete any in the storage path that are in a set where 1 of them match the 'YYYY/YYYYMMDD' format, the rest are deleted + 3) a set of directories where there are only 2 duplicate files (with the same file name), just in a different dir - allow user to choose the dir to keep + 4) a set of individual files where I want the user to make a decision (3 or more copies, those with different filenames, or in the same dir) - allow user to choose file to keep + """ + def __init__(self): - ### Duplicates Attributes ### + """ initialises all the Duplicates Attributes """ self.ip_to_sp_dups_keep={} self.ip_to_sp_dups_del={} self.dups_to_process={} @@ -90,33 +83,59 @@ class Duplicates(PA): self.uniq_dups=0 self.total_dups=0 - self.import_ptype_id = PathType.query.filter(PathType.name=='Import').first().id - self.storage_ptype_id = PathType.query.filter(PathType.name=='Storage').first().id + self.import_ptype_id = PathType.query.filter(PathType.name=="Import").first().id + self.storage_ptype_id = PathType.query.filter(PathType.name=="Storage").first().id - # is this file in the import path? def InImportPath( self, path_type ): + """ Is the path being checked a import path + + Args: + path_type (int): db key for the path_type of the path being checked + Returns: + bool: True if this path is a import path + """ if path_type == self.import_ptype_id: return True return False - # is this file in the storage path? def InStoragePath( self, path_type ): + """ Is the path being checked a storage path + + Args: + path_type (int): db key for the path_type of the path being checked + Returns: + bool: True if this path is a storage path + """ if path_type == self.storage_ptype_id: return True return False - # this stores this object into the keep from same path list (sometimes there can be more than 1 SP, e.g SP to SP to IP) - # for now, by not dealing with the extra SP, we will just delete the IP, and force a check_dups after deleting, it will then - # pick up and process the SP to SP - if still needed -- if there is only SP1 to SP2, then the per_path_dup will pick it up and - # I believe this will all work, but doesn't hurt to do an extra check_dups again def KeepInIPSPDups( self, obj ): + """ stores this file into the "keep from same path" list + + sometimes there can be more than 1 SP, e.g SP to SP to IP + for now, by not dealing with the extra SP, we will just delete the IP, and force a check_dups after deleting, it will then + pick up and process the SP to SP - if still needed -- if there is only SP1 to SP2, then the per_path_dup will pick it up and + I believe this will all work, but doesn't hurt to do an extra check_dups again + + Args: + obj (DupRow): file that will be stored into the "Delete from same path" list + Returns: + None + """ if obj.h not in self.ip_to_sp_dups_keep: self.ip_to_sp_dups_keep[obj.h]= obj return - # this stores this object into the Delete from same path list (if it is not - # already there) def DelInIPSPDups( self, obj ): + """ stores this object into the Delete from same path list (if it is not already there) + + Args: + obj (DupRow): file that will be stored into the "Delete from same path" list + Returns: + None + """ + if obj.h not in self.ip_to_sp_dups_del: self.ip_to_sp_dups_del[obj.h]=[] self.ip_to_sp_dups_del[obj.h].append( obj ) @@ -127,10 +146,21 @@ class Duplicates(PA): self.ip_to_sp_dups_del[obj.h].append( obj ) return - # this function takes a duplicate file (in the import path and the storage path) - # and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups() - # and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups() def DupInImportAndStoragePath( self, row, dr1, dr2 ): + """ handles a duplicate file in import and storage paths, and stores them into keep lists + + this function takes a duplicate file (in the import path and the storage path) + and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups() + and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups() + + Args: + row (ORM row): row from the database with a dup pair in dir1 & dir2 + dr1 (DupRow): dup data for file 1 or a duplicate + dr2 (DupRow): dup data for file 2 or a duplicate + + Returns: + bool: True if file is in both import and storage path, False otherwise + """ if self.InStoragePath(row.path_type1) and self.InImportPath(row.path_type2): self.KeepInIPSPDups( dr1 ) self.DelInIPSPDups( dr2 ) @@ -180,7 +210,7 @@ class Duplicates(PA): # AddDupPath: takes a row from the database effectively with a dup pair in dir1 & dir2 # we process these into appropriate data structures on this second pass - # working through if a dir is in th estorage path and is + # working through if a dir is in th estorage path and is def AddDupPath(self, hash): # this gets complex, if this hash is also in a shared imp / sp - then dont deal with it now, let the imp files be deleted and # the repeat check_dups validation step catch it as a cleander (potential) for still more duplicates just in sp @@ -198,9 +228,9 @@ class Duplicates(PA): # FIXME: what if both do? what if one is in SP and the other not, etc... if new: self.per_path_dups.append( dpr ) - if re.search( r'\d{4}/\d{8}', dpr.d1): + if re.search( r"\d{4}/\d{8}", dpr.d1): self.preferred_path[dpr.did1]=1 - if re.search( r'\d{4}/\d{8}', dpr.d2): + if re.search( r"\d{4}/\d{8}", dpr.d2): self.preferred_path[dpr.did2]=1 return True @@ -216,7 +246,7 @@ class Duplicates(PA): if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f): self.per_file_dups.append(self.dups_to_process[hash]) for el in self.dups_to_process[hash]: - if re.search( r'\d{4}/\d{8}', el.d): + if re.search( r"\d{4}/\d{8}", el.d): self.preferred_file[hash] = el.id else: # will force ask per path @@ -232,9 +262,9 @@ class Duplicates(PA): if len(self.ip_to_sp_dups_keep) > 0: print( "############ Files that are in both Import and Storage Paths ###########") for h in self.ip_to_sp_dups_keep: - print( f"hash={h} keep 1 of {len(self.ip_to_sp_dups_del[h])+1}, keep: {self.ip_to_sp_dups_keep[h]} | ", end='' ) + print( f"hash={h} keep 1 of {len(self.ip_to_sp_dups_del[h])+1}, keep: {self.ip_to_sp_dups_keep[h]} | ", end="" ) for d in self.ip_to_sp_dups_del[h]: - print( f"Del: {d}", end='' ) + print( f"Del: {d}", end="" ) print( "" ) print( f"{len(self.ip_to_sp_dups_keep)} sets of duplicate files to delete at least 1, anything with 2 or more dups is printed above explicitly" ) @@ -247,9 +277,9 @@ class Duplicates(PA): if len(self.preferred_file) > 0: print( " We have preferred (regexp matched) ###########") for h in self.preferred_file: - print( f"hash={h}, keep this one: {self.preferred_file[h]} from ", end='' ) + print( f"hash={h}, keep this one: {self.preferred_file[h]} from ", end="" ) for d in self.dups_to_process[h]: - print( f"{d.id}, ", end='' ) + print( f"{d.id}, ", end="" ) print ("") print( f"which is a total of {len(self.preferred_file)} duplicate files we will keep as they match the regexp" ) diff --git a/face.py b/face.py index 3caec79..c4a40fc 100644 --- a/face.py +++ b/face.py @@ -1,20 +1,26 @@ -from main import db, app, ma -from sqlalchemy import Sequence -from sqlalchemy.exc import SQLAlchemyError +""" file containing all classes to handle Face (and associated tables) from the database """ +from main import db from shared import PA -# pylint: disable=no-member - -################################################################################ -# Class describing Face in the database and DB via sqlalchemy -# - face contains the binary version of numpy array so we dont need to recalc it -# - refimg_lnk and facefile_lnk are viewOnly / just for convenience in viewer -# - refimg is a real link to the refimg used for this face (its is only used in -# viewer, and is either set when there is a matched face, or None if no match ################################################################################ class Face(PA,db.Model): + """Class describing a Face in the database + + Attributes: + id (int): database id of row in Face table / primary key + face (bytes): the binary version of numpy array so we dont need to recalc it + face_top (int): top-most pixel of face + face_right (int): right-most pixel of face + face_bottom (int): bottom-most pixel of face + face_left (int): left-most pixel of face + w (int) : width of face in pixels + h (int) : height of face in pixels + refimg_lnk (FaceRefimgLink): face_refimg_link data - viewOnly / just for convenience in viewer + facefile_lnk (FaceFileLink): face_file_link data - viewOnly / just for convenience in viewer + refimg (Refimg): link to the refimg used for this face (used in viewer). Set when there is a matched face, or None if no match + """ __tablename__ = "face" - id = db.Column(db.Integer, db.Sequence('face_id_seq'), primary_key=True ) + id = db.Column(db.Integer, db.Sequence("face_id_seq"), primary_key=True ) face = db.Column( db.LargeBinary ) face_top = db.Column( db.Integer ) face_right = db.Column( db.Integer ) @@ -27,62 +33,93 @@ class Face(PA,db.Model): refimg =db.relationship("Refimg", secondary="face_refimg_link", uselist=False) -################################################################################ -# Class describing FaceFileLink in the database and DB via sqlalchemy -# each face comes from a file and used a model to find the face -# this is not perfect, each face in the same file is always foudn with the same -# model - so really should have ModelFileLink or something, in the long run -# this might even be better as ScanDetailsFileLink and ScanDetails ################################################################################ class FaceFileLink(PA, db.Model): + """Class describing a Face_File_Link in the database + + NOTE: this data model is not perfect, each face in the same file is always found + with the same model - so really should have ModelFileLink or something, in the long run + this might even be better as ScanDetailsFileLink and ScanDetails + + Attributes: + face_id (int): face id of row in Face table / foreign key - part primary key + file_eid (int): entry id of a row in File table / foreign key - part primary key + model_used(int): id of a row in AI_model table used to find the face / foreign key - part primary key + """ __tablename__ = "face_file_link" face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True ) file_eid = db.Column(db.Integer, db.ForeignKey("file.eid"), primary_key=True ) model_used = db.Column(db.Integer, db.ForeignKey("ai_model.id"), primary_key=True ) -################################################################################ -# Class describing FaceRefimgLink in the database and DB via sqlalchemy -# connects / implies a face has matched a refimg and we keep the distance too -# distance is mainly for debugging for now and shown in viewer ################################################################################ class FaceRefimgLink(PA, db.Model): + """Class describing a Face_Regimg_Link in the database + + connects / implies a face has matched a refimg and we keep the distance too + distance is mainly for debugging for now and shown in viewer + + Attributes: + face_id (int): face id of row in Face table / foreign key - part primary key + refimg_id (int): face id of row in Face table / foreign key - part primary key + face_distance (int): distance value (how similar matched Face was) + """ + __tablename__ = "face_refimg_link" face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True ) refimg_id = db.Column(db.Integer, db.ForeignKey("refimg.id"), primary_key=True ) face_distance = db.Column(db.Integer) -################################################################################ -# Class describing FaceOverrideType in the database and DB via sqlalchemy -# when a face has an override, it will be a simple list of different types -# eg (forced match, no match, not a face, etc) ################################################################################ class FaceOverrideType(PA, db.Model): + """Class describing a Face_Override_Type in the database + + when a face has an override, it will be a simple list of different types + eg (forced match, no match, not a face, etc) + + Attributes: + id (int): database id of row in FaceOverrideType table / part primary key + name (str): name of the type of face override + """ __tablename__ = "face_override_type" - id = db.Column(db.Integer, db.Sequence('face_override_type_id_seq'), primary_key=True ) + id = db.Column(db.Integer, db.Sequence("face_override_type_id_seq"), primary_key=True ) name = db.Column( db.String ) -################################################################################ -# Class describing FaceNoMatchOverride in the database and DB via sqlalchemy -# used when a face does not match for some reason (type and face id connected) ################################################################################ class FaceNoMatchOverride(PA, db.Model): + """Class describing a Face_No_Match_Override in the database + + used when a face does not match for some reason (type and face id connected) + + Attributes: + id (int): database id of row in Face_No_Match_Override table / part primary key + face_id (int): face id of row in Face table / foreign key - part primary key + type_id (int): id of row in Face_Override_Type table / foreign key + type (FaceOverrideType): convenience field for face override type for this face + """ __tablename__ = "face_no_match_override" - id = db.Column(db.Integer, db.Sequence('face_override_id_seq'), primary_key=True ) + id = db.Column(db.Integer, db.Sequence("face_override_id_seq"), primary_key=True ) face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True ) type_id = db.Column(db.Integer, db.ForeignKey("face_override_type.id")) type = db.relationship("FaceOverrideType") -################################################################################ -# Class describing FaceForceMatchOverride in the database and DB via sqlalchemy -# used when a face is forced to match for some reason (who and face id connected) ################################################################################ class FaceForceMatchOverride(PA, db.Model): + """Class describing a Face_Force_Match_Override in the database + + used when a face is forced to match for some reason (who and face id connected) + + Attributes: + id (int): database id of row in Face_Force_Match_Override table / part primary key + face_id (int): face id of row in Face table / foreign key - part primary key + person_id (int): person id of row in Person table / foreign key - part primary key + person (Person): convenience field for Person with forced match + """ __tablename__ = "face_force_match_override" - id = db.Column(db.Integer, db.Sequence('face_override_id_seq'), primary_key=True ) + id = db.Column(db.Integer, db.Sequence("face_override_id_seq"), primary_key=True ) face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True ) person_id = db.Column(db.Integer, db.ForeignKey("person.id"), primary_key=True ) person = db.relationship("Person") diff --git a/path.py b/path.py index aee377b..192088d 100644 --- a/path.py +++ b/path.py @@ -1,61 +1,89 @@ -from shared import PA, ICON +""" file containing all classes/functions to handle Path from the database """ + from flask import url_for -from flask_wtf import FlaskForm -from main import db, app, ma -from sqlalchemy import Sequence -from sqlalchemy.exc import SQLAlchemyError +from shared import PA, ICON +from main import db -# pylint: disable=no-member - -################################################################################ -# Class describing File in the database, and via sqlalchemy, connected to the DB as well -# This has to match one-for-one the DB table ################################################################################ class PathType(db.Model): + """Class describing the type of Paths in the database + + Attributes: + id (int): database id of row in PathType table / primary key + name (str): name of path type (e.g. import, storage, bin) + """ + __tablename__ = "path_type" - id = db.Column(db.Integer, db.Sequence('path_type_id_seq'), primary_key=True ) - name = db.Column(db.String, unique=True, nullable=False ) + id:int = db.Column(db.Integer, db.Sequence("path_type_id_seq"), primary_key=True ) + name:str = db.Column(db.String, unique=True, nullable=False ) def __repr__(self): - return "".format(self.id, self.name ) + return f"" -################################################################################ -# Class describing Path & in the database via sqlalchemy ################################################################################ class Path(db.Model): + """Class describing a Path in the database + + Attributes: + id (int): database id of row in Path table / primary key + type_id (int): id of row in PathType table / foreign key + type (PathType): sqlalchemy relationship of PathType using type_id + path_prefix (str): The actual dir on the filesystem that defines this Path + num_files (int): number of files in this Path + """ __tablename__ = "path" - id = db.Column(db.Integer, db.Sequence('path_id_seq'), primary_key=True ) - type_id = db.Column(db.Integer, db.ForeignKey("path_type.id")) - type = db.relationship("PathType") - path_prefix = db.Column(db.String, unique=True, nullable=False ) - num_files = db.Column(db.Integer) + id:int = db.Column(db.Integer, db.Sequence("path_id_seq"), primary_key=True ) + type_id:int = db.Column(db.Integer, db.ForeignKey("path_type.id")) + type:PathType = db.relationship("PathType") + path_prefix:str = db.Column(db.String, unique=True, nullable=False ) + num_files:int = db.Column(db.Integer) def __repr__(self): return f"" ################################################################################ -# Class describing PathDeatil (quick connvenence class for MovePathDetails()) +# Class describing PathDetail (quick connvenence class for MovePathDetails()) ################################################################################ class PathDetail(PA): - def __init__(self,type,path): - self.type=type - self.path=path - self.icon_url=url_for('internal', filename='icons.svg') + '#' + ICON[self.type] - return + """Class describing details of a Path [internal class used in MovePathDetais()]""" + + def __init__(self,ptype,path): + """Initialisation function for PathDetail class + + Args: + id (int): database id of row in PathDetail table / primary key + ptype (int): database id of row in PathType table / foreign key + """ + + self.type:int=ptype + self.path:str=path + # construct icon_url based on type of storage path (icons.svg contains icons for each) + self.icon_url:str=url_for("internal", filename="icons.svg") + "#" + ICON[self.type] ################################################################################ -# helper function to find oath details for move destinations - used in html +# helper function to find path details for move destinations - used in html # for move DBox to show potential storage paths to move files into ################################################################################ def MovePathDetails(): + """helper function to find path details for move destinations + + used in html/javascript for move Dialog Box to show potential storage paths to move files into + + Args: + None + + Returns: + ret (List[PathDetail]): a list of Path Details for where files can be moved + + """ ret=[] - sps=Path.query.join(PathType).filter(PathType.name=='Storage').all() + sps=Path.query.join(PathType).filter(PathType.name=="Storage").all() for p in sps: - obj = PathDetail( type='Storage', path=p.path_prefix.replace('static/Storage/','') ) + obj = PathDetail( ptype="Storage", path=p.path_prefix.replace("static/Storage/","") ) ret.append( obj ) - ips=Path.query.join(PathType).filter(PathType.name=='Import').all() + ips=Path.query.join(PathType).filter(PathType.name=="Import").all() for p in ips: - obj = PathDetail( type='Import', path=p.path_prefix.replace('static/Import/','') ) + obj = PathDetail( ptype="Import", path=p.path_prefix.replace("static/Import/","") ) ret.append( obj ) return ret