From b636ac08b8dfb7576ae45152b719fbd16c208960 Mon Sep 17 00:00:00 2001
From: Damien De Paoli <ddp@depaoli.id.au>
Date: Sun, 18 Jun 2023 22:02:33 +1000
Subject: [PATCH] update file to use new pylint settings, added types and using
 docstrings in goolge format with partial openapi spec

---
 ai.py   | 159 ++++++++++++++++++++++++++++++++++---------------
 dups.py | 180 +++++++++++++++++++++++++++++++++-----------------------
 face.py | 107 ++++++++++++++++++++++-----------
 path.py |  90 ++++++++++++++++++----------
 4 files changed, 348 insertions(+), 188 deletions(-)

diff --git a/ai.py b/ai.py
index 4e727f8..700acc4 100644
--- a/ai.py
+++ b/ai.py
@@ -1,90 +1,140 @@
-from wtforms import SubmitField, StringField, HiddenField, validators, Form
-from flask_wtf import FlaskForm
+""" file containing all functions to handle routes relating to AI functionality """
+
+# pylint: disable=singleton-comparison
+
 from flask import request, render_template, redirect, make_response
-from main import db, app, ma
-from sqlalchemy import Sequence
-from sqlalchemy.exc import SQLAlchemyError
-from path import Path, PathType
-from files import Entry, Dir, File, PathDirLink
-from person import Refimg, Person, PersonRefimgLink
-from flask_login import login_required, current_user
+from flask_login import login_required
 from PIL import Image
 import io
 import base64
-
-from job import Job, JobExtra, Joblog, NewJob
+from main import db, app
+from path import PathType
+from files import Entry, File
+from job import JobExtra, NewJob
 from face import Face, FaceFileLink, FaceRefimgLink, FaceNoMatchOverride, FaceForceMatchOverride
 
-# pylint: disable=no-member
-
-################################################################################
-# /ai_stats -> placholder for some sort of stats
 ################################################################################
 @app.route("/ai_stats", methods=["GET"])
 @login_required
 def ai_stats():
-    stats = db.session.execute( "select p.tag, count(f.id) from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id and ffl.face_id = f.id group by p.tag order by 2 desc" )
-    cnt_res = db.session.execute( "select count(1) from ( select p.tag from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id and ffl.face_id = f.id group by p.tag ) as foo" )
+    """ route to handle URL: /ai_stats
+    ---
+    responses:
+      200:
+        description: renders ai_stats.html to display counts of how many matches for each person we have
+    """
+
+    stats = db.session.execute( {
+        "select p.tag, count(f.id) "
+        "from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl "
+        "where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id "
+        "      and ffl.face_id = f.id group by p.tag order by 2 desc" } )
+    cnt_res = db.session.execute( {
+        "select count(1) from "
+        "  ( select p.tag from person p, face f, face_file_link ffl, face_refimg_link frl, person_refimg_link prl "
+        "    where p.id = prl.person_id and prl.refimg_id = frl.refimg_id and frl.face_id = ffl.face_id "
+        "          and ffl.face_id = f.id group by p.tag ) as foo" } )
     num_stats=cnt_res.first()[0]
 
     fstats={}
-    fstats['files_with_a_face'] = db.session.execute( "select count(distinct file_eid) as count from face_file_link" ).first()[0]
-    fstats['files_with_a_match'] = db.session.execute( "select count(distinct ffl.file_eid) as count from face_file_link ffl, face_refimg_link frl where frl.face_id = ffl.face_id" ).first()[0]
-    fstats['files_with_missing_matches'] = db.session.execute( "select count(distinct ffl.file_eid) from face f left join face_refimg_link frl on f.id = frl.face_id join face_file_link ffl on f.id = ffl.face_id where frl.refimg_id is null" ).first()[0]
+    fstats["files_with_a_face"] = db.session.execute( "select count(distinct file_eid) as count from face_file_link" ).first()[0]
+    sql="select count(distinct ffl.file_eid) as count from face_file_link ffl, face_refimg_link frl where frl.face_id = ffl.face_id"
+    fstats["files_with_a_match"] = db.session.execute( sql ).first()[0]
+    sql={"select count(distinct ffl.file_eid) from face f left join "
+         "  face_refimg_link frl on f.id = frl.face_id join face_file_link ffl on f.id = ffl.face_id where frl.refimg_id is null" }
+    fstats["files_with_missing_matches"] = db.session.execute( sql ).first()[0]
 
     # files_with_no_matches?
 
-    fstats['all_faces'] = db.session.execute( "select count(distinct face_id) as count from face_file_link" ).first()[0]
-    fstats['all_matched_faces'] = db.session.execute( "select count(distinct face_id) as count from face_refimg_link" ).first()[0]
-    fstats['all_unmatched_faces'] = db.session.execute( "select count(f.id) from face f left join face_refimg_link frl on f.id = frl.face_id  where frl.refimg_id is null" ).first()[0]
+    fstats["all_faces"] = db.session.execute( "select count(distinct face_id) as count from face_file_link" ).first()[0]
+    fstats["all_matched_faces"] = db.session.execute( "select count(distinct face_id) as count from face_refimg_link" ).first()[0]
+    sql="select count(f.id) from face f left join face_refimg_link frl on f.id = frl.face_id  where frl.refimg_id is null"
+    fstats["all_unmatched_faces"] = db.session.execute( sql).first()[0]
 
-    return render_template("ai_stats.html", page_title='AI Statistics', stats=stats, num_stats=num_stats, fstats=fstats )
+    return render_template("ai_stats.html", page_title="AI Statistics", stats=stats, num_stats=num_stats, fstats=fstats )
 
 
-################################################################################
-# /run_ai_on -> creates a job, with extras containing entry ids (eid-0, eid-1,
-# etc.) and person=all|dad, etc.   Room to consider threshold, algo, etc.
 ################################################################################
 @app.route("/run_ai_on", methods=["POST"])
 @login_required
 def run_ai_on():
+    """ route to handle URL: /run_ai_on
+
+    this route creates a job for the job manager to scan for face(s) with AI on the
+    files/dirs passed in as form variables named eid-X, where X=0, 1, 2, etc. and
+    each eid-X contains an eid from the database for the dir/file entry
+
+    jobextras created containing entry ids (eid-0, eid-1,
+    and person=all|dad, etc.   Room to consider threshold, algo, etc.
+    ---
+    responses:
+      302:
+        description: redirects to /jobs page showing all jobs (including this new one)
+    """
     jex=[]
     for el in request.form:
-        jex.append( JobExtra( name=f"{el}", value=request.form[el] ) )
-    job=NewJob( "run_ai_on", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in selected file(s)" )
+        jex.append( JobExtra( name=el, value=request.form[el] ) )
+    NewJob( "run_ai_on", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in selected file(s)" )
     return redirect("/jobs")
 
+################################################################################
 @app.route("/run_ai_on_import", methods=["GET"])
 @login_required
 def run_ai_on_import():
+    """ route to handle URL: /run_ai_on_import
+
+    this route creates a job for the job manager to scan for the all faces with AI on
+    all the files in the import dir
+    ---
+    responses:
+      302:
+        description: redirects to /jobs page showing all jobs (including this new one)
+    """
     jex=[]
 
-    ptype=PathType.query.filter(PathType.name=='Import').first()
-    jex.append( JobExtra( name=f"person", value="all" ) )
-    jex.append( JobExtra( name=f"path_type", value=ptype.id ) )
-    job=NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in import path(s)")
+    ptype=PathType.query.filter(PathType.name=="Import").first()
+    jex.append( JobExtra( name="person", value="all" ) )
+    jex.append( JobExtra( name="path_type", value=ptype.id ) )
+    NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in import path(s)")
     return redirect("/jobs")
 
+################################################################################
 @app.route("/run_ai_on_storage", methods=["GET"])
 @login_required
 def run_ai_on_storage():
+    """ route to handle URL: /run_ai_on_storage
+
+    this route creates a job for the job manager to scan for the all faces with AI on
+    all the files in the storage dir
+    ---
+    responses:
+      302:
+        description: redirects to /jobs page showing all jobs (including this new one)
+    """
     jex=[]
-    ptype=PathType.query.filter(PathType.name=='Storage').first()
-    jex.append( JobExtra( name=f"person", value="all" ) )
-    jex.append( JobExtra( name=f"path_type", value=ptype.id ) )
-    job=NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in storage path(s)")
+    ptype=PathType.query.filter(PathType.name=="Storage").first()
+    jex.append( JobExtra( name="person", value="all" ) )
+    jex.append( JobExtra( name="path_type", value=ptype.id ) )
+    NewJob( "run_ai_on_path", num_files=0, wait_for=None, jex=jex, desc="Look for face(s) in storage path(s)")
     return redirect("/jobs")
 
+################################################################################
 @app.route("/unmatched_faces", methods=["GET"])
 @login_required
 def unmatched_faces():
+    """ route to handle URL: /unmatched_faces
+    ---
+    responses:
+      200:
+        description: renders faces.html to show up to 10 faces that AI has found, that have no matching person
+    """
     # get overrides and exclude them as they have been processed already
     fnmo_ids = [id[0] for id in FaceNoMatchOverride.query.with_entities(FaceNoMatchOverride.face_id).all()]
     fmo_ids = [id[0] for id in FaceForceMatchOverride.query.with_entities(FaceForceMatchOverride.face_id).all()]
-    faces=Face.query.join(FaceFileLink).join(FaceRefimgLink, isouter=True).filter(FaceRefimgLink.refimg_id==None).filter(Face.id.not_in(fnmo_ids+fmo_ids)).order_by(Face.h.desc()).limit(10).all()
-    imgs={}
+    faces=Face.query.join(FaceFileLink).join(FaceRefimgLink, isouter=True).filter(FaceRefimgLink.refimg_id==None) \
+        .filter(Face.id.not_in(fnmo_ids+fmo_ids)).order_by(Face.h.desc()).limit(10).all()
     for face in faces:
-        f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face.id).first() 
+        f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face.id).first()
         face.file_eid=f.id
         face.url=f.FullPathOnFS()
         face.filename=f.name
@@ -96,7 +146,7 @@ def unmatched_faces():
         im = Image.open(f.FullPathOnFS())
         region = im.crop((x, y, x2, y2))
         img_bytearray = io.BytesIO()
-        region.save(img_bytearray, format='JPEG')
+        region.save(img_bytearray, format="JPEG")
         img_bytearray = img_bytearray.getvalue()
         face.img = base64.b64encode(img_bytearray)
         face.img = str(face.img)[2:-1]
@@ -104,14 +154,29 @@ def unmatched_faces():
     return render_template("faces.html", faces=faces)
 
 
-# this is called in Ajax, when we manually override a face that is currently unmatched load
-# the original full image, find the current face's coords, grab pixels 10% larger and return
-# it so we can show it in the dbox, and be able to pass it around for refimg creation (if needed)
-@app.route("/get_face_from_image/<face_id>", methods=["POST"])
+################################################################################
+@app.route("/get_face_from_image/<face_id:int>", methods=["POST"])
 @login_required
 def get_face_from_image(face_id):
+    """ route to handle URL: /get_face_from_image/<face_id:int>
+
+    this is called in Ajax, when we manually override a face that is currently unmatched load
+    the original full image, find the current face's coords, grab pixels 10% larger and return
+    it so we can show it in the dbox, and be able to pass it around for refimg creation (if needed)
+    ---
+      responses:
+      200:
+          description: Base64-encoded image of face AI found returned successfully
+          content:
+              text/plain:
+                  schema:
+                      type: string
+                      format: binary
+                      description: Base64-encoded image data
+    """
+
     face=Face.query.get(face_id)
-    f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face_id).first() 
+    f = Entry.query.join(File).join(FaceFileLink).filter(FaceFileLink.face_id==face_id).first()
     x=face.face_left*0.95
     y=face.face_top*0.95
     x2=face.face_right*1.05
@@ -120,7 +185,7 @@ def get_face_from_image(face_id):
     im = Image.open(f.FullPathOnFS())
     region = im.crop((x, y, x2, y2))
     img_bytearray = io.BytesIO()
-    region.save(img_bytearray, format='JPEG')
+    region.save(img_bytearray, format="JPEG")
     img_bytearray = img_bytearray.getvalue()
     face_img = base64.b64encode(img_bytearray)
     face_img = str(face_img)[2:-1]
diff --git a/dups.py b/dups.py
index 3106933..bbf7f21 100644
--- a/dups.py
+++ b/dups.py
@@ -1,47 +1,36 @@
-from wtforms import SubmitField, StringField, HiddenField, validators, Form
-from flask_wtf import FlaskForm
-from flask import request, render_template, send_from_directory
-from main import db, app, ma 
-from sqlalchemy import Sequence
-from sqlalchemy.exc import SQLAlchemyError
-import os
-import glob
-from PIL import Image
-from pymediainfo import MediaInfo
-import hashlib
-import exifread
-import base64
-import numpy
-import cv2
-import time
+""" functions provided to process duplicate photo data from DB into usable data structures """
 import re
 
 ################################################################################
 # Local Class imports
 ################################################################################
-from settings import Settings
-from shared import SymlinkName, PA
+from shared import PA
 from path import PathType
 
-################################################################################    
-# DupRow class is a simple 'struct' to keep data per duplicate file / just to
-# avoid using python list/dicts intermixed, and be able to consistently use
-# dot-notation of fields
+################################################################################
 class DupRow(PA):
-    def __init__(self, hash, file, dir, did, fid):
+    """ DupRow class is a simple 'struct' to keep data per duplicate file
+
+        Created just to avoid using python list/dicts intermixed, and be able to consistently use
+        dot-notation of fields
+    """
+
+    def __init__(self, _hash, file, _dir, did, fid):
         ### DupRow Attributes -- note, simple class, no methods ###
-        self.h=hash
+        self.h=_hash
         self.f=file
-        self.d=dir
+        self.d=_dir
         self.did=did
         self.id=fid
         return
 
-################################################################################    
-# DupPathRow class is a simple 'struct' to keep data per files in duplicate paths
-# just to avoid using python list/dicts intermixed, and be able to consistently use
-# dot-notation of fields
+################################################################################
 class DupPathRow(PA):
+    """ DupPathRow class is a simple 'struct' to keep data per files in duplicate paths
+
+        Created just to avoid using python list/dicts intermixed, and be able to consistently use
+        dot-notation of fields
+    """
     def __init__(self, count, d1, d2, did1, did2, hashes ):
         self.count=count
         self.d1=d1
@@ -51,33 +40,37 @@ class DupPathRow(PA):
         self.hashes=hashes
         return
 
-################################################################################    
-# Duplicates class is used with one instance/object to process all the
-# 'duplicate' data from the Database, and parse it into more usable data
-# structures.    This is needed also, as the database content shows duplicates
-# more than once, e.g.
-#     file1 and file2 are a duplicate, then later file2 and file 1 are 'another' duplicate
-# The class passes over the data in 2 passes.  The first pass in AddDup() finds
-# any files in the import and storage path and marks the storage ones to keep,
-# the import ones to delete.  Anything else is either a set of files duplicated
-# inside the import path or set of files duplicated in the storage path
-# The first pass, simply concatenates these into a data structure
-# (im_same_dups) that contains all the duplicates with a key of the md5 hash
-#
-# The second pass (), processes these duplicates to see if there are any in the
-# storage path that follow the pattern 'YYYY/YYYYMMDD' -> if so mark these to
-# keep and the rest to be deleted.
-#
-# After the 2 passes, we have data structures that allow the web to break up
-# the duplicates into batches to process:
-#    1) auto delete any in the import path that are also in the storage path
-#       - careful here, if we have any in the import path and 2+ in the storage path, leave it for manual intervention
-#    2) auto delete any in the storage path that are in a set where 1 of them match the 'YYYY/YYYYMMDD' format, the rest are deleted
-#    3) a set of directories where there are only 2 duplicate files (with the same file name), just in a different dir - allow user to choose the dir to keep
-#    4) a set of individual files where I want the user to make a decision (3 or more copies, those with different filenames, or in the same dir) - allow user to choose file to keep
+################################################################################
 class Duplicates(PA):
+    """ Duplicates class that has methods to process DB duplicate photo data
+
+    The Duplicates class is used with one instance/object to process all the
+    'duplicate' data from the Database, and parse it into more usable data
+    structures.    This is needed also, as the database content shows duplicates
+    more than once, e.g.
+        file1 and file2 are a duplicate, then later file2 and file 1 are 'another' duplicate
+    The class passes over the data in 2 passes.  The first pass in AddDup() finds
+    any files in the import and storage path and marks the storage ones to keep,
+    the import ones to delete.  Anything else is either a set of files duplicated
+    inside the import path or set of files duplicated in the storage path
+    The first pass, simply concatenates these into a data structure
+    (im_same_dups) that contains all the duplicates with a key of the md5 hash
+
+    The second pass (), processes these duplicates to see if there are any in the
+    storage path that follow the pattern 'YYYY/YYYYMMDD' -> if so mark these to
+    keep and the rest to be deleted.
+
+    After the 2 passes, we have data structures that allow the web to break up
+    the duplicates into batches to process:
+       1) auto delete any in the import path that are also in the storage path
+          - careful here, if we have any in the import path and 2+ in the storage path, leave it for manual intervention
+       2) auto delete any in the storage path that are in a set where 1 of them match the 'YYYY/YYYYMMDD' format, the rest are deleted
+       3) a set of directories where there are only 2 duplicate files (with the same file name), just in a different dir - allow user to choose the dir to keep
+       4) a set of individual files where I want the user to make a decision (3 or more copies, those with different filenames, or in the same dir) - allow user to choose file to keep
+    """
+
     def __init__(self):
-        ### Duplicates Attributes ###
+        """ initialises all the Duplicates Attributes """
         self.ip_to_sp_dups_keep={}
         self.ip_to_sp_dups_del={}
         self.dups_to_process={}
@@ -90,33 +83,59 @@ class Duplicates(PA):
         self.uniq_dups=0
         self.total_dups=0
 
-        self.import_ptype_id  = PathType.query.filter(PathType.name=='Import').first().id
-        self.storage_ptype_id = PathType.query.filter(PathType.name=='Storage').first().id
+        self.import_ptype_id  = PathType.query.filter(PathType.name=="Import").first().id
+        self.storage_ptype_id = PathType.query.filter(PathType.name=="Storage").first().id
 
-    # is this file in the import path?
     def InImportPath( self, path_type ):
+        """ Is the path being checked a import path
+
+        Args:
+            path_type (int): db key for the path_type of the path being checked
+        Returns:
+            bool: True if this path is a import path
+        """
         if path_type == self.import_ptype_id:
             return True
         return False
 
-    # is this file in the storage path?
     def InStoragePath( self, path_type ):
+        """ Is the path being checked a storage path
+
+        Args:
+            path_type (int): db key for the path_type of the path being checked
+        Returns:
+            bool: True if this path is a storage path
+        """
         if path_type == self.storage_ptype_id:
             return True
         return False
 
-    # this stores this object into the keep from same path list (sometimes there can be more than 1 SP, e.g SP to SP to IP)
-    # for now, by not dealing with the extra SP, we will just delete the IP, and force a check_dups after deleting, it will then
-    # pick up and process the SP to SP - if still needed -- if there is only SP1 to SP2, then the per_path_dup will pick it up and
-    # I believe this will all work, but doesn't hurt to do an extra check_dups again
     def KeepInIPSPDups( self, obj ):
+        """ stores this file into the "keep from same path" list 
+
+        sometimes there can be more than 1 SP, e.g SP to SP to IP
+        for now, by not dealing with the extra SP, we will just delete the IP, and force a check_dups after deleting, it will then
+        pick up and process the SP to SP - if still needed -- if there is only SP1 to SP2, then the per_path_dup will pick it up and
+        I believe this will all work, but doesn't hurt to do an extra check_dups again
+
+        Args:
+            obj (DupRow): file that will be stored into the "Delete from same path" list
+        Returns:
+            None
+        """
         if obj.h not in self.ip_to_sp_dups_keep:
             self.ip_to_sp_dups_keep[obj.h]= obj
         return
 
-    # this stores this object into the Delete from same path list (if it is not
-    # already there)
     def DelInIPSPDups( self, obj ):
+        """ stores this object into the Delete from same path list (if it is not already there) 
+
+        Args:
+            obj (DupRow): file that will be stored into the "Delete from same path" list
+        Returns:
+            None
+        """
+
         if obj.h not in self.ip_to_sp_dups_del:
             self.ip_to_sp_dups_del[obj.h]=[]
             self.ip_to_sp_dups_del[obj.h].append( obj )
@@ -127,10 +146,21 @@ class Duplicates(PA):
             self.ip_to_sp_dups_del[obj.h].append( obj )
         return
 
-    # this function takes a duplicate file (in the import path and the storage path)
-    # and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups()
-    # and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups()
     def DupInImportAndStoragePath( self, row, dr1, dr2 ):
+        """ handles a duplicate file in import and storage paths, and stores them into keep lists
+
+        this function takes a duplicate file (in the import path and the storage path)
+        and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups()
+        and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups()
+
+        Args:
+            row (ORM row): row from the database with a dup pair in dir1 & dir2
+            dr1 (DupRow): dup data for file 1 or a duplicate
+            dr2 (DupRow): dup data for file 2 or a duplicate
+            
+        Returns:
+            bool: True if file is in both import and storage path, False otherwise
+        """
         if self.InStoragePath(row.path_type1) and self.InImportPath(row.path_type2):
             self.KeepInIPSPDups( dr1 )
             self.DelInIPSPDups( dr2 )
@@ -180,7 +210,7 @@ class Duplicates(PA):
 
     # AddDupPath: takes a row from the database effectively with a dup pair in dir1 & dir2
     # we process these into appropriate data structures on this second pass
-    # working through if a dir is in th estorage path and is 
+    # working through if a dir is in th estorage path and is
     def AddDupPath(self, hash):
         # this gets complex, if this hash is also in a shared imp / sp - then dont deal with it now, let the imp files be deleted and
         # the repeat check_dups validation step catch it as a cleander (potential) for still more duplicates just in sp
@@ -198,9 +228,9 @@ class Duplicates(PA):
         # FIXME: what if both do? what if one is in SP and the other not, etc...  
         if new:
             self.per_path_dups.append( dpr )
-        if re.search( r'\d{4}/\d{8}', dpr.d1):
+        if re.search( r"\d{4}/\d{8}", dpr.d1):
             self.preferred_path[dpr.did1]=1
-        if re.search( r'\d{4}/\d{8}', dpr.d2):
+        if re.search( r"\d{4}/\d{8}", dpr.d2):
             self.preferred_path[dpr.did2]=1
         return True
 
@@ -216,7 +246,7 @@ class Duplicates(PA):
             if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f):
                 self.per_file_dups.append(self.dups_to_process[hash])
                 for el in self.dups_to_process[hash]:
-                    if re.search( r'\d{4}/\d{8}', el.d):
+                    if re.search( r"\d{4}/\d{8}", el.d):
                         self.preferred_file[hash] = el.id
             else:
                 # will force ask per path
@@ -232,9 +262,9 @@ class Duplicates(PA):
         if len(self.ip_to_sp_dups_keep) > 0:
             print( "############ Files that are in both Import and Storage Paths ###########")
             for h in self.ip_to_sp_dups_keep:
-                print( f"hash={h} keep 1 of {len(self.ip_to_sp_dups_del[h])+1}, keep: {self.ip_to_sp_dups_keep[h]} | ", end='' )
+                print( f"hash={h} keep 1 of {len(self.ip_to_sp_dups_del[h])+1}, keep: {self.ip_to_sp_dups_keep[h]} | ", end="" )
                 for d in self.ip_to_sp_dups_del[h]:
-                    print( f"Del: {d}", end='' )
+                    print( f"Del: {d}", end="" )
             print( "" )
             print( f"{len(self.ip_to_sp_dups_keep)} sets of duplicate files to delete at least 1, anything with 2 or more dups is printed above explicitly" )
 
@@ -247,9 +277,9 @@ class Duplicates(PA):
         if len(self.preferred_file) > 0:
             print( "     We have preferred (regexp matched) ###########")
             for h in self.preferred_file:
-                print( f"hash={h}, keep this one: {self.preferred_file[h]} from ", end='' )
+                print( f"hash={h}, keep this one: {self.preferred_file[h]} from ", end="" )
                 for d in self.dups_to_process[h]:
-                    print( f"{d.id}, ", end='' )
+                    print( f"{d.id}, ", end="" )
                 print ("")
             print( f"which is a total of {len(self.preferred_file)} duplicate files we will keep as they match the regexp" )
             
diff --git a/face.py b/face.py
index 3caec79..c4a40fc 100644
--- a/face.py
+++ b/face.py
@@ -1,20 +1,26 @@
-from main import db, app, ma
-from sqlalchemy import Sequence
-from sqlalchemy.exc import SQLAlchemyError
+""" file containing all classes to handle Face (and associated tables) from the database """
+from main import db
 from shared import PA
 
-# pylint: disable=no-member
-
-################################################################################
-# Class describing Face in the database and DB via sqlalchemy
-# - face contains the binary version of numpy array so we dont need to recalc it
-# - refimg_lnk and facefile_lnk are viewOnly / just for convenience in viewer
-# - refimg is a real link to the refimg used for this face (its is only used in
-#   viewer, and is either set when there is a matched face, or None if no match
 ################################################################################
 class Face(PA,db.Model):
+    """Class describing a Face in the database
+
+    Attributes:
+        id (int): database id of row in Face table / primary key
+        face (bytes): the binary version of numpy array so we dont need to recalc it
+        face_top (int): top-most pixel of face
+        face_right (int): right-most pixel of face
+        face_bottom (int): bottom-most pixel of face
+        face_left (int): left-most pixel of face
+        w (int) : width of face in pixels
+        h (int) : height of face in pixels
+        refimg_lnk (FaceRefimgLink): face_refimg_link data - viewOnly / just for convenience in viewer
+        facefile_lnk (FaceFileLink): face_file_link data - viewOnly / just for convenience in viewer
+        refimg (Refimg): link to the refimg used for this face (used in viewer). Set when there is a matched face, or None if no match
+    """
     __tablename__ = "face"
-    id = db.Column(db.Integer, db.Sequence('face_id_seq'), primary_key=True )
+    id = db.Column(db.Integer, db.Sequence("face_id_seq"), primary_key=True )
     face = db.Column( db.LargeBinary )
     face_top = db.Column( db.Integer )
     face_right = db.Column( db.Integer )
@@ -27,62 +33,93 @@ class Face(PA,db.Model):
     refimg =db.relationship("Refimg", secondary="face_refimg_link", uselist=False)
 
 
-################################################################################
-# Class describing FaceFileLink in the database and DB via sqlalchemy
-# each face comes from a file and used a model to find the face
-# this is not perfect, each face in the same file is always foudn with the same
-# model - so really should have ModelFileLink or something, in the long run
-#         this might even be better as ScanDetailsFileLink  and ScanDetails
 ################################################################################
 class FaceFileLink(PA, db.Model):
+    """Class describing a Face_File_Link in the database
+
+    NOTE: this data model is not perfect, each face in the same file is always found
+        with the same model - so really should have ModelFileLink or something, in the long run
+        this might even be better as ScanDetailsFileLink and ScanDetails
+
+    Attributes:
+        face_id (int): face id of row in Face table / foreign key - part primary key
+        file_eid (int): entry id of a row in File table / foreign key - part primary key
+        model_used(int): id of a row in AI_model table used to find the face  / foreign key - part primary key
+    """
     __tablename__ = "face_file_link"
     face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True )
     file_eid = db.Column(db.Integer, db.ForeignKey("file.eid"), primary_key=True )
     model_used = db.Column(db.Integer, db.ForeignKey("ai_model.id"), primary_key=True )
 
 
-################################################################################
-# Class describing FaceRefimgLink in the database and DB via sqlalchemy
-# connects / implies a face has matched a refimg and we keep the distance too
-# distance is mainly for debugging for now and shown in viewer
 ################################################################################
 class FaceRefimgLink(PA, db.Model):
+    """Class describing a Face_Regimg_Link in the database
+
+    connects / implies a face has matched a refimg and we keep the distance too
+    distance is mainly for debugging for now and shown in viewer
+
+    Attributes:
+        face_id (int): face id of row in Face table / foreign key - part primary key
+        refimg_id (int): face id of row in Face table / foreign key - part primary key
+        face_distance (int): distance value (how similar matched Face was)
+    """
+
     __tablename__ = "face_refimg_link"
     face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True )
     refimg_id = db.Column(db.Integer, db.ForeignKey("refimg.id"), primary_key=True )
     face_distance = db.Column(db.Integer)
 
 
-################################################################################
-# Class describing FaceOverrideType in the database and DB via sqlalchemy
-# when a face has an override, it will be a simple list of different types
-# eg (forced match, no match, not a face, etc)
 ################################################################################
 class FaceOverrideType(PA, db.Model):
+    """Class describing a Face_Override_Type in the database
+
+    when a face has an override, it will be a simple list of different types
+        eg (forced match, no match, not a face, etc)
+
+    Attributes:
+        id (int): database id of row in FaceOverrideType table / part primary key
+        name (str): name of the type of face override
+    """
     __tablename__ = "face_override_type"
-    id = db.Column(db.Integer, db.Sequence('face_override_type_id_seq'), primary_key=True )
+    id = db.Column(db.Integer, db.Sequence("face_override_type_id_seq"), primary_key=True )
     name = db.Column( db.String )
 
 
-################################################################################
-# Class describing FaceNoMatchOverride in the database and DB via sqlalchemy
-# used when a face does not match for some reason (type and face id connected)
 ################################################################################
 class FaceNoMatchOverride(PA, db.Model):
+    """Class describing a Face_No_Match_Override in the database
+
+    used when a face does not match for some reason (type and face id connected)
+
+    Attributes:
+        id (int): database id of row in Face_No_Match_Override table / part primary key
+        face_id (int): face id of row in Face table / foreign key - part primary key
+        type_id (int): id of row in Face_Override_Type table / foreign key
+        type (FaceOverrideType): convenience field for face override type for this face
+    """
     __tablename__ = "face_no_match_override"
-    id = db.Column(db.Integer, db.Sequence('face_override_id_seq'), primary_key=True )
+    id = db.Column(db.Integer, db.Sequence("face_override_id_seq"), primary_key=True )
     face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True )
     type_id = db.Column(db.Integer, db.ForeignKey("face_override_type.id"))
     type = db.relationship("FaceOverrideType")
 
 
-################################################################################
-# Class describing FaceForceMatchOverride in the database and DB via sqlalchemy
-# used when a face is forced to match for some reason (who and face id connected)
 ################################################################################
 class FaceForceMatchOverride(PA, db.Model):
+    """Class describing a Face_Force_Match_Override in the database
+
+    used when a face is forced to match for some reason (who and face id connected)
+
+    Attributes:
+        id (int): database id of row in Face_Force_Match_Override table / part primary key
+        face_id (int): face id of row in Face table / foreign key - part primary key
+        person_id (int): person id of row in Person table / foreign key - part primary key
+        person (Person): convenience field for Person with forced match
+    """
     __tablename__ = "face_force_match_override"
-    id = db.Column(db.Integer, db.Sequence('face_override_id_seq'), primary_key=True )
+    id = db.Column(db.Integer, db.Sequence("face_override_id_seq"), primary_key=True )
     face_id = db.Column(db.Integer, db.ForeignKey("face.id"), primary_key=True )
     person_id = db.Column(db.Integer, db.ForeignKey("person.id"), primary_key=True )
     person = db.relationship("Person")
diff --git a/path.py b/path.py
index aee377b..192088d 100644
--- a/path.py
+++ b/path.py
@@ -1,61 +1,89 @@
-from shared import PA, ICON
+""" file containing all classes/functions to handle Path from the database """
+
 from flask import url_for
-from flask_wtf import FlaskForm
-from main import db, app, ma 
-from sqlalchemy import Sequence
-from sqlalchemy.exc import SQLAlchemyError
+from shared import PA, ICON
+from main import db
 
-# pylint: disable=no-member
-
-################################################################################
-# Class describing File in the database, and via sqlalchemy, connected to the DB as well
-# This has to match one-for-one the DB table
 ################################################################################
 class PathType(db.Model):
+    """Class describing the type of Paths in the database
+
+    Attributes:
+        id (int): database id of row in PathType table / primary key
+        name (str): name of path type (e.g. import, storage, bin)
+    """
+
     __tablename__ = "path_type"
-    id = db.Column(db.Integer, db.Sequence('path_type_id_seq'), primary_key=True )
-    name = db.Column(db.String, unique=True, nullable=False )
+    id:int = db.Column(db.Integer, db.Sequence("path_type_id_seq"), primary_key=True )
+    name:str = db.Column(db.String, unique=True, nullable=False )
 
     def __repr__(self):
-        return "<id: {}, name={}>".format(self.id, self.name )
+        return f"<id: {self.id}, name={self.name}>"
 
-################################################################################
-# Class describing Path & in the database via sqlalchemy
 ################################################################################
 class Path(db.Model):
+    """Class describing a Path in the database
+
+    Attributes:
+        id (int): database id of row in Path table / primary key
+        type_id (int): id of row in PathType table / foreign key
+        type (PathType): sqlalchemy relationship of PathType using type_id
+        path_prefix (str): The actual dir on the filesystem that defines this Path
+        num_files (int): number of files in this Path
+    """
     __tablename__ = "path"
-    id = db.Column(db.Integer, db.Sequence('path_id_seq'), primary_key=True )
-    type_id = db.Column(db.Integer, db.ForeignKey("path_type.id"))
-    type = db.relationship("PathType")
-    path_prefix = db.Column(db.String, unique=True, nullable=False )
-    num_files = db.Column(db.Integer)
+    id:int = db.Column(db.Integer, db.Sequence("path_id_seq"), primary_key=True )
+    type_id:int = db.Column(db.Integer, db.ForeignKey("path_type.id"))
+    type:PathType = db.relationship("PathType")
+    path_prefix:str = db.Column(db.String, unique=True, nullable=False )
+    num_files:int = db.Column(db.Integer)
 
     def __repr__(self):
         return f"<id: {self.id}, path_prefix: {self.path_prefix}, num_files={self.num_files}, type={self.type}>"
 
 
 ################################################################################
-# Class describing PathDeatil (quick connvenence class for MovePathDetails())
+# Class describing PathDetail (quick connvenence class for MovePathDetails())
 ################################################################################
 class PathDetail(PA):
-    def __init__(self,type,path):
-        self.type=type
-        self.path=path
-        self.icon_url=url_for('internal', filename='icons.svg') + '#' + ICON[self.type]
-        return
+    """Class describing details of a Path [internal class used in MovePathDetais()]"""
+
+    def __init__(self,ptype,path):
+        """Initialisation function for PathDetail class
+
+        Args:
+            id (int): database id of row in PathDetail table / primary key
+            ptype (int): database id of row in PathType table / foreign key
+        """
+
+        self.type:int=ptype
+        self.path:str=path
+        # construct icon_url based on type of storage path (icons.svg contains icons for each)
+        self.icon_url:str=url_for("internal", filename="icons.svg") + "#" + ICON[self.type]
 
 ################################################################################
-# helper function to find oath details for move destinations - used in html 
+# helper function to find path details for move destinations - used in html
 # for move DBox to show potential storage paths to move files into
 ################################################################################
 def MovePathDetails():
+    """helper function to find path details for move destinations
+
+       used in html/javascript for move Dialog Box to show potential storage paths to move files into
+
+       Args:
+           None
+
+       Returns:
+           ret (List[PathDetail]): a list of Path Details for where files can be moved
+
+    """
     ret=[]
-    sps=Path.query.join(PathType).filter(PathType.name=='Storage').all()
+    sps=Path.query.join(PathType).filter(PathType.name=="Storage").all()
     for p in sps:
-        obj = PathDetail( type='Storage', path=p.path_prefix.replace('static/Storage/','') )
+        obj = PathDetail( ptype="Storage", path=p.path_prefix.replace("static/Storage/","") )
         ret.append( obj )
-    ips=Path.query.join(PathType).filter(PathType.name=='Import').all()
+    ips=Path.query.join(PathType).filter(PathType.name=="Import").all()
     for p in ips:
-        obj = PathDetail( type='Import', path=p.path_prefix.replace('static/Import/','') )
+        obj = PathDetail( ptype="Import", path=p.path_prefix.replace("static/Import/","") )
         ret.append( obj )
     return ret