diff --git a/TODO b/TODO
index 32a17fb..116e2c7 100644
--- a/TODO
+++ b/TODO
@@ -1,33 +1,24 @@
## GENERAL
- * Face matching:
- - upgrade to face distance per face per file
- - so we dont get 2 x same face in one file, and if it could match say Cam and Mich for 1 face, take the higher match, not the first one to be over the threshold
- - allow for threshold/settings to be tweaked from the GUI?
- ---> at least settings for default value (back to 0.6 / 0.5?)
- ---> with override table to do per file / per face?
- - face locations:
- START FORM SCRATCH so all images have face_locn data
- - algo:
- for each face (even known) in image
- foreach refimg
- get face_distance
- sort by face_distance
- for each face
- connect lowest score with that face (for this file)
- this means this face is no longer 'free' for a match
- if (sorted) face distance > 0.55 stop as no further 'matches'
- - use cnn model (check ftst.py) for ref images, and potentially as a setting to check images without a face?
- - or always?
- -- would CUDA be useful here? (which is faster say an old 730 or the AMD cpu?)
+ * allow for and implement: default_refimg_model and default_scan_model
+ - cnn for refimgs should be defaul, for scan use hog
+ - research upsample val...
-
+ * viewer needs to allow toggle to scan_model (and prob. right-click on file... AI (with CNN) AI (with hog)
+ - I think go to html5 toggles for: face, distance (only shows if you toggle face on), drop-down for model (allow change to cnn and reprocess)
+ - show matching face distance in viewer
+ - might be best for AI_Model to have friendly name (slow, slower) :)
+
+ * remove dirs after the duplicate cleanup removes all its content
+
+ * Face matching:
+ - upgrade to face distance per face per file [DONE]
+ - face locations:
+ START FORM SCRATCH for prod so all images have face_locn data
* viewer:
can we make it preload next/prev images, and only reload the image div when we jump? to make arrow-based nav much faster
- * remove dirs after the duplicate cleanup removes all its content
-
* could look to remove the hand fixing of json.loads of array data --> seems you can make your own datatype in the ORM, and it can do the conversion every time you use it
- https://stackoverflow.com/questions/28143557/sqlalchemy-convert-column-value-back-and-forth-between-internal-and-database-fo
@@ -52,22 +43,24 @@
need a manual button to restart a job in the GUI,
(based on file-level optims, just run the job as new and it will optim over already done parts and continue)
- Future:
- Admin
- -> reset face_flag
-
- AI
- -> rescan
for --> can do this on new image only
- (optimised to only scan has_unidentified_face)... BUT if you change
- thresholds on AI, or we get a new/better one some day, then it can
- all images with faces, or if we 'reset face_flag' rescan all images
-
Admin
-> delete old jobs / auto delete jobs older than ???
-> do I want to have admin roles/users?
+### AI
+ * faces per file (need a threshold for too many? OR
+ * consider size of bbox of face / 'high-quality' faces -- if face is too small in image, dont match it
+ * if we have a high-qual face, we could show this on a page and have UI to create ref img / person for it
+
### UI
??? ipads can't do selections and contextMenus, do I want to re-factor to cater for this?
+ - partial fix, double-click / tap allows viewing (most useful context-menu feature)
+
+ For AI / rescan:
+ way to override per file:
+ the model used
+ the threshold used?
+ maybe on the per file you could select an unknown face and add it as a ref img to a existing person, or make a new person and attach?
file details is sort of crap - only works on import path
- probably better to have a different 'view', e.g. folders/flat/detailed
diff --git a/pa_job_manager.py b/pa_job_manager.py
index be348ee..211bdac 100644
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -190,9 +190,11 @@ class Settings(Base):
import_path = Column(String)
storage_path = Column(String)
recycle_bin_path = Column(String)
+ default_model = Column(Integer,ForeignKey('ai_model.id'), unique=True, nullable=False)
+ default_threshold = Column(Integer)
def __repr__(self):
- return f""
+ return f""
class PersonRefimgLink(Base):
__tablename__ = "person_refimg_link"
@@ -459,8 +461,6 @@ def RunJob(job):
JobMoveFiles(job)
elif job.name == "restore_files":
JobRestoreFiles(job)
- elif job.name == "processai":
- JobProcessAI(job)
elif job.name == "run_ai_on":
JobRunAIOn(job)
elif job.name == "rotate_image":
@@ -959,26 +959,11 @@ def RunFuncOnFilesInPath( job, path, file_func, count_dirs ):
return
-def JobProcessAI(job):
- path=[jex.value for jex in job.extra if jex.name == "path"][0]
- path_prefix=[jex.value for jex in job.extra if jex.name == "path_prefix"][0]
- path = SymlinkName(path_prefix, path, '/')
- p = session.query(Path).filter(Path.path_prefix==path).first()
- job.num_files=p.num_files
-
- RunFuncOnFilesInPath( job, path, ProcessAI, True )
-
- FinishJob(job, "Finished Processesing AI")
- return
-
def WrapperForScanFileForPerson(job, entry):
- which_person=[jex.value for jex in job.extra if jex.name == "person"][0]
-
if entry.type.name == 'Image':
if DEBUG:
AddLogForJob( job, f'INFO: processing File: {entry.name}' )
- for pid in job.ppl:
- ScanFileForPerson( job, entry, pid, force=False)
+ ScanFileForPerson( job, entry, force=False)
# processed this file, add 1 to count
job.current_file_num+=1
return
@@ -992,9 +977,9 @@ def JobRunAIOn(job):
AddLogForJob(job, f"INFO: Starting looking For faces in files job...")
which_person=[jex.value for jex in job.extra if jex.name == "person"][0]
if which_person == "all":
- ppl=session.query(Person).all()
+ job.refimgs = session.query(Refimg).all()
else:
- ppl=session.query(Person).filter(Person.tag==which_person).all()
+ job.refimgs=session.query(Refimg).join(PersonRefimgLink).join(Person).filter(Person.tag==which_person).all()
# start by working out how many images in this selection we will need face match on
job.num_files = 0
@@ -1011,13 +996,8 @@ def JobRunAIOn(job):
job.current_file_num = 0
session.commit()
- ppl_lst=[]
- for person in ppl:
- ppl_lst.append(person.id)
-
- job.ppl = ppl_lst
-
for jex in job.extra:
+ print( jex )
if 'eid-' in jex.name:
entry=session.query(Entry).get(jex.value)
if entry.type.name == 'Directory':
@@ -1027,8 +1007,7 @@ def JobRunAIOn(job):
which_file=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
if DEBUG:
AddLogForJob( job, f'INFO: processing File: {entry.name}' )
- for person in ppl:
- ScanFileForPerson( job, which_file, person.id, force=False)
+ ScanFileForPerson( job, which_file, force=False)
# processed this file, add 1 to count
job.current_file_num+=1
else:
@@ -1081,46 +1060,6 @@ def GenHashAndThumb(job, e):
e.file_details.last_hash_date = time.time()
return
-def ProcessAI(job, e):
- if e.type.name != 'Image':
- job.current_file_num+=1
- return
-
- file = e.FullPathOnFS()
- stat = os.stat(file)
- # find if file is newer than when we found faces before (fyi: first time faces_created_on == 0)
- if stat.st_ctime > e.file_details.faces_created_on:
- session.add(e)
- im_orig = Image.open(file)
- im = ImageOps.exif_transpose(im_orig)
-
- faces = generateUnknownEncodings(im)
- e.file_details.faces_created_on=time.time()
- if faces:
- flat_faces = numpy.array(faces)
- e.file_details.faces = flat_faces.tobytes()
- else:
- e.file_details.faces = None
- job.current_file_num+=1
- return
- else:
- if not e.file_details.faces:
- print("OPTIM: This image has no faces, skip it")
- job.current_file_num+=1
- return
- recover=numpy.frombuffer(e.file_details.faces,dtype=numpy.float64)
- real_recover=numpy.reshape(recover,(-1,128))
- l=[]
- for el in real_recover:
- l.append(numpy.array(el))
- faces = l
- people = session.query(Person).all()
- for unknown_encoding in faces:
- for person in people:
- lookForPersonInImage(job, person, unknown_encoding, e)
- ProcessFileForJob(job, f"Finished processing {e.name}", e.name )
- return
-
def lookForPersonInImage(job, person, unknown_encoding, e):
FinishJob( job, "THIS CODE HAS BEEN REMOVED, need to use new Face* tables, and rethink", "Failed" )
return
@@ -1428,8 +1367,10 @@ def DelFacesForFile( eid ):
session.commit()
return
-def MatchRefimgToFace( refimg_id, face_id ):
- rfl = FaceRefimgLink( refimg_id = refimg_id, face_id = face_id )
+def MatchRefimgToFace( refimg_id, face_id, model, face_dist ):
+ # remove any match to this face from previous attempts, and 'replace' with new one
+ session.query(FaceRefimgLink).filter(FaceRefimgLink.face_id==face_id).delete()
+ rfl = FaceRefimgLink( refimg_id = refimg_id, face_id = face_id, model_used=model, face_distance=face_dist )
session.add(rfl)
session.commit()
return
@@ -1438,7 +1379,18 @@ def UnmatchedFacesForFile( eid ):
rows = session.execute( f"select f.* from face f left join face_refimg_link frl on f.id = frl.face_id join face_file_link ffl on f.id = ffl.face_id where ffl.file_eid = {eid} and frl.refimg_id is null" )
return rows
-def ScanFileForPerson( job, e, person_id, force=False ):
+def BestFaceMatch(dist, fid, threshold):
+ # 1 is not a match (0 is perfect match)
+ lowest=1.0
+ which=None
+ for who in dist:
+ if who in dist and fid in dist[who] and dist[who][fid][0] < lowest and dist[who][fid][0] <= threshold:
+ lowest=dist[who][fid][0]
+ which=who
+ print( f"bfm: return {which}, {lowest} for {fid}" )
+ return which, lowest
+
+def ScanFileForPerson( job, e, force=False ):
file_h = session.query(File).get( e.id )
# if we are forcing this, delete any old faces (this will also delete linked tables), and reset faces_created_on to None
if force:
@@ -1446,12 +1398,12 @@ def ScanFileForPerson( job, e, person_id, force=False ):
DelFacesForFile( e.id )
file_h.faces_created_on = 0
- # optimise: dont rescan if we already have faces (we are just going to try
- # to match (maybe?) a refimg
+ # optimise: dont rescan if we already have faces
if file_h.faces_created_on == 0:
if DEBUG:
AddLogForJob( job, f"DEBUG: {e.name} is missing unknown faces, generating them" )
im = face_recognition.load_image_file(e.FullPathOnFS())
+ # TODO: use setting to use model
face_locations = face_recognition.face_locations(im)
unknown_encodings = face_recognition.face_encodings(im, known_face_locations=face_locations)
for locn, face in zip( face_locations, unknown_encodings ):
@@ -1459,22 +1411,35 @@ def ScanFileForPerson( job, e, person_id, force=False ):
file_h.faces_created_on = time.time()
session.commit()
- ## now look for person
- refimgs = session.query(Refimg).join(PersonRefimgLink).filter(PersonRefimgLink.person_id==person_id).all()
- uf = UnmatchedFacesForFile( e.id )
- if DEBUG and not uf:
- AddLogForJob( job, "DEBUG: {e.name} all faces already matched - finished" )
-
- for face in uf:
- for r in refimgs:
+ # get default_model from settings (test this)
+ settings = session.query(Settings).first()
+ model=settings.default_model
+ threshold = settings.default_threshold
+
+ faces = session.query(Face).join(FaceFileLink).filter(FaceFileLink.file_eid==e.id).all()
+ # if there are no faces for this file, then dont go any futher
+ if not faces:
+ return
+
+ dist={}
+ name={}
+ for r in job.refimgs:
+ dist[r.id]={}
+ name[r.id]=r.fname
+ for face in faces:
+ for r in job.refimgs:
unknown_face_data = numpy.frombuffer(face.face, dtype=numpy.float64)
refimg_face_data = numpy.frombuffer(r.face, dtype=numpy.float64)
- match = compareAI(refimg_face_data, unknown_face_data)
- if match[0]:
- AddLogForJob(job, f'WE MATCHED: {r.fname} with file: {e.name} ')
- MatchRefimgToFace( r.id, face.id )
- # no need to keep looking for this face, we found it, go to next unknown face
- break
+ dist[r.id][face.id] = face_recognition.face_distance(unknown_face_data, [refimg_face_data])
+
+ # if you need to check face distances, uncomment this: print( f"dist={dist}" )
+ faces = session.execute( f"select f.* from face f join face_file_link ffl on f.id = ffl.face_id where ffl.file_eid = {e.id}" )
+ for face in faces:
+ who, fd = BestFaceMatch(dist, face.id, threshold )
+ if who != None:
+ MatchRefimgToFace( who, face.id, model, fd )
+ AddLogForJob(job, f'WE MATCHED: {name[who]} with file: {e.name} - face distance of {fd}')
+ del( dist[who] )
return
@@ -1482,7 +1447,7 @@ if __name__ == "__main__":
print("INFO: PA job manager starting - listening on {}:{}".format( PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT) )
InitialValidationChecks()
-
+
HandleJobs()
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind((PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT))
diff --git a/settings.py b/settings.py
index 5ece10b..3d71249 100644
--- a/settings.py
+++ b/settings.py
@@ -33,7 +33,7 @@ class Settings(db.Model):
default_threshold = db.Column(db.Integer)
def __repr__(self):
- return f""
+ return f""
################################################################################
# Helper class that inherits a .dump() method to turn class Settings into json / useful in jinja2
diff --git a/templates/viewer.html b/templates/viewer.html
index fdcfb8e..68f32e8 100644
--- a/templates/viewer.html
+++ b/templates/viewer.html
@@ -73,6 +73,7 @@
context.fillStyle = "green"
context.fillText(faces[i].who, x+w/2, y-2)
}
+ /* can use to show lower left coords of a face for debugging
else
{
context.font = "14px Arial"
@@ -81,6 +82,7 @@
context.fillText( 'x=' + faces[i].x + ', y=' + faces[i].y, x+w/2, y-2)
context.fillText( 'x=' + faces[i].x + ', y=' + faces[i].y, x+w/2, y-2)
}
+ */
context.stroke();
}
}