diff --git a/TODO b/TODO index 62ade08..d148c7c 100644 --- a/TODO +++ b/TODO @@ -1,17 +1,13 @@ ## GENERAL - * Face matching: - - upgrade to face distance per face per file [DONE] - - face locations: - START FORM SCRATCH for prod so all images have face_locn data - - need to reconsider whether current distance algorithm gives best match - can I do better? + * face locations -- START FORM SCRATCH for prod so all images have face_locn data * per file you could select an unknown face and add it as a ref img to an existing person, or make a new person and attach? * from menu, we could try to get smart/fancy... say find face with largest size, check it vs. other faces, if it matches more than say 10? we offer it up as a required ref img, then cut that face (with margin) out and use it is a new ref image / person - read that guys face matching / clustering / nearest neighbour examples, for a whole new AI capability + https://www.pyimagesearch.com/2018/07/09/face-clustering-with-python/ * fix up logging in general * comment your code - * js files * html files? ## DB * Dir can have date in the DB, so we can do Oldest/Newest dirs in Folder view diff --git a/pa_job_manager.py b/pa_job_manager.py index c629fe4..1cba183 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -1679,20 +1679,57 @@ def MatchRefimgToFace( refimg_id, face_id, face_dist ): return #################################################################################################################################### -# BestFaceMatch(): take in required threshold for anything to be a match, then go through the 'dist' array - it contains the face -# distance for each refimg / face. So this func loops through all of them, -# return the best match (with lowest distance) by checking each one that is a match based on the threshold +# Util function to remove the matching face number (which_f) from the dist array (which has dist[who][])... #################################################################################################################################### -def BestFaceMatch(dist, fid, threshold): - # 1 is not a match (0 is perfect match) - lowest=1.0 - which=None +def RemoveFaceNumFromDist(dist, which_f ): for who in dist: - if who in dist and fid in dist[who] and dist[who][fid][0] < lowest and dist[who][fid][0] <= threshold: - lowest=dist[who][fid][0] - which=who - print( f"bfm: return {which}, {lowest} for {fid}" ) - return which, lowest + if which_f in dist[who]: + del( dist[who][which_f] ) + next + return + +#################################################################################################################################### +# go through dist array and find the best single match (lowest face distance) +# returning the matching refimg (which_r), and face num (which_f), and dist (which_fd) +#################################################################################################################################### +def FindBestFaceMatch( dist, threshold ): + which_r=None + which_f=None + which_fd=None + lowest=1.0 + for who in dist: + for fid in dist[who]: + if dist[who][fid][0] < lowest and dist[who][fid][0] <= threshold: + which_r=who + which_f=fid + which_fd=dist[who][fid][0] + return which_r, which_f, which_fd + +#################################################################################################################################### +# Okay, go through dist array and find the best single match via FindBestFaceMatch(), +# then record that match and remove that refimg/person and that face number +# from the dist array, and rinse/repeat until we have no more faces to match, +# or the 'best' match is > threshold, so no more matches... +#################################################################################################################################### +def ProcessFaceMatches( job, dist, threshold, e, name ): + while True: + print( f"ProcessFaceMatches() - finding best match left with dist={dist}" ) + which_r, which_f, which_fd = FindBestFaceMatch( dist, threshold ) + print( f"seems that best match is r={which_r}, f={which_f}, with fd={which_fd}" ) + if which_r != None: + print( f"okay, which_r is real, so we have a match" ) + MatchRefimgToFace( which_r, which_f, which_fd ) + AddLogForJob(job, f'WE MATCHED: {name[which_r]} with file: {e.name} - face distance of {which_fd}') + # remove this refimg completely, cant be 2 of this person matched + print( f"now remove this refimg from dist" ) + del( dist[which_r] ) + # remove this face id completely, this face cant be matched by someone else + print( f"now remove this face from dist (if it is connected with anyone else)" ) + print( f"dist now = {dist}" ) + RemoveFaceNumFromDist( dist, which_f ) + else: + return + #################################################################################################################################### # ScanFileForPerson(): for a file, check to see if a person is matched via face_recognition @@ -1700,13 +1737,8 @@ def BestFaceMatch(dist, fid, threshold): # NOTE: can pass force into this, but no f/e to trip this yet # if we do not have (any) faces for this file, go get them and their locations and store them in the DB assocaited with this file # then for each face (known/matched already or not), create a new array 'dist[refimg][face]' and run face_recognition code -# to calculate the distance between the refimg and this face, for each refimg and each face -# then invoke BestFaceMatch above to find the best match, and then store that in the DB for each face for this file -# TODO: I *think* by going face-1, face-2, etc. we could find a suboptimal solution: -# e.g face-1 (Cam 0.55, Mich 0.6) -# face-2 (Cam 0.45, Mich 0.54) -# Algo would pick Cam for face-1, delete Cam, then Mich for face-2. -# Should have chosen the other way -> might need to research best algo here +# to calculate the distance between the refimg and this face, for each refimg and each face, then call ProcessFaceMatches() to +# go through the best matches (1-by-1) until no more faces match and store matching faces in DB #################################################################################################################################### def ScanFileForPerson( job, e, force=False ): # get default_scan_model from settings (test this) @@ -1751,15 +1783,8 @@ def ScanFileForPerson( job, e, force=False ): refimg_face_data = numpy.frombuffer(r.face, dtype=numpy.float64) dist[r.id][face.id] = face_recognition.face_distance(unknown_face_data, [refimg_face_data]) - # if you need to check face distances, uncomment this: print( f"dist={dist}" ) - # TODO: I think this next line is not needed anymore (does the same as above?) - faces = session.execute( f"select f.* from face f join face_file_link ffl on f.id = ffl.face_id where ffl.file_eid = {e.id}" ) - for face in faces: - who, fd = BestFaceMatch(dist, face.id, threshold ) - if who != None: - MatchRefimgToFace( who, face.id, fd ) - AddLogForJob(job, f'WE MATCHED: {name[who]} with file: {e.name} - face distance of {fd}') - del( dist[who] ) + # record matches in DB... + ProcessFaceMatches( job, dist, threshold, e, name ) return