use better face distance sorting/matching algo

This commit is contained in:
2021-09-12 18:35:04 +10:00
parent f71e3d6144
commit 444b206c64
2 changed files with 55 additions and 34 deletions

View File

@@ -1679,20 +1679,57 @@ def MatchRefimgToFace( refimg_id, face_id, face_dist ):
return
####################################################################################################################################
# BestFaceMatch(): take in required threshold for anything to be a match, then go through the 'dist' array - it contains the face
# distance for each refimg / face. So this func loops through all of them,
# return the best match (with lowest distance) by checking each one that is a match based on the threshold
# Util function to remove the matching face number (which_f) from the dist array (which has dist[who][<face_num>])...
####################################################################################################################################
def BestFaceMatch(dist, fid, threshold):
# 1 is not a match (0 is perfect match)
lowest=1.0
which=None
def RemoveFaceNumFromDist(dist, which_f ):
for who in dist:
if who in dist and fid in dist[who] and dist[who][fid][0] < lowest and dist[who][fid][0] <= threshold:
lowest=dist[who][fid][0]
which=who
print( f"bfm: return {which}, {lowest} for {fid}" )
return which, lowest
if which_f in dist[who]:
del( dist[who][which_f] )
next
return
####################################################################################################################################
# go through dist array and find the best single match (lowest face distance)
# returning the matching refimg (which_r), and face num (which_f), and dist (which_fd)
####################################################################################################################################
def FindBestFaceMatch( dist, threshold ):
which_r=None
which_f=None
which_fd=None
lowest=1.0
for who in dist:
for fid in dist[who]:
if dist[who][fid][0] < lowest and dist[who][fid][0] <= threshold:
which_r=who
which_f=fid
which_fd=dist[who][fid][0]
return which_r, which_f, which_fd
####################################################################################################################################
# Okay, go through dist array and find the best single match via FindBestFaceMatch(),
# then record that match and remove that refimg/person and that face number
# from the dist array, and rinse/repeat until we have no more faces to match,
# or the 'best' match is > threshold, so no more matches...
####################################################################################################################################
def ProcessFaceMatches( job, dist, threshold, e, name ):
while True:
print( f"ProcessFaceMatches() - finding best match left with dist={dist}" )
which_r, which_f, which_fd = FindBestFaceMatch( dist, threshold )
print( f"seems that best match is r={which_r}, f={which_f}, with fd={which_fd}" )
if which_r != None:
print( f"okay, which_r is real, so we have a match" )
MatchRefimgToFace( which_r, which_f, which_fd )
AddLogForJob(job, f'WE MATCHED: {name[which_r]} with file: {e.name} - face distance of {which_fd}')
# remove this refimg completely, cant be 2 of this person matched
print( f"now remove this refimg from dist" )
del( dist[which_r] )
# remove this face id completely, this face cant be matched by someone else
print( f"now remove this face from dist (if it is connected with anyone else)" )
print( f"dist now = {dist}" )
RemoveFaceNumFromDist( dist, which_f )
else:
return
####################################################################################################################################
# ScanFileForPerson(): for a file, check to see if a person is matched via face_recognition
@@ -1700,13 +1737,8 @@ def BestFaceMatch(dist, fid, threshold):
# NOTE: can pass force into this, but no f/e to trip this yet
# if we do not have (any) faces for this file, go get them and their locations and store them in the DB assocaited with this file
# then for each face (known/matched already or not), create a new array 'dist[refimg][face]' and run face_recognition code
# to calculate the distance between the refimg and this face, for each refimg and each face
# then invoke BestFaceMatch above to find the best match, and then store that in the DB for each face for this file
# TODO: I *think* by going face-1, face-2, etc. we could find a suboptimal solution:
# e.g face-1 (Cam 0.55, Mich 0.6)
# face-2 (Cam 0.45, Mich 0.54)
# Algo would pick Cam for face-1, delete Cam, then Mich for face-2.
# Should have chosen the other way -> might need to research best algo here
# to calculate the distance between the refimg and this face, for each refimg and each face, then call ProcessFaceMatches() to
# go through the best matches (1-by-1) until no more faces match and store matching faces in DB
####################################################################################################################################
def ScanFileForPerson( job, e, force=False ):
# get default_scan_model from settings (test this)
@@ -1751,15 +1783,8 @@ def ScanFileForPerson( job, e, force=False ):
refimg_face_data = numpy.frombuffer(r.face, dtype=numpy.float64)
dist[r.id][face.id] = face_recognition.face_distance(unknown_face_data, [refimg_face_data])
# if you need to check face distances, uncomment this: print( f"dist={dist}" )
# TODO: I think this next line is not needed anymore (does the same as above?)
faces = session.execute( f"select f.* from face f join face_file_link ffl on f.id = ffl.face_id where ffl.file_eid = {e.id}" )
for face in faces:
who, fd = BestFaceMatch(dist, face.id, threshold )
if who != None:
MatchRefimgToFace( who, face.id, fd )
AddLogForJob(job, f'WE MATCHED: {name[who]} with file: {e.name} - face distance of {fd}')
del( dist[who] )
# record matches in DB...
ProcessFaceMatches( job, dist, threshold, e, name )
return