use better face distance sorting/matching algo
This commit is contained in:
8
TODO
8
TODO
@@ -1,17 +1,13 @@
|
|||||||
## GENERAL
|
## GENERAL
|
||||||
* Face matching:
|
* face locations -- START FORM SCRATCH for prod so all images have face_locn data
|
||||||
- upgrade to face distance per face per file [DONE]
|
|
||||||
- face locations:
|
|
||||||
START FORM SCRATCH for prod so all images have face_locn data
|
|
||||||
- need to reconsider whether current distance algorithm gives best match - can I do better?
|
|
||||||
|
|
||||||
* per file you could select an unknown face and add it as a ref img to an existing person, or make a new person and attach?
|
* per file you could select an unknown face and add it as a ref img to an existing person, or make a new person and attach?
|
||||||
* from menu, we could try to get smart/fancy... say find face with largest size, check it vs. other faces, if it matches more than say 10? we offer it up as a required ref img, then cut that face (with margin) out and use it is a new ref image / person
|
* from menu, we could try to get smart/fancy... say find face with largest size, check it vs. other faces, if it matches more than say 10? we offer it up as a required ref img, then cut that face (with margin) out and use it is a new ref image / person
|
||||||
- read that guys face matching / clustering / nearest neighbour examples, for a whole new AI capability
|
- read that guys face matching / clustering / nearest neighbour examples, for a whole new AI capability
|
||||||
|
https://www.pyimagesearch.com/2018/07/09/face-clustering-with-python/
|
||||||
|
|
||||||
* fix up logging in general
|
* fix up logging in general
|
||||||
* comment your code
|
* comment your code
|
||||||
* js files
|
|
||||||
* html files?
|
* html files?
|
||||||
## DB
|
## DB
|
||||||
* Dir can have date in the DB, so we can do Oldest/Newest dirs in Folder view
|
* Dir can have date in the DB, so we can do Oldest/Newest dirs in Folder view
|
||||||
|
|||||||
@@ -1679,20 +1679,57 @@ def MatchRefimgToFace( refimg_id, face_id, face_dist ):
|
|||||||
return
|
return
|
||||||
|
|
||||||
####################################################################################################################################
|
####################################################################################################################################
|
||||||
# BestFaceMatch(): take in required threshold for anything to be a match, then go through the 'dist' array - it contains the face
|
# Util function to remove the matching face number (which_f) from the dist array (which has dist[who][<face_num>])...
|
||||||
# distance for each refimg / face. So this func loops through all of them,
|
|
||||||
# return the best match (with lowest distance) by checking each one that is a match based on the threshold
|
|
||||||
####################################################################################################################################
|
####################################################################################################################################
|
||||||
def BestFaceMatch(dist, fid, threshold):
|
def RemoveFaceNumFromDist(dist, which_f ):
|
||||||
# 1 is not a match (0 is perfect match)
|
|
||||||
lowest=1.0
|
|
||||||
which=None
|
|
||||||
for who in dist:
|
for who in dist:
|
||||||
if who in dist and fid in dist[who] and dist[who][fid][0] < lowest and dist[who][fid][0] <= threshold:
|
if which_f in dist[who]:
|
||||||
lowest=dist[who][fid][0]
|
del( dist[who][which_f] )
|
||||||
which=who
|
next
|
||||||
print( f"bfm: return {which}, {lowest} for {fid}" )
|
return
|
||||||
return which, lowest
|
|
||||||
|
####################################################################################################################################
|
||||||
|
# go through dist array and find the best single match (lowest face distance)
|
||||||
|
# returning the matching refimg (which_r), and face num (which_f), and dist (which_fd)
|
||||||
|
####################################################################################################################################
|
||||||
|
def FindBestFaceMatch( dist, threshold ):
|
||||||
|
which_r=None
|
||||||
|
which_f=None
|
||||||
|
which_fd=None
|
||||||
|
lowest=1.0
|
||||||
|
for who in dist:
|
||||||
|
for fid in dist[who]:
|
||||||
|
if dist[who][fid][0] < lowest and dist[who][fid][0] <= threshold:
|
||||||
|
which_r=who
|
||||||
|
which_f=fid
|
||||||
|
which_fd=dist[who][fid][0]
|
||||||
|
return which_r, which_f, which_fd
|
||||||
|
|
||||||
|
####################################################################################################################################
|
||||||
|
# Okay, go through dist array and find the best single match via FindBestFaceMatch(),
|
||||||
|
# then record that match and remove that refimg/person and that face number
|
||||||
|
# from the dist array, and rinse/repeat until we have no more faces to match,
|
||||||
|
# or the 'best' match is > threshold, so no more matches...
|
||||||
|
####################################################################################################################################
|
||||||
|
def ProcessFaceMatches( job, dist, threshold, e, name ):
|
||||||
|
while True:
|
||||||
|
print( f"ProcessFaceMatches() - finding best match left with dist={dist}" )
|
||||||
|
which_r, which_f, which_fd = FindBestFaceMatch( dist, threshold )
|
||||||
|
print( f"seems that best match is r={which_r}, f={which_f}, with fd={which_fd}" )
|
||||||
|
if which_r != None:
|
||||||
|
print( f"okay, which_r is real, so we have a match" )
|
||||||
|
MatchRefimgToFace( which_r, which_f, which_fd )
|
||||||
|
AddLogForJob(job, f'WE MATCHED: {name[which_r]} with file: {e.name} - face distance of {which_fd}')
|
||||||
|
# remove this refimg completely, cant be 2 of this person matched
|
||||||
|
print( f"now remove this refimg from dist" )
|
||||||
|
del( dist[which_r] )
|
||||||
|
# remove this face id completely, this face cant be matched by someone else
|
||||||
|
print( f"now remove this face from dist (if it is connected with anyone else)" )
|
||||||
|
print( f"dist now = {dist}" )
|
||||||
|
RemoveFaceNumFromDist( dist, which_f )
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
####################################################################################################################################
|
####################################################################################################################################
|
||||||
# ScanFileForPerson(): for a file, check to see if a person is matched via face_recognition
|
# ScanFileForPerson(): for a file, check to see if a person is matched via face_recognition
|
||||||
@@ -1700,13 +1737,8 @@ def BestFaceMatch(dist, fid, threshold):
|
|||||||
# NOTE: can pass force into this, but no f/e to trip this yet
|
# NOTE: can pass force into this, but no f/e to trip this yet
|
||||||
# if we do not have (any) faces for this file, go get them and their locations and store them in the DB assocaited with this file
|
# if we do not have (any) faces for this file, go get them and their locations and store them in the DB assocaited with this file
|
||||||
# then for each face (known/matched already or not), create a new array 'dist[refimg][face]' and run face_recognition code
|
# then for each face (known/matched already or not), create a new array 'dist[refimg][face]' and run face_recognition code
|
||||||
# to calculate the distance between the refimg and this face, for each refimg and each face
|
# to calculate the distance between the refimg and this face, for each refimg and each face, then call ProcessFaceMatches() to
|
||||||
# then invoke BestFaceMatch above to find the best match, and then store that in the DB for each face for this file
|
# go through the best matches (1-by-1) until no more faces match and store matching faces in DB
|
||||||
# TODO: I *think* by going face-1, face-2, etc. we could find a suboptimal solution:
|
|
||||||
# e.g face-1 (Cam 0.55, Mich 0.6)
|
|
||||||
# face-2 (Cam 0.45, Mich 0.54)
|
|
||||||
# Algo would pick Cam for face-1, delete Cam, then Mich for face-2.
|
|
||||||
# Should have chosen the other way -> might need to research best algo here
|
|
||||||
####################################################################################################################################
|
####################################################################################################################################
|
||||||
def ScanFileForPerson( job, e, force=False ):
|
def ScanFileForPerson( job, e, force=False ):
|
||||||
# get default_scan_model from settings (test this)
|
# get default_scan_model from settings (test this)
|
||||||
@@ -1751,15 +1783,8 @@ def ScanFileForPerson( job, e, force=False ):
|
|||||||
refimg_face_data = numpy.frombuffer(r.face, dtype=numpy.float64)
|
refimg_face_data = numpy.frombuffer(r.face, dtype=numpy.float64)
|
||||||
dist[r.id][face.id] = face_recognition.face_distance(unknown_face_data, [refimg_face_data])
|
dist[r.id][face.id] = face_recognition.face_distance(unknown_face_data, [refimg_face_data])
|
||||||
|
|
||||||
# if you need to check face distances, uncomment this: print( f"dist={dist}" )
|
# record matches in DB...
|
||||||
# TODO: I think this next line is not needed anymore (does the same as above?)
|
ProcessFaceMatches( job, dist, threshold, e, name )
|
||||||
faces = session.execute( f"select f.* from face f join face_file_link ffl on f.id = ffl.face_id where ffl.file_eid = {e.id}" )
|
|
||||||
for face in faces:
|
|
||||||
who, fd = BestFaceMatch(dist, face.id, threshold )
|
|
||||||
if who != None:
|
|
||||||
MatchRefimgToFace( who, face.id, fd )
|
|
||||||
AddLogForJob(job, f'WE MATCHED: {name[who]} with file: {e.name} - face distance of {fd}')
|
|
||||||
del( dist[who] )
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user