change from UUID to md5sum of face_data to allow disconnected metadata to be rematched in the future. This also removes BUG-109 where a file in DEV was deleted them copied back and deleted again, and it caused duplicate metadata reading on a subsequent pa_job_manager restart

This commit is contained in:
2023-01-13 17:35:30 +11:00
parent 1ed4a0f25d
commit 2dac6125c4
2 changed files with 15 additions and 9 deletions

2
BUGs
View File

@@ -4,5 +4,3 @@ BUG-100: I managed to get 2 photos matching mich in the NOT_WORKING photo (proba
BUG-106: cant add trudy /pat? as refimgs via FaceDBox BUG-106: cant add trudy /pat? as refimgs via FaceDBox
- seems the cropped trudy face is not sufficient to find a face, how odd... - seems the cropped trudy face is not sufficient to find a face, how odd...
(it came from a face bbox, BUT, I have grown the face seln by 10%?) (it came from a face bbox, BUT, I have grown the face seln by 10%?)
BUG-109: add mich force override, removed it, then re-added it, then rebuild DB form scratch and metadata has a duplicate
- redo disco metadata with md5 not UUID of face dataS

View File

@@ -46,7 +46,6 @@ import re
import re import re
import sys import sys
import ffmpeg import ffmpeg
import uuid
# global debug setting # global debug setting
@@ -970,6 +969,15 @@ def JobScanStorageDir(job):
FinishJob( job, "Completed (scan for new files)" ) FinishJob( job, "Completed (scan for new files)" )
return return
##############################################################################
# Util func to take face data and return hex representation of md5 checksum
# used for disconnected metadata (so its unique BUT also can't cause
# duplicates in rare scenarios)
##############################################################################
def md5face( face_data ):
hash_md5 = hashlib.md5()
hash_md5.update(face_data)
return hash_md5.hexdigest()
############################################################################## ##############################################################################
# DisconnectSingleNoMatchOverride( job, o ): takes a single NoMatch override # DisconnectSingleNoMatchOverride( job, o ): takes a single NoMatch override
@@ -990,7 +998,7 @@ def DisconnectSingleNoMatchOverride( job, o ):
# now deal with 'renaming' the metadata on FS # now deal with 'renaming' the metadata on FS
mpath=f'{SettingsMPath()}/no_match_overrides/' mpath=f'{SettingsMPath()}/no_match_overrides/'
fname=f'{mpath}{o.face_id}_{ot.name}' fname=f'{mpath}{o.face_id}_{ot.name}'
new_fname=f'{mpath}0_{ot.name}_{uuid.uuid4()}' new_fname=f'{mpath}0_{ot.name}_{md5face(f.face)}'
try: try:
if os.path.exists( fname ): if os.path.exists( fname ):
os.replace( fname, new_fname ) os.replace( fname, new_fname )
@@ -1025,7 +1033,7 @@ def DisconnectSingleForceMatchOverride( job, o ):
# now deal with 'renaming' the metadata on FS # now deal with 'renaming' the metadata on FS
path=f'{SettingsMPath()}/force_match_overrides/' path=f'{SettingsMPath()}/force_match_overrides/'
fname=f'{path}{o.face_id}_{p.tag}' fname=f'{path}{o.face_id}_{p.tag}'
new_fname=f'{path}0_{p.tag}_{uuid.uuid4()}' new_fname=f'{path}0_{p.tag}_{md5face(f.face)}'
try: try:
if os.path.exists( fname ): if os.path.exists( fname ):
os.replace( fname, new_fname ) os.replace( fname, new_fname )
@@ -2237,7 +2245,7 @@ def ReloadMetadata(job):
# process Metadata on FS for no_match_overrides (disco ones, will have 0 as face_id) # process Metadata on FS for no_match_overrides (disco ones, will have 0 as face_id)
fnames = glob.glob( f'{mpath}/no_match_overrides/*' ) fnames = glob.glob( f'{mpath}/no_match_overrides/*' )
for fname in fnames: for fname in fnames:
# type derived from fname (e.g. 0_Too Young_uuid*, 1_Too Young, 2_Ingore Face, etc.) # type derived from fname (e.g. 0_Too Young_md5*, 1_Too Young, 2_Ingore Face, etc.)
match=re.search( '(\d+)_([^_\.]+)', fname ) match=re.search( '(\d+)_([^_\.]+)', fname )
face_id=match.group(1) face_id=match.group(1)
type_name=match.group(2) type_name=match.group(2)
@@ -2254,14 +2262,14 @@ def ReloadMetadata(job):
session.add( DisconnectedNoMatchOverride( face=face_data, type_id=otype.id ) ) session.add( DisconnectedNoMatchOverride( face=face_data, type_id=otype.id ) )
if face_id: if face_id:
try: try:
os.replace( fname, f'{mpath}no_match_overrides/0_{otype.name}_{uuid.uuid4()}' ) os.replace( fname, f'{mpath}no_match_overrides/0_{otype.name}_{md5face(face_data)}' )
except Exception as ex: except Exception as ex:
print( f"ERROR: renaming no-match metadata on filesystem failed: {ex}" ) print( f"ERROR: renaming no-match metadata on filesystem failed: {ex}" )
# process Metadata on FS for force_match_overrides (disco ones, will have 0 as face_id) # process Metadata on FS for force_match_overrides (disco ones, will have 0 as face_id)
fnames = glob.glob( f'{mpath}force_match_overrides/*' ) fnames = glob.glob( f'{mpath}force_match_overrides/*' )
for fname in fnames: for fname in fnames:
# person derived from fname (e.g. 0_ddp_uuid*, 1_ddp, 2_mich, etc.) # person derived from fname (e.g. 0_ddp_md5*, 1_ddp, 2_mich, etc.)
match=re.search( '(\d+)_([^_]+)', fname ) match=re.search( '(\d+)_([^_]+)', fname )
face_id=match.group(1) face_id=match.group(1)
person_tag=match.group(2) person_tag=match.group(2)
@@ -2282,7 +2290,7 @@ def ReloadMetadata(job):
# if face>0, then we need to move the FS copy to a disco # if face>0, then we need to move the FS copy to a disco
if face_id: if face_id:
try: try:
os.replace( fname, f'{mpath}force_match_overrides/0_{p.tag}_{uuid.uuid4()}' ) os.replace( fname, f'{mpath}force_match_overrides/0_{p.tag}_{md5face(face_data)}' )
except Exception as ex: except Exception as ex:
print( f"ERROR: renaming force-match metadata on filesystem failed: {ex}" ) print( f"ERROR: renaming force-match metadata on filesystem failed: {ex}" )