From 2dac6125c4fed694b51097197ffdf42815d65786 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Fri, 13 Jan 2023 17:35:30 +1100 Subject: [PATCH] change from UUID to md5sum of face_data to allow disconnected metadata to be rematched in the future. This also removes BUG-109 where a file in DEV was deleted them copied back and deleted again, and it caused duplicate metadata reading on a subsequent pa_job_manager restart --- BUGs | 2 -- pa_job_manager.py | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/BUGs b/BUGs index cc5c8e5..553d4c0 100644 --- a/BUGs +++ b/BUGs @@ -4,5 +4,3 @@ BUG-100: I managed to get 2 photos matching mich in the NOT_WORKING photo (proba BUG-106: cant add trudy /pat? as refimgs via FaceDBox - seems the cropped trudy face is not sufficient to find a face, how odd... (it came from a face bbox, BUT, I have grown the face seln by 10%?) -BUG-109: add mich force override, removed it, then re-added it, then rebuild DB form scratch and metadata has a duplicate - - redo disco metadata with md5 not UUID of face dataS diff --git a/pa_job_manager.py b/pa_job_manager.py index 5fff9f9..95dd2f1 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -46,7 +46,6 @@ import re import re import sys import ffmpeg -import uuid # global debug setting @@ -970,6 +969,15 @@ def JobScanStorageDir(job): FinishJob( job, "Completed (scan for new files)" ) return +############################################################################## +# Util func to take face data and return hex representation of md5 checksum +# used for disconnected metadata (so its unique BUT also can't cause +# duplicates in rare scenarios) +############################################################################## +def md5face( face_data ): + hash_md5 = hashlib.md5() + hash_md5.update(face_data) + return hash_md5.hexdigest() ############################################################################## # DisconnectSingleNoMatchOverride( job, o ): takes a single NoMatch override @@ -990,7 +998,7 @@ def DisconnectSingleNoMatchOverride( job, o ): # now deal with 'renaming' the metadata on FS mpath=f'{SettingsMPath()}/no_match_overrides/' fname=f'{mpath}{o.face_id}_{ot.name}' - new_fname=f'{mpath}0_{ot.name}_{uuid.uuid4()}' + new_fname=f'{mpath}0_{ot.name}_{md5face(f.face)}' try: if os.path.exists( fname ): os.replace( fname, new_fname ) @@ -1025,7 +1033,7 @@ def DisconnectSingleForceMatchOverride( job, o ): # now deal with 'renaming' the metadata on FS path=f'{SettingsMPath()}/force_match_overrides/' fname=f'{path}{o.face_id}_{p.tag}' - new_fname=f'{path}0_{p.tag}_{uuid.uuid4()}' + new_fname=f'{path}0_{p.tag}_{md5face(f.face)}' try: if os.path.exists( fname ): os.replace( fname, new_fname ) @@ -2237,7 +2245,7 @@ def ReloadMetadata(job): # process Metadata on FS for no_match_overrides (disco ones, will have 0 as face_id) fnames = glob.glob( f'{mpath}/no_match_overrides/*' ) for fname in fnames: - # type derived from fname (e.g. 0_Too Young_uuid*, 1_Too Young, 2_Ingore Face, etc.) + # type derived from fname (e.g. 0_Too Young_md5*, 1_Too Young, 2_Ingore Face, etc.) match=re.search( '(\d+)_([^_\.]+)', fname ) face_id=match.group(1) type_name=match.group(2) @@ -2254,14 +2262,14 @@ def ReloadMetadata(job): session.add( DisconnectedNoMatchOverride( face=face_data, type_id=otype.id ) ) if face_id: try: - os.replace( fname, f'{mpath}no_match_overrides/0_{otype.name}_{uuid.uuid4()}' ) + os.replace( fname, f'{mpath}no_match_overrides/0_{otype.name}_{md5face(face_data)}' ) except Exception as ex: print( f"ERROR: renaming no-match metadata on filesystem failed: {ex}" ) # process Metadata on FS for force_match_overrides (disco ones, will have 0 as face_id) fnames = glob.glob( f'{mpath}force_match_overrides/*' ) for fname in fnames: - # person derived from fname (e.g. 0_ddp_uuid*, 1_ddp, 2_mich, etc.) + # person derived from fname (e.g. 0_ddp_md5*, 1_ddp, 2_mich, etc.) match=re.search( '(\d+)_([^_]+)', fname ) face_id=match.group(1) person_tag=match.group(2) @@ -2282,7 +2290,7 @@ def ReloadMetadata(job): # if face>0, then we need to move the FS copy to a disco if face_id: try: - os.replace( fname, f'{mpath}force_match_overrides/0_{p.tag}_{uuid.uuid4()}' ) + os.replace( fname, f'{mpath}force_match_overrides/0_{p.tag}_{md5face(face_data)}' ) except Exception as ex: print( f"ERROR: renaming force-match metadata on filesystem failed: {ex}" )