From 137b3f347d4a3af25e60933eb8ccb7413a8bcb3d Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Fri, 11 Jun 2021 20:26:15 +1000 Subject: [PATCH] removed debugs, created ClearOtherDupMessagesAndJobs() function so CheckDups jobs created by restore/delete are removing old f/e messages, e.g. I delete a dup by hand and the dup error disappears :) MAIN thing though is the crazy restore/delete paths are now fully functional, with content in top-level of a path, different paths on import, and in subdirs of import PATHs, also tested mass delete via flat-view of all files, then mass restore -- ALL WORKED :) --- pa_job_manager.py | 57 ++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/pa_job_manager.py b/pa_job_manager.py index 9ee7462..afc1e7d 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -124,7 +124,7 @@ class Dir(Base): return self.in_path.path_prefix+'/'+self.rel_path def __repr__(self): - return f"" + return f"" class Entry(Base): __tablename__ = "entry" @@ -627,28 +627,31 @@ def RestoreFile(job,restore_me): orig_path = session.query(Path).filter(Path.path_prefix==orig_file_details.orig_path_prefix).first() parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==orig_path.id).first() + # in case our new_rel_path is just in the top level of the path, make + # new_dir = parent_dir so restore_me.in_dir = new_dir after the for loop works + new_dir=parent_dir + # e.g. restore_me's rel_path 'Import/images_to_process/1111', orig_path was 'static/Import/images_to_process', need new rel_path to be just the 1111 bit... new_rel_path='static/'+restore_me.in_dir.rel_path - new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix+'/', '') - new_rel_path='1111' + new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix, '') + if len(new_rel_path) > 0 and new_rel_path[-1] == '/': + new_rel_path=new_rel_path[0:-1] - # okay, go through new relative path and AddDir any missing subdirs of this # path (think Import/Dir1/Dir2) which b/c we have orig_path in AddDir will # create static/Import, static/Import/Dir1, static/Import/Dir1/Dir2 part_rel_path="" for dirname in new_rel_path.split("/"): part_rel_path += f"{dirname}" - ### DDP: when restoring, an original dir might have been removed, so need make it (if needed) - os.makedirs( dirname,mode=0o777, exist_ok=True ) new_dir=AddDir( job, dirname, parent_dir, part_rel_path, orig_path ) parent_dir=new_dir part_rel_path += "/" restore_me.in_dir = new_dir + ### DDP: when restoring, an original dir tree might have been removed, so need make it (if needed) + os.makedirs( os.path.dirname(restore_me.FullPathOnFS()),mode=0o777, exist_ok=True ) + # remove DelFile entry for this restored file session.query(DelFile).filter(DelFile.file_eid==restore_me.id).delete() session.commit() - CheckForDups(job) - print("TODO: test the check_dups above works") return # Function that moves a file we are "deleting" to the recycle bin, it moves the @@ -669,7 +672,6 @@ def MoveFileToRecycleBin(job,del_me): bin_path=session.query(Path).join(PathType).filter(PathType.name=='Bin').first() parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==bin_path.id).first() - print( f"need to keep this (ind): {del_me.in_dir.in_path.path_prefix}" ) # if we ever need to restore, lets remember this file's original path # (use a string in case the dir/path is ever deleted from FS (and then DB) and we need to recreate) del_file_details = DelFile( file_eid=del_me.id, orig_path_prefix=del_me.in_dir.in_path.path_prefix ) @@ -827,10 +829,10 @@ def JobImportDir(job): year, month, day, woy = GetDateFromFile(fname, stat) e=AddFile( job, basename, type_str, fsize, dir, year, month, day, woy ) else: + if DEBUG==1: + print( f"DEBUG: { basename} - {stat.st_ctime} is OLDER than {dir.last_import_date}" ) e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==basename,Dir.eid==dir.eid).first() e.exists_on_fs=True - if DEBUG==1: - print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename ) job.current_file=basename job.current_file_num+=1 job.current_file_num += len(subdirs) @@ -948,7 +950,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e): deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64) results = compareAI(deserialized_bytes, unknown_encoding) if results[0]: - print(f'Found a match between: {person.tag} and {e.name}') + print(f'DEBUG: Found a match between: {person.tag} and {e.name}') AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}') frl.matched=True return @@ -998,7 +1000,6 @@ def JobGetFileDetails(job): #### I think the fix here is to get JobImportDir (or whatever makes the PATH) to add a jex for path_prefix and just pull it here, and stop 're-creating' it via SymlinkName path=[jex.value for jex in job.extra if jex.name == "path"][0] path_prefix=[jex.value for jex in job.extra if jex.name == "path_prefix"][0] - print( f"JobGetFileDetails({job}) -- pp={path_prefix}" ) if DEBUG==1: print("DEBUG: JobGetFileDetails for path={}".format( path_prefix ) ) p=session.query(Path).filter(Path.path_prefix==path_prefix).first() @@ -1083,8 +1084,22 @@ def GenVideoThumbnail(job, file): return None return thumbnail +# utility function to clear any other future Duplicate messages, called if we +# either create a "new" CheckDups (often del/restore related), OR because we +# are actualyl handling the dups now from a front-end click through to +# /removedups, but some other job has since created another dup message... +def ClearOtherDupMessagesAndJobs(): + msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups') + for msg in msgs: + session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete() + cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all() + for j in cd_jobs: + FinishJob(j, "New CheckForDups job/removal supercedes this job, withdrawing it", "Withdrawn") + session.commit() + def CheckForDups(job): AddLogForJob( job, f"Check for duplicates" ) + ClearOtherDupMessagesAndJobs() res = session.execute( "select count(e1.id) from entry e1, file f1, dir d1, entry_dir_link edl1, path_dir_link pdl1, path p1, entry e2, file f2, dir d2, entry_dir_link edl2, path_dir_link pdl2, path p2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and p1.type_id != (select id from path_type where name = 'Bin') and p1.id = pdl1.path_id and pdl1.dir_eid = d1.eid and p2.type_id != (select id from path_type where name = 'Bin') and p2.id = pdl2.path_id and pdl2.dir_eid = d2.eid and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb" ) for row in res: @@ -1099,13 +1114,7 @@ def CheckForDups(job): def RemoveDups(job): AddLogForJob(job, f"INFO: Starting Remove Duplicates job...") # as checkdups covers all dups, delete all future dups messages, and Withdraw future checkdups jobs - msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups') - for msg in msgs: - session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete() - cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all() - for j in cd_jobs: - FinishJob(j, "Just removed duplicates - so no need to do any other checkdups, we will force 1 last one after the remove step", "Withdrawn") - session.commit() + ClearOtherDupMessagesAndJobs() dup_cnt=0 for jex in job.extra: @@ -1173,8 +1182,11 @@ def JobDeleteFiles(job): for jex in job.extra: if 'eid-' in jex.name: del_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first() - AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me}" ) + AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me.name}" ) MoveFileToRecycleBin(job,del_me) + now=datetime.now(pytz.utc) + next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 ) + session.add(next_job) FinishJob(job, f"Finished deleting selected file(s)") return @@ -1185,6 +1197,9 @@ def JobRestoreFiles(job): restore_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first() AddLogForJob(job, f"INFO: Removing file: #{restore_me.id} -> {restore_me}" ) RestoreFile(job,restore_me) + now=datetime.now(pytz.utc) + next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 ) + session.add(next_job) FinishJob(job, f"Finished restoring selected file(s)") return