From 137b3f347d4a3af25e60933eb8ccb7413a8bcb3d Mon Sep 17 00:00:00 2001
From: Damien De Paoli <ddp@depaoli.id.au>
Date: Fri, 11 Jun 2021 20:26:15 +1000
Subject: [PATCH] removed debugs, created ClearOtherDupMessagesAndJobs()
 function so CheckDups jobs created by restore/delete are removing old f/e
 messages, e.g. I delete a dup by hand and the dup error disappears :)  MAIN
 thing though is the crazy restore/delete paths are now fully functional, with
 content in top-level of a path, different paths on import, and in subdirs of
 import PATHs, also tested mass delete via flat-view of all files, then mass
 restore -- ALL WORKED :)

---
 pa_job_manager.py | 57 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/pa_job_manager.py b/pa_job_manager.py
index 9ee7462..afc1e7d 100644
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -124,7 +124,7 @@ class Dir(Base):
         return self.in_path.path_prefix+'/'+self.rel_path
 
     def __repr__(self):
-        return f"<eid: {self.eid}, last_import_date: {self.last_import_date}, files: {self.files}>"
+        return f"<eid: {self.eid}, rel_path: {self.rel_path}, in_path={self.in_path}, last_import_date: {self.last_import_date}, files: {self.files}>"
 
 class Entry(Base):
     __tablename__ = "entry"
@@ -627,28 +627,31 @@ def RestoreFile(job,restore_me):
     orig_path = session.query(Path).filter(Path.path_prefix==orig_file_details.orig_path_prefix).first()
     parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==orig_path.id).first()
 
+    # in case our new_rel_path is just in the top level of the path, make
+    # new_dir = parent_dir so restore_me.in_dir = new_dir after the for loop works 
+    new_dir=parent_dir
+
     # e.g. restore_me's rel_path 'Import/images_to_process/1111', orig_path was 'static/Import/images_to_process', need new rel_path to be just the 1111 bit...
     new_rel_path='static/'+restore_me.in_dir.rel_path
-    new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix+'/', '')
-    new_rel_path='1111'
+    new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix, '')
+    if len(new_rel_path) > 0 and new_rel_path[-1] == '/':
+        new_rel_path=new_rel_path[0:-1]
 
-    # okay, go through new relative path and AddDir any missing subdirs of this
     # path (think Import/Dir1/Dir2) which b/c we have orig_path in AddDir will
     # create static/Import, static/Import/Dir1, static/Import/Dir1/Dir2
     part_rel_path=""
     for dirname in new_rel_path.split("/"):
         part_rel_path += f"{dirname}"
-        ### DDP: when restoring, an original dir might have been removed, so need make it (if needed)
-        os.makedirs( dirname,mode=0o777, exist_ok=True )
         new_dir=AddDir( job, dirname, parent_dir, part_rel_path, orig_path )
         parent_dir=new_dir
         part_rel_path += "/"
     
     restore_me.in_dir = new_dir
+    ### DDP: when restoring, an original dir tree might have been removed, so need make it (if needed)
+    os.makedirs( os.path.dirname(restore_me.FullPathOnFS()),mode=0o777, exist_ok=True )
+    # remove DelFile entry for this restored file
     session.query(DelFile).filter(DelFile.file_eid==restore_me.id).delete()
     session.commit()
-    CheckForDups(job)
-    print("TODO: test the check_dups above works")
     return
 
 # Function that moves a file we are "deleting" to the recycle bin, it moves the
@@ -669,7 +672,6 @@ def MoveFileToRecycleBin(job,del_me):
     bin_path=session.query(Path).join(PathType).filter(PathType.name=='Bin').first()
     parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==bin_path.id).first()
 
-    print( f"need to keep this (ind): {del_me.in_dir.in_path.path_prefix}" )
     # if we ever need to restore, lets remember this file's original path
     # (use a string in case the dir/path is ever deleted from FS (and then DB) and we need to recreate)
     del_file_details = DelFile( file_eid=del_me.id, orig_path_prefix=del_me.in_dir.in_path.path_prefix )
@@ -827,10 +829,10 @@ def JobImportDir(job):
                 year, month, day, woy = GetDateFromFile(fname, stat)
                 e=AddFile( job, basename, type_str, fsize, dir, year, month, day, woy )
             else:
+                if DEBUG==1:
+                    print( f"DEBUG: { basename} - {stat.st_ctime} is OLDER than {dir.last_import_date}" )
                 e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==basename,Dir.eid==dir.eid).first()
                 e.exists_on_fs=True
-                if DEBUG==1:
-                    print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename )
             job.current_file=basename
             job.current_file_num+=1
         job.current_file_num += len(subdirs)
@@ -948,7 +950,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
         deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64)
         results = compareAI(deserialized_bytes, unknown_encoding)
         if results[0]:
-            print(f'Found a match between: {person.tag} and {e.name}')
+            print(f'DEBUG: Found a match between: {person.tag} and {e.name}')
             AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}')
             frl.matched=True
             return
@@ -998,7 +1000,6 @@ def JobGetFileDetails(job):
     #### I think the fix here is to get JobImportDir (or whatever makes the PATH) to add a jex for path_prefix and just pull it here, and stop 're-creating' it via SymlinkName
     path=[jex.value for jex in job.extra if jex.name == "path"][0]
     path_prefix=[jex.value for jex in job.extra if jex.name == "path_prefix"][0]
-    print( f"JobGetFileDetails({job}) -- pp={path_prefix}" )
     if DEBUG==1:
         print("DEBUG: JobGetFileDetails for path={}".format( path_prefix ) )
     p=session.query(Path).filter(Path.path_prefix==path_prefix).first()
@@ -1083,8 +1084,22 @@ def GenVideoThumbnail(job, file):
         return None
     return thumbnail
 
+# utility function to clear any other future Duplicate messages, called if we
+# either create a "new" CheckDups (often del/restore related), OR because we
+# are actualyl handling the dups now from a front-end click through to
+# /removedups, but some other job has since created another dup message...
+def ClearOtherDupMessagesAndJobs():
+    msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups')
+    for msg in msgs:
+        session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete()
+    cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all()
+    for j in cd_jobs:
+        FinishJob(j, "New CheckForDups job/removal supercedes this job, withdrawing it", "Withdrawn")
+    session.commit()
+
 def CheckForDups(job):
     AddLogForJob( job, f"Check for duplicates" )
+    ClearOtherDupMessagesAndJobs()
 
     res = session.execute( "select count(e1.id) from entry e1, file f1, dir d1, entry_dir_link edl1, path_dir_link pdl1, path p1, entry e2, file f2, dir d2, entry_dir_link edl2, path_dir_link pdl2, path p2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and p1.type_id != (select id from path_type where name = 'Bin') and  p1.id = pdl1.path_id and pdl1.dir_eid = d1.eid and p2.type_id != (select id from path_type where name = 'Bin') and p2.id = pdl2.path_id and pdl2.dir_eid = d2.eid and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb" )
     for row in res:
@@ -1099,13 +1114,7 @@ def CheckForDups(job):
 def RemoveDups(job):
     AddLogForJob(job, f"INFO: Starting Remove Duplicates job...")
     # as checkdups covers all dups, delete all future dups messages, and Withdraw future checkdups jobs
-    msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups')
-    for msg in msgs:
-        session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete()
-    cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all()
-    for j in cd_jobs:
-        FinishJob(j, "Just removed duplicates - so no need to do any other checkdups, we will force 1 last one after the remove step", "Withdrawn")
-    session.commit()
+    ClearOtherDupMessagesAndJobs()
 
     dup_cnt=0
     for jex in job.extra:
@@ -1173,8 +1182,11 @@ def JobDeleteFiles(job):
     for jex in job.extra:
         if 'eid-' in jex.name:
             del_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
-            AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me}" )
+            AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me.name}" )
             MoveFileToRecycleBin(job,del_me)
+    now=datetime.now(pytz.utc)
+    next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
+    session.add(next_job)
     FinishJob(job, f"Finished deleting selected file(s)")
     return
             
@@ -1185,6 +1197,9 @@ def JobRestoreFiles(job):
             restore_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
             AddLogForJob(job, f"INFO: Removing file: #{restore_me.id} -> {restore_me}" )
             RestoreFile(job,restore_me)
+    now=datetime.now(pytz.utc)
+    next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
+    session.add(next_job)
     FinishJob(job, f"Finished restoring selected file(s)")
     return