removed debugs, created ClearOtherDupMessagesAndJobs() function so CheckDups jobs created by restore/delete are removing old f/e messages, e.g. I delete a dup by hand and the dup error disappears :) MAIN thing though is the crazy restore/delete paths are now fully functional, with content in top-level of a path, different paths on import, and in subdirs of import PATHs, also tested mass delete via flat-view of all files, then mass restore -- ALL WORKED :)

2021-06-11 20:26:15 +10:00
parent c3398ef77a
commit 137b3f347d
1 changed files with 36 additions and 21 deletions
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -124,7 +124,7 @@ class Dir(Base):
        return self.in_path.path_prefix+'/'+self.rel_path

    def __repr__(self):
-        return f"<eid: {self.eid}, last_import_date: {self.last_import_date}, files: {self.files}>"
+        return f"<eid: {self.eid}, rel_path: {self.rel_path}, in_path={self.in_path}, last_import_date: {self.last_import_date}, files: {self.files}>"

 class Entry(Base):
    __tablename__ = "entry"
@@ -627,28 +627,31 @@ def RestoreFile(job,restore_me):
    orig_path = session.query(Path).filter(Path.path_prefix==orig_file_details.orig_path_prefix).first()
    parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==orig_path.id).first()

+    # in case our new_rel_path is just in the top level of the path, make
+    # new_dir = parent_dir so restore_me.in_dir = new_dir after the for loop works 
+    new_dir=parent_dir
+
    # e.g. restore_me's rel_path 'Import/images_to_process/1111', orig_path was 'static/Import/images_to_process', need new rel_path to be just the 1111 bit...
    new_rel_path='static/'+restore_me.in_dir.rel_path
-    new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix+'/', '')
-    new_rel_path='1111'
+    new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix, '')
+    if len(new_rel_path) > 0 and new_rel_path[-1] == '/':
+        new_rel_path=new_rel_path[0:-1]

-    # okay, go through new relative path and AddDir any missing subdirs of this
    # path (think Import/Dir1/Dir2) which b/c we have orig_path in AddDir will
    # create static/Import, static/Import/Dir1, static/Import/Dir1/Dir2
    part_rel_path=""
    for dirname in new_rel_path.split("/"):
        part_rel_path += f"{dirname}"
-        ### DDP: when restoring, an original dir might have been removed, so need make it (if needed)
-        os.makedirs( dirname,mode=0o777, exist_ok=True )
        new_dir=AddDir( job, dirname, parent_dir, part_rel_path, orig_path )
        parent_dir=new_dir
        part_rel_path += "/"
    
    restore_me.in_dir = new_dir
+    ### DDP: when restoring, an original dir tree might have been removed, so need make it (if needed)
+    os.makedirs( os.path.dirname(restore_me.FullPathOnFS()),mode=0o777, exist_ok=True )
+    # remove DelFile entry for this restored file
    session.query(DelFile).filter(DelFile.file_eid==restore_me.id).delete()
    session.commit()
-    CheckForDups(job)
-    print("TODO: test the check_dups above works")
    return

 # Function that moves a file we are "deleting" to the recycle bin, it moves the
@@ -669,7 +672,6 @@ def MoveFileToRecycleBin(job,del_me):
    bin_path=session.query(Path).join(PathType).filter(PathType.name=='Bin').first()
    parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==bin_path.id).first()

-    print( f"need to keep this (ind): {del_me.in_dir.in_path.path_prefix}" )
    # if we ever need to restore, lets remember this file's original path
    # (use a string in case the dir/path is ever deleted from FS (and then DB) and we need to recreate)
    del_file_details = DelFile( file_eid=del_me.id, orig_path_prefix=del_me.in_dir.in_path.path_prefix )
@@ -827,10 +829,10 @@ def JobImportDir(job):
                year, month, day, woy = GetDateFromFile(fname, stat)
                e=AddFile( job, basename, type_str, fsize, dir, year, month, day, woy )
            else:
+                if DEBUG==1:
+                    print( f"DEBUG: { basename} - {stat.st_ctime} is OLDER than {dir.last_import_date}" )
                e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==basename,Dir.eid==dir.eid).first()
                e.exists_on_fs=True
-                if DEBUG==1:
-                    print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename )
            job.current_file=basename
            job.current_file_num+=1
        job.current_file_num += len(subdirs)
@@ -948,7 +950,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
        deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64)
        results = compareAI(deserialized_bytes, unknown_encoding)
        if results[0]:
-            print(f'Found a match between: {person.tag} and {e.name}')
+            print(f'DEBUG: Found a match between: {person.tag} and {e.name}')
            AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}')
            frl.matched=True
            return
@@ -998,7 +1000,6 @@ def JobGetFileDetails(job):
    #### I think the fix here is to get JobImportDir (or whatever makes the PATH) to add a jex for path_prefix and just pull it here, and stop 're-creating' it via SymlinkName
    path=[jex.value for jex in job.extra if jex.name == "path"][0]
    path_prefix=[jex.value for jex in job.extra if jex.name == "path_prefix"][0]
-    print( f"JobGetFileDetails({job}) -- pp={path_prefix}" )
    if DEBUG==1:
        print("DEBUG: JobGetFileDetails for path={}".format( path_prefix ) )
    p=session.query(Path).filter(Path.path_prefix==path_prefix).first()
@@ -1083,8 +1084,22 @@ def GenVideoThumbnail(job, file):
        return None
    return thumbnail

+# utility function to clear any other future Duplicate messages, called if we
+# either create a "new" CheckDups (often del/restore related), OR because we
+# are actualyl handling the dups now from a front-end click through to
+# /removedups, but some other job has since created another dup message...
+def ClearOtherDupMessagesAndJobs():
+    msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups')
+    for msg in msgs:
+        session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete()
+    cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all()
+    for j in cd_jobs:
+        FinishJob(j, "New CheckForDups job/removal supercedes this job, withdrawing it", "Withdrawn")
+    session.commit()
+
 def CheckForDups(job):
    AddLogForJob( job, f"Check for duplicates" )
+    ClearOtherDupMessagesAndJobs()

    res = session.execute( "select count(e1.id) from entry e1, file f1, dir d1, entry_dir_link edl1, path_dir_link pdl1, path p1, entry e2, file f2, dir d2, entry_dir_link edl2, path_dir_link pdl2, path p2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and p1.type_id != (select id from path_type where name = 'Bin') and  p1.id = pdl1.path_id and pdl1.dir_eid = d1.eid and p2.type_id != (select id from path_type where name = 'Bin') and p2.id = pdl2.path_id and pdl2.dir_eid = d2.eid and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb" )
    for row in res:
@@ -1099,13 +1114,7 @@ def CheckForDups(job):
 def RemoveDups(job):
    AddLogForJob(job, f"INFO: Starting Remove Duplicates job...")
    # as checkdups covers all dups, delete all future dups messages, and Withdraw future checkdups jobs
-    msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups')
-    for msg in msgs:
-        session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete()
-    cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all()
-    for j in cd_jobs:
-        FinishJob(j, "Just removed duplicates - so no need to do any other checkdups, we will force 1 last one after the remove step", "Withdrawn")
-    session.commit()
+    ClearOtherDupMessagesAndJobs()

    dup_cnt=0
    for jex in job.extra:
@@ -1173,8 +1182,11 @@ def JobDeleteFiles(job):
    for jex in job.extra:
        if 'eid-' in jex.name:
            del_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
-            AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me}" )
+            AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me.name}" )
            MoveFileToRecycleBin(job,del_me)
+    now=datetime.now(pytz.utc)
+    next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
+    session.add(next_job)
    FinishJob(job, f"Finished deleting selected file(s)")
    return
            
@@ -1185,6 +1197,9 @@ def JobRestoreFiles(job):
            restore_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
            AddLogForJob(job, f"INFO: Removing file: #{restore_me.id} -> {restore_me}" )
            RestoreFile(job,restore_me)
+    now=datetime.now(pytz.utc)
+    next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
+    session.add(next_job)
    FinishJob(job, f"Finished restoring selected file(s)")
    return