cleaned up some logs / debugs, fixed bug where we stripped comma of list of hashes, but there was none, so failed to find that hash, also created convenience functions to remove a file / dir from DB for exists_of_fs code, but also re-used so deleting duplicates also removes files

2021-03-28 16:30:46 +11:00
parent 5f42e19bcd
commit 5e6d63d668
1 changed files with 30 additions and 17 deletions
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -1,4 +1,4 @@
-###
+
 #
 # This file controls the 'external' job control manager, that (periodically #
 # looks / somehow is pushed an event?) picks up new jobs, and processes them.
@@ -483,23 +483,34 @@ def ResetExistsOnFS(job, path):
            session.add(reset_file)
    return

+# Convenience function to remove a file from the database - and its associated links
+# used when scanning and a file has been removed out from under PA, or
+# when we remove duplicates
+def RemoveFileFromDB(id):
+    session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
+    session.query(File).filter(File.eid==id).delete()
+    session.query(Entry).filter(Entry.id==id).delete()
+    return
+
+# Convenience function to remove a dir from the database - and its associated links
+def RemoveDirFromDB(id):
+    session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
+    session.query(Dir).filter(Dir.eid==id).delete()
+    session.query(Entry).filter(Entry.id==id).delete()
+    return
+
 def HandleAnyFSDeletions(job):
    dtype=session.query(FileType).filter(FileType.name=='Directory').first()
    rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all()
    rm_cnt=0
    for rm in rms:
-        session.query(EntryDirLink).filter(EntryDirLink.entry_id==rm.id).delete()
-        session.query(File).filter(File.eid==rm.id).delete()
-        session.query(Entry).filter(Entry.id==rm.id).delete()
+        RemoveFileFromDB(rm.id)
        AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system")
        rm_cnt+=1

    rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all()
    for rmdir in rmdirs:
-        print(f"We have a directory ({rmdir.name}) to delete from DB as it no longer exists on fs")
-        session.query(EntryDirLink).filter(EntryDirLink.entry_id==rmdir.id).delete()
-        session.query(Dir).filter(Dir.eid==rmdir.id).delete()
-        session.query(Entry).filter(Entry.id==rmdir.id).delete()
+        RemoveFileFromDB(rmdir.id)
        AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system")
        rm_cnt+=1
    return rm_cnt
@@ -635,6 +646,7 @@ def GenHashAndThumb(job, e):
        session.commit()
    stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name )
    if stat.st_ctime < e.file_details[0].last_hash_date:
+        if DEBUG==1:
            print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
        job.current_file_num+=1
        return
@@ -876,19 +888,19 @@ def RemoveDups(job):
            del_me_lst = []
            for f in files:
                if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
-                    AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
+                    AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
                elif f.file_details[0].eid == int(keeping):
                    found = f
                else:
                    del_me_lst.append(f)
            if found == None:
-                AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
+                AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
            else:
                AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
                for del_me in del_me_lst:
-                    AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
+                    AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
                    os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
-                    dup_cnt += 1
+                    RemoveFileFromDB(del_me.id)

        if 'kdid-' in jex.name:
            _, which = jex.name.split('-')
@@ -896,23 +908,24 @@ def RemoveDups(job):
            keeping=jex.value
            tmp=session.query(Dir).filter(Dir.eid==keeping).first()
            AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
-            for hash in hashes[0:-1].split(","):
+            for hash in hashes.split(","):
                files=session.query(Entry).join(File).filter(File.hash==hash).all()
                found=None
                for f in files:
                    if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
-                        AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
+                        AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
                    if f.in_dir[0].eid == int(keeping):
                        found=f
                    else:
                        del_me=f

                if found == None:
-                    AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
+                    AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
                else:
                    AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
-                    AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
+                    AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
                    os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
+                    RemoveFileFromDB(del_me.id)
                    dup_cnt += 1

    FinishJob(job, f"Finished removing {dup_cnt} duplicate files" )