diff --git a/pa_job_manager.py b/pa_job_manager.py index 8f500c0..d68c793 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -1,4 +1,4 @@ -### + # # This file controls the 'external' job control manager, that (periodically # # looks / somehow is pushed an event?) picks up new jobs, and processes them. @@ -483,23 +483,34 @@ def ResetExistsOnFS(job, path): session.add(reset_file) return +# Convenience function to remove a file from the database - and its associated links +# used when scanning and a file has been removed out from under PA, or +# when we remove duplicates +def RemoveFileFromDB(id): + session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete() + session.query(File).filter(File.eid==id).delete() + session.query(Entry).filter(Entry.id==id).delete() + return + +# Convenience function to remove a dir from the database - and its associated links +def RemoveDirFromDB(id): + session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete() + session.query(Dir).filter(Dir.eid==id).delete() + session.query(Entry).filter(Entry.id==id).delete() + return + def HandleAnyFSDeletions(job): dtype=session.query(FileType).filter(FileType.name=='Directory').first() rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all() rm_cnt=0 for rm in rms: - session.query(EntryDirLink).filter(EntryDirLink.entry_id==rm.id).delete() - session.query(File).filter(File.eid==rm.id).delete() - session.query(Entry).filter(Entry.id==rm.id).delete() + RemoveFileFromDB(rm.id) AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system") rm_cnt+=1 rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all() for rmdir in rmdirs: - print(f"We have a directory ({rmdir.name}) to delete from DB as it no longer exists on fs") - session.query(EntryDirLink).filter(EntryDirLink.entry_id==rmdir.id).delete() - session.query(Dir).filter(Dir.eid==rmdir.id).delete() - session.query(Entry).filter(Entry.id==rmdir.id).delete() + RemoveFileFromDB(rmdir.id) AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system") rm_cnt+=1 return rm_cnt @@ -635,7 +646,8 @@ def GenHashAndThumb(job, e): session.commit() stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name ) if stat.st_ctime < e.file_details[0].last_hash_date: - print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this") + if DEBUG==1: + print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this") job.current_file_num+=1 return @@ -876,19 +888,19 @@ def RemoveDups(job): del_me_lst = [] for f in files: if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False: - AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") + AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") elif f.file_details[0].eid == int(keeping): found = f else: del_me_lst.append(f) if found == None: - AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" ) + AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" ) else: AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" ) for del_me in del_me_lst: - AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) + AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name ) - dup_cnt += 1 + RemoveFileFromDB(del_me.id) if 'kdid-' in jex.name: _, which = jex.name.split('-') @@ -896,23 +908,24 @@ def RemoveDups(job): keeping=jex.value tmp=session.query(Dir).filter(Dir.eid==keeping).first() AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" ) - for hash in hashes[0:-1].split(","): + for hash in hashes.split(","): files=session.query(Entry).join(File).filter(File.hash==hash).all() found=None for f in files: if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False: - AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") + AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") if f.in_dir[0].eid == int(keeping): found=f else: del_me=f if found == None: - AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" ) + AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" ) else: AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" ) - AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) + AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name ) + RemoveFileFromDB(del_me.id) dup_cnt += 1 FinishJob(job, f"Finished removing {dup_cnt} duplicate files" )