cleaned up some logs / debugs, fixed bug where we stripped comma of list of hashes, but there was none, so failed to find that hash, also created convenience functions to remove a file / dir from DB for exists_of_fs code, but also re-used so deleting duplicates also removes files

This commit is contained in:
2021-03-28 16:30:46 +11:00
parent 5f42e19bcd
commit 5e6d63d668

View File

@@ -1,4 +1,4 @@
###
#
# This file controls the 'external' job control manager, that (periodically #
# looks / somehow is pushed an event?) picks up new jobs, and processes them.
@@ -483,23 +483,34 @@ def ResetExistsOnFS(job, path):
session.add(reset_file)
return
# Convenience function to remove a file from the database - and its associated links
# used when scanning and a file has been removed out from under PA, or
# when we remove duplicates
def RemoveFileFromDB(id):
session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
session.query(File).filter(File.eid==id).delete()
session.query(Entry).filter(Entry.id==id).delete()
return
# Convenience function to remove a dir from the database - and its associated links
def RemoveDirFromDB(id):
session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
session.query(Dir).filter(Dir.eid==id).delete()
session.query(Entry).filter(Entry.id==id).delete()
return
def HandleAnyFSDeletions(job):
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all()
rm_cnt=0
for rm in rms:
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rm.id).delete()
session.query(File).filter(File.eid==rm.id).delete()
session.query(Entry).filter(Entry.id==rm.id).delete()
RemoveFileFromDB(rm.id)
AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system")
rm_cnt+=1
rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all()
for rmdir in rmdirs:
print(f"We have a directory ({rmdir.name}) to delete from DB as it no longer exists on fs")
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rmdir.id).delete()
session.query(Dir).filter(Dir.eid==rmdir.id).delete()
session.query(Entry).filter(Entry.id==rmdir.id).delete()
RemoveFileFromDB(rmdir.id)
AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system")
rm_cnt+=1
return rm_cnt
@@ -635,7 +646,8 @@ def GenHashAndThumb(job, e):
session.commit()
stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name )
if stat.st_ctime < e.file_details[0].last_hash_date:
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
if DEBUG==1:
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
job.current_file_num+=1
return
@@ -876,19 +888,19 @@ def RemoveDups(job):
del_me_lst = []
for f in files:
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
elif f.file_details[0].eid == int(keeping):
found = f
else:
del_me_lst.append(f)
if found == None:
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
for del_me in del_me_lst:
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
dup_cnt += 1
RemoveFileFromDB(del_me.id)
if 'kdid-' in jex.name:
_, which = jex.name.split('-')
@@ -896,23 +908,24 @@ def RemoveDups(job):
keeping=jex.value
tmp=session.query(Dir).filter(Dir.eid==keeping).first()
AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
for hash in hashes[0:-1].split(","):
for hash in hashes.split(","):
files=session.query(Entry).join(File).filter(File.hash==hash).all()
found=None
for f in files:
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
if f.in_dir[0].eid == int(keeping):
found=f
else:
del_me=f
if found == None:
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
RemoveFileFromDB(del_me.id)
dup_cnt += 1
FinishJob(job, f"Finished removing {dup_cnt} duplicate files" )