cleaned up some logs / debugs, fixed bug where we stripped comma of list of hashes, but there was none, so failed to find that hash, also created convenience functions to remove a file / dir from DB for exists_of_fs code, but also re-used so deleting duplicates also removes files
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
###
|
||||
|
||||
#
|
||||
# This file controls the 'external' job control manager, that (periodically #
|
||||
# looks / somehow is pushed an event?) picks up new jobs, and processes them.
|
||||
@@ -483,23 +483,34 @@ def ResetExistsOnFS(job, path):
|
||||
session.add(reset_file)
|
||||
return
|
||||
|
||||
# Convenience function to remove a file from the database - and its associated links
|
||||
# used when scanning and a file has been removed out from under PA, or
|
||||
# when we remove duplicates
|
||||
def RemoveFileFromDB(id):
|
||||
session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
|
||||
session.query(File).filter(File.eid==id).delete()
|
||||
session.query(Entry).filter(Entry.id==id).delete()
|
||||
return
|
||||
|
||||
# Convenience function to remove a dir from the database - and its associated links
|
||||
def RemoveDirFromDB(id):
|
||||
session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
|
||||
session.query(Dir).filter(Dir.eid==id).delete()
|
||||
session.query(Entry).filter(Entry.id==id).delete()
|
||||
return
|
||||
|
||||
def HandleAnyFSDeletions(job):
|
||||
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
|
||||
rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all()
|
||||
rm_cnt=0
|
||||
for rm in rms:
|
||||
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rm.id).delete()
|
||||
session.query(File).filter(File.eid==rm.id).delete()
|
||||
session.query(Entry).filter(Entry.id==rm.id).delete()
|
||||
RemoveFileFromDB(rm.id)
|
||||
AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system")
|
||||
rm_cnt+=1
|
||||
|
||||
rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all()
|
||||
for rmdir in rmdirs:
|
||||
print(f"We have a directory ({rmdir.name}) to delete from DB as it no longer exists on fs")
|
||||
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rmdir.id).delete()
|
||||
session.query(Dir).filter(Dir.eid==rmdir.id).delete()
|
||||
session.query(Entry).filter(Entry.id==rmdir.id).delete()
|
||||
RemoveFileFromDB(rmdir.id)
|
||||
AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system")
|
||||
rm_cnt+=1
|
||||
return rm_cnt
|
||||
@@ -635,6 +646,7 @@ def GenHashAndThumb(job, e):
|
||||
session.commit()
|
||||
stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name )
|
||||
if stat.st_ctime < e.file_details[0].last_hash_date:
|
||||
if DEBUG==1:
|
||||
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
|
||||
job.current_file_num+=1
|
||||
return
|
||||
@@ -876,19 +888,19 @@ def RemoveDups(job):
|
||||
del_me_lst = []
|
||||
for f in files:
|
||||
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
|
||||
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
||||
AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
||||
elif f.file_details[0].eid == int(keeping):
|
||||
found = f
|
||||
else:
|
||||
del_me_lst.append(f)
|
||||
if found == None:
|
||||
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
|
||||
AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
|
||||
else:
|
||||
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
|
||||
for del_me in del_me_lst:
|
||||
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
||||
AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
||||
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
|
||||
dup_cnt += 1
|
||||
RemoveFileFromDB(del_me.id)
|
||||
|
||||
if 'kdid-' in jex.name:
|
||||
_, which = jex.name.split('-')
|
||||
@@ -896,23 +908,24 @@ def RemoveDups(job):
|
||||
keeping=jex.value
|
||||
tmp=session.query(Dir).filter(Dir.eid==keeping).first()
|
||||
AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
|
||||
for hash in hashes[0:-1].split(","):
|
||||
for hash in hashes.split(","):
|
||||
files=session.query(Entry).join(File).filter(File.hash==hash).all()
|
||||
found=None
|
||||
for f in files:
|
||||
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
|
||||
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
||||
AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
||||
if f.in_dir[0].eid == int(keeping):
|
||||
found=f
|
||||
else:
|
||||
del_me=f
|
||||
|
||||
if found == None:
|
||||
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
|
||||
AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
|
||||
else:
|
||||
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
|
||||
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
||||
AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
||||
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
|
||||
RemoveFileFromDB(del_me.id)
|
||||
dup_cnt += 1
|
||||
|
||||
FinishJob(job, f"Finished removing {dup_cnt} duplicate files" )
|
||||
|
||||
Reference in New Issue
Block a user