cleaned up some logs / debugs, fixed bug where we stripped comma of list of hashes, but there was none, so failed to find that hash, also created convenience functions to remove a file / dir from DB for exists_of_fs code, but also re-used so deleting duplicates also removes files
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
###
|
|
||||||
#
|
#
|
||||||
# This file controls the 'external' job control manager, that (periodically #
|
# This file controls the 'external' job control manager, that (periodically #
|
||||||
# looks / somehow is pushed an event?) picks up new jobs, and processes them.
|
# looks / somehow is pushed an event?) picks up new jobs, and processes them.
|
||||||
@@ -483,23 +483,34 @@ def ResetExistsOnFS(job, path):
|
|||||||
session.add(reset_file)
|
session.add(reset_file)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Convenience function to remove a file from the database - and its associated links
|
||||||
|
# used when scanning and a file has been removed out from under PA, or
|
||||||
|
# when we remove duplicates
|
||||||
|
def RemoveFileFromDB(id):
|
||||||
|
session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
|
||||||
|
session.query(File).filter(File.eid==id).delete()
|
||||||
|
session.query(Entry).filter(Entry.id==id).delete()
|
||||||
|
return
|
||||||
|
|
||||||
|
# Convenience function to remove a dir from the database - and its associated links
|
||||||
|
def RemoveDirFromDB(id):
|
||||||
|
session.query(EntryDirLink).filter(EntryDirLink.entry_id==id).delete()
|
||||||
|
session.query(Dir).filter(Dir.eid==id).delete()
|
||||||
|
session.query(Entry).filter(Entry.id==id).delete()
|
||||||
|
return
|
||||||
|
|
||||||
def HandleAnyFSDeletions(job):
|
def HandleAnyFSDeletions(job):
|
||||||
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
|
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
|
||||||
rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all()
|
rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all()
|
||||||
rm_cnt=0
|
rm_cnt=0
|
||||||
for rm in rms:
|
for rm in rms:
|
||||||
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rm.id).delete()
|
RemoveFileFromDB(rm.id)
|
||||||
session.query(File).filter(File.eid==rm.id).delete()
|
|
||||||
session.query(Entry).filter(Entry.id==rm.id).delete()
|
|
||||||
AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system")
|
AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system")
|
||||||
rm_cnt+=1
|
rm_cnt+=1
|
||||||
|
|
||||||
rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all()
|
rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all()
|
||||||
for rmdir in rmdirs:
|
for rmdir in rmdirs:
|
||||||
print(f"We have a directory ({rmdir.name}) to delete from DB as it no longer exists on fs")
|
RemoveFileFromDB(rmdir.id)
|
||||||
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rmdir.id).delete()
|
|
||||||
session.query(Dir).filter(Dir.eid==rmdir.id).delete()
|
|
||||||
session.query(Entry).filter(Entry.id==rmdir.id).delete()
|
|
||||||
AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system")
|
AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system")
|
||||||
rm_cnt+=1
|
rm_cnt+=1
|
||||||
return rm_cnt
|
return rm_cnt
|
||||||
@@ -635,7 +646,8 @@ def GenHashAndThumb(job, e):
|
|||||||
session.commit()
|
session.commit()
|
||||||
stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name )
|
stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name )
|
||||||
if stat.st_ctime < e.file_details[0].last_hash_date:
|
if stat.st_ctime < e.file_details[0].last_hash_date:
|
||||||
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
|
if DEBUG==1:
|
||||||
|
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
|
||||||
job.current_file_num+=1
|
job.current_file_num+=1
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -876,19 +888,19 @@ def RemoveDups(job):
|
|||||||
del_me_lst = []
|
del_me_lst = []
|
||||||
for f in files:
|
for f in files:
|
||||||
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
|
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
|
||||||
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
||||||
elif f.file_details[0].eid == int(keeping):
|
elif f.file_details[0].eid == int(keeping):
|
||||||
found = f
|
found = f
|
||||||
else:
|
else:
|
||||||
del_me_lst.append(f)
|
del_me_lst.append(f)
|
||||||
if found == None:
|
if found == None:
|
||||||
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
|
AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
|
||||||
else:
|
else:
|
||||||
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
|
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
|
||||||
for del_me in del_me_lst:
|
for del_me in del_me_lst:
|
||||||
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
||||||
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
|
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
|
||||||
dup_cnt += 1
|
RemoveFileFromDB(del_me.id)
|
||||||
|
|
||||||
if 'kdid-' in jex.name:
|
if 'kdid-' in jex.name:
|
||||||
_, which = jex.name.split('-')
|
_, which = jex.name.split('-')
|
||||||
@@ -896,23 +908,24 @@ def RemoveDups(job):
|
|||||||
keeping=jex.value
|
keeping=jex.value
|
||||||
tmp=session.query(Dir).filter(Dir.eid==keeping).first()
|
tmp=session.query(Dir).filter(Dir.eid==keeping).first()
|
||||||
AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
|
AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
|
||||||
for hash in hashes[0:-1].split(","):
|
for hash in hashes.split(","):
|
||||||
files=session.query(Entry).join(File).filter(File.hash==hash).all()
|
files=session.query(Entry).join(File).filter(File.hash==hash).all()
|
||||||
found=None
|
found=None
|
||||||
for f in files:
|
for f in files:
|
||||||
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
|
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
|
||||||
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
|
||||||
if f.in_dir[0].eid == int(keeping):
|
if f.in_dir[0].eid == int(keeping):
|
||||||
found=f
|
found=f
|
||||||
else:
|
else:
|
||||||
del_me=f
|
del_me=f
|
||||||
|
|
||||||
if found == None:
|
if found == None:
|
||||||
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
|
AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
|
||||||
else:
|
else:
|
||||||
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
|
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
|
||||||
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
|
||||||
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
|
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
|
||||||
|
RemoveFileFromDB(del_me.id)
|
||||||
dup_cnt += 1
|
dup_cnt += 1
|
||||||
|
|
||||||
FinishJob(job, f"Finished removing {dup_cnt} duplicate files" )
|
FinishJob(job, f"Finished removing {dup_cnt} duplicate files" )
|
||||||
|
|||||||
Reference in New Issue
Block a user