cleaned up some debugs, added logs and finally committed code that does its best to make sure the os.remove(file) matches the duplicate detected, and it now actually does the file remove

This commit is contained in:
2021-02-17 21:46:29 +11:00
parent 6ef383f732
commit 26c2ef717c

View File

@@ -638,7 +638,6 @@ def GenHashAndThumb(job, e):
def ProcessAI(job, e):
if e.type.name != 'Image':
job.current_file_num+=1
print("DDP: ProcessAI: adding 1 to current_file_num as we have a non-image file")
return
file = e.in_dir[0].path_prefix + '/' + e.name
@@ -829,6 +828,7 @@ def GenVideoThumbnail(job, file):
def CheckForDups(job):
path=[jex.value for jex in job.extra if jex.name == "path"][0]
path='static'+'/'+os.path.basename(path[0:-1])
AddLogForJob( job, f"Check for duplicates in import path: {path}" )
res = session.execute( f"select count(e1.name) as count from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and d1.path_prefix like '%{path}%' and f1.hash = f2.hash and e1.id != e2.id" )
for row in res:
@@ -842,23 +842,64 @@ def CheckForDups(job):
def RemoveDups(job):
# clear FE message we are deleting dups for this now...
fe_msg_id =[jex.value for jex in job.extra if jex.name == "fe_msg_id"][0]
print( f"need to clear FE message: {fe_msg_id}")
msg=session.query(PA_JobManager_FE_Message).get(fe_msg_id)
session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==fe_msg_id).delete()
session.commit()
if DEBUG:
AddLogForJob(job, f"DEBUG: Starting Remove Duplicates job...")
AddLogForJob(job, f"INFO: Starting Remove Duplicates job...")
dup_cnt=0
for jex in job.extra:
if 'kfid-' in jex.name:
pfx, which = jex.name.split('-')
hash=[jex.value for jex in job.extra if jex.name == f"kfhash-{which}"][0]
AddLogForJob(job, f"deleting some files with hash: {hash} but keeping file id={jex.value}" )
AddLogForJob(job, f"deleting duplicate files with hash: {hash} but keeping file with DB id={jex.value}" )
files=session.query(Entry).join(File).filter(File.hash==hash).all()
keeping=jex.value
found=None
del_me_lst = []
for f in files:
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
elif f.file_details[0].eid == int(keeping):
found = f
else:
exists = os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name)
del_me_lst.append(f)
if found == None:
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
for del_me in del_me_lst:
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
dup_cnt += 1
if 'kdid-' in jex.name:
pfx, which = jex.name.split('-')
hash=[jex.value for jex in job.extra if jex.name == f"kfhash-{which}"][0]
AddLogForJob(job, f"deleting some files with hashes: {hash[0:40]}... but keeping files in dir id={jex.value}" )
FinishJob(job, f"FAKE finished removal, have not actually done the deletes yet - that will be the last bit" )
hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0]
keeping=jex.value
tmp=session.query(Dir).filter(Dir.eid==keeping).first()
AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
for hash in hashes[0:-1].split(","):
files=session.query(Entry).join(File).filter(File.hash==hash).all()
found=None
for f in files:
if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False:
AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file")
if f.in_dir[0].eid == int(keeping):
found=f
else:
del_me=f
if found == None:
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )
else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" )
AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" )
os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name )
dup_cnt += 1
FinishJob(job, f"Finished removing {dup_cnt} duplicate files" )
return