diff --git a/pa_job_manager.py b/pa_job_manager.py index 47bc6e6..4bd27b3 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -638,7 +638,6 @@ def GenHashAndThumb(job, e): def ProcessAI(job, e): if e.type.name != 'Image': job.current_file_num+=1 - print("DDP: ProcessAI: adding 1 to current_file_num as we have a non-image file") return file = e.in_dir[0].path_prefix + '/' + e.name @@ -829,6 +828,7 @@ def GenVideoThumbnail(job, file): def CheckForDups(job): path=[jex.value for jex in job.extra if jex.name == "path"][0] path='static'+'/'+os.path.basename(path[0:-1]) + AddLogForJob( job, f"Check for duplicates in import path: {path}" ) res = session.execute( f"select count(e1.name) as count from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and d1.path_prefix like '%{path}%' and f1.hash = f2.hash and e1.id != e2.id" ) for row in res: @@ -842,23 +842,64 @@ def CheckForDups(job): def RemoveDups(job): # clear FE message we are deleting dups for this now... fe_msg_id =[jex.value for jex in job.extra if jex.name == "fe_msg_id"][0] - print( f"need to clear FE message: {fe_msg_id}") msg=session.query(PA_JobManager_FE_Message).get(fe_msg_id) session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==fe_msg_id).delete() session.commit() - if DEBUG: - AddLogForJob(job, f"DEBUG: Starting Remove Duplicates job...") + AddLogForJob(job, f"INFO: Starting Remove Duplicates job...") + dup_cnt=0 for jex in job.extra: if 'kfid-' in jex.name: pfx, which = jex.name.split('-') hash=[jex.value for jex in job.extra if jex.name == f"kfhash-{which}"][0] - AddLogForJob(job, f"deleting some files with hash: {hash} but keeping file id={jex.value}" ) + AddLogForJob(job, f"deleting duplicate files with hash: {hash} but keeping file with DB id={jex.value}" ) + files=session.query(Entry).join(File).filter(File.hash==hash).all() + keeping=jex.value + found=None + del_me_lst = [] + for f in files: + if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False: + AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") + elif f.file_details[0].eid == int(keeping): + found = f + else: + exists = os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) + del_me_lst.append(f) + if found == None: + AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" ) + else: + AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" ) + for del_me in del_me_lst: + AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) + os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name ) + dup_cnt += 1 + if 'kdid-' in jex.name: pfx, which = jex.name.split('-') - hash=[jex.value for jex in job.extra if jex.name == f"kfhash-{which}"][0] - AddLogForJob(job, f"deleting some files with hashes: {hash[0:40]}... but keeping files in dir id={jex.value}" ) - FinishJob(job, f"FAKE finished removal, have not actually done the deletes yet - that will be the last bit" ) + hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0] + keeping=jex.value + tmp=session.query(Dir).filter(Dir.eid==keeping).first() + AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" ) + for hash in hashes[0:-1].split(","): + files=session.query(Entry).join(File).filter(File.hash==hash).all() + found=None + for f in files: + if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False: + AddLogForJob( job, "ERROR: file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") + if f.in_dir[0].eid == int(keeping): + found=f + else: + del_me=f + + if found == None: + AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" ) + else: + AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" ) + AddLogForJob(job, f"Remove duplicate file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) + os.remove( del_me.in_dir[0].path_prefix+'/'+del_me.name ) + dup_cnt += 1 + + FinishJob(job, f"Finished removing {dup_cnt} duplicate files" ) return