From e6fd9a9b13631cc682de9d63823c7a3bb7a70fbc Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Fri, 2 Apr 2021 16:19:40 +1100 Subject: [PATCH] made just one checkdups/rmdups front end message once we dups --- files.py | 7 ++----- pa_job_manager.py | 53 +++++++++++++++++++++++++++-------------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/files.py b/files.py index 2cd7743..45f07af 100644 --- a/files.py +++ b/files.py @@ -247,7 +247,7 @@ def fix_dups(): if 'pagesize' not in request.form: # default to 10, see if we have a larger value as someone reset it in the gui, rather than first time invoked pagesize = 10 - jexes = JobExtra.query.join(Job).join(PA_JobManager_Message).filter(PA_JobManager_Message.id==request.form['fe_msg_id']).all() + jexes = JobExtra.query.join(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all() jexes.append( JobExtra( name="pagesize", value=pagesize ) ) else: pagesize=int(request.form['pagesize']) @@ -258,7 +258,7 @@ def fix_dups(): DD.SecondPass() # DD.Dump() - return render_template("dups.html", DD=DD, fe_msg_id=request.form['fe_msg_id'], pagesize=pagesize ) + return render_template("dups.html", DD=DD, pagesize=pagesize ) @app.route("/rm_dups", methods=["POST"]) def rm_dups(): @@ -276,9 +276,6 @@ def rm_dups(): jex.append( JobExtra( name=f"kdid-{which}", value=request.form['kdid-'+which] ) ) jex.append( JobExtra( name=f"kdhash-{which}", value=request.form[el] ) ) - fe_msg_id=request.form['fe_msg_id'] - # allow backend to delete FE message once delete is being processed - jex.append( JobExtra( name="fe_msg_id", value=fe_msg_id ) ) jex.append( JobExtra( name="pagesize", value=10 ) ) job=NewJob( "rmdups", 0, None, jex ) diff --git a/pa_job_manager.py b/pa_job_manager.py index d88114e..37f1c60 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -282,22 +282,20 @@ def JobsForPaths( parent_job, paths ): if parent_job: AddLogForJob(parent_job, "adding job id={} {} (wait for: {})".format( job2.id, job2.id, job2.name, job2.wait_for ) ) + """ jex3=JobExtra( name="path", value=path ) - job3=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 ) - job3.extra.append(jex3) + job3=Job(start_time=now, last_update=now, name="processai", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 ) + job3.extra.append(jex4) session.add(job3) session.commit() if parent_job: AddLogForJob(parent_job, "adding job id={} {} (wait for: {})".format( job3.id, job3.id, job3.name, job3.wait_for ) ) """ - jex4=JobExtra( name="path", value=path ) - job4=Job(start_time=now, last_update=now, name="processai", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 ) - job4.extra.append(jex4) - session.add(job4) - session.commit() - if parent_job: - AddLogForJob(parent_job, "adding job id={} {} (wait for: {})".format( job3.id, job3.id, job3.name, job3.wait_for ) ) - """ + job4=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 ) + session.add(job4) + session.commit() + if parent_job: + AddLogForJob(parent_job, "adding job id={} {} (wait for: {})".format( job4.id, job4.id, job4.name, job4.wait_for ) ) HandleJobs() return @@ -857,17 +855,16 @@ def GenVideoThumbnail(job, file): return thumbnail def CheckForDups(job): - path=[jex.value for jex in job.extra if jex.name == "path"][0] - path=SymlinkName( path, path ) + AddLogForJob( job, f"Check for duplicates" ) - AddLogForJob( job, f"Check for duplicates in import path: {path}" ) - res = session.execute( f"select count(e1.name) as count from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and d1.path_prefix like '%{path}%' and f1.hash = f2.hash and e1.id != e2.id" ) + res = session.execute( f"select count(e1.name) as count from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id" ) for row in res: if row.count > 0: AddLogForJob(job, f"Found duplicates, Creating Status message in front-end for attention") - msg_id=MessageToFE( job.id, "danger", 'replaceme' ) - session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg_id).update( { 'message' : f'Found duplicate(s), click  here to finalise import by removing duplicates' } ) - FinishJob(job, f"Finished Looking for Duplicates") + MessageToFE( job.id, "danger", f'Found duplicate(s), click  here to finalise import by removing duplicates' ) + else: + FinishJob(job, f"No duplicates found") + FinishJob(job, f"Finished looking for duplicates") return def RemoveFileFromFS( del_me ): @@ -884,12 +881,17 @@ def RemoveFileFromFS( del_me ): return def RemoveDups(job): - # clear FE message we are deleting dups for this now... - fe_msg_id =[jex.value for jex in job.extra if jex.name == "fe_msg_id"][0] - session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==fe_msg_id).delete() + AddLogForJob(job, f"INFO: Starting Remove Duplicates job...") + # as checkdups covers all dups, delete all future dups messages, and Withdraw future checkdups jobs + msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups') + for msg in msgs: + session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete() + cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all() + for j in cd_jobs: + FinishJob(j, "Just removed duplicates - so no need to do any other checkdups, we will force 1 last one after the remove step", "Withdrawn") + print("here-loop") session.commit() - AddLogForJob(job, f"INFO: Starting Remove Duplicates job...") dup_cnt=0 for jex in job.extra: if 'kfid-' in jex.name: @@ -925,6 +927,7 @@ def RemoveDups(job): for hash in hashes.split(","): files=session.query(Entry).join(File).filter(File.hash==hash).all() found=None + del_me=None for f in files: if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False: AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") @@ -943,8 +946,14 @@ def RemoveDups(job): dup_cnt += 1 FinishJob(job, f"Finished removing {dup_cnt} duplicate files" ) - return + # Need to put another checkdups job in now to force / validate we have no dups + now=datetime.now(pytz.utc) + next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 ) + session.add(next_job) + session.commit() + AddLogForJob(job, "adding job id={} {} to confirm there are no more duplicates".format( next_job.id, next_job.id, next_job.name ) ) + return if __name__ == "__main__": print("INFO: PA job manager starting - listening on {}:{}".format( PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT) )