From 7fa0d3e02b4e17d124c63691261f7faa73953c6a Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Sun, 14 Feb 2021 16:20:17 +1100 Subject: [PATCH] fixed primary key bug with PA_JobManager_FE_Message, made Message return id, so it can be used in link to rm_dups, added in code to have rm_dups as a job, it does everthing but the actual deletes now, but the data is ready to be used --- pa_job_manager.py | 46 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/pa_job_manager.py b/pa_job_manager.py index fc2084a..47bc6e6 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -220,7 +220,7 @@ class Job(Base): class PA_JobManager_FE_Message(Base): __tablename__ = "pa_job_manager_fe_message" id = Column(Integer, Sequence('pa_job_manager_fe_message_id_seq'), primary_key=True ) - job_id = Column(Integer, ForeignKey('job.id'), primary_key=True ) + job_id = Column(Integer, ForeignKey('job.id') ) alert = Column(String) message = Column(String) def __repr__(self): @@ -233,7 +233,7 @@ def MessageToFE( job_id, alert, message ): msg = PA_JobManager_FE_Message( job_id=job_id, alert=alert, message=message) session.add(msg) session.commit() - return + return msg.id def ProcessImportDirs(parent_job): settings = session.query(Settings).first() @@ -309,6 +309,8 @@ def RunJob(job): JobGetFileDetails(job) elif job.name == "checkdups": CheckForDups(job) + elif job.name == "rmdups": + RemoveDups(job) elif job.name == "processai": JobProcessAI(job) else: @@ -316,7 +318,7 @@ def RunJob(job): # okay, we finished a job, so check for any jobs that are dependant on this and run them... # session.close() if job.pa_job_state != "Completed": - FinishJob(job, "PA Job Manager - This is a catchall to close of a Job, this sould never be seen and implies a job did not complete formally?", "Failed" ) + FinishJob(job, "PA Job Manager - This is a catchall to close of a Job, this sould never be seen and implies a job did not actually complete?", "Failed" ) HandleJobs() return @@ -831,16 +833,42 @@ def CheckForDups(job): res = session.execute( f"select count(e1.name) as count from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and d1.path_prefix like '%{path}%' and f1.hash = f2.hash and e1.id != e2.id" ) for row in res: if row.count > 0: - MessageToFE( job.id, "danger", "Found duplicate(s), click  here to finalise import by removing duplicates" ) + AddLogForJob(job, f"Found duplicates, Creating Status message in front-end for attention") + msg_id=MessageToFE( job.id, "danger", 'replaceme' ) + session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg_id).update( { 'message' : f'Found duplicate(s), click  here to finalise import by removing duplicates' } ) + FinishJob(job, f"Finished Looking for Duplicates") + return + +def RemoveDups(job): + # clear FE message we are deleting dups for this now... + fe_msg_id =[jex.value for jex in job.extra if jex.name == "fe_msg_id"][0] + print( f"need to clear FE message: {fe_msg_id}") + msg=session.query(PA_JobManager_FE_Message).get(fe_msg_id) + session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==fe_msg_id).delete() + session.commit() + + if DEBUG: + AddLogForJob(job, f"DEBUG: Starting Remove Duplicates job...") + for jex in job.extra: + if 'kfid-' in jex.name: + pfx, which = jex.name.split('-') + hash=[jex.value for jex in job.extra if jex.name == f"kfhash-{which}"][0] + AddLogForJob(job, f"deleting some files with hash: {hash} but keeping file id={jex.value}" ) + if 'kdid-' in jex.name: + pfx, which = jex.name.split('-') + hash=[jex.value for jex in job.extra if jex.name == f"kfhash-{which}"][0] + AddLogForJob(job, f"deleting some files with hashes: {hash[0:40]}... but keeping files in dir id={jex.value}" ) + FinishJob(job, f"FAKE finished removal, have not actually done the deletes yet - that will be the last bit" ) + return + if __name__ == "__main__": print("INFO: PA job manager starting - listening on {}:{}".format( PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT) ) -##### have to test the the lines below (to force a scan on startup) - now=datetime.now(pytz.utc) - job=Job(start_time=now, last_update=now, name="scannow", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=0 ) - session.add(job) - session.commit() +# now=datetime.now(pytz.utc) +# job=Job(start_time=now, last_update=now, name="scannow", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=0 ) +# session.add(job) +# session.commit() HandleJobs() with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind((PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT))