new BUG (very minor), reordered TODOs and now have basic stale job handling - they are detected, and can be cancelled or restarted from GUI

This commit is contained in:
2022-01-11 13:18:21 +11:00
parent bf04c862d6
commit a67c20d72b
6 changed files with 90 additions and 25 deletions

View File

@@ -558,7 +558,7 @@ def JobsForPaths( parent_job, paths, ptype ):
session.commit()
if parent_job:
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job4.id, job4.id, job4.name, job4.wait_for ) )
HandleJobs()
HandleJobs(False)
return
##############################################################################
@@ -636,7 +636,7 @@ def RunJob(job):
# session.close()
if job.pa_job_state != "Completed":
FinishJob(job, "PA Job Manager - This is a catchall to close of a Job, this should never be seen and implies a job did not actually complete?", "Failed" )
HandleJobs()
HandleJobs(False)
return
##############################################################################
@@ -665,13 +665,26 @@ def FinishJob(job, last_log, state="Completed", pa_job_state="Completed"):
return
##############################################################################
# HandleJobs(): go through each job, if it New, then tackle it --
# TODO: why not only retrieve New jobs from DB?
# HandleJobs(first_run): go through each job, if it New, then tackle it --
# if first_run is True, then we are restarting the job manager and any job
# that was "In Progress" is stale, and should be handled -- mark it as Stale
# and that allows user in F/E to cancel or restart it
##############################################################################
def HandleJobs():
if DEBUG:
print("INFO: PA job manager is scanning for new jobs to process")
for job in session.query(Job).all():
def HandleJobs(first_run=False):
if first_run:
print("INFO: PA job manager is starting up - check for stale jobs" )
else:
if DEBUG:
print("INFO: PA job manager is scanning for new jobs to process")
for job in session.query(Job).filter(Job.pa_job_state != 'Complete').all():
if first_run and job.pa_job_state == 'In Progress':
print( f"INFO: Found stale job#{job.id} - {job.name}" )
job.pa_job_state = 'Stale'
session.add(job)
AddLogForJob( job, "ERROR: Job has been marked stale as it did not complete" )
MessageToFE( job.id, "danger", f'Stale job, click&nbsp; <a href="javascript:document.body.innerHTML+=\'<form id=_fm method=POST action=/stale_jobs></form>\'; document.getElementById(\'_fm\').submit();">here</a>&nbsp;to restart or cancel' )
session.commit()
continue
if job.pa_job_state == 'New':
if job.wait_for != None:
j2 = session.query(Job).get(job.wait_for)
@@ -1657,7 +1670,7 @@ def JobMoveFiles(job):
# Sanity check, if prefix starts with /, reject it -> no /etc/shadow potentials
# Sanity check, if .. in prefix or suffix, reject it -> no ../../etc/shadow potentials
# Sanity check, if // in prefix or suffix, reject it -> not sure code wouldnt try to make empty dirs, and I dont want to chase /////// cases, any 2 in a row is enough to reject
if '..' in prefix or '..' in suffix or prefix[0] == '/' or '//' in prefix or '//' in suffix:
if '..' in prefix or '..' in suffix or (prefix and prefix[0] == '/') or '//' in prefix or '//' in suffix:
FinishJob( job, f"ERROR: Not processing move as the paths contain illegal chars", "Failed" )
return
# also remove unecessary slashes, jic
@@ -1917,10 +1930,10 @@ if __name__ == "__main__":
InitialValidationChecks()
HandleJobs()
HandleJobs(True)
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind((PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT))
s.listen()
while True:
conn, addr = s.accept()
HandleJobs()
HandleJobs(False)