scans now put AI back on by default, and optimise to only call path-level scans once per path, rather than once per dir in path

This commit is contained in:
2022-01-16 12:51:25 +11:00
parent 684cc27df9
commit 096d4f79cf
4 changed files with 49 additions and 64 deletions

View File

@@ -160,8 +160,7 @@ class Entry(Base):
file_details = relationship( "File", uselist=False )
in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )
# DDP: I think I need to test this outside of larger code, I think a
# directory has a full path of real full Path + '/' + self.name
# return the full path of this entry on the filesystem
def FullPathOnFS(self):
if self.in_dir:
s=self.in_dir.in_path.path_prefix + '/'
@@ -527,54 +526,36 @@ def JobsForPaths( parent_job, paths, ptype ):
if p:
cfn=p.num_files
jex=JobExtra( name="path", value=path )
jex2=JobExtra( name="path_type", value=ptype.id )
job=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn )
job.extra.append(jex)
job.extra.append(jex2)
session.add(job)
job1=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn )
job1.extra.append( JobExtra( name="path", value=path ) )
job1.extra.append( JobExtra( name="path_type", value=ptype.id ) )
session.add(job1)
session.commit()
if parent_job:
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a>".format( job.id, job.id, job.name ) )
AddLogForJob(parent_job, f"adding <a href='/job/{job1.id}'>job id={job1.id} {job1.name}</a>")
# force commit to make job.id be valid in use of wait_for later
session.commit()
jex2=JobExtra( name="path", value=path )
job2=Job(start_time=now, last_update=now, name="getfiledetails", state="New", wait_for=job.id, pa_job_state="New", current_file_num=0 )
job2.extra.append(jex2)
job2=Job(start_time=now, last_update=now, name="getfiledetails", state="New", wait_for=job1.id, pa_job_state="New", current_file_num=0 )
job2.extra.append( JobExtra( name="path", value=path ) )
session.add(job2)
session.commit()
if parent_job:
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job2.id, job2.id, job2.name, job2.wait_for ) )
AddLogForJob(parent_job, f"adding <a href='/job/{job2.id}'>job id={job2.id} {job2.name}</a> (wait for: {job2.wait_for})")
"""
job3=Job(start_time=now, last_update=now, name="run_ai_on_path", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
job3.extra.append( JobExtra( name="person", value="all" ) )
job3.extra.append( JobExtra( name="ptype", value=ptype.name ) )
session.add(job3)
session.commit()
if parent_job:
AddLogForJob(parent_job, f"adding <a href='/job/{job3.id}'>job id={job3.id} {job3.name}</a> (wait for: {job3.wait_for})")
# make a wrapper to do these few lines (up to run_ai_on) in pa_job_manager and then call it in ai.py in the f/end and here...
jex.append( JobExtra( name=f"person", value="all" ) )
paths=Path.query.join(PathType).filter(PathType.name=='Import').all()
path_cnt=0
for p in paths:
d = Dir.query.join(PathDirLink).filter(PathDirLink.path_id==p.id).filter(Dir.rel_path=='').first()
jex.append( JobExtra( name=f"eid-{path_cnt}", value=f"{d.eid}" ) )
path_cnt+=1
job=NewJob( "run_ai_on", 0, None, jex )
jex3=JobExtra( name="path", value=path )
job3=Job(start_time=now, last_update=now, name="processai", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
job3.extra.append(jex4)
session.add(job3)
session.commit()
if parent_job:
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job3.id, job3.id, job3.name, job3.wait_for ) )
"""
### FIXME: wait for job3 not job2!
job4=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
session.add(job4)
session.commit()
if parent_job:
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job4.id, job4.id, job4.name, job4.wait_for ) )
HandleJobs(False)
job4=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=job3.id, pa_job_state="New", current_file_num=0 )
session.add(job4)
session.commit()
if parent_job:
AddLogForJob(parent_job, f"adding <a href='/job/{job4.id}'>job id={job4.id} {job4.name}</a> (wait for: {job4.wait_for})" )
HandleJobs(False)
return
##############################################################################
@@ -686,6 +667,8 @@ def RunJob(job):
JobRestoreFiles(job)
elif job.name == "run_ai_on":
JobRunAIOn(job)
elif job.name == "run_ai_on_path":
JobRunAIOnPath(job)
elif job.name == "transform_image":
JobTransformImage(job)
elif job.name == "clean_bin":
@@ -1388,6 +1371,22 @@ def AddToJobImageCount(job, entry ):
job.num_files += 1
return
####################################################################################################################################
# JobRunAIOnPath(): job that uses path type (Import or Storage) to add to the job extras - dir_eid of the root Dir of the Path
# so we can then just calls JobRunAIOn
####################################################################################################################################
def JobRunAIOnPath(job):
which_ptype=[jex.value for jex in job.extra if jex.name == "ptype"][0]
paths=session.query(Path).join(PathType).filter(PathType.name==which_ptype).all()
path_cnt=0
for p in paths:
d = session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==p.id).filter(Dir.rel_path=='').first()
job.extra.append( JobExtra( name=f"eid-{path_cnt}", value=f"{d.eid}" ) )
path_cnt+=1
JobRunAIOn(job)
return
####################################################################################################################################
# JobRunAIOn(): job that grabs relevant refimgs to scan against the given set of entries (from seln made in F/E)
####################################################################################################################################