scans now put AI back on by default, and optimise to only call path-level scans once per path, rather than once per dir in path

2022-01-16 12:51:25 +11:00
parent 684cc27df9
commit 096d4f79cf
4 changed files with 49 additions and 64 deletions
--- a/29
+++ b/29
@@ -1,9 +1,4 @@
 ## GENERAL
    * put back AI jobs auto after scan (& setting for this?)
    * CleanUpInDir() 
        * when it hits an error should stop and not keep cleaning
    *** Need to double-check scheduled jobs running in PROD (can use new pa_job_manager.log)
    * only show say last week of jobs, or last 50? and archive the rest into an archived_jobs table
@@ -13,6 +8,7 @@
            - [DONE] joblog page should show last X logs, <show all button>, newest X logs, 
                - [DONE/TEST?] need to use sm-txt class more as the space is too constrained
                - make clickable sort toggles (or just give in and do a dataTable?)
                -- also allow joblog search
            - [DONE] need to archive jobs
    * per file you could select an unknown face and add it as a ref img to an existing person, or make a new person and attach?
@@ -83,9 +79,6 @@
    * consider size of bbox of face / 'high-quality' faces -- if face is too small in image, dont match it
 ### UI
    ??? ipads can't do selections and contextMenus, do I want to re-factor to cater for this?
        - partial fix, double-click / tap allows viewing (most useful context-menu feature)
    * viewer needs to allow toggle to scan_model (and prob. right-click on file... AI (with CNN) AI (with hog)
        - make the form-select AI_Model actually do the change (but need more mem on mara really)
@@ -98,19 +91,13 @@
    file details is sort of crap - only works on import path
        - probably better to have a different 'view', e.g. folders/flat/detailed
-    job management:
+    timelineview? (I think maybe sunburst for large amounts of files, then maybe something more timeline-series for drilling in?)
-        joblog needs to show only say 25 log lines (last?) by default and allow see start/end, page by page, and search
+        (vertical timeline, date has thumbnails (small) horizontally along
-        clear all jobs (via GUI)
+        a page, etc.?
-        clear old jobs? (via b/end?)
+    https://www.highcharts.com/docs/chart-and-series-types/timeline-series
-            -> auto delete jobs older than ???
+    https://www.highcharts.com/demo/sunburst
-        clear FE messages (at least dup ones on File data deletion) -- in backend
+    https://www.highcharts.com/demo/heatmap
-        timelineview? (I think maybe sunburst for large amounts of files, then maybe something more timeline-series for drilling in?)
+    https://www.highcharts.com/demo/packed-bubble-split
            (vertical timeline, date has thumbnails (small) horizontally along
            a page, etc.?
        https://www.highcharts.com/docs/chart-and-series-types/timeline-series
        https://www.highcharts.com/demo/sunburst
        https://www.highcharts.com/demo/heatmap
        https://www.highcharts.com/demo/packed-bubble-split
 ### SORTER
--- a/dups.py
+++ b/dups.py
@@ -107,7 +107,7 @@ class Duplicates(PA):
            return True
        return False
-    # this stores this object into the keep from same path list (DDP: sometimes there can be more than 1 SP, e.g SP to SP to IP)
+    # this stores this object into the keep from same path list (sometimes there can be more than 1 SP, e.g SP to SP to IP)
    # for now, by not dealing with the extra SP, we will just delete the IP, and force a check_dups after deleting, it will then
    # pick up and process the SP to SP - if still needed -- if there is only SP1 to SP2, then the per_path_dup will pick it up and
    # I believe this will all work, but doesn't hurt to do an extra check_dups again
--- a/options.py
+++ b/options.py
@@ -142,7 +142,6 @@ class Options(PA):
                pref.st_offset=self.offset
                pref.size=self.size
                pref.folders=self.folders
                # DDP: does this need to save changed preferences here???
            db.session.add(pref)
            db.session.commit()
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -160,8 +160,7 @@ class Entry(Base):
    file_details = relationship( "File", uselist=False )
    in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )
-    # DDP: I think I need to test this outside of larger code, I think a
+    # return the full path of this entry on the filesystem
    # directory has a full path of real full Path + '/' + self.name
    def FullPathOnFS(self):
        if self.in_dir:
            s=self.in_dir.in_path.path_prefix + '/' 
@@ -527,54 +526,36 @@ def JobsForPaths( parent_job, paths, ptype ):
        if p:
            cfn=p.num_files
-        jex=JobExtra( name="path", value=path )
+        job1=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn )
-        jex2=JobExtra( name="path_type", value=ptype.id )
+        job1.extra.append( JobExtra( name="path", value=path ) )
-        job=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn )
+        job1.extra.append( JobExtra( name="path_type", value=ptype.id ) )
-        job.extra.append(jex)
+        session.add(job1)
        job.extra.append(jex2)
        session.add(job)
        session.commit()
        if parent_job:
-            AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a>".format( job.id, job.id, job.name ) )
+            AddLogForJob(parent_job, f"adding <a href='/job/{job1.id}'>job id={job1.id} {job1.name}</a>")
        # force commit to make job.id be valid in use of wait_for later
        session.commit()
-        jex2=JobExtra( name="path", value=path )
+        job2=Job(start_time=now, last_update=now, name="getfiledetails", state="New", wait_for=job1.id, pa_job_state="New", current_file_num=0 )
-        job2=Job(start_time=now, last_update=now, name="getfiledetails", state="New", wait_for=job.id, pa_job_state="New", current_file_num=0 )
+        job2.extra.append( JobExtra( name="path", value=path ) )
        job2.extra.append(jex2)
        session.add(job2)
        session.commit()
        if parent_job:
-            AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job2.id, job2.id, job2.name, job2.wait_for ) )
+            AddLogForJob(parent_job, f"adding <a href='/job/{job2.id}'>job id={job2.id} {job2.name}</a> (wait for: {job2.wait_for})")
-        """
+    job3=Job(start_time=now, last_update=now, name="run_ai_on_path", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
    job3.extra.append( JobExtra( name="person", value="all" ) )
    job3.extra.append( JobExtra( name="ptype", value=ptype.name ) )
    session.add(job3)
    session.commit()
    if parent_job:
        AddLogForJob(parent_job, f"adding <a href='/job/{job3.id}'>job id={job3.id} {job3.name}</a> (wait for: {job3.wait_for})")
-# make a wrapper to do these few lines (up to run_ai_on) in pa_job_manager and then call it in ai.py in the f/end and here...
+    job4=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=job3.id, pa_job_state="New", current_file_num=0 )
-
+    session.add(job4)
-    jex.append( JobExtra( name=f"person", value="all" ) )
+    session.commit()
-    paths=Path.query.join(PathType).filter(PathType.name=='Import').all()
+    if parent_job:
-    path_cnt=0
+        AddLogForJob(parent_job, f"adding <a href='/job/{job4.id}'>job id={job4.id} {job4.name}</a> (wait for: {job4.wait_for})" )
-    for p in paths:
+    HandleJobs(False)
        d = Dir.query.join(PathDirLink).filter(PathDirLink.path_id==p.id).filter(Dir.rel_path=='').first()
        jex.append( JobExtra( name=f"eid-{path_cnt}", value=f"{d.eid}" ) )
        path_cnt+=1
    job=NewJob( "run_ai_on", 0, None, jex )
        jex3=JobExtra( name="path", value=path )
        job3=Job(start_time=now, last_update=now, name="processai", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
        job3.extra.append(jex4)
        session.add(job3)
        session.commit()
        if parent_job:
            AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job3.id, job3.id, job3.name, job3.wait_for ) )
        """
        ### FIXME: wait for job3 not job2!
        job4=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
        session.add(job4)
        session.commit()
        if parent_job:
            AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job4.id, job4.id, job4.name, job4.wait_for ) )
        HandleJobs(False)
    return
 ##############################################################################
@@ -686,6 +667,8 @@ def RunJob(job):
        JobRestoreFiles(job)
    elif job.name == "run_ai_on":
        JobRunAIOn(job)
    elif job.name == "run_ai_on_path":
        JobRunAIOnPath(job)
    elif job.name == "transform_image":
        JobTransformImage(job)
    elif job.name == "clean_bin":
@@ -1388,6 +1371,22 @@ def AddToJobImageCount(job, entry ):
        job.num_files += 1
    return
 ####################################################################################################################################
 # JobRunAIOnPath(): job that uses path type (Import or Storage) to add to the job extras - dir_eid of the root Dir of the Path
 # so we can then just calls JobRunAIOn
 ####################################################################################################################################
 def JobRunAIOnPath(job):
    which_ptype=[jex.value for jex in job.extra if jex.name == "ptype"][0]
    paths=session.query(Path).join(PathType).filter(PathType.name==which_ptype).all()
    path_cnt=0
    for p in paths:
        d = session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==p.id).filter(Dir.rel_path=='').first()
        job.extra.append( JobExtra( name=f"eid-{path_cnt}", value=f"{d.eid}" ) )
        path_cnt+=1
    JobRunAIOn(job)
    return
 ####################################################################################################################################
 # JobRunAIOn(): job that grabs relevant refimgs to scan against the given set of entries (from seln made in F/E)
 ####################################################################################################################################