From c543f21f3e822341a7e0f7ca3568fb1112747cb4 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Sun, 17 Jan 2021 15:20:32 +1100 Subject: [PATCH] added TODO thoughts --- TODO | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/TODO b/TODO index 73ef164..bbf30ef 100644 --- a/TODO +++ b/TODO @@ -1,18 +1,13 @@ ### DB - * create tables ### BACKEND * need a "batch" processing system that uses ionice to minimise load on mara and is threaded and used DB to interact with gunicorn'd pa - DB tables to share between pa and pa_jobs: - create table pa_job_state ( state string, num_jobs_active integer, num_jobs_complete ) - create table pa_jobs ( job_id, <...> ) - - pa_jobs (threaded python add separate from photoassistant) - takes over files.py + DONE: pa_jobs (threaded python add separate from photoassistant) + DONE: takes over files.py has ai.py needs broad jobs to: - find files in {import_dir & storage_dir} + DONE: find files in {import_dir & storage_dir} calc thumbs/hashes { " } run AI against { " } move files from import_dir to appropriate sub_dir in storage_dir (list will come from pa web) @@ -25,16 +20,71 @@ but not sure if any job won't have passes, num files, etc. BUT if it ever does this is how we deal with it on start-up: - set state: initialising - get settings, if no import_dir -> set state: Configuration Needed + DONE: set state: initialising + DONE: get settings, if no import_dir -> set state: Configuration Needed check DB for any files -> if some, set state: awaiting jobs -> if none, set state: Find Files (run that code) -> when job finished, then Import Files (import_dir only) -> when job finished, the set state: awating jobs - implications, pa_job needs to know if it depends on another, e.g. do find before import (in example above) - pa web needs to show status for the job engine IN YOUR FACE when not at awaiting jobs yet, afterwards, maybe a simple jobs() that is clickable on the gui? + DONE: implications, pa_job needs to know if it depends on another, e.g. do find before import (in example above) + DONE: pa web needs to show status for the job engine IN YOUR FACE when not at awaiting jobs yet, afterwards, maybe a simple jobs() that is clickable on the gui? + + +PROPOSED CHANGES: + DIR 1<-to->M FILE + DIR -> path_prefix (move from file), num_files_in_dir + would stat it specifically, rather than each file when scan for new (so last_scan date moves here from a generic settings) + FILE -> fname, size, type (vid/img, etc.), hash, thumb, has_unidentified_face + ?has_face?, + + in FILE_PERSON_LINK add: + refimg, link to AI_scan + AI_SCAN: + date of scan + version of code? + settings used + + NewJob should occur per path (or potentially all paths in import_dir), then you know #files for new non-scan jobs + if we make jobs be minimum, then ditch pass, and just use wait_for... + + + Jobs should be: + scan for files in DIR -> returns knows num_files in DIR + get thumbs for files (in DIR) + TODO: The 2 above lines are in GenerateFileData AND work on all import_dir paths at once, need to split this up (so our current setup would be 5 jobs (1 fail) on borric): + Job-1: Scan images_to_process -> success (num_files_1) + Job-2: Scan C: -> fail (report back to web) + Job-3: scan new_image_dir -> success (num_files_2) + Job-4 (wait on 1): Gen thumbs images_to_process (on num_files_1) + Job-5 (wait on 3): Gen thumbs new_image_dir (on num_files_2) + (worst case if a job waited on job-2, and 2 failed, then auto-fail it.) + + process AI (<1 person>) for files (in DIR), e.g. + Job-7: scan 'cam' in images_to_process (num_files_1) + Job-8 (wait for 7): scan 'cam' in new_image_dir (num_files_2) + Job-9 scan 'dad' in images_to_process (num_files_1) + Job-10 (wait fo 9)scan 'dad' in new_image_dir (num_files_2) + etc. + this way we ditch passes + + + + num jobs active, num jobs completed, lets bin them from the pa_job_manager table -> calculate them everytime (simple select count(1) from job where pa_job_state == "Completed") + FE does not really care what 'state' the job engine is in anyway, so maybe we bin that table, make it a local class to pa_job_manager? + + When AI kicks in, it processes per person per DIR, only compares to an image if it has_unidentified_face + + Admin + -> reset face_flag + + AI + -> rescan for --> can do this on new image only + (optimised to only scan has_unidentified_face)... BUT if you change + thresholds on AI, or we get a new/better one some day, then it can + all images with faces, or if we 'reset face_flag' rescan all images + ### UI @@ -42,8 +92,10 @@ ### AI * store reference images (UI allows this now) * check images + * allow for threshold/settings to be tweaked from the GUI + - it would be good to then say, just run the scanner against this image or maybe this DIR, to see how it IDs ppl ### SORTER * date stuff * exif processing? - * location stuff + * location stuff - test a new photo from my camera out