diff --git a/TODO b/TODO index 338c16a..5863299 100644 --- a/TODO +++ b/TODO @@ -8,18 +8,16 @@ (file_refimg_link --> file_refimg_link needs a face_num?) ### BACKEND + scan storage_dir + * need to be smart not to 'rescan' import_dir if it is inside storage_dir + * need to find / remove duplicate files from inside storage_dir and itself, and in import_dir and in storage_dir + * so we can AI that and search all photos) + -- started on some basic optimisations (commit logs every 100 logs, not each log) - with debugs: import = 04:11, getfiledetails== 0:35:35 - without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff - with exifread & debug: import == 04:26 - * CheckForDups(): - in files.py - -> need to process the form and ACT on it (by deleting files) - - * try again with walk to go through loop once quickly just to add up files, - * then start the import dir counting up / progress - *** Need to use thread-safe sessions per Thread, half-assed version did not work need a manual button to restart it in the GUI, @@ -43,8 +41,6 @@ need some way to multiselect images [DONE] and then get them into a new "folder" - Will also need a 'real storage path' - one day allow scanning this too if we want (so we can AI that and search all photos) timelineview? (I think maybe sunburst for large amounts of files, then maybe something more timeline-series for drilling in?) (vertical timeline, date has thumbnails (small) horizontally along @@ -60,12 +56,7 @@ ### SORTER - * duplicate files - this sql finds them: - select d1.path_prefix, e1.name, f1.hash, d2.path_prefix, e2.name, f2.hash from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by f1.hash, f2.hash; - - * date stuff * exif processing? * location stuff - test a new photo from my camera out -- image is in dir, need to look at exifread output - * also need to find / remove duplicate files from inside import_dir and in storage_dir diff --git a/pa_job_manager.py b/pa_job_manager.py index 4bd27b3..fa94f77 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -535,20 +535,29 @@ def JobImportDir(job): symlink=CreateSymlink(job,path) ResetExistsOnFS(job, symlink) - overall_file_cnt=0 walk=os.walk(path, topdown=True) - # root == path of dir, files are in dir... subdirs are in dir - parent_dir=None - for root, subdirs, files in walk: + ftree=list(walk) + + # go through data once to work out file_cnt so progress bar works from first import + overall_file_cnt=0 + for root, subdirs, files in ftree: overall_file_cnt+= len(subdirs) + len(files) - if root == path: - pp = symlink - else: + + parent_dir=None + dir=AddDir(job, os.path.basename(symlink), symlink, parent_dir) + dir.num_files=overall_file_cnt + # session.add in case we already have imported this dir (as AddDir wont) & now we might have diff num of files to last time, + session.add(dir) + job.num_files=overall_file_cnt + AddLogForJob(job, f"Found {overall_file_cnt} file(s) to process") + session.commit() + + # root == path of dir, files are in dir... subdirs are in dir + for root, subdirs, files in ftree: + # already create root above to work out num_files for whole os.walk + if root != path: pp=SymlinkName( path, root )+'/'+os.path.basename(root) - if root[-1]=="/": - root=root[0:-1] - - dir=AddDir(job, os.path.basename(root), pp, parent_dir) + dir=AddDir(job, os.path.basename(root), pp, parent_dir) for basename in files: # commit every 100 files to see progress being made but not hammer the database if job.current_file_num % 100 == 0: @@ -585,11 +594,6 @@ def JobImportDir(job): rm_cnt=HandleAnyFSDeletions(job) - # reset overall path with overall_file_cnt, we use this for future jobs - # to measure progress when dealing with this path - import_dir=session.query(Dir).filter(Dir.path_prefix==symlink).first() - import_dir.num_files=overall_file_cnt - session.add(import_dir) FinishJob(job, f"Finished Importing: {path} - Processed {overall_file_cnt} files, Removed {rm_cnt} file(s)") return