fixed issue where we cant do for loop over os.walk more than once, so now convert it to a list, and then go through that once to get overal_file_cnt, and then commit this to the job and Dir structure so that it is known on the joblog page, e.g. we get the progress bar on the first import once the os.walk is finished. Also cleaned the code to remove unnecessary string manipulation around symlink name in loop, it cant happen there. Finally updated TODO to remove this todo item (progress bar on first import) and a few other old TODOs, added new section on scanning storage_dir, the next big thing...

This commit is contained in:
2021-02-22 18:24:34 +11:00
parent 1320ecba20
commit fe1c01e2f4
2 changed files with 25 additions and 30 deletions

View File

@@ -535,20 +535,29 @@ def JobImportDir(job):
symlink=CreateSymlink(job,path)
ResetExistsOnFS(job, symlink)
overall_file_cnt=0
walk=os.walk(path, topdown=True)
# root == path of dir, files are in dir... subdirs are in dir
parent_dir=None
for root, subdirs, files in walk:
ftree=list(walk)
# go through data once to work out file_cnt so progress bar works from first import
overall_file_cnt=0
for root, subdirs, files in ftree:
overall_file_cnt+= len(subdirs) + len(files)
if root == path:
pp = symlink
else:
parent_dir=None
dir=AddDir(job, os.path.basename(symlink), symlink, parent_dir)
dir.num_files=overall_file_cnt
# session.add in case we already have imported this dir (as AddDir wont) & now we might have diff num of files to last time,
session.add(dir)
job.num_files=overall_file_cnt
AddLogForJob(job, f"Found {overall_file_cnt} file(s) to process")
session.commit()
# root == path of dir, files are in dir... subdirs are in dir
for root, subdirs, files in ftree:
# already create root above to work out num_files for whole os.walk
if root != path:
pp=SymlinkName( path, root )+'/'+os.path.basename(root)
if root[-1]=="/":
root=root[0:-1]
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
for basename in files:
# commit every 100 files to see progress being made but not hammer the database
if job.current_file_num % 100 == 0:
@@ -585,11 +594,6 @@ def JobImportDir(job):
rm_cnt=HandleAnyFSDeletions(job)
# reset overall path with overall_file_cnt, we use this for future jobs
# to measure progress when dealing with this path
import_dir=session.query(Dir).filter(Dir.path_prefix==symlink).first()
import_dir.num_files=overall_file_cnt
session.add(import_dir)
FinishJob(job, f"Finished Importing: {path} - Processed {overall_file_cnt} files, Removed {rm_cnt} file(s)")
return