fixed issue where we cant do for loop over os.walk more than once, so now convert it to a list, and then go through that once to get overal_file_cnt, and then commit this to the job and Dir structure so that it is known on the joblog page, e.g. we get the progress bar on the first import once the os.walk is finished. Also cleaned the code to remove unnecessary string manipulation around symlink name in loop, it cant happen there. Finally updated TODO to remove this todo item (progress bar on first import) and a few other old TODOs, added new section on scanning storage_dir, the next big thing...
This commit is contained in:
@@ -535,20 +535,29 @@ def JobImportDir(job):
|
||||
symlink=CreateSymlink(job,path)
|
||||
ResetExistsOnFS(job, symlink)
|
||||
|
||||
overall_file_cnt=0
|
||||
walk=os.walk(path, topdown=True)
|
||||
# root == path of dir, files are in dir... subdirs are in dir
|
||||
parent_dir=None
|
||||
for root, subdirs, files in walk:
|
||||
ftree=list(walk)
|
||||
|
||||
# go through data once to work out file_cnt so progress bar works from first import
|
||||
overall_file_cnt=0
|
||||
for root, subdirs, files in ftree:
|
||||
overall_file_cnt+= len(subdirs) + len(files)
|
||||
if root == path:
|
||||
pp = symlink
|
||||
else:
|
||||
|
||||
parent_dir=None
|
||||
dir=AddDir(job, os.path.basename(symlink), symlink, parent_dir)
|
||||
dir.num_files=overall_file_cnt
|
||||
# session.add in case we already have imported this dir (as AddDir wont) & now we might have diff num of files to last time,
|
||||
session.add(dir)
|
||||
job.num_files=overall_file_cnt
|
||||
AddLogForJob(job, f"Found {overall_file_cnt} file(s) to process")
|
||||
session.commit()
|
||||
|
||||
# root == path of dir, files are in dir... subdirs are in dir
|
||||
for root, subdirs, files in ftree:
|
||||
# already create root above to work out num_files for whole os.walk
|
||||
if root != path:
|
||||
pp=SymlinkName( path, root )+'/'+os.path.basename(root)
|
||||
if root[-1]=="/":
|
||||
root=root[0:-1]
|
||||
|
||||
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
|
||||
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
|
||||
for basename in files:
|
||||
# commit every 100 files to see progress being made but not hammer the database
|
||||
if job.current_file_num % 100 == 0:
|
||||
@@ -585,11 +594,6 @@ def JobImportDir(job):
|
||||
|
||||
rm_cnt=HandleAnyFSDeletions(job)
|
||||
|
||||
# reset overall path with overall_file_cnt, we use this for future jobs
|
||||
# to measure progress when dealing with this path
|
||||
import_dir=session.query(Dir).filter(Dir.path_prefix==symlink).first()
|
||||
import_dir.num_files=overall_file_cnt
|
||||
session.add(import_dir)
|
||||
FinishJob(job, f"Finished Importing: {path} - Processed {overall_file_cnt} files, Removed {rm_cnt} file(s)")
|
||||
return
|
||||
|
||||
|
||||
Reference in New Issue
Block a user