fixed BUGS 12/13/14, fixed new thumbnail code, fixed duplicate db entries for directories, fixed optimising code not working, by just ditching the directory optimising anyway

This commit is contained in:
2021-01-23 00:00:47 +11:00
parent 8e9c7408a8
commit bfe0613167
2 changed files with 19 additions and 33 deletions

1
BUGs
View File

@@ -5,4 +5,3 @@
BUG-8: cascading failure jobs are needed BUG-8: cascading failure jobs are needed
BUG-10: gen*details will stop if no change to top-level dir, but not look in subdirs BUG-10: gen*details will stop if no change to top-level dir, but not look in subdirs
BUG-11: Ai ref img jobs are looping, needs fix BUG-11: Ai ref img jobs are looping, needs fix
BUG-14: JobImportDir optimises incorrectly, needs to check every dir for any change, not just root

View File

@@ -230,7 +230,7 @@ def ProcessImportDirs(parent_job=None):
for path in paths: for path in paths:
# make new Job; HandleJobs will make them run later # make new Job; HandleJobs will make them run later
jex=JobExtra( name="path", value=path ) jex=JobExtra( name="path", value=path )
job=Job(start_time='now()', last_update='now()', name="importdir", state="New", wait_for=None, pa_job_state="New" ) job=Job(start_time='now()', last_update='now()', name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=0 )
job.extra.append(jex) job.extra.append(jex)
session.add(job) session.add(job)
session.commit() session.commit()
@@ -375,11 +375,8 @@ def CreateSymlink(job,path):
return symlink return symlink
def AddDir(job, dirname, path_prefix, in_dir): def AddDir(job, dirname, path_prefix, in_dir):
# see if this exists already dir=session.query(Dir).filter(Dir.path_prefix==path_prefix).first()
dir=session.query(Dir).filter(Dir.path_prefix==dirname).first()
if dir: if dir:
if DEBUG==1:
print("Found {} returning DB object".format(dirname))
return dir return dir
dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0, last_hash_date=0 ) dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0, last_hash_date=0 )
dtype=session.query(FileType).filter(FileType.name=='Directory').first() dtype=session.query(FileType).filter(FileType.name=='Directory').first()
@@ -389,12 +386,16 @@ def AddDir(job, dirname, path_prefix, in_dir):
if in_dir: if in_dir:
e.in_dir.append(in_dir) e.in_dir.append(in_dir)
if DEBUG==1: if DEBUG==1:
print("AddDir: created {}".format(dirname)) print(f"AddDir: created d={dirname}, pp={path_prefix}")
AddLogForJob(job, "DEBUG: AddDir: {} in (dir_id={})".format(dirname, in_dir) ) AddLogForJob(job, "DEBUG: AddDir: {} in (dir_id={})".format(dirname, in_dir) )
session.add(e) session.add(e)
return dir return dir
def AddFile(job, fname, type_str, fsize, in_dir ): def AddFile(job, fname, type_str, fsize, in_dir ):
# see if this exists already
e=session.query(Entry).filter(Entry.name==fname).first()
if e:
return e
ftype = session.query(FileType).filter(FileType.name==type_str).first() ftype = session.query(FileType).filter(FileType.name==type_str).first()
e=Entry( name=fname, type=ftype ) e=Entry( name=fname, type=ftype )
f=File( size_mb=fsize ) f=File( size_mb=fsize )
@@ -426,21 +427,16 @@ def JobImportDir(job):
# root == path of dir, files are in dir... subdirs are in dir # root == path of dir, files are in dir... subdirs are in dir
parent_dir=None parent_dir=None
for root, subdirs, files in walk: for root, subdirs, files in walk:
print(f"walk: r={root} s={subdirs} f={files}")
overall_file_cnt+= len(subdirs) + len(files) overall_file_cnt+= len(subdirs) + len(files)
if root == path: if root == path:
pp = symlink pp = symlink
else: else:
pp=SymlinkName( path, root )+'/'+os.path.basename(root) pp=SymlinkName( path, root )+'/'+os.path.basename(root)
if root[-1]=="/":
root=root[0:-1]
dir=AddDir(job, os.path.basename(root), pp, parent_dir) dir=AddDir(job, os.path.basename(root), pp, parent_dir)
parent_dir=dir parent_dir=dir
stat = os.stat( dir.path_prefix )
# check any modificaiton on fs, since last import, if none we are done
if dir.last_import_date > 0 and stat.st_ctime < dir.last_import_date:
if DEBUG==1:
print( "DEBUG: Directory has not been altered since the last import, just ignore contents" )
job.current_file_num=dir.num_files
job.num_files+=dir.num_files
continue
for basename in files: for basename in files:
fname=dir.path_prefix+'/'+basename fname=dir.path_prefix+'/'+basename
stat = os.stat(fname) stat = os.stat(fname)
@@ -471,13 +467,17 @@ def JobImportDir(job):
return return
def JobProcessAI(job): def JobProcessAI(job):
print("DDP: disabled while fixing other bugs")
FinishJob(job, "Finished Processesing AI")
return
path=[jex.value for jex in job.extra if jex.name == "path"][0] path=[jex.value for jex in job.extra if jex.name == "path"][0]
path = SymlinkName(path, '/') path = SymlinkName(path, '/')
print('REMOVE AFTER TESTING ON WINDOWS... path=',path) print('REMOVE AFTER TESTING ON WINDOWS... path=',path)
d=session.query(Dir).filter(Dir.path_prefix==path).first() d=session.query(Dir).filter(Dir.path_prefix==path).first()
job.num_files=d.num_files job.num_files=d.num_files
for e in FilesInDir( path ): for e in FilesInDir( path ):
ProcessFilesInDir(job, e, ProcessAI, lambda a: True) ProcessFilesInDir(job, e, ProcessAI)
FinishJob(job, "Finished Processesing AI") FinishJob(job, "Finished Processesing AI")
return return
@@ -495,19 +495,6 @@ def GenHashAndThumb(job, e):
job.current_file_num+=1 job.current_file_num+=1
return return
def HashAndThumbDirHasNew(dir):
session.add(dir)
stat = os.stat( dir.path_prefix )
# check any modificaiton on fs, since last import, if none we are done
if stat.st_ctime < dir.last_hash_date:
dir.last_hash_date = time.time()
AddLogForJob(job, "skip {} as it has not changed since last hashing".format(dir.path_prefix))
if DEBUG==1:
print ("DEBUG: skip this dir {} as it has not changed since last hashing".format(dir.path_prefix))
return 0
dir.last_hash_date = time.time()
return 1
def ProcessAI(job, e): def ProcessAI(job, e):
if e.type.name != 'Image': if e.type.name != 'Image':
job.current_file_num+=1 job.current_file_num+=1
@@ -564,7 +551,7 @@ def compareAI(known_encoding, unknown_encoding):
return results return results
def ProcessFilesInDir(job, e, file_func, go_into_dir_func): def ProcessFilesInDir(job, e, file_func):
if DEBUG==1: if DEBUG==1:
print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir[0].path_prefix)) print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir[0].path_prefix))
if e.type.name != 'Directory': if e.type.name != 'Directory':
@@ -573,7 +560,7 @@ def ProcessFilesInDir(job, e, file_func, go_into_dir_func):
dir=session.query(Dir).filter(Dir.eid==e.id).first() dir=session.query(Dir).filter(Dir.eid==e.id).first()
job.current_file_num+=1 job.current_file_num+=1
# if this func returns # if this func returns
if not go_into_dir_func(dir): if not go_into_dir_func(job,dir):
return return
for sub in dir.files: for sub in dir.files:
ProcessFilesInDir(job, sub, file_func, go_into_dir_func) ProcessFilesInDir(job, sub, file_func, go_into_dir_func)
@@ -598,7 +585,7 @@ def JobGetFileDetails(job):
job.num_files = dir.num_files job.num_files = dir.num_files
session.commit() session.commit()
for e in FilesInDir( path ): for e in FilesInDir( path ):
ProcessFilesInDir(job, e, GenHashAndThumb, HashAndThumbDirHasNew ) ProcessFilesInDir(job, e, GenHashAndThumb )
FinishJob(job, "File Details job finished") FinishJob(job, "File Details job finished")
session.commit() session.commit()
return return
@@ -641,6 +628,7 @@ def GenImageThumbnail(job, file):
im.save(img_bytearray, format='JPEG') im.save(img_bytearray, format='JPEG')
img_bytearray = img_bytearray.getvalue() img_bytearray = img_bytearray.getvalue()
thumbnail = base64.b64encode(img_bytearray) thumbnail = base64.b64encode(img_bytearray)
thumbnail = str(thumbnail)[2:-1]
except: except:
print('WARNING: NO EXIF TAGS?!?!?!?') print('WARNING: NO EXIF TAGS?!?!?!?')
AddLogForJob(job, "WARNING: No EXIF TAF found for: {}".format(file)) AddLogForJob(job, "WARNING: No EXIF TAF found for: {}".format(file))
@@ -655,7 +643,6 @@ def GenVideoThumbnail(job, file):
res, im_ar = vcap.read() res, im_ar = vcap.read()
im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR) im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR)
res, thumb_buf = cv2.imencode('.jpeg', im_ar) res, thumb_buf = cv2.imencode('.jpeg', im_ar)
# bt = thumb_buf.tostring()
bt = thumb_buf.tobytes() bt = thumb_buf.tobytes()
thumbnail = base64.b64encode(bt) thumbnail = base64.b64encode(bt)
thumbnail = str(thumbnail)[2:-1] thumbnail = str(thumbnail)[2:-1]