diff --git a/BUGs b/BUGs index 77ce3cf..2fa7c2f 100644 --- a/BUGs +++ b/BUGs @@ -5,4 +5,3 @@ BUG-8: cascading failure jobs are needed BUG-10: gen*details will stop if no change to top-level dir, but not look in subdirs BUG-11: Ai ref img jobs are looping, needs fix -BUG-14: JobImportDir optimises incorrectly, needs to check every dir for any change, not just root diff --git a/pa_job_manager.py b/pa_job_manager.py index e5f7e06..9d2f4ec 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -230,7 +230,7 @@ def ProcessImportDirs(parent_job=None): for path in paths: # make new Job; HandleJobs will make them run later jex=JobExtra( name="path", value=path ) - job=Job(start_time='now()', last_update='now()', name="importdir", state="New", wait_for=None, pa_job_state="New" ) + job=Job(start_time='now()', last_update='now()', name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=0 ) job.extra.append(jex) session.add(job) session.commit() @@ -375,11 +375,8 @@ def CreateSymlink(job,path): return symlink def AddDir(job, dirname, path_prefix, in_dir): - # see if this exists already - dir=session.query(Dir).filter(Dir.path_prefix==dirname).first() + dir=session.query(Dir).filter(Dir.path_prefix==path_prefix).first() if dir: - if DEBUG==1: - print("Found {} returning DB object".format(dirname)) return dir dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0, last_hash_date=0 ) dtype=session.query(FileType).filter(FileType.name=='Directory').first() @@ -389,12 +386,16 @@ def AddDir(job, dirname, path_prefix, in_dir): if in_dir: e.in_dir.append(in_dir) if DEBUG==1: - print("AddDir: created {}".format(dirname)) + print(f"AddDir: created d={dirname}, pp={path_prefix}") AddLogForJob(job, "DEBUG: AddDir: {} in (dir_id={})".format(dirname, in_dir) ) session.add(e) return dir def AddFile(job, fname, type_str, fsize, in_dir ): + # see if this exists already + e=session.query(Entry).filter(Entry.name==fname).first() + if e: + return e ftype = session.query(FileType).filter(FileType.name==type_str).first() e=Entry( name=fname, type=ftype ) f=File( size_mb=fsize ) @@ -426,21 +427,16 @@ def JobImportDir(job): # root == path of dir, files are in dir... subdirs are in dir parent_dir=None for root, subdirs, files in walk: + print(f"walk: r={root} s={subdirs} f={files}") overall_file_cnt+= len(subdirs) + len(files) if root == path: pp = symlink else: pp=SymlinkName( path, root )+'/'+os.path.basename(root) + if root[-1]=="/": + root=root[0:-1] dir=AddDir(job, os.path.basename(root), pp, parent_dir) parent_dir=dir - stat = os.stat( dir.path_prefix ) - # check any modificaiton on fs, since last import, if none we are done - if dir.last_import_date > 0 and stat.st_ctime < dir.last_import_date: - if DEBUG==1: - print( "DEBUG: Directory has not been altered since the last import, just ignore contents" ) - job.current_file_num=dir.num_files - job.num_files+=dir.num_files - continue for basename in files: fname=dir.path_prefix+'/'+basename stat = os.stat(fname) @@ -471,13 +467,17 @@ def JobImportDir(job): return def JobProcessAI(job): + print("DDP: disabled while fixing other bugs") + FinishJob(job, "Finished Processesing AI") + return + path=[jex.value for jex in job.extra if jex.name == "path"][0] path = SymlinkName(path, '/') print('REMOVE AFTER TESTING ON WINDOWS... path=',path) d=session.query(Dir).filter(Dir.path_prefix==path).first() job.num_files=d.num_files for e in FilesInDir( path ): - ProcessFilesInDir(job, e, ProcessAI, lambda a: True) + ProcessFilesInDir(job, e, ProcessAI) FinishJob(job, "Finished Processesing AI") return @@ -495,19 +495,6 @@ def GenHashAndThumb(job, e): job.current_file_num+=1 return -def HashAndThumbDirHasNew(dir): - session.add(dir) - stat = os.stat( dir.path_prefix ) - # check any modificaiton on fs, since last import, if none we are done - if stat.st_ctime < dir.last_hash_date: - dir.last_hash_date = time.time() - AddLogForJob(job, "skip {} as it has not changed since last hashing".format(dir.path_prefix)) - if DEBUG==1: - print ("DEBUG: skip this dir {} as it has not changed since last hashing".format(dir.path_prefix)) - return 0 - dir.last_hash_date = time.time() - return 1 - def ProcessAI(job, e): if e.type.name != 'Image': job.current_file_num+=1 @@ -564,7 +551,7 @@ def compareAI(known_encoding, unknown_encoding): return results -def ProcessFilesInDir(job, e, file_func, go_into_dir_func): +def ProcessFilesInDir(job, e, file_func): if DEBUG==1: print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir[0].path_prefix)) if e.type.name != 'Directory': @@ -573,7 +560,7 @@ def ProcessFilesInDir(job, e, file_func, go_into_dir_func): dir=session.query(Dir).filter(Dir.eid==e.id).first() job.current_file_num+=1 # if this func returns - if not go_into_dir_func(dir): + if not go_into_dir_func(job,dir): return for sub in dir.files: ProcessFilesInDir(job, sub, file_func, go_into_dir_func) @@ -598,7 +585,7 @@ def JobGetFileDetails(job): job.num_files = dir.num_files session.commit() for e in FilesInDir( path ): - ProcessFilesInDir(job, e, GenHashAndThumb, HashAndThumbDirHasNew ) + ProcessFilesInDir(job, e, GenHashAndThumb ) FinishJob(job, "File Details job finished") session.commit() return @@ -641,6 +628,7 @@ def GenImageThumbnail(job, file): im.save(img_bytearray, format='JPEG') img_bytearray = img_bytearray.getvalue() thumbnail = base64.b64encode(img_bytearray) + thumbnail = str(thumbnail)[2:-1] except: print('WARNING: NO EXIF TAGS?!?!?!?') AddLogForJob(job, "WARNING: No EXIF TAF found for: {}".format(file)) @@ -655,7 +643,6 @@ def GenVideoThumbnail(job, file): res, im_ar = vcap.read() im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR) res, thumb_buf = cv2.imencode('.jpeg', im_ar) -# bt = thumb_buf.tostring() bt = thumb_buf.tobytes() thumbnail = base64.b64encode(bt) thumbnail = str(thumbnail)[2:-1]