From 4a55e98f5bc51f59c1a3846d5dcc7f1e0e27e8c4 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Sat, 10 Apr 2021 11:48:56 +1000 Subject: [PATCH] first pass at removing explicit use of path_prefix, the remaining code either uses it to search so needs new path_dir_link table, or is AddDir which I need to think about --- pa_job_manager.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/pa_job_manager.py b/pa_job_manager.py index 8446d96..1d9fdbe 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -85,6 +85,9 @@ class Dir(Base): last_import_date = Column(Float) files = relationship("Entry", secondary="entry_dir_link") + def PathOnFS(self): + return self.path_prefix + def __repr__(self): return f"" @@ -99,6 +102,9 @@ class Entry(Base): file_details = relationship( "File", uselist=False ) in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False ) + def FullPathOnFS(self): + return self.in_dir.path_prefix + '/' + self.name + def __repr__(self): return f"" @@ -457,7 +463,6 @@ def AddDir(job, dirname, path_prefix, in_dir): def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ): e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first() if e: - print( f"################################################ FILE EXISTS ALREADY: {fname} -- {in_dir.path_prefix} {e}" ) e.exists_on_fs=True return e ftype = session.query(FileType).filter(FileType.name==type_str).first() @@ -582,7 +587,7 @@ def JobImportDir(job): # commit every 100 files to see progress being made but not hammer the database if job.current_file_num % 100 == 0: session.commit() - fname=dir.path_prefix+'/'+basename + fname=dir.PathOnFS()+'/'+basename stat = os.stat(fname) if stat.st_ctime > dir.last_import_date: @@ -643,20 +648,20 @@ def GenHashAndThumb(job, e): # commit every 100 files to see progress being made but not hammer the database if job.current_file_num % 100 == 0: session.commit() - stat = os.stat( e.in_dir.path_prefix + '/' + e.name ) + stat = os.stat( e.FullPathOnFS() ) if stat.st_ctime < e.file_details.last_hash_date: if DEBUG==1: print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this") job.current_file_num+=1 return - e.file_details.hash = md5( job, e.in_dir.path_prefix+'/'+ e.name ) + e.file_details.hash = md5( job, e.FullPathOnFS() ) if DEBUG==1: print( f"{e.name} - hash={e.file_details.hash}" ) if e.type.name == 'Image': - e.file_details.thumbnail = GenImageThumbnail( job, e.in_dir.path_prefix+'/'+ e.name ) + e.file_details.thumbnail = GenImageThumbnail( job, e.FullPathOnFS() ) elif e.type.name == 'Video': - e.file_details.thumbnail = GenVideoThumbnail( job, e.in_dir.path_prefix+'/'+ e.name ) + e.file_details.thumbnail = GenVideoThumbnail( job, e.FullPathOnFS() ) elif e.type.name == 'Unknown': job.current_file_num+=1 e.file_details.last_hash_date = time.time() @@ -667,7 +672,7 @@ def ProcessAI(job, e): job.current_file_num+=1 return - file = e.in_dir.path_prefix + '/' + e.name + file = e.FullPathOnFS() stat = os.stat(file) # find if file is newer than when we found faces before (fyi: first time faces_created_on == 0) if stat.st_ctime > e.file_details.faces_created_on: @@ -709,7 +714,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e): if not frl: frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details.eid) else: - stat=os.stat(e.in_dir.path_prefix+'/'+ e.name) + stat=os.stat( e.FullPathOnFS() ) # file & refimg are not newer then we dont need to check if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed: print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg haven't changed") @@ -757,7 +762,7 @@ def compareAI(known_encoding, unknown_encoding): def ProcessFilesInDir(job, e, file_func): if DEBUG==1: - print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir.path_prefix)) + print("DEBUG: files in dir - process: {}".format(e.FullPathOnFS()) ) if e.type.name != 'Directory': file_func(job, e) else: @@ -873,7 +878,7 @@ def RemoveFileFromFS( del_me ): m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix) dst_dir=settings.recycle_bin_path + m[1] + '/' os.makedirs( dst_dir,mode=0o777, exist_ok=True ) - src=del_me.in_dir.path_prefix+'/'+del_me.name + src=del_me.FullPathOnFS() dst=dst_dir + '/' + del_me.name os.replace( src, dst ) except Exception as e: @@ -903,8 +908,8 @@ def RemoveDups(job): found=None del_me_lst = [] for f in files: - if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False: - AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file") + if os.path.isfile( f.FullPathOnFS() ) == False: + AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file") elif f.file_details.eid == int(keeping): found = f else: @@ -912,9 +917,9 @@ def RemoveDups(job): if found == None: AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" ) else: - AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" ) + AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" ) for del_me in del_me_lst: - AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" ) + AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.FullPathOnFS()}" ) RemoveFileFromFS( del_me ) RemoveFileFromDB(del_me.id) @@ -923,14 +928,14 @@ def RemoveDups(job): hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0] keeping=jex.value tmp=session.query(Dir).filter(Dir.eid==keeping).first() - AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" ) + AddLogForJob(job, f"Keeping files in {tmp.PathOnFS()}" ) for hash in hashes.split(","): files=session.query(Entry).join(File).filter(File.hash==hash).all() found=None del_me=None for f in files: - if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False: - AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file") + if os.path.isfile(f.FullPathOnFS()) == False: + AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file") if f.in_dir.eid == int(keeping): found=f else: @@ -939,8 +944,8 @@ def RemoveDups(job): if found == None: AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" ) else: - AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" ) - AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" ) + AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" ) + AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.FullPathOnFS()}" ) RemoveFileFromFS( del_me ) RemoveFileFromDB(del_me.id) dup_cnt += 1