From 4a55e98f5bc51f59c1a3846d5dcc7f1e0e27e8c4 Mon Sep 17 00:00:00 2001
From: Damien De Paoli <ddp@depaoli.id.au>
Date: Sat, 10 Apr 2021 11:48:56 +1000
Subject: [PATCH] first pass at removing explicit use of path_prefix, the
 remaining code either uses it to search so needs new path_dir_link table, or
 is AddDir which I need to think about

---
 pa_job_manager.py | 43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/pa_job_manager.py b/pa_job_manager.py
index 8446d96..1d9fdbe 100644
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -85,6 +85,9 @@ class Dir(Base):
     last_import_date = Column(Float)
     files = relationship("Entry", secondary="entry_dir_link")
 
+    def PathOnFS(self):
+        return self.path_prefix
+
     def __repr__(self):
         return f"<eid: {self.eid}, path_prefix: {self.path_prefix}, num_files: {self.num_files}, last_import_date: {self.last_import_date}, files: {self.files}>"
 
@@ -99,6 +102,9 @@ class Entry(Base):
     file_details = relationship( "File", uselist=False )
     in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )
 
+    def FullPathOnFS(self):
+        return self.in_dir.path_prefix + '/' + self.name
+
     def __repr__(self):
         return f"<id: {self.id}, name: {self.name}, type={self.type}, exists_on_fs={self.exists_on_fs}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"
 
@@ -457,7 +463,6 @@ def AddDir(job, dirname, path_prefix, in_dir):
 def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy  ):
     e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first()
     if e:
-        print( f"################################################ FILE EXISTS ALREADY: {fname} -- {in_dir.path_prefix} {e}" )
         e.exists_on_fs=True
         return e
     ftype = session.query(FileType).filter(FileType.name==type_str).first()
@@ -582,7 +587,7 @@ def JobImportDir(job):
             # commit every 100 files to see progress being made but not hammer the database
             if job.current_file_num % 100 == 0:
                 session.commit()
-            fname=dir.path_prefix+'/'+basename
+            fname=dir.PathOnFS()+'/'+basename
 
             stat = os.stat(fname)
             if stat.st_ctime > dir.last_import_date:
@@ -643,20 +648,20 @@ def GenHashAndThumb(job, e):
     # commit every 100 files to see progress being made but not hammer the database 
     if job.current_file_num % 100 == 0:
         session.commit()
-    stat = os.stat( e.in_dir.path_prefix + '/' + e.name )
+    stat = os.stat( e.FullPathOnFS() )
     if stat.st_ctime < e.file_details.last_hash_date:
         if DEBUG==1:
             print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
         job.current_file_num+=1
         return
 
-    e.file_details.hash = md5( job, e.in_dir.path_prefix+'/'+ e.name )
+    e.file_details.hash = md5( job, e.FullPathOnFS() )
     if DEBUG==1:
         print( f"{e.name} - hash={e.file_details.hash}" )
     if e.type.name == 'Image':
-        e.file_details.thumbnail = GenImageThumbnail( job, e.in_dir.path_prefix+'/'+ e.name )
+        e.file_details.thumbnail = GenImageThumbnail( job, e.FullPathOnFS() )
     elif e.type.name == 'Video':
-        e.file_details.thumbnail = GenVideoThumbnail( job, e.in_dir.path_prefix+'/'+ e.name )
+        e.file_details.thumbnail = GenVideoThumbnail( job, e.FullPathOnFS() )
     elif e.type.name == 'Unknown':
         job.current_file_num+=1
     e.file_details.last_hash_date = time.time()
@@ -667,7 +672,7 @@ def ProcessAI(job, e):
         job.current_file_num+=1
         return
 
-    file = e.in_dir.path_prefix + '/' + e.name
+    file = e.FullPathOnFS() 
     stat = os.stat(file)
     # find if file is newer than when we found faces before (fyi: first time faces_created_on == 0)
     if stat.st_ctime > e.file_details.faces_created_on:
@@ -709,7 +714,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
         if not frl:
             frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details.eid)
         else: 
-            stat=os.stat(e.in_dir.path_prefix+'/'+ e.name)
+            stat=os.stat( e.FullPathOnFS() )
             # file & refimg are not newer then we dont need to check
             if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
                 print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg  haven't changed")
@@ -757,7 +762,7 @@ def compareAI(known_encoding, unknown_encoding):
 
 def ProcessFilesInDir(job, e, file_func):
     if DEBUG==1:
-        print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir.path_prefix))
+        print("DEBUG: files in dir - process: {}".format(e.FullPathOnFS()) )
     if e.type.name != 'Directory':
         file_func(job, e)
     else:
@@ -873,7 +878,7 @@ def RemoveFileFromFS( del_me ):
         m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix)
         dst_dir=settings.recycle_bin_path + m[1] + '/'
         os.makedirs( dst_dir,mode=0o777, exist_ok=True )
-        src=del_me.in_dir.path_prefix+'/'+del_me.name
+        src=del_me.FullPathOnFS()
         dst=dst_dir + '/' + del_me.name
         os.replace( src, dst )
     except Exception as e:
@@ -903,8 +908,8 @@ def RemoveDups(job):
             found=None
             del_me_lst = []
             for f in files:
-                if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False:
-                    AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file")
+                if os.path.isfile( f.FullPathOnFS() ) == False:
+                    AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
                 elif f.file_details.eid == int(keeping):
                     found = f
                 else:
@@ -912,9 +917,9 @@ def RemoveDups(job):
             if found == None:
                 AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
             else:
-                AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" )
+                AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
                 for del_me in del_me_lst:
-                    AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" )
+                    AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.FullPathOnFS()}" )
                     RemoveFileFromFS( del_me )
                     RemoveFileFromDB(del_me.id)
 
@@ -923,14 +928,14 @@ def RemoveDups(job):
             hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0]
             keeping=jex.value
             tmp=session.query(Dir).filter(Dir.eid==keeping).first()
-            AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
+            AddLogForJob(job, f"Keeping files in {tmp.PathOnFS()}" )
             for hash in hashes.split(","):
                 files=session.query(Entry).join(File).filter(File.hash==hash).all()
                 found=None
                 del_me=None
                 for f in files:
-                    if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False:
-                        AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file")
+                    if os.path.isfile(f.FullPathOnFS()) == False:
+                        AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
                     if f.in_dir.eid == int(keeping):
                         found=f
                     else:
@@ -939,8 +944,8 @@ def RemoveDups(job):
                 if found == None:
                     AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
                 else:
-                    AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" )
-                    AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" )
+                    AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
+                    AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.FullPathOnFS()}" )
                     RemoveFileFromFS( del_me )
                     RemoveFileFromDB(del_me.id)
                     dup_cnt += 1