first pass at removing explicit use of path_prefix, the remaining code either uses it to search so needs new path_dir_link table, or is AddDir which I need to think about

2021-04-10 11:48:56 +10:00
parent fddd47a1ab
commit 4a55e98f5b
1 changed files with 24 additions and 19 deletions
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -85,6 +85,9 @@ class Dir(Base):
    last_import_date = Column(Float)
    files = relationship("Entry", secondary="entry_dir_link")

+    def PathOnFS(self):
+        return self.path_prefix
+
    def __repr__(self):
        return f"<eid: {self.eid}, path_prefix: {self.path_prefix}, num_files: {self.num_files}, last_import_date: {self.last_import_date}, files: {self.files}>"

@@ -99,6 +102,9 @@ class Entry(Base):
    file_details = relationship( "File", uselist=False )
    in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )

+    def FullPathOnFS(self):
+        return self.in_dir.path_prefix + '/' + self.name
+
    def __repr__(self):
        return f"<id: {self.id}, name: {self.name}, type={self.type}, exists_on_fs={self.exists_on_fs}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"

@@ -457,7 +463,6 @@ def AddDir(job, dirname, path_prefix, in_dir):
 def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy  ):
    e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first()
    if e:
-        print( f"################################################ FILE EXISTS ALREADY: {fname} -- {in_dir.path_prefix} {e}" )
        e.exists_on_fs=True
        return e
    ftype = session.query(FileType).filter(FileType.name==type_str).first()
@@ -582,7 +587,7 @@ def JobImportDir(job):
            # commit every 100 files to see progress being made but not hammer the database
            if job.current_file_num % 100 == 0:
                session.commit()
-            fname=dir.path_prefix+'/'+basename
+            fname=dir.PathOnFS()+'/'+basename

            stat = os.stat(fname)
            if stat.st_ctime > dir.last_import_date:
@@ -643,20 +648,20 @@ def GenHashAndThumb(job, e):
    # commit every 100 files to see progress being made but not hammer the database 
    if job.current_file_num % 100 == 0:
        session.commit()
-    stat = os.stat( e.in_dir.path_prefix + '/' + e.name )
+    stat = os.stat( e.FullPathOnFS() )
    if stat.st_ctime < e.file_details.last_hash_date:
        if DEBUG==1:
            print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
        job.current_file_num+=1
        return

-    e.file_details.hash = md5( job, e.in_dir.path_prefix+'/'+ e.name )
+    e.file_details.hash = md5( job, e.FullPathOnFS() )
    if DEBUG==1:
        print( f"{e.name} - hash={e.file_details.hash}" )
    if e.type.name == 'Image':
-        e.file_details.thumbnail = GenImageThumbnail( job, e.in_dir.path_prefix+'/'+ e.name )
+        e.file_details.thumbnail = GenImageThumbnail( job, e.FullPathOnFS() )
    elif e.type.name == 'Video':
-        e.file_details.thumbnail = GenVideoThumbnail( job, e.in_dir.path_prefix+'/'+ e.name )
+        e.file_details.thumbnail = GenVideoThumbnail( job, e.FullPathOnFS() )
    elif e.type.name == 'Unknown':
        job.current_file_num+=1
    e.file_details.last_hash_date = time.time()
@@ -667,7 +672,7 @@ def ProcessAI(job, e):
        job.current_file_num+=1
        return

-    file = e.in_dir.path_prefix + '/' + e.name
+    file = e.FullPathOnFS() 
    stat = os.stat(file)
    # find if file is newer than when we found faces before (fyi: first time faces_created_on == 0)
    if stat.st_ctime > e.file_details.faces_created_on:
@@ -709,7 +714,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
        if not frl:
            frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details.eid)
        else: 
-            stat=os.stat(e.in_dir.path_prefix+'/'+ e.name)
+            stat=os.stat( e.FullPathOnFS() )
            # file & refimg are not newer then we dont need to check
            if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
                print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg  haven't changed")
@@ -757,7 +762,7 @@ def compareAI(known_encoding, unknown_encoding):

 def ProcessFilesInDir(job, e, file_func):
    if DEBUG==1:
-        print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir.path_prefix))
+        print("DEBUG: files in dir - process: {}".format(e.FullPathOnFS()) )
    if e.type.name != 'Directory':
        file_func(job, e)
    else:
@@ -873,7 +878,7 @@ def RemoveFileFromFS( del_me ):
        m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix)
        dst_dir=settings.recycle_bin_path + m[1] + '/'
        os.makedirs( dst_dir,mode=0o777, exist_ok=True )
-        src=del_me.in_dir.path_prefix+'/'+del_me.name
+        src=del_me.FullPathOnFS()
        dst=dst_dir + '/' + del_me.name
        os.replace( src, dst )
    except Exception as e:
@@ -903,8 +908,8 @@ def RemoveDups(job):
            found=None
            del_me_lst = []
            for f in files:
-                if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False:
-                    AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file")
+                if os.path.isfile( f.FullPathOnFS() ) == False:
+                    AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
                elif f.file_details.eid == int(keeping):
                    found = f
                else:
@@ -912,9 +917,9 @@ def RemoveDups(job):
            if found == None:
                AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
            else:
-                AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" )
+                AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
                for del_me in del_me_lst:
-                    AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" )
+                    AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.FullPathOnFS()}" )
                    RemoveFileFromFS( del_me )
                    RemoveFileFromDB(del_me.id)

@@ -923,14 +928,14 @@ def RemoveDups(job):
            hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0]
            keeping=jex.value
            tmp=session.query(Dir).filter(Dir.eid==keeping).first()
-            AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
+            AddLogForJob(job, f"Keeping files in {tmp.PathOnFS()}" )
            for hash in hashes.split(","):
                files=session.query(Entry).join(File).filter(File.hash==hash).all()
                found=None
                del_me=None
                for f in files:
-                    if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False:
-                        AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file")
+                    if os.path.isfile(f.FullPathOnFS()) == False:
+                        AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
                    if f.in_dir.eid == int(keeping):
                        found=f
                    else:
@@ -939,8 +944,8 @@ def RemoveDups(job):
                if found == None:
                    AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
                else:
-                    AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" )
-                    AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" )
+                    AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
+                    AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.FullPathOnFS()}" )
                    RemoveFileFromFS( del_me )
                    RemoveFileFromDB(del_me.id)
                    dup_cnt += 1