fixed up dup code to work with paths, added path_types throughout and updated TODO to be clear on what next

2021-04-17 17:43:42 +10:00
parent 477aa4e5b8
commit 3237e3bf8f
6 changed files with 68 additions and 77 deletions
--- a/dups.py
+++ b/dups.py
@@ -23,6 +23,7 @@ import re
 from job import Job, JobExtra, Joblog, NewJob
 from settings import Settings
 from shared import SymlinkName
+from path import PathType

 ################################################################################    
 # DupRow class is a simple 'struct' to keep data per duplicate file / just to
@@ -91,47 +92,23 @@ class Duplicates:
        self.per_path_dups=[]
        self.preferred_file={}
        self.preferred_path={}
-        self.all_paths=[]
-        self.storage_paths=[]
-        self.import_paths=[]
        self.hashes_processed={}
        self.uniq_dups=0
        self.total_dups=0

-        # pull apart the storage path Setting, and make array of each for use in TrimmedPath()
-        settings=Settings.query.first()
-        paths = settings.storage_path.split("#")
-        for path in paths:
-            prefix = SymlinkName(path,path+'/')
-            self.storage_paths.append(prefix)
-            self.all_paths.append(prefix)
-        # pull apart the import path Setting, and make array of each for use in TrimmedPath()
-        paths = settings.import_path.split("#")
-        for path in paths:
-            prefix = SymlinkName(path,path+'/')
-            self.import_paths.append(prefix)
-            self.all_paths.append(prefix)
-
-    # Strip the front of the path (any match on a storage or import path) is
-    # removed.   Just to make it easier to read when we display in the web page
-    def TrimmedPath( self, path ):
-        for p in self.all_paths:
-            if re.match( f"^{p}", path ):
-                return path.replace(p, '' )
-        return path
+        self.import_ptype_id  = PathType.query.filter(PathType.name=='Import').first().id
+        self.storage_ptype_id = PathType.query.filter(PathType.name=='Storage').first().id

    # is this file in the import path?
-    def InImportPath( self, path ):
-        for p in self.import_paths:
-            if re.match( f"^{p}", path ):
-                return True
+    def InImportPath( self, path_type ):
+        if path_type == self.import_ptype_id:
+            return True
        return False

    # is this file in the storage path?
-    def InStoragePath( self, path ):
-        for p in self.storage_paths:
-            if re.match( f"^{p}", path ):
-                return True
+    def InStoragePath( self, path_type ):
+        if path_type == self.storage_ptype_id:
+            return True
        return False

    # this stores this object into the keep from same path list (DDP: sometimes there can be more than 1 SP, e.g SP to SP to IP)
@@ -160,11 +137,11 @@ class Duplicates:
    # and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups()
    # and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups()
    def DupInImportAndStoragePath( self, row, dr1, dr2 ):
-        if self.InStoragePath(row.path1) and self.InImportPath(row.path2):
+        if self.InStoragePath(row.path_type1) and self.InImportPath(row.path_type2):
            self.KeepInIPSPDups( dr1 )
            self.DelInIPSPDups( dr2 )
            return True
-        if self.InStoragePath(row.path2) and self.InImportPath(row.path1):
+        if self.InStoragePath(row.path_type2) and self.InImportPath(row.path_type1):
            self.KeepInIPSPDups( dr2 )
            self.DelInIPSPDups( dr1 )
            return True
@@ -174,8 +151,8 @@ class Duplicates:
    # we process these into appropriate data structures on this first pass
    def AddDup( self, row ):
        self.hashes_processed[row.hash]=1
-        dr1=DupRow( row.hash, row.fname1, self.TrimmedPath(row.path1), row.did1, row.id1 )
-        dr2=DupRow( row.hash, row.fname2, self.TrimmedPath(row.path2), row.did2, row.id2 )
+        dr1=DupRow( row.hash, row.fname1, row.rel_path1, row.did1, row.id1 )
+        dr2=DupRow( row.hash, row.fname2, row.rel_path2, row.did2, row.id2 )
        # if in both import and storage path, just keep the storage path file,
        # and del import path file.
        if self.DupInImportAndStoragePath( row, dr1, dr2 ):