fixed up dup code to work with paths, added path_types throughout and updated TODO to be clear on what next
This commit is contained in:
49
dups.py
49
dups.py
@@ -23,6 +23,7 @@ import re
|
||||
from job import Job, JobExtra, Joblog, NewJob
|
||||
from settings import Settings
|
||||
from shared import SymlinkName
|
||||
from path import PathType
|
||||
|
||||
################################################################################
|
||||
# DupRow class is a simple 'struct' to keep data per duplicate file / just to
|
||||
@@ -91,47 +92,23 @@ class Duplicates:
|
||||
self.per_path_dups=[]
|
||||
self.preferred_file={}
|
||||
self.preferred_path={}
|
||||
self.all_paths=[]
|
||||
self.storage_paths=[]
|
||||
self.import_paths=[]
|
||||
self.hashes_processed={}
|
||||
self.uniq_dups=0
|
||||
self.total_dups=0
|
||||
|
||||
# pull apart the storage path Setting, and make array of each for use in TrimmedPath()
|
||||
settings=Settings.query.first()
|
||||
paths = settings.storage_path.split("#")
|
||||
for path in paths:
|
||||
prefix = SymlinkName(path,path+'/')
|
||||
self.storage_paths.append(prefix)
|
||||
self.all_paths.append(prefix)
|
||||
# pull apart the import path Setting, and make array of each for use in TrimmedPath()
|
||||
paths = settings.import_path.split("#")
|
||||
for path in paths:
|
||||
prefix = SymlinkName(path,path+'/')
|
||||
self.import_paths.append(prefix)
|
||||
self.all_paths.append(prefix)
|
||||
|
||||
# Strip the front of the path (any match on a storage or import path) is
|
||||
# removed. Just to make it easier to read when we display in the web page
|
||||
def TrimmedPath( self, path ):
|
||||
for p in self.all_paths:
|
||||
if re.match( f"^{p}", path ):
|
||||
return path.replace(p, '' )
|
||||
return path
|
||||
self.import_ptype_id = PathType.query.filter(PathType.name=='Import').first().id
|
||||
self.storage_ptype_id = PathType.query.filter(PathType.name=='Storage').first().id
|
||||
|
||||
# is this file in the import path?
|
||||
def InImportPath( self, path ):
|
||||
for p in self.import_paths:
|
||||
if re.match( f"^{p}", path ):
|
||||
return True
|
||||
def InImportPath( self, path_type ):
|
||||
if path_type == self.import_ptype_id:
|
||||
return True
|
||||
return False
|
||||
|
||||
# is this file in the storage path?
|
||||
def InStoragePath( self, path ):
|
||||
for p in self.storage_paths:
|
||||
if re.match( f"^{p}", path ):
|
||||
return True
|
||||
def InStoragePath( self, path_type ):
|
||||
if path_type == self.storage_ptype_id:
|
||||
return True
|
||||
return False
|
||||
|
||||
# this stores this object into the keep from same path list (DDP: sometimes there can be more than 1 SP, e.g SP to SP to IP)
|
||||
@@ -160,11 +137,11 @@ class Duplicates:
|
||||
# and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups()
|
||||
# and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups()
|
||||
def DupInImportAndStoragePath( self, row, dr1, dr2 ):
|
||||
if self.InStoragePath(row.path1) and self.InImportPath(row.path2):
|
||||
if self.InStoragePath(row.path_type1) and self.InImportPath(row.path_type2):
|
||||
self.KeepInIPSPDups( dr1 )
|
||||
self.DelInIPSPDups( dr2 )
|
||||
return True
|
||||
if self.InStoragePath(row.path2) and self.InImportPath(row.path1):
|
||||
if self.InStoragePath(row.path_type2) and self.InImportPath(row.path_type1):
|
||||
self.KeepInIPSPDups( dr2 )
|
||||
self.DelInIPSPDups( dr1 )
|
||||
return True
|
||||
@@ -174,8 +151,8 @@ class Duplicates:
|
||||
# we process these into appropriate data structures on this first pass
|
||||
def AddDup( self, row ):
|
||||
self.hashes_processed[row.hash]=1
|
||||
dr1=DupRow( row.hash, row.fname1, self.TrimmedPath(row.path1), row.did1, row.id1 )
|
||||
dr2=DupRow( row.hash, row.fname2, self.TrimmedPath(row.path2), row.did2, row.id2 )
|
||||
dr1=DupRow( row.hash, row.fname1, row.rel_path1, row.did1, row.id1 )
|
||||
dr2=DupRow( row.hash, row.fname2, row.rel_path2, row.did2, row.id2 )
|
||||
# if in both import and storage path, just keep the storage path file,
|
||||
# and del import path file.
|
||||
if self.DupInImportAndStoragePath( row, dr1, dr2 ):
|
||||
|
||||
Reference in New Issue
Block a user