renamed self.KeepInSameDups to self.KeepInIPSPDups as it is not same dir, but ip to sp
This commit is contained in:
27
dups.py
27
dups.py
@@ -134,16 +134,15 @@ class Duplicates:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# this stores this object into the keep from same path list (DDP: could there be more than 1)
|
# this stores this object into the keep from same path list (DDP: could there be more than 1)
|
||||||
def KeepInSameDups( self, obj ):
|
def KeepInIPSPDups( self, obj ):
|
||||||
if obj.h not in self.ip_to_sp_dups_keep:
|
if obj.h not in self.ip_to_sp_dups_keep:
|
||||||
self.ip_to_sp_dups_keep[obj.h]= obj
|
self.ip_to_sp_dups_keep[obj.h]= obj
|
||||||
self.overall_dup_cnt += 1
|
|
||||||
self.overall_dup_sets += 1
|
self.overall_dup_sets += 1
|
||||||
return
|
return
|
||||||
|
|
||||||
# this stores this object into the Delete from same path list (if it is not
|
# this stores this object into the Delete from same path list (if it is not
|
||||||
# already there)
|
# already there)
|
||||||
def DelInSameDups( self, obj ):
|
def DelInIPSPDups( self, obj ):
|
||||||
if obj.h not in self.ip_to_sp_dups_del:
|
if obj.h not in self.ip_to_sp_dups_del:
|
||||||
self.ip_to_sp_dups_del[obj.h]=[]
|
self.ip_to_sp_dups_del[obj.h]=[]
|
||||||
self.ip_to_sp_dups_del[obj.h].append( obj )
|
self.ip_to_sp_dups_del[obj.h].append( obj )
|
||||||
@@ -157,16 +156,16 @@ class Duplicates:
|
|||||||
return
|
return
|
||||||
|
|
||||||
# this function takes a duplicate file (in the import path and the storage path)
|
# this function takes a duplicate file (in the import path and the storage path)
|
||||||
# and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInSameDups()
|
# and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups()
|
||||||
# and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInSameDups()
|
# and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups()
|
||||||
def DupInImportAndStoragePath( self, row, dr1, dr2 ):
|
def DupInImportAndStoragePath( self, row, dr1, dr2 ):
|
||||||
if self.InStoragePath(row.path1) and self.InImportPath(row.path2):
|
if self.InStoragePath(row.path1) and self.InImportPath(row.path2):
|
||||||
self.KeepInSameDups( dr1 )
|
self.KeepInIPSPDups( dr1 )
|
||||||
self.DelInSameDups( dr2 )
|
self.DelInIPSPDups( dr2 )
|
||||||
return True
|
return True
|
||||||
if self.InStoragePath(row.path2) and self.InImportPath(row.path1):
|
if self.InStoragePath(row.path2) and self.InImportPath(row.path1):
|
||||||
self.KeepInSameDups( dr2 )
|
self.KeepInIPSPDups( dr2 )
|
||||||
self.DelInSameDups( dr1 )
|
self.DelInIPSPDups( dr1 )
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -180,9 +179,6 @@ class Duplicates:
|
|||||||
if self.DupInImportAndStoragePath( row, dr1, dr2 ):
|
if self.DupInImportAndStoragePath( row, dr1, dr2 ):
|
||||||
return
|
return
|
||||||
|
|
||||||
# if we are here, we have duplicates either in the storage path or in
|
|
||||||
# the import path
|
|
||||||
|
|
||||||
# if the hast is no dups_to_process, created / append
|
# if the hast is no dups_to_process, created / append
|
||||||
if row.hash not in self.dups_to_process:
|
if row.hash not in self.dups_to_process:
|
||||||
self.dups_to_process[row.hash]=[]
|
self.dups_to_process[row.hash]=[]
|
||||||
@@ -208,7 +204,7 @@ class Duplicates:
|
|||||||
return
|
return
|
||||||
|
|
||||||
def AddDupPath(self, hash):
|
def AddDupPath(self, hash):
|
||||||
# this gets complex, if this hash is also in a sahred imp / sp - then dont deal with it now, let the imp files be deleted and
|
# this gets complex, if this hash is also in a shared imp / sp - then dont deal with it now, let the imp files be deleted and
|
||||||
# the repeat check_dups validation step catch it as a cleander (potential) for still more duplicates just in sp
|
# the repeat check_dups validation step catch it as a cleander (potential) for still more duplicates just in sp
|
||||||
if hash in self.ip_to_sp_dups_keep:
|
if hash in self.ip_to_sp_dups_keep:
|
||||||
return
|
return
|
||||||
@@ -228,14 +224,15 @@ class Duplicates:
|
|||||||
self.preferred_path[dpr.did1]=1
|
self.preferred_path[dpr.did1]=1
|
||||||
if re.search( r'\d{4}/\d{8}', dpr.d2):
|
if re.search( r'\d{4}/\d{8}', dpr.d2):
|
||||||
self.preferred_path[dpr.did2]=1
|
self.preferred_path[dpr.did2]=1
|
||||||
|
return
|
||||||
|
|
||||||
def SecondPass(self):
|
def SecondPass(self):
|
||||||
for hash in self.dups_to_process:
|
for hash in self.dups_to_process:
|
||||||
# more than 2 files (just ask per file) OR only 2 copies, and files are in same dir (so must be diff name, so just ask) OR content same, filename different (ask per file)
|
# more than 2 files (just ask per file) OR only 2 copies, and files are in same dir (so must be diff name, so just ask) OR content same, filename different (ask per file)
|
||||||
if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d):
|
if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d):
|
||||||
self.per_file_dups.append(self.dups_to_process[hash])
|
|
||||||
self.overall_dup_cnt += len(self.dups_to_process[hash])
|
|
||||||
self.overall_dup_sets += 1
|
self.overall_dup_sets += 1
|
||||||
|
self.overall_dup_cnt += len(self.dups_to_process[hash])
|
||||||
|
self.per_file_dups.append(self.dups_to_process[hash])
|
||||||
for el in self.dups_to_process[hash]:
|
for el in self.dups_to_process[hash]:
|
||||||
if re.search( r'\d{4}/\d{8}', el.d):
|
if re.search( r'\d{4}/\d{8}', el.d):
|
||||||
self.preferred_file[hash] = el.id
|
self.preferred_file[hash] = el.id
|
||||||
|
|||||||
Reference in New Issue
Block a user