diff --git a/dups.py b/dups.py
index 93d2fa9..774272a 100644
--- a/dups.py
+++ b/dups.py
@@ -24,6 +24,7 @@ from job import Job, JobExtra, Joblog, NewJob
 from settings import Settings
 from shared import SymlinkName
 
+################################################################################    
 # DupRow class is a simple 'struct' to keep data per duplicate file / just to
 # avoid using python list/dicts intermixed, and be able to consistently use
 # dot-notation of fields
@@ -35,7 +36,28 @@ class DupRow:
         self.d=dir
         self.did=did
         self.id=fid
-    
+        return
+
+    def __repr__(self):
+        return f"DupRow( id: {self.id}, did: {self.did} )"
+
+################################################################################    
+# DupPathRow class is a simple 'struct' to keep data per files in duplicate paths
+# just to avoid using python list/dicts intermixed, and be able to consistently use
+# dot-notation of fields
+class DupPathRow:
+    def __init__(self, count, d1, d2, did1, did2, hashes ):
+        self.count=count
+        self.d1=d1
+        self.d2=d2
+        self.did1=did1
+        self.did2=did2
+        self.hashes=hashes
+
+    def __repr__(self):
+        return f"DupPathRow( did1: {self.did1}, did2: {self.did2} )"
+
+################################################################################    
 # Duplicates class is used with one instance/object to process all the
 # 'duplicate' data from the Database, and parse it into more usable data
 # structures.    This is needed also, as the database content shows duplicates
@@ -67,10 +89,13 @@ class Duplicates:
         self.dups_to_process={}
         self.per_file_dups=[]
         self.per_path_dups=[]
-        self.preferred={}
+        self.preferred_file={}
+        self.preferred_path={}
         self.all_paths=[]
         self.storage_paths=[]
         self.import_paths=[]
+        self.overall_dup_cnt=0
+        self.overall_dup_sets=0
 
         # pull apart the storage path Setting, and make array of each for use in TrimmedPath()
         settings=Settings.query.first()
@@ -108,12 +133,10 @@ class Duplicates:
                 return True
         return False
 
-    # this stores this object into the keep from same path list (only ever 1)
+    # this stores this object into the keep from same path list (DDP: could there be more than 1)
     def KeepInSameDups( self, obj ):
         if obj.h not in self.ip_to_sp_dups_keep:
             self.ip_to_sp_dups_keep[obj.h]= obj
-        else:
-            print( f"DDP: we need to cater for this - 2 files to keep in the storage path, if they are different, then pull these out of here and put them in the in_same_dup list to manually process" )
         return
 
     # this stores this object into the Delete from same path list (if it is not
@@ -180,98 +203,95 @@ class Duplicates:
                 self.dups_to_process[row.hash].append( dr2 )
         return
 
+    def SecondPass(self):
+        print("################################## second pass starting")
+        d1=""
+        d2=""
+        did1=""
+        did2=""
+        str=""
+        dup_cnt=1
+        hashes=""
+        for hash in self.dups_to_process:
+            if self.overall_dup_cnt<2:
+                print(f"process {hash}")
+            # more than 2 files (just ask per file) OR
+            # only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
+            # content same, filename different (just ask per file)
+            if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d):
+                self.per_file_dups.append(self.dups_to_process[hash])
+                self.overall_dup_cnt += len(self.dups_to_process[hash])
+                self.overall_dup_sets += 1
+                if self.overall_dup_cnt<2:
+                    print( f"process as len(el)={len(self.dups_to_process[hash])}" )
+                for el in self.dups_to_process[hash]:
+                    if re.search( '\d{4}/\d{8}', el.d):
+                        self.preferred_file[hash] = el.id
+                        if self.overall_dup_cnt<25:
+                            print( f"{self.dups_to_process[hash]} <- keeping {el.id} -- {self.preferred_file[hash]}" )
+            # by here we have only 2 files, with the same name, different path
+            # (MOST COMMON, and I think we dont care per file, just per path)
+            elif d1 != self.dups_to_process[hash][0].d:
+                if d1 != '':
+                    self.overall_dup_cnt += dup_cnt
+                    self.overall_dup_sets += 1
+                    self.per_path_dups.append( DupPathRow( dup_cnt, d1, d2, did1, did2, hashes ) )
+                    if re.search( '\d{4}/\d{8}', d1):
+                        self.preferred_path[did1]=1
+                    if re.search( '\d{4}/\d{8}', d2):
+                        self.preferred_path[did2]=1
+                    dup_cnt=1
+                d1 = self.dups_to_process[hash][0].d
+                d2 = self.dups_to_process[hash][1].d
+                did1 = self.dups_to_process[hash][0].did
+                did2 = self.dups_to_process[hash][1].did
+                str=f"duplicates found in {d1} and {d2}"
+                hashes = f"{hash},"
+            else:
+                dup_cnt += 1
+                hashes += f"{hash},"
+
+        if d1 != '':
+            self.overall_dup_cnt += dup_cnt
+            self.overall_dup_sets += dup_cnt
+            self.per_path_dups.append( DupPathRow( dup_cnt, d1, d2, did1, did2, hashes ) )
+        print("#################### second pass FINISHED")
+        return
+
     # quick debugger to see the data in the data structure 
     def Dump(self):
         if len(self.ip_to_sp_dups_keep) > 0:
             print( "############ Files that are in both Import and Storage Paths ###########")
-            cnt=0
             for h in self.ip_to_sp_dups_keep:
-                cnt +=1
                 if len(self.ip_to_sp_dups_del[h])>2:
                     print( f"hash={h}, keep: {self.ip_to_sp_dups_keep[h]}" )
                     for d in self.ip_to_sp_dups_del[h]:
                         print( f"Del: {d}" )
-            print( f"{cnt} sets of duplicate files to delete at least 1, anything with 2 or more dups is printed above explicitly" )
+            print( f"{len(self.ip_to_sp_dups_keep)} sets of duplicate files to delete at least 1, anything with 2 or more dups is printed above explicitly" )
+
         if len(self.dups_to_process) > 0:
             print( "############ Duplicate Files that are needing to be futher processed ###########")
-            cnt=0
             for h in self.dups_to_process:
-                cnt +=1
-                if len(self.dups_to_process[h])>2:
-                    print( f"hash={h}, keep 1 of these: ", end='')
-                    for d in self.dups_to_process[h]:
-                        print( f"{d.id}, ", end='' )
-                    print ("")
-            print( f"{cnt} sets of duplicate files to delete at least 1, anything with 2 or more dups is printed above explicitly" )
+                print( f"hash={h}, keep 1 of these: ", end='')
+                for d in self.dups_to_process[h]:
+                    print( f"{d.id}, ", end='' )
+                print ("")
+            print( f"{len(self.dups_to_process)} sets of duplicate files to delete at least 1, anything with 2 or more dups is printed above explicitly" )
+
+        if len(self.preferred_file) > 0:
+            for h in self.preferred_file:
+                print( f"hash={h}, keep this one: {self.preferred_file[h]} from ", end='' )
+                for d in self.dups_to_process[h]:
+                    print( f"{d.id}, ", end='' )
+                print ("")
+            print( f"{len(self.preferred_file)} duplicate files we will keep as they match the regexp" )
+            
+        if len(self.per_path_dups) > 0:
+            for pair in self.per_path_dups:
+                print( f"{pair.count} dups in dir1: {pair.did1}  dir2: {pair.did2}" )
+                if pair.did1 in self.preferred_path:
+                    print("Keep dir1")
+                if pair.did2 in self.preferred_path:
+                    print("Keep dir2")
+            print( f"{len(self.per_path_dups)} duplicate files in per path dups" )
         return
-
-
-"""
-@app.route("/fix_dups", methods=["POST"])
-def fix_dups():
-    rows = db.engine.execute( "select e1.id as id1, f1.hash, d1.path_prefix as path1, d1.eid as did1, e1.name as fname1, e2.id as id2, d2.path_prefix as path2, d2.eid as did2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id  and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb order by path1, fname1" )
-
-    if rows.returns_rows == False:
-        st.SetAlert("success")
-        st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?")
-        return render_template("base.html")
-
-    jexes = JobExtra.query.join(Job).join(PA_JobManager_Message).filter(PA_JobManager_Message.id==request.form['fe_msg_id']).all()
-    path=[jex.value for jex in jexes if jex.name == "path"][0]
-    prefix = SymlinkName(path,path+'/')
-    if 'pagesize' not in request.form:
-        pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
-    else:
-        pagesize=int(request.form['pagesize'])
-    dups={}
-    for row in rows:
-        AddDup( prefix+'/', row, dups )
-
-    d1=""
-    d2=""
-    did1=""
-    did2=""
-    str=""
-    dup_cnt=1
-    preferred={}
-    per_file_dups=[]
-    per_path_dups=[]
-    hashes=""
-    overall_dup_cnt=0
-    overall_dup_sets=0
-    for hash in dups:
-        # more than 2 files (just ask per file) OR
-        # only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
-        # content same, filename different (just ask per file)
-        if (len(dups[hash]) > 2) or (dups[hash][0]['d'] == dups[hash][1]['d']) or (dups[hash][0]['f'] != dups[hash][1]['f']):
-            per_file_dups.append(dups[hash])
-            overall_dup_cnt += len(dups[hash])
-            overall_dup_sets += 1
-            for el in dups[hash]:
-                if re.search( '\d{4}/\d{8}', el['d']):
-                    preferred[hash] = el['id']
-                    if overall_dup_cnt<5:
-                        print( f"{dups[hash]} <- keeping {el['d']} -- {preferred[hash]}" )
-        # by here we have only 2 files, with the same name, different path
-        # (MOST COMMON, and I think we dont care per file, just per path)
-        elif d1 != dups[hash][0]['d']:
-            if d1 != '':
-                overall_dup_cnt += dup_cnt
-                overall_dup_sets += 1
-                per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
-                dup_cnt=1
-            d1 = dups[hash][0]['d']
-            d2 = dups[hash][1]['d']
-            did1 = dups[hash][0]['did']
-            did2 = dups[hash][1]['did']
-            str=f"duplicates found in {d1} and {d2}"
-            hashes = f"{hash},"
-        else:
-            dup_cnt += 1
-            hashes += f"{hash},"
-
-    if d1 != '':
-        overall_dup_cnt += dup_cnt
-        overall_dup_sets += dup_cnt
-        per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
-"""
diff --git a/files.py b/files.py
index 60b16c5..631c3d1 100644
--- a/files.py
+++ b/files.py
@@ -281,66 +281,10 @@ def fix_dups():
     for row in rows:
         D.AddDup( row )
 
-    print( D.Dump() )
+    D.SecondPass()
+#    print( D.Dump() )
 
-    d1=""
-    d2=""
-    did1=""
-    did2=""
-    str=""
-    dup_cnt=1
-    preferred={}
-    per_file_dups=[]
-    per_path_dups=[]
-    hashes=""
-    overall_dup_cnt=0
-    overall_dup_sets=0
-
-    return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
-
-"""
-    dups={}
-    for row in rows:
-        AddDup( prefix+'/', row, dups )
-    for hash in dups:
-        # more than 2 files (just ask per file) OR
-        # only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
-        # content same, filename different (just ask per file)
-        if (len(dups[hash]) > 2) or (dups[hash][0]['d'] == dups[hash][1]['d']) or (dups[hash][0]['f'] != dups[hash][1]['f']):
-            per_file_dups.append(dups[hash])
-            overall_dup_cnt += len(dups[hash])
-            overall_dup_sets += 1
-            for el in dups[hash]:
-                if re.search( '\d{4}/\d{8}', el['d']):
-                    preferred[hash] = el['id']
-                    if overall_dup_cnt<5:
-                        print( f"{dups[hash]} <- keeping {el['d']} -- {preferred[hash]}" )
-        # by here we have only 2 files, with the same name, different path
-        # (MOST COMMON, and I think we dont care per file, just per path)
-        elif d1 != dups[hash][0]['d']:
-            if d1 != '':
-                overall_dup_cnt += dup_cnt
-                overall_dup_sets += 1
-                per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
-                dup_cnt=1
-            d1 = dups[hash][0]['d']
-            d2 = dups[hash][1]['d']
-            did1 = dups[hash][0]['did']
-            did2 = dups[hash][1]['did']
-            str=f"duplicates found in {d1} and {d2}"
-            hashes = f"{hash},"
-        else:
-            dup_cnt += 1
-            hashes += f"{hash},"
-
-    if d1 != '':
-        overall_dup_cnt += dup_cnt
-        overall_dup_sets += dup_cnt
-        per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
-
-    return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
-
-"""
+    return render_template("dups.html", per_file_dups=D.per_file_dups, preferred=D.preferred_file, per_path_dups=D.per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=D.overall_dup_cnt, overall_dup_sets=D.overall_dup_sets, pagesize=pagesize )
 
 @app.route("/rm_dups", methods=["POST"])
 def rm_dups():