From 7014eb0f35dca5c5fe35f8e5a000a544a9aee9aa Mon Sep 17 00:00:00 2001
From: Damien De Paoli <ddp@depaoli.id.au>
Date: Sat, 13 Feb 2021 20:21:08 +1100
Subject: [PATCH] now have a partial fix_dups path, it shows the content in a
 much more reasonable manner, and allows the GUI to select the files/paths to
 keep, HOWEVER, the form POST is not enabled, and I still need to process the
 form data -- right now, now sure how I know which files to delete vs keep ->
 will need hidden vars of options, not just the to_keep - then process them

---
 TODO     |  4 ++-
 files.py | 90 ++++++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 77 insertions(+), 17 deletions(-)
diff --git a/TODO b/TODO
index 3e5207d..338c16a 100644
--- a/TODO
+++ b/TODO
@@ -13,7 +13,9 @@
         - without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff
         - with exifread & debug: import == 04:26
 
-    * CheckForDups() needs to allow the f/end to actually do the work, and then clear the MessageToFE() as well
+    * CheckForDups():
+        in files.py 
+            -> need to process the form and ACT on it (by deleting files)
 
     * try again with walk to go through loop once quickly just to add up files,
     * then start the import dir counting up / progress
diff --git a/files.py b/files.py
index eac4477..37716fc 100644
--- a/files.py
+++ b/files.py
@@ -159,28 +159,86 @@ def forcescan():
     st.SetMessage("force scan & rebuild data for files in:&nbsp;<a href=/job/{}>Job #{}</a>&nbsp;(Click the link to follow progress)".format( job.id, job.id) )
     return render_template("base.html")
 
+
+def TrimmedPath( prefix, path ):
+    return path.replace(prefix, '' )
+
+def AddDup( prefix, row, dups ):
+    if row.hash not in dups:
+        dups[row.hash]=[]
+        dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
+        dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
+    else:
+        # process path1 / fname1 -- if that combo is not in the dups[hash], add it
+        found=0
+        for dup in dups[row.hash]:
+            if dup['f'] == row.fname1 and dup['d'] == TrimmedPath(prefix, row.path1):
+                found=1
+                continue
+        if not found:
+            dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
+
+        # process path2 / fname2 -- if that combo is not in the dups[hash], add it
+        found=0
+        for dup in dups[row.hash]:
+            if dup['f'] == row.fname2 and dup['d'] == TrimmedPath(prefix, row.path2):
+                found=1
+                continue
+        if not found:
+            dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
+    return
+
 @app.route("/fix_dups", methods=["GET"])
 def fix_dups():
-#    dups = db.engine.execute.session.execute( "select d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as name2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id  and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
+    rows = db.engine.execute( "select f1.hash, d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id  and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
 
-#    if len(dups) > 0:
-#            ActionForFE( job, dups, "danger", "Found duplicate(s), click <a href="/fix_dups">here</a> to finalise import by removing duplicates" )
-#    p1="" 
-#    done=list()
-#    for dup in dups:
-#        if p1 != dup.path1:
-#            p1 = dup.path1
-#            p2 = dup.path2
-#            # this is the flip-side of a previous p1 <-> p2 dup (this p2 is a previous p1)
-#            if p2 in done:
-#                continue
-#            done.append(p1)
-#            print(f"Duplicates in: {p1} <-> {p2}")
+    if rows.returns_rows == False:
+        st.SetAlert("success")
+        st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?")
+        return render_template("base.html")
 
+    # use import_path setting to remove the dup path prefix of static/basename(<import_path>) 
+    #    -- static isn't really seen, and the import path basename is always going to be the same
+    s=Settings.query.first()
+    print (s.import_path)
+    if s.import_path[-1] == '/':
+        prefix = os.path.basename(s.import_path[0:-1])
+    else:
+        prefix = os.path.basename(s.import_path)
+    prefix=f"static/{prefix}/"
+    dups={}
+    for row in rows:
+        AddDup( prefix, row, dups )
+
+    d1=""
+    d2=""
+    str=""
+    dup_cnt=1
+    per_file_dups=[]
+    per_path_dups=[]
+    for hash in dups:
+        if len(dups[hash]) > 2:
+            per_file_dups.append(dups[hash])
+        elif dups[hash][0]['d'] == dups[hash][1]['d']:
+            per_file_dups.append(dups[hash])
+        elif dups[hash][0]['f'] != dups[hash][1]['f']:
+            per_file_dups.append(dups[hash])
+        # by here we have only 2 files, with the same name, different path
+        # (MOST COMMON, and I think we dont care per file, just per path)
+        elif d1 != dups[hash][0]['d']:
+            if d1 != '':
+                dup_cnt=1
+                per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
+            d1 = dups[hash][0]['d']
+            d2 = dups[hash][1]['d']
+            str=f"duplicates found in {d1} and {d2}"
+        else:
+            dup_cnt += 1
+
+    per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
     st.SetAlert("warning")
     st.SetMessage("Not Yet!")
-    return render_template("base.html")
-
+    return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups)
 
 @app.route("/move_files", methods=["POST"])
 def move_files():