okay fix_dups page now has functioning pagination, highlights regex matching "good" files as green, and just a file as yellow if we cant find the right one, so easily shows where to really pat attention. Has DBox based help page, and overall just a better UI/UX

2021-03-06 17:18:11 +11:00
parent b9dea327d0
commit 76aee3a10a
4 changed files with 77 additions and 52 deletions
--- a/files.py
+++ b/files.py
@@ -15,6 +15,7 @@ import base64
 import numpy
 import cv2
 import time
+import re

 ################################################################################
 # Local Class imports
@@ -271,7 +272,10 @@ def fix_dups():
    jexes = JobExtra.query.join(Job).join(PA_JobManager_Message).filter(PA_JobManager_Message.id==request.form['fe_msg_id']).all()
    path=[jex.value for jex in jexes if jex.name == "path"][0]
    prefix = SymlinkName(path,path+'/')
-    pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
+    if 'pagesize' not in request.form:
+        pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
+    else:
+        pagesize=int(request.form['pagesize'])
    dups={}
    for row in rows:
        AddDup( prefix+'/', row, dups )
@@ -282,27 +286,25 @@ def fix_dups():
    did2=""
    str=""
    dup_cnt=1
+    preferred={}
    per_file_dups=[]
    per_path_dups=[]
    hashes=""
    overall_dup_cnt=0
    overall_dup_sets=0
    for hash in dups:
-        # more than 2 files (just ask per file)
-        if len(dups[hash]) > 2:
-            per_file_dups.append(dups[hash])
-            overall_dup_cnt += len(dups[hash])
-            overall_dup_sets += 1
-        # only 2 copies, and files are in same dir (so must be diff name, so just ask)
-        elif dups[hash][0]['d'] == dups[hash][1]['d']:
-            per_file_dups.append(dups[hash])
-            overall_dup_cnt += len(dups[hash])
-            overall_dup_sets += 1
+        # more than 2 files (just ask per file) OR
+        # only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
        # content same, filename different (just ask per file)
-        elif dups[hash][0]['f'] != dups[hash][1]['f']:
+        if (len(dups[hash]) > 2) or (dups[hash][0]['d'] == dups[hash][1]['d']) or (dups[hash][0]['f'] != dups[hash][1]['f']):
            per_file_dups.append(dups[hash])
            overall_dup_cnt += len(dups[hash])
            overall_dup_sets += 1
+            for el in dups[hash]:
+                if re.search( '\d{4}/\d{8}', el['d']):
+                    preferred[hash] = el['id']
+                    if overall_dup_cnt<5:
+                        print( f"{dups[hash]} <- keeping {el['d']} -- {preferred[hash]}" )
        # by here we have only 2 files, with the same name, different path
        # (MOST COMMON, and I think we dont care per file, just per path)
        elif d1 != dups[hash][0]['d']:
@@ -326,7 +328,7 @@ def fix_dups():
        overall_dup_sets += dup_cnt
        per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })

-    return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
+    return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )

@app.route("/rm_dups", methods=["POST"])
 def rm_dups():