okay fix_dups page now has functioning pagination, highlights regex matching "good" files as green, and just a file as yellow if we cant find the right one, so easily shows where to really pat attention. Has DBox based help page, and overall just a better UI/UX

This commit is contained in:
2021-03-06 17:18:11 +11:00
parent b9dea327d0
commit 76aee3a10a
4 changed files with 77 additions and 52 deletions

View File

@@ -15,6 +15,7 @@ import base64
import numpy
import cv2
import time
import re
################################################################################
# Local Class imports
@@ -271,7 +272,10 @@ def fix_dups():
jexes = JobExtra.query.join(Job).join(PA_JobManager_Message).filter(PA_JobManager_Message.id==request.form['fe_msg_id']).all()
path=[jex.value for jex in jexes if jex.name == "path"][0]
prefix = SymlinkName(path,path+'/')
pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
if 'pagesize' not in request.form:
pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
else:
pagesize=int(request.form['pagesize'])
dups={}
for row in rows:
AddDup( prefix+'/', row, dups )
@@ -282,27 +286,25 @@ def fix_dups():
did2=""
str=""
dup_cnt=1
preferred={}
per_file_dups=[]
per_path_dups=[]
hashes=""
overall_dup_cnt=0
overall_dup_sets=0
for hash in dups:
# more than 2 files (just ask per file)
if len(dups[hash]) > 2:
per_file_dups.append(dups[hash])
overall_dup_cnt += len(dups[hash])
overall_dup_sets += 1
# only 2 copies, and files are in same dir (so must be diff name, so just ask)
elif dups[hash][0]['d'] == dups[hash][1]['d']:
per_file_dups.append(dups[hash])
overall_dup_cnt += len(dups[hash])
overall_dup_sets += 1
# more than 2 files (just ask per file) OR
# only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
# content same, filename different (just ask per file)
elif dups[hash][0]['f'] != dups[hash][1]['f']:
if (len(dups[hash]) > 2) or (dups[hash][0]['d'] == dups[hash][1]['d']) or (dups[hash][0]['f'] != dups[hash][1]['f']):
per_file_dups.append(dups[hash])
overall_dup_cnt += len(dups[hash])
overall_dup_sets += 1
for el in dups[hash]:
if re.search( '\d{4}/\d{8}', el['d']):
preferred[hash] = el['id']
if overall_dup_cnt<5:
print( f"{dups[hash]} <- keeping {el['d']} -- {preferred[hash]}" )
# by here we have only 2 files, with the same name, different path
# (MOST COMMON, and I think we dont care per file, just per path)
elif d1 != dups[hash][0]['d']:
@@ -326,7 +328,7 @@ def fix_dups():
overall_dup_sets += dup_cnt
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
@app.route("/rm_dups", methods=["POST"])
def rm_dups():