first pass of using Duplicate class, rather than files doing all the dup work. The html still is shown preferreds, and does not know there are preferred files and preferred dirs yet

This commit is contained in:
2021-03-15 20:36:10 +11:00
parent 046c512e6b
commit 08dc646371
2 changed files with 110 additions and 146 deletions

View File

@@ -281,66 +281,10 @@ def fix_dups():
for row in rows:
D.AddDup( row )
print( D.Dump() )
D.SecondPass()
# print( D.Dump() )
d1=""
d2=""
did1=""
did2=""
str=""
dup_cnt=1
preferred={}
per_file_dups=[]
per_path_dups=[]
hashes=""
overall_dup_cnt=0
overall_dup_sets=0
return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
"""
dups={}
for row in rows:
AddDup( prefix+'/', row, dups )
for hash in dups:
# more than 2 files (just ask per file) OR
# only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
# content same, filename different (just ask per file)
if (len(dups[hash]) > 2) or (dups[hash][0]['d'] == dups[hash][1]['d']) or (dups[hash][0]['f'] != dups[hash][1]['f']):
per_file_dups.append(dups[hash])
overall_dup_cnt += len(dups[hash])
overall_dup_sets += 1
for el in dups[hash]:
if re.search( '\d{4}/\d{8}', el['d']):
preferred[hash] = el['id']
if overall_dup_cnt<5:
print( f"{dups[hash]} <- keeping {el['d']} -- {preferred[hash]}" )
# by here we have only 2 files, with the same name, different path
# (MOST COMMON, and I think we dont care per file, just per path)
elif d1 != dups[hash][0]['d']:
if d1 != '':
overall_dup_cnt += dup_cnt
overall_dup_sets += 1
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
dup_cnt=1
d1 = dups[hash][0]['d']
d2 = dups[hash][1]['d']
did1 = dups[hash][0]['did']
did2 = dups[hash][1]['did']
str=f"duplicates found in {d1} and {d2}"
hashes = f"{hash},"
else:
dup_cnt += 1
hashes += f"{hash},"
if d1 != '':
overall_dup_cnt += dup_cnt
overall_dup_sets += dup_cnt
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
"""
return render_template("dups.html", per_file_dups=D.per_file_dups, preferred=D.preferred_file, per_path_dups=D.per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=D.overall_dup_cnt, overall_dup_sets=D.overall_dup_sets, pagesize=pagesize )
@app.route("/rm_dups", methods=["POST"])
def rm_dups():