now have a partial fix_dups path, it shows the content in a much more reasonable manner, and allows the GUI to select the files/paths to keep, HOWEVER, the form POST is not enabled, and I still need to process the form data -- right now, now sure how I know which files to delete vs keep -> will need hidden vars of options, not just the to_keep - then process them

This commit is contained in:
2021-02-13 20:21:08 +11:00
parent 0fe5d97317
commit 7014eb0f35
2 changed files with 77 additions and 17 deletions

View File

@@ -159,28 +159,86 @@ def forcescan():
st.SetMessage("force scan & rebuild data for files in:&nbsp;<a href=/job/{}>Job #{}</a>&nbsp;(Click the link to follow progress)".format( job.id, job.id) )
return render_template("base.html")
def TrimmedPath( prefix, path ):
return path.replace(prefix, '' )
def AddDup( prefix, row, dups ):
if row.hash not in dups:
dups[row.hash]=[]
dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
else:
# process path1 / fname1 -- if that combo is not in the dups[hash], add it
found=0
for dup in dups[row.hash]:
if dup['f'] == row.fname1 and dup['d'] == TrimmedPath(prefix, row.path1):
found=1
continue
if not found:
dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
# process path2 / fname2 -- if that combo is not in the dups[hash], add it
found=0
for dup in dups[row.hash]:
if dup['f'] == row.fname2 and dup['d'] == TrimmedPath(prefix, row.path2):
found=1
continue
if not found:
dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
return
@app.route("/fix_dups", methods=["GET"])
def fix_dups():
# dups = db.engine.execute.session.execute( "select d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as name2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
rows = db.engine.execute( "select f1.hash, d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
# if len(dups) > 0:
# ActionForFE( job, dups, "danger", "Found duplicate(s), click <a href="/fix_dups">here</a> to finalise import by removing duplicates" )
# p1=""
# done=list()
# for dup in dups:
# if p1 != dup.path1:
# p1 = dup.path1
# p2 = dup.path2
# # this is the flip-side of a previous p1 <-> p2 dup (this p2 is a previous p1)
# if p2 in done:
# continue
# done.append(p1)
# print(f"Duplicates in: {p1} <-> {p2}")
if rows.returns_rows == False:
st.SetAlert("success")
st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?")
return render_template("base.html")
# use import_path setting to remove the dup path prefix of static/basename(<import_path>)
# -- static isn't really seen, and the import path basename is always going to be the same
s=Settings.query.first()
print (s.import_path)
if s.import_path[-1] == '/':
prefix = os.path.basename(s.import_path[0:-1])
else:
prefix = os.path.basename(s.import_path)
prefix=f"static/{prefix}/"
dups={}
for row in rows:
AddDup( prefix, row, dups )
d1=""
d2=""
str=""
dup_cnt=1
per_file_dups=[]
per_path_dups=[]
for hash in dups:
if len(dups[hash]) > 2:
per_file_dups.append(dups[hash])
elif dups[hash][0]['d'] == dups[hash][1]['d']:
per_file_dups.append(dups[hash])
elif dups[hash][0]['f'] != dups[hash][1]['f']:
per_file_dups.append(dups[hash])
# by here we have only 2 files, with the same name, different path
# (MOST COMMON, and I think we dont care per file, just per path)
elif d1 != dups[hash][0]['d']:
if d1 != '':
dup_cnt=1
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
d1 = dups[hash][0]['d']
d2 = dups[hash][1]['d']
str=f"duplicates found in {d1} and {d2}"
else:
dup_cnt += 1
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
st.SetAlert("warning")
st.SetMessage("Not Yet!")
return render_template("base.html")
return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups)
@app.route("/move_files", methods=["POST"])
def move_files():