From 7014eb0f35dca5c5fe35f8e5a000a544a9aee9aa Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Sat, 13 Feb 2021 20:21:08 +1100 Subject: [PATCH] now have a partial fix_dups path, it shows the content in a much more reasonable manner, and allows the GUI to select the files/paths to keep, HOWEVER, the form POST is not enabled, and I still need to process the form data -- right now, now sure how I know which files to delete vs keep -> will need hidden vars of options, not just the to_keep - then process them --- TODO | 4 ++- files.py | 90 ++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 77 insertions(+), 17 deletions(-) diff --git a/TODO b/TODO index 3e5207d..338c16a 100644 --- a/TODO +++ b/TODO @@ -13,7 +13,9 @@ - without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff - with exifread & debug: import == 04:26 - * CheckForDups() needs to allow the f/end to actually do the work, and then clear the MessageToFE() as well + * CheckForDups(): + in files.py + -> need to process the form and ACT on it (by deleting files) * try again with walk to go through loop once quickly just to add up files, * then start the import dir counting up / progress diff --git a/files.py b/files.py index eac4477..37716fc 100644 --- a/files.py +++ b/files.py @@ -159,28 +159,86 @@ def forcescan(): st.SetMessage("force scan & rebuild data for files in: Job #{} (Click the link to follow progress)".format( job.id, job.id) ) return render_template("base.html") + +def TrimmedPath( prefix, path ): + return path.replace(prefix, '' ) + +def AddDup( prefix, row, dups ): + if row.hash not in dups: + dups[row.hash]=[] + dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } ) + dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } ) + else: + # process path1 / fname1 -- if that combo is not in the dups[hash], add it + found=0 + for dup in dups[row.hash]: + if dup['f'] == row.fname1 and dup['d'] == TrimmedPath(prefix, row.path1): + found=1 + continue + if not found: + dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } ) + + # process path2 / fname2 -- if that combo is not in the dups[hash], add it + found=0 + for dup in dups[row.hash]: + if dup['f'] == row.fname2 and dup['d'] == TrimmedPath(prefix, row.path2): + found=1 + continue + if not found: + dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } ) + return + @app.route("/fix_dups", methods=["GET"]) def fix_dups(): -# dups = db.engine.execute.session.execute( "select d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as name2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" ) + rows = db.engine.execute( "select f1.hash, d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" ) -# if len(dups) > 0: -# ActionForFE( job, dups, "danger", "Found duplicate(s), click here to finalise import by removing duplicates" ) -# p1="" -# done=list() -# for dup in dups: -# if p1 != dup.path1: -# p1 = dup.path1 -# p2 = dup.path2 -# # this is the flip-side of a previous p1 <-> p2 dup (this p2 is a previous p1) -# if p2 in done: -# continue -# done.append(p1) -# print(f"Duplicates in: {p1} <-> {p2}") + if rows.returns_rows == False: + st.SetAlert("success") + st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?") + return render_template("base.html") + # use import_path setting to remove the dup path prefix of static/basename() + # -- static isn't really seen, and the import path basename is always going to be the same + s=Settings.query.first() + print (s.import_path) + if s.import_path[-1] == '/': + prefix = os.path.basename(s.import_path[0:-1]) + else: + prefix = os.path.basename(s.import_path) + prefix=f"static/{prefix}/" + dups={} + for row in rows: + AddDup( prefix, row, dups ) + + d1="" + d2="" + str="" + dup_cnt=1 + per_file_dups=[] + per_path_dups=[] + for hash in dups: + if len(dups[hash]) > 2: + per_file_dups.append(dups[hash]) + elif dups[hash][0]['d'] == dups[hash][1]['d']: + per_file_dups.append(dups[hash]) + elif dups[hash][0]['f'] != dups[hash][1]['f']: + per_file_dups.append(dups[hash]) + # by here we have only 2 files, with the same name, different path + # (MOST COMMON, and I think we dont care per file, just per path) + elif d1 != dups[hash][0]['d']: + if d1 != '': + dup_cnt=1 + per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 }) + d1 = dups[hash][0]['d'] + d2 = dups[hash][1]['d'] + str=f"duplicates found in {d1} and {d2}" + else: + dup_cnt += 1 + + per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 }) st.SetAlert("warning") st.SetMessage("Not Yet!") - return render_template("base.html") - + return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups) @app.route("/move_files", methods=["POST"]) def move_files():