now have a partial fix_dups path, it shows the content in a much more reasonable manner, and allows the GUI to select the files/paths to keep, HOWEVER, the form POST is not enabled, and I still need to process the form data -- right now, now sure how I know which files to delete vs keep -> will need hidden vars of options, not just the to_keep - then process them

2021-02-13 20:21:08 +11:00
parent 0fe5d97317
commit 7014eb0f35
2 changed files with 77 additions and 17 deletions
--- a/4
+++ b/4
@@ -13,7 +13,9 @@
        - without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff
        - with exifread & debug: import == 04:26
-    * CheckForDups() needs to allow the f/end to actually do the work, and then clear the MessageToFE() as well
+    * CheckForDups():
        in files.py 
            -> need to process the form and ACT on it (by deleting files)
    * try again with walk to go through loop once quickly just to add up files,
    * then start the import dir counting up / progress
--- a/files.py
+++ b/files.py
@@ -159,28 +159,86 @@ def forcescan():
    st.SetMessage("force scan & rebuild data for files in:&nbsp;<a href=/job/{}>Job #{}</a>&nbsp;(Click the link to follow progress)".format( job.id, job.id) )
    return render_template("base.html")
 def TrimmedPath( prefix, path ):
    return path.replace(prefix, '' )
 def AddDup( prefix, row, dups ):
    if row.hash not in dups:
        dups[row.hash]=[]
        dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
        dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
    else:
        # process path1 / fname1 -- if that combo is not in the dups[hash], add it
        found=0
        for dup in dups[row.hash]:
            if dup['f'] == row.fname1 and dup['d'] == TrimmedPath(prefix, row.path1):
                found=1
                continue
        if not found:
            dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
        # process path2 / fname2 -- if that combo is not in the dups[hash], add it
        found=0
        for dup in dups[row.hash]:
            if dup['f'] == row.fname2 and dup['d'] == TrimmedPath(prefix, row.path2):
                found=1
                continue
        if not found:
            dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
    return
@app.route("/fix_dups", methods=["GET"])
 def fix_dups():
-#    dups = db.engine.execute.session.execute( "select d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as name2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id  and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
+    rows = db.engine.execute( "select f1.hash, d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id  and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
-#    if len(dups) > 0:
+    if rows.returns_rows == False:
-#            ActionForFE( job, dups, "danger", "Found duplicate(s), click <a href="/fix_dups">here</a> to finalise import by removing duplicates" )
+        st.SetAlert("success")
-#    p1="" 
+        st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?")
-#    done=list()
+        return render_template("base.html")
 #    for dup in dups:
 #        if p1 != dup.path1:
 #            p1 = dup.path1
 #            p2 = dup.path2
 #            # this is the flip-side of a previous p1 <-> p2 dup (this p2 is a previous p1)
 #            if p2 in done:
 #                continue
 #            done.append(p1)
 #            print(f"Duplicates in: {p1} <-> {p2}")
    # use import_path setting to remove the dup path prefix of static/basename(<import_path>) 
    #    -- static isn't really seen, and the import path basename is always going to be the same
    s=Settings.query.first()
    print (s.import_path)
    if s.import_path[-1] == '/':
        prefix = os.path.basename(s.import_path[0:-1])
    else:
        prefix = os.path.basename(s.import_path)
    prefix=f"static/{prefix}/"
    dups={}
    for row in rows:
        AddDup( prefix, row, dups )
    d1=""
    d2=""
    str=""
    dup_cnt=1
    per_file_dups=[]
    per_path_dups=[]
    for hash in dups:
        if len(dups[hash]) > 2:
            per_file_dups.append(dups[hash])
        elif dups[hash][0]['d'] == dups[hash][1]['d']:
            per_file_dups.append(dups[hash])
        elif dups[hash][0]['f'] != dups[hash][1]['f']:
            per_file_dups.append(dups[hash])
        # by here we have only 2 files, with the same name, different path
        # (MOST COMMON, and I think we dont care per file, just per path)
        elif d1 != dups[hash][0]['d']:
            if d1 != '':
                dup_cnt=1
                per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
            d1 = dups[hash][0]['d']
            d2 = dups[hash][1]['d']
            str=f"duplicates found in {d1} and {d2}"
        else:
            dup_cnt += 1
    per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
    st.SetAlert("warning")
    st.SetMessage("Not Yet!")
-    return render_template("base.html")
+    return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups)
@app.route("/move_files", methods=["POST"])
 def move_files():