diff --git a/TODO b/TODO
index 3e5207d..338c16a 100644
--- a/TODO
+++ b/TODO
@@ -13,7 +13,9 @@
- without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff
- with exifread & debug: import == 04:26
- * CheckForDups() needs to allow the f/end to actually do the work, and then clear the MessageToFE() as well
+ * CheckForDups():
+ in files.py
+ -> need to process the form and ACT on it (by deleting files)
* try again with walk to go through loop once quickly just to add up files,
* then start the import dir counting up / progress
diff --git a/files.py b/files.py
index eac4477..37716fc 100644
--- a/files.py
+++ b/files.py
@@ -159,28 +159,86 @@ def forcescan():
st.SetMessage("force scan & rebuild data for files in: Job #{} (Click the link to follow progress)".format( job.id, job.id) )
return render_template("base.html")
+
+def TrimmedPath( prefix, path ):
+ return path.replace(prefix, '' )
+
+def AddDup( prefix, row, dups ):
+ if row.hash not in dups:
+ dups[row.hash]=[]
+ dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
+ dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
+ else:
+ # process path1 / fname1 -- if that combo is not in the dups[hash], add it
+ found=0
+ for dup in dups[row.hash]:
+ if dup['f'] == row.fname1 and dup['d'] == TrimmedPath(prefix, row.path1):
+ found=1
+ continue
+ if not found:
+ dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1) } )
+
+ # process path2 / fname2 -- if that combo is not in the dups[hash], add it
+ found=0
+ for dup in dups[row.hash]:
+ if dup['f'] == row.fname2 and dup['d'] == TrimmedPath(prefix, row.path2):
+ found=1
+ continue
+ if not found:
+ dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2) } )
+ return
+
@app.route("/fix_dups", methods=["GET"])
def fix_dups():
-# dups = db.engine.execute.session.execute( "select d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as name2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
+ rows = db.engine.execute( "select f1.hash, d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
-# if len(dups) > 0:
-# ActionForFE( job, dups, "danger", "Found duplicate(s), click here to finalise import by removing duplicates" )
-# p1=""
-# done=list()
-# for dup in dups:
-# if p1 != dup.path1:
-# p1 = dup.path1
-# p2 = dup.path2
-# # this is the flip-side of a previous p1 <-> p2 dup (this p2 is a previous p1)
-# if p2 in done:
-# continue
-# done.append(p1)
-# print(f"Duplicates in: {p1} <-> {p2}")
+ if rows.returns_rows == False:
+ st.SetAlert("success")
+ st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?")
+ return render_template("base.html")
+ # use import_path setting to remove the dup path prefix of static/basename()
+ # -- static isn't really seen, and the import path basename is always going to be the same
+ s=Settings.query.first()
+ print (s.import_path)
+ if s.import_path[-1] == '/':
+ prefix = os.path.basename(s.import_path[0:-1])
+ else:
+ prefix = os.path.basename(s.import_path)
+ prefix=f"static/{prefix}/"
+ dups={}
+ for row in rows:
+ AddDup( prefix, row, dups )
+
+ d1=""
+ d2=""
+ str=""
+ dup_cnt=1
+ per_file_dups=[]
+ per_path_dups=[]
+ for hash in dups:
+ if len(dups[hash]) > 2:
+ per_file_dups.append(dups[hash])
+ elif dups[hash][0]['d'] == dups[hash][1]['d']:
+ per_file_dups.append(dups[hash])
+ elif dups[hash][0]['f'] != dups[hash][1]['f']:
+ per_file_dups.append(dups[hash])
+ # by here we have only 2 files, with the same name, different path
+ # (MOST COMMON, and I think we dont care per file, just per path)
+ elif d1 != dups[hash][0]['d']:
+ if d1 != '':
+ dup_cnt=1
+ per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
+ d1 = dups[hash][0]['d']
+ d2 = dups[hash][1]['d']
+ str=f"duplicates found in {d1} and {d2}"
+ else:
+ dup_cnt += 1
+
+ per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2 })
st.SetAlert("warning")
st.SetMessage("Not Yet!")
- return render_template("base.html")
-
+ return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups)
@app.route("/move_files", methods=["POST"])
def move_files():