diff --git a/TODO b/TODO
index 03be52b..6b6e2d7 100644
--- a/TODO
+++ b/TODO
@@ -1,12 +1,11 @@
## GENERAL
- * fix_dups, etc. need to know path so we don't guess import_path or storage_path to remove the prefix from the keep/del alerts
- * pagination in dups, needs to be a drop-down and take affect on page on change
* SymlinkName - use it from shared everywhere, never do path_prefix by hand use this function
* AddJobForLog can absorb DEBUGs, etc. in fact fix up logging in general
* comment your code
* do we need to make some funcs/code into OO?
* scan_sp needs to be in scannow
* need a way for page to show we are in import_path or storage_path
+ * storage_path viewing needs to be by folder / not a big grab bag of files (by default)
## DB
Need to think about...
@@ -21,10 +20,9 @@
ignore *thumb*
scan storage_dir
- * need to find / remove duplicate files from inside storage_dir and itself, and in import_dir and in storage_dir
- implications --
- VIEWING: need to view import dir and view storage dir as separate menu items AND make it clear what you are looking at in header
- MOVING/COPYING: need to be smart, its a file move/copy depending on file systems (if import_dir/storage_dir on same fs, we can use mv - much faster)
+ * need to find / remove duplicate files from inside storage_dir and import_dir
+ -- in fact not sure what will happen if I try this right now, I think it might sort of work, only the dup display per file won't be able to
+ use jex.path for all sets of files, only those dups in the original source of the scan
-- started on some basic optimisations (commit logs every 100 logs, not each log)
- with debugs: import = 04:11, getfiledetails== 0:35:35
@@ -33,7 +31,7 @@
*** Need to use thread-safe sessions per Thread, half-assed version did not work
- need a manual button to restart it in the GUI,
+ need a manual button to restart a job in the GUI,
(based on file-level optims, just run the job as new and it will optim over already done parts and continue)
Future:
diff --git a/files.py b/files.py
index c90d524..df5f196 100644
--- a/files.py
+++ b/files.py
@@ -15,6 +15,7 @@ import base64
import numpy
import cv2
import time
+import re
################################################################################
# Local Class imports
@@ -271,7 +272,10 @@ def fix_dups():
jexes = JobExtra.query.join(Job).join(PA_JobManager_Message).filter(PA_JobManager_Message.id==request.form['fe_msg_id']).all()
path=[jex.value for jex in jexes if jex.name == "path"][0]
prefix = SymlinkName(path,path+'/')
- pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
+ if 'pagesize' not in request.form:
+ pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
+ else:
+ pagesize=int(request.form['pagesize'])
dups={}
for row in rows:
AddDup( prefix+'/', row, dups )
@@ -282,27 +286,25 @@ def fix_dups():
did2=""
str=""
dup_cnt=1
+ preferred={}
per_file_dups=[]
per_path_dups=[]
hashes=""
overall_dup_cnt=0
overall_dup_sets=0
for hash in dups:
- # more than 2 files (just ask per file)
- if len(dups[hash]) > 2:
- per_file_dups.append(dups[hash])
- overall_dup_cnt += len(dups[hash])
- overall_dup_sets += 1
- # only 2 copies, and files are in same dir (so must be diff name, so just ask)
- elif dups[hash][0]['d'] == dups[hash][1]['d']:
- per_file_dups.append(dups[hash])
- overall_dup_cnt += len(dups[hash])
- overall_dup_sets += 1
+ # more than 2 files (just ask per file) OR
+ # only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
# content same, filename different (just ask per file)
- elif dups[hash][0]['f'] != dups[hash][1]['f']:
+ if (len(dups[hash]) > 2) or (dups[hash][0]['d'] == dups[hash][1]['d']) or (dups[hash][0]['f'] != dups[hash][1]['f']):
per_file_dups.append(dups[hash])
overall_dup_cnt += len(dups[hash])
overall_dup_sets += 1
+ for el in dups[hash]:
+ if re.search( '\d{4}/\d{8}', el['d']):
+ preferred[hash] = el['id']
+ if overall_dup_cnt<5:
+ print( f"{dups[hash]} <- keeping {el['d']} -- {preferred[hash]}" )
# by here we have only 2 files, with the same name, different path
# (MOST COMMON, and I think we dont care per file, just per path)
elif d1 != dups[hash][0]['d']:
@@ -326,7 +328,7 @@ def fix_dups():
overall_dup_sets += dup_cnt
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
- return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
+ return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
@app.route("/rm_dups", methods=["POST"])
def rm_dups():
diff --git a/templates/base.html b/templates/base.html
index 177d42e..03665b4 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -11,6 +11,11 @@
+
+
+
+
+
{% import "bootstrap/wtf.html" as wtf %}