From 2cd55580a96f7a734ef463539e5abcaa81e949f2 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Wed, 17 Mar 2021 20:04:25 +1100 Subject: [PATCH] rewrote dups.html to now use newer model, where we will auto-delete those that match the regexp, and not show a per row view of these. Also removed extra / unneeded line when processing deleting files --- dups.py | 15 ++--------- pa_job_manager.py | 1 - templates/dups.html | 63 ++++++++++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 37 deletions(-) diff --git a/dups.py b/dups.py index 774272a..e685e91 100644 --- a/dups.py +++ b/dups.py @@ -209,28 +209,18 @@ class Duplicates: d2="" did1="" did2="" - str="" dup_cnt=1 hashes="" for hash in self.dups_to_process: - if self.overall_dup_cnt<2: - print(f"process {hash}") - # more than 2 files (just ask per file) OR - # only 2 copies, and files are in same dir (so must be diff name, so just ask) OR - # content same, filename different (just ask per file) + # more than 2 files (just ask per file) OR only 2 copies, and files are in same dir (so must be diff name, so just ask) OR content same, filename different (ask per file) if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d): self.per_file_dups.append(self.dups_to_process[hash]) self.overall_dup_cnt += len(self.dups_to_process[hash]) self.overall_dup_sets += 1 - if self.overall_dup_cnt<2: - print( f"process as len(el)={len(self.dups_to_process[hash])}" ) for el in self.dups_to_process[hash]: if re.search( '\d{4}/\d{8}', el.d): self.preferred_file[hash] = el.id - if self.overall_dup_cnt<25: - print( f"{self.dups_to_process[hash]} <- keeping {el.id} -- {self.preferred_file[hash]}" ) - # by here we have only 2 files, with the same name, different path - # (MOST COMMON, and I think we dont care per file, just per path) + # by here we have only 2 files, with the same name, different path (ask per path) elif d1 != self.dups_to_process[hash][0].d: if d1 != '': self.overall_dup_cnt += dup_cnt @@ -245,7 +235,6 @@ class Duplicates: d2 = self.dups_to_process[hash][1].d did1 = self.dups_to_process[hash][0].did did2 = self.dups_to_process[hash][1].did - str=f"duplicates found in {d1} and {d2}" hashes = f"{hash}," else: dup_cnt += 1 diff --git a/pa_job_manager.py b/pa_job_manager.py index ff25126..616ef55 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -895,7 +895,6 @@ def RemoveDups(job): elif f.file_details[0].eid == int(keeping): found = f else: - exists = os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) del_me_lst.append(f) if found == None: AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" ) diff --git a/templates/dups.html b/templates/dups.html index 7fcfc87..ea3f97f 100644 --- a/templates/dups.html +++ b/templates/dups.html @@ -48,14 +48,14 @@ function KeepFile(row, which, al) { - $('[id^=kf' + row + ']').attr('class', 'alert alert-danger sm-txt py-1') - $('[id^=kf' + row + ']').attr('class', 'alert alert-danger sm-txt py-1') + $('[id^=kf' + row + '-f]').attr('class', 'alert alert-danger sm-txt py-1') + $('[id^=kf' + row + '-f]').attr('class', 'alert alert-danger sm-txt py-1') $('#kf'+row+'-f'+which).attr('class', 'alert alert-' + al + ' py-1') $('#kfname-'+row).val( F[row.toString()+which.toString()] ) } function KeepDir(row, which) { - $('[id^=kd'+row+']').attr('class', 'alert alert-danger sm-txt py-1') + $('[id^=kd'+row+'-d]').attr('class', 'alert alert-danger sm-txt py-1') $('#kd'+row+'-d'+which).attr('class', 'alert alert-success py-1') $('#kdid-'+row).val( D[row.toString()+which.toString()] ) } @@ -66,39 +66,56 @@ {% set page=namespace(cnt=0) %} {% set pref=namespace(have="") %} + {% set count=namespace( del=0, keep=0 ) %}
Choose between these files:
{% for dups in per_file_dups %} {% set outer_loop=loop.index %} {% set pref.have="" %} -
- {% for dup in dups %} - {{dup.d}}/{{dup.f}} + {% for dup in dups %} + {% if preferred[dup.h] %} + {% set pref.have = "True" %} {% if preferred[dup.h] == dup.id %} - {% set pref.have="kf{}-f{}".format(outer_loop,loop.index) %} - {% endif %} - {% if loop.index < dups|length %} - or - {% else %} + {% set count.keep = count.keep + 1 %} + {% else %} + {% set count.del = count.del + (dups|length-1) %} {% endif %} - - {% endfor %} -
+ {% else %} + {% if page.cnt <= pagesize %} + {% if loop.index == 1 %} +
+ {% endif %} + {{dup.d}}/{{dup.f}} + {% if preferred[dup.h] == dup.id %} + {% set pref.have="kf{}-f{}".format(outer_loop,loop.index) %} + {% endif %} + {% if loop.index < dups|length %} + or + {% else %} + + + {% endif %} + + {% if loop.index == dups|length %} +
+ {% endif %} + {% endif %} + {% endif %} + {% endfor %} {% if pref.have == "" %} - {% else %} - - {% endif %} - {% set page.cnt = page.cnt + 1 %} - {% if page.cnt == pagesize %} - {% break %} + {% set page.cnt = page.cnt + 1 %} {% endif %} {% endfor %} +
+ Matched preference regexp: Keep={{count.keep}}, Deleting={{count.del}} +
+ {% if page.cnt < pagesize %}
Choose path to KEEP (same file names in 2 different directories):
{% for dup in per_path_dups %}