rewrote dups.html to now use newer model, where we will auto-delete those that match the regexp, and not show a per row view of these. Also removed extra / unneeded line when processing deleting files

This commit is contained in:
2021-03-17 20:04:25 +11:00
parent 08dc646371
commit 2cd55580a9
3 changed files with 42 additions and 37 deletions

15
dups.py
View File

@@ -209,28 +209,18 @@ class Duplicates:
d2="" d2=""
did1="" did1=""
did2="" did2=""
str=""
dup_cnt=1 dup_cnt=1
hashes="" hashes=""
for hash in self.dups_to_process: for hash in self.dups_to_process:
if self.overall_dup_cnt<2: # more than 2 files (just ask per file) OR only 2 copies, and files are in same dir (so must be diff name, so just ask) OR content same, filename different (ask per file)
print(f"process {hash}")
# more than 2 files (just ask per file) OR
# only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
# content same, filename different (just ask per file)
if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d): if (len(self.dups_to_process[hash]) > 2) or (self.dups_to_process[hash][0].f != self.dups_to_process[hash][1].f) or (self.dups_to_process[hash][0].d == self.dups_to_process[hash][1].d):
self.per_file_dups.append(self.dups_to_process[hash]) self.per_file_dups.append(self.dups_to_process[hash])
self.overall_dup_cnt += len(self.dups_to_process[hash]) self.overall_dup_cnt += len(self.dups_to_process[hash])
self.overall_dup_sets += 1 self.overall_dup_sets += 1
if self.overall_dup_cnt<2:
print( f"process as len(el)={len(self.dups_to_process[hash])}" )
for el in self.dups_to_process[hash]: for el in self.dups_to_process[hash]:
if re.search( '\d{4}/\d{8}', el.d): if re.search( '\d{4}/\d{8}', el.d):
self.preferred_file[hash] = el.id self.preferred_file[hash] = el.id
if self.overall_dup_cnt<25: # by here we have only 2 files, with the same name, different path (ask per path)
print( f"{self.dups_to_process[hash]} <- keeping {el.id} -- {self.preferred_file[hash]}" )
# by here we have only 2 files, with the same name, different path
# (MOST COMMON, and I think we dont care per file, just per path)
elif d1 != self.dups_to_process[hash][0].d: elif d1 != self.dups_to_process[hash][0].d:
if d1 != '': if d1 != '':
self.overall_dup_cnt += dup_cnt self.overall_dup_cnt += dup_cnt
@@ -245,7 +235,6 @@ class Duplicates:
d2 = self.dups_to_process[hash][1].d d2 = self.dups_to_process[hash][1].d
did1 = self.dups_to_process[hash][0].did did1 = self.dups_to_process[hash][0].did
did2 = self.dups_to_process[hash][1].did did2 = self.dups_to_process[hash][1].did
str=f"duplicates found in {d1} and {d2}"
hashes = f"{hash}," hashes = f"{hash},"
else: else:
dup_cnt += 1 dup_cnt += 1

View File

@@ -895,7 +895,6 @@ def RemoveDups(job):
elif f.file_details[0].eid == int(keeping): elif f.file_details[0].eid == int(keeping):
found = f found = f
else: else:
exists = os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name)
del_me_lst.append(f) del_me_lst.append(f)
if found == None: if found == None:
AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" ) AddLogForJob( job, f"ERROR: Cannot find file with hash={hash} to process - skipping it)" )

View File

@@ -48,14 +48,14 @@
function KeepFile(row, which, al) function KeepFile(row, which, al)
{ {
$('[id^=kf' + row + ']').attr('class', 'alert alert-danger sm-txt py-1') $('[id^=kf' + row + '-f]').attr('class', 'alert alert-danger sm-txt py-1')
$('[id^=kf' + row + ']').attr('class', 'alert alert-danger sm-txt py-1') $('[id^=kf' + row + '-f]').attr('class', 'alert alert-danger sm-txt py-1')
$('#kf'+row+'-f'+which).attr('class', 'alert alert-' + al + ' py-1') $('#kf'+row+'-f'+which).attr('class', 'alert alert-' + al + ' py-1')
$('#kfname-'+row).val( F[row.toString()+which.toString()] ) $('#kfname-'+row).val( F[row.toString()+which.toString()] )
} }
function KeepDir(row, which) function KeepDir(row, which)
{ {
$('[id^=kd'+row+']').attr('class', 'alert alert-danger sm-txt py-1') $('[id^=kd'+row+'-d]').attr('class', 'alert alert-danger sm-txt py-1')
$('#kd'+row+'-d'+which).attr('class', 'alert alert-success py-1') $('#kd'+row+'-d'+which).attr('class', 'alert alert-success py-1')
$('#kdid-'+row).val( D[row.toString()+which.toString()] ) $('#kdid-'+row).val( D[row.toString()+which.toString()] )
} }
@@ -66,39 +66,56 @@
<input type="hidden" name="fe_msg_id" value={{fe_msg_id}}> <input type="hidden" name="fe_msg_id" value={{fe_msg_id}}>
{% set page=namespace(cnt=0) %} {% set page=namespace(cnt=0) %}
{% set pref=namespace(have="") %} {% set pref=namespace(have="") %}
{% set count=namespace( del=0, keep=0 ) %}
<h5>Choose between these files:</h5> <h5>Choose between these files:</h5>
{% for dups in per_file_dups %} {% for dups in per_file_dups %}
{% set outer_loop=loop.index %} {% set outer_loop=loop.index %}
{% set pref.have="" %} {% set pref.have="" %}
<div class="col-lg-12 py-2"> {% for dup in dups %}
{% for dup in dups %} {% if preferred[dup.h] %}
<alert id="kf{{outer_loop}}-f{{loop.index}}" style="cursor: pointer;" class="alert" {% set pref.have = "True" %}
onClick="KeepFile({{outer_loop}},{{loop.index}},'success')">{{dup.d}}/{{dup.f}}</alert>
{% if preferred[dup.h] == dup.id %} {% if preferred[dup.h] == dup.id %}
{% set pref.have="kf{}-f{}".format(outer_loop,loop.index) %}
{% endif %}
{% if loop.index < dups|length %}
or
{% else %}
<input type="hidden" name="kfhash-{{outer_loop}}" value="{{dup.h}}"> <input type="hidden" name="kfhash-{{outer_loop}}" value="{{dup.h}}">
<input type="hidden" id="kfname-{{outer_loop}}" name="kfname-{{outer_loop}}" value=""> <input type="hidden" id="kfname-{{outer_loop}}" name="kfname-{{outer_loop}}" value="">
{% set count.keep = count.keep + 1 %}
{% else %}
{% set count.del = count.del + (dups|length-1) %}
{% endif %} {% endif %}
<script> {% else %}
F[{{outer_loop}}{{loop.index}}]="{{dup.id}}" {% if page.cnt <= pagesize %}
</script> {% if loop.index == 1 %}
{% endfor %} <div class="col-lg-12 py-2">
</div class="col-lg-12"> {% endif %}
<alert id="kf{{outer_loop}}-f{{loop.index}}" style="cursor: pointer;" class="alert"
onClick="KeepFile({{outer_loop}},{{loop.index}},'success')">{{dup.d}}/{{dup.f}}</alert>
{% if preferred[dup.h] == dup.id %}
{% set pref.have="kf{}-f{}".format(outer_loop,loop.index) %}
{% endif %}
{% if loop.index < dups|length %}
or
{% else %}
<input type="hidden" name="kfhash-{{outer_loop}}" value="{{dup.h}}">
<input type="hidden" id="kfname-{{outer_loop}}" name="kfname-{{outer_loop}}" value="">
{% endif %}
<script>
F[{{outer_loop}}{{loop.index}}]="{{dup.id}}"
</script>
{% if loop.index == dups|length %}
</div class="col-lg-12">
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{% if pref.have == "" %} {% if pref.have == "" %}
<script>KeepFile( {{outer_loop}}, 1,'warning' )</script> <script>KeepFile( {{outer_loop}}, 1,'warning' )</script>
{% else %} {% set page.cnt = page.cnt + 1 %}
<script>$("#{{pref.have}}").click()</script>
{% endif %}
{% set page.cnt = page.cnt + 1 %}
{% if page.cnt == pagesize %}
{% break %}
{% endif %} {% endif %}
{% endfor %} {% endfor %}
<div class="col-lg-12">
Matched preference regexp: Keep={{count.keep}}, Deleting={{count.del}}
</div>
{% if page.cnt < pagesize %} {% if page.cnt < pagesize %}
<h5 class="mt-3">Choose path to KEEP (same file names in 2 different directories):</h5> <h5 class="mt-3">Choose path to KEEP (same file names in 2 different directories):</h5>
{% for dup in per_path_dups %} {% for dup in per_path_dups %}