okay fix_dups page now has functioning pagination, highlights regex matching "good" files as green, and just a file as yellow if we cant find the right one, so easily shows where to really pat attention. Has DBox based help page, and overall just a better UI/UX
This commit is contained in:
12
TODO
12
TODO
@@ -1,12 +1,11 @@
|
||||
## GENERAL
|
||||
* fix_dups, etc. need to know path so we don't guess import_path or storage_path to remove the prefix from the keep/del alerts
|
||||
* pagination in dups, needs to be a drop-down and take affect on page on change
|
||||
* SymlinkName - use it from shared everywhere, never do path_prefix by hand use this function
|
||||
* AddJobForLog can absorb DEBUGs, etc. in fact fix up logging in general
|
||||
* comment your code
|
||||
* do we need to make some funcs/code into OO?
|
||||
* scan_sp needs to be in scannow
|
||||
* need a way for page to show we are in import_path or storage_path
|
||||
* storage_path viewing needs to be by folder / not a big grab bag of files (by default)
|
||||
|
||||
## DB
|
||||
Need to think about...
|
||||
@@ -21,10 +20,9 @@
|
||||
ignore *thumb*
|
||||
|
||||
scan storage_dir
|
||||
* need to find / remove duplicate files from inside storage_dir and itself, and in import_dir and in storage_dir
|
||||
implications --
|
||||
VIEWING: need to view import dir and view storage dir as separate menu items AND make it clear what you are looking at in header
|
||||
MOVING/COPYING: need to be smart, its a file move/copy depending on file systems (if import_dir/storage_dir on same fs, we can use mv - much faster)
|
||||
* need to find / remove duplicate files from inside storage_dir and import_dir
|
||||
-- in fact not sure what will happen if I try this right now, I think it might sort of work, only the dup display per file won't be able to
|
||||
use jex.path for all sets of files, only those dups in the original source of the scan
|
||||
|
||||
-- started on some basic optimisations (commit logs every 100 logs, not each log)
|
||||
- with debugs: import = 04:11, getfiledetails== 0:35:35
|
||||
@@ -33,7 +31,7 @@
|
||||
|
||||
*** Need to use thread-safe sessions per Thread, half-assed version did not work
|
||||
|
||||
need a manual button to restart it in the GUI,
|
||||
need a manual button to restart a job in the GUI,
|
||||
(based on file-level optims, just run the job as new and it will optim over already done parts and continue)
|
||||
|
||||
Future:
|
||||
|
||||
26
files.py
26
files.py
@@ -15,6 +15,7 @@ import base64
|
||||
import numpy
|
||||
import cv2
|
||||
import time
|
||||
import re
|
||||
|
||||
################################################################################
|
||||
# Local Class imports
|
||||
@@ -271,7 +272,10 @@ def fix_dups():
|
||||
jexes = JobExtra.query.join(Job).join(PA_JobManager_Message).filter(PA_JobManager_Message.id==request.form['fe_msg_id']).all()
|
||||
path=[jex.value for jex in jexes if jex.name == "path"][0]
|
||||
prefix = SymlinkName(path,path+'/')
|
||||
if 'pagesize' not in request.form:
|
||||
pagesize=int([jex.value for jex in jexes if jex.name == "pagesize"][0])
|
||||
else:
|
||||
pagesize=int(request.form['pagesize'])
|
||||
dups={}
|
||||
for row in rows:
|
||||
AddDup( prefix+'/', row, dups )
|
||||
@@ -282,27 +286,25 @@ def fix_dups():
|
||||
did2=""
|
||||
str=""
|
||||
dup_cnt=1
|
||||
preferred={}
|
||||
per_file_dups=[]
|
||||
per_path_dups=[]
|
||||
hashes=""
|
||||
overall_dup_cnt=0
|
||||
overall_dup_sets=0
|
||||
for hash in dups:
|
||||
# more than 2 files (just ask per file)
|
||||
if len(dups[hash]) > 2:
|
||||
per_file_dups.append(dups[hash])
|
||||
overall_dup_cnt += len(dups[hash])
|
||||
overall_dup_sets += 1
|
||||
# only 2 copies, and files are in same dir (so must be diff name, so just ask)
|
||||
elif dups[hash][0]['d'] == dups[hash][1]['d']:
|
||||
per_file_dups.append(dups[hash])
|
||||
overall_dup_cnt += len(dups[hash])
|
||||
overall_dup_sets += 1
|
||||
# more than 2 files (just ask per file) OR
|
||||
# only 2 copies, and files are in same dir (so must be diff name, so just ask) OR
|
||||
# content same, filename different (just ask per file)
|
||||
elif dups[hash][0]['f'] != dups[hash][1]['f']:
|
||||
if (len(dups[hash]) > 2) or (dups[hash][0]['d'] == dups[hash][1]['d']) or (dups[hash][0]['f'] != dups[hash][1]['f']):
|
||||
per_file_dups.append(dups[hash])
|
||||
overall_dup_cnt += len(dups[hash])
|
||||
overall_dup_sets += 1
|
||||
for el in dups[hash]:
|
||||
if re.search( '\d{4}/\d{8}', el['d']):
|
||||
preferred[hash] = el['id']
|
||||
if overall_dup_cnt<5:
|
||||
print( f"{dups[hash]} <- keeping {el['d']} -- {preferred[hash]}" )
|
||||
# by here we have only 2 files, with the same name, different path
|
||||
# (MOST COMMON, and I think we dont care per file, just per path)
|
||||
elif d1 != dups[hash][0]['d']:
|
||||
@@ -326,7 +328,7 @@ def fix_dups():
|
||||
overall_dup_sets += dup_cnt
|
||||
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
|
||||
|
||||
return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
|
||||
return render_template("dups.html", per_file_dups=per_file_dups, preferred=preferred, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets, pagesize=pagesize )
|
||||
|
||||
@app.route("/rm_dups", methods=["POST"])
|
||||
def rm_dups():
|
||||
|
||||
@@ -11,6 +11,11 @@
|
||||
<link rel="stylesheet" href="https://cdn.datatables.net/1.10.22/css/dataTables.bootstrap4.min.css">
|
||||
<link rel="shortcut icon" href="{{ url_for('static', filename='favicon.ico') }}">
|
||||
<script src="https://kit.fontawesome.com/9b4c7cf470.js" crossorigin="anonymous"></script>
|
||||
<!-- code to get bootstrap & bootstrap datatable to work -->
|
||||
<script src="https://code.jquery.com/jquery-3.5.1.min.js" integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@4.5.3/dist/js/bootstrap.bundle.min.js" integrity="sha384-ho+j7jyWK8fNQe+A12Hb8AhRq26LrZ/JpcUGGOn+Y7RsweNrtN/tE3MoK7ZeZDyx" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.datatables.net/1.10.22/js/jquery.dataTables.min.js"></script>
|
||||
<script src="https://cdn.datatables.net/1.10.22/js/dataTables.bootstrap4.min.js"></script>
|
||||
{% import "bootstrap/wtf.html" as wtf %}
|
||||
<style>
|
||||
.highlight { box-shadow: 0 0 7px 4px #5bc0de }
|
||||
@@ -21,7 +26,7 @@
|
||||
|
||||
<!-- Modal Dialog Box, jquery used to show / set content -->
|
||||
<div id="dbox" class="modal fade" tabindex="-1" role="dialog">
|
||||
<div class="modal-dialog mw-100 w-100">
|
||||
<div class="modal-dialog">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 id="dbox-title" class="modal-title"></h5>
|
||||
@@ -135,11 +140,6 @@
|
||||
{% endblock main_content %}
|
||||
|
||||
{% if not InDBox %}
|
||||
<!-- code to get bootstrap & bootstrap datatable to work -->
|
||||
<script src="https://code.jquery.com/jquery-3.5.1.min.js" integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@4.5.3/dist/js/bootstrap.bundle.min.js" integrity="sha384-ho+j7jyWK8fNQe+A12Hb8AhRq26LrZ/JpcUGGOn+Y7RsweNrtN/tE3MoK7ZeZDyx" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.datatables.net/1.10.22/js/jquery.dataTables.min.js"></script>
|
||||
<script src="https://cdn.datatables.net/1.10.22/js/dataTables.bootstrap4.min.js"></script>
|
||||
<script>
|
||||
function SetViewingOptionsForSearchForm()
|
||||
{
|
||||
|
||||
@@ -1,19 +1,15 @@
|
||||
{% extends "base.html" %} {% block main_content %}
|
||||
<div class="container-fluid">
|
||||
|
||||
<h3 class="offset-lg-2">{{page_title}}</h3>
|
||||
<div class="alert alert-info">Duplicate files have been detected. They have the same binary content,
|
||||
but either have a different name, there are 3 or more copies or are stored in two different
|
||||
directories. Choose between the options below. NOTE: after you click
|
||||
'Delete Duplicates', the files / directories in red will be deleted from the file
|
||||
system, those in green will remain
|
||||
</div>
|
||||
<form class="d-flex justify-content-center form-inline">
|
||||
<form id="psform" class="d-flex justify-content-center form-inline" method="POST" action="">
|
||||
<input type="hidden" name="fe_msg_id" value="{{fe_msg_id}}"></input>
|
||||
<h5>
|
||||
<div class="form-group">
|
||||
<label for="pagesize">{{overall_dup_sets}} sets/dirs of files
|
||||
containing {{overall_dup_cnt}} files -- Showing </label>
|
||||
<select class="form form-control" name="pagesize">
|
||||
{% for o in "5", "10", "20", "50" %}
|
||||
<select id="pagesize" class="form form-control" name="pagesize" onChange="ResetPageSize()">
|
||||
{% for o in "5", "10", "15", "20", "25", "50", "75", "100", "200" %}
|
||||
<option
|
||||
{% if o|int == pagesize %}
|
||||
selected
|
||||
@@ -22,25 +18,65 @@
|
||||
{% endfor %}
|
||||
</select>
|
||||
duplicates at a time
|
||||
<button class="button btn-info" onClick="$('#dbox').modal('show'); return false;"><i class="fas fa-info-circle"></i></button>
|
||||
</div>
|
||||
</h5>
|
||||
</form>
|
||||
<script>
|
||||
let D=[]
|
||||
let F=[]
|
||||
|
||||
function ResetPageSize()
|
||||
{
|
||||
console.log( $("#pagesize").val() )
|
||||
$("#psform").submit()
|
||||
return false;
|
||||
}
|
||||
|
||||
$('#dbox-title').html('Duplicate Files Processing (info)')
|
||||
div=`
|
||||
<p>The duplicates are shown below ({{pagesize}} at a time) in a per set of files or sets of directories</p>
|
||||
<p>The per file sets are shown with the title 'Choose between these fiels' and show a series of duplicate files in a row, that comprise one of:</p>
|
||||
<ul>
|
||||
<li>3 or more of the same file</li>
|
||||
<li>different file names in the same or different directories</li>
|
||||
</ul>
|
||||
<p>The per direcory sets are shown with the title 'Choose path to KEEP...' and show a series of directories in a row, that contain duplicate files. In this view duplicates have the same file name and any other files in these directories that are not duplicates will not be deleted</p>
|
||||
<p>TO BE CLEAR: after you click the "Delete Duplicates" button, the files in red and duplicate files in directories in red will be deleted from the file system</p>
|
||||
`
|
||||
$('#dbox-content').html(div)
|
||||
|
||||
function KeepFile(row, which, al)
|
||||
{
|
||||
$('[id^=kf' + row + ']').attr('class', 'alert alert-danger sm-txt py-1')
|
||||
$('[id^=kf' + row + ']').attr('class', 'alert alert-danger sm-txt py-1')
|
||||
$('#kf'+row+'-f'+which).attr('class', 'alert alert-' + al + ' py-1')
|
||||
$('#kfname-'+row).val( F[row.toString()+which.toString()] )
|
||||
}
|
||||
function KeepDir(row, which)
|
||||
{
|
||||
$('[id^=kd'+row+']').attr('class', 'alert alert-danger sm-txt py-1')
|
||||
$('#kd'+row+'-d'+which).attr('class', 'alert alert-success py-1')
|
||||
$('#kdid-'+row).val( D[row.toString()+which.toString()] )
|
||||
}
|
||||
</script>
|
||||
<div class="row">
|
||||
<form class="form form-inline col-lg-12" action="{{url_for('rm_dups')}}" method="POST">
|
||||
{# pass this through so that the back-end can delete this message when it rm_dups #}
|
||||
<input type="hidden" name="fe_msg_id" value={{fe_msg_id}}>
|
||||
{% set page=namespace(cnt=0) %}
|
||||
{% set pref=namespace(have="") %}
|
||||
<h5>Choose between these files:</h5>
|
||||
{% for dups in per_file_dups %}
|
||||
{% set outer_loop=loop.index %}
|
||||
{% set pref.have="" %}
|
||||
<div class="col-lg-12 py-2">
|
||||
{% for dup in dups %}
|
||||
<alert id="kf{{outer_loop}}-f{{loop.index}}" style="cursor: pointer;" class="alert"
|
||||
onClick="KeepFile({{outer_loop}},{{loop.index}})">{{dup.d}}/{{dup.f}}</alert>
|
||||
onClick="KeepFile({{outer_loop}},{{loop.index}},'success')">{{dup.d}}/{{dup.f}}</alert>
|
||||
{% if preferred[dup.h] == dup.id %}
|
||||
{% set pref.have="kf{}-f{}".format(outer_loop,loop.index) %}
|
||||
{% endif %}
|
||||
{% if loop.index < dups|length %}
|
||||
or
|
||||
{% else %}
|
||||
@@ -52,6 +88,11 @@
|
||||
</script>
|
||||
{% endfor %}
|
||||
</div class="col-lg-12">
|
||||
{% if pref.have == "" %}
|
||||
<script>KeepFile( {{outer_loop}}, 1,'warning' )</script>
|
||||
{% else %}
|
||||
<script>$("#{{pref.have}}").click()</script>
|
||||
{% endif %}
|
||||
{% set page.cnt = page.cnt + 1 %}
|
||||
{% if page.cnt == pagesize %}
|
||||
{% break %}
|
||||
@@ -85,22 +126,6 @@
|
||||
{% endblock main_content %}
|
||||
{% block script_content %}
|
||||
<script>
|
||||
function KeepFile(row, which)
|
||||
{
|
||||
$('[id^=kf' + row + ']').attr('class', 'alert alert-danger sm-txt py-1')
|
||||
$('#kf'+row+'-f'+which).attr('class', 'alert alert-success py-1')
|
||||
$('#kfname-'+row).val( F[row.toString()+which.toString()] )
|
||||
}
|
||||
|
||||
function KeepDir(row, which)
|
||||
{
|
||||
$('[id^=kd'+row+']').attr('class', 'alert alert-danger sm-txt py-1')
|
||||
$('#kd'+row+'-d'+which).attr('class', 'alert alert-success py-1')
|
||||
$('#kdid-'+row).val( D[row.toString()+which.toString()] )
|
||||
}
|
||||
|
||||
// force choose last of each keep file set
|
||||
$('[id$=f1]').each( function () { $(this).siblings( '.alert' ).last().click() } )
|
||||
// force choose last of each dir set
|
||||
$('[id$=d2]').each( function() { $(this).click() } )
|
||||
</script>
|
||||
|
||||
Reference in New Issue
Block a user