fixed BUG-21: file stucture broken after rescan, and beginnings of new job for checking for duplicate files, and having the back-end job indicate to the front-end that there are duplicates, and the basic route is in the f/e, but not built yet
This commit is contained in:
61
BUGs
61
BUGs
@@ -1,61 +1,2 @@
|
|||||||
### Next: 21
|
### Next: 22
|
||||||
|
|
||||||
BUG-21: the datastructure of dir/files is actually quite broken on a real import (seems to be on second/subsequent imports -- the code to remove deleted files, is not 'seeing all files?'
|
|
||||||
pa=# select count(edl.dir_eid), d.path_prefix from entry_dir_link edl, dir d where edl.dir_eid = d.eid and edl.dir_eid in ( select eid from dir ) group by d.path_prefix;
|
|
||||||
count | path_prefix
|
|
||||||
-------+----------------------------------------------------
|
|
||||||
101 | static/CAM_UPLOADS/M's Phone/Camera Roll
|
|
||||||
1 | static/CAM_UPLOADS/M's Phone
|
|
||||||
2979 | static/CAM_UPLOADS/Mandy's Phone/Camera Roll
|
|
||||||
1 | static/CAM_UPLOADS
|
|
||||||
675 | static/CAM_UPLOADS/M's Galaxy A51/Camera Roll
|
|
||||||
3656 | static/CAM_UPLOADS/Damien's Phone/Camera Roll
|
|
||||||
1 | static/CAM_UPLOADS/Damien's Phone/Camera Roll/0000
|
|
||||||
1 | static/CAM_UPLOADS/M's Galaxy A51
|
|
||||||
1 | static/CAM_UPLOADS/Mandy's Phone
|
|
||||||
1 | static/CAM_UPLOADS/Damien's Phone
|
|
||||||
(10 rows)
|
|
||||||
|
|
||||||
pa=# select * from dir;
|
|
||||||
eid | path_prefix | num_files | last_import_date
|
|
||||||
------+----------------------------------------------------+-----------+--------------------
|
|
||||||
2 | static/CAM_UPLOADS/Mandy's Phone | 1 | 1613024867.8238187
|
|
||||||
3 | static/CAM_UPLOADS/Mandy's Phone/Camera Roll | 2999 | 1613024872.385247
|
|
||||||
3003 | static/CAM_UPLOADS/M's Galaxy A51 | 1 | 1613024872.387184
|
|
||||||
3004 | static/CAM_UPLOADS/M's Galaxy A51/Camera Roll | 2400 | 1613024875.9811678
|
|
||||||
5405 | static/CAM_UPLOADS/Damien's Phone | 1 | 1613024875.983697
|
|
||||||
5406 | static/CAM_UPLOADS/Damien's Phone/Camera Roll | 3658 | 1613024883.1730359
|
|
||||||
9058 | static/CAM_UPLOADS/Damien's Phone/Camera Roll/0000 | 1 | 1613024883.1779747
|
|
||||||
9066 | static/CAM_UPLOADS/M's Phone | 1 | 1613024883.1806386
|
|
||||||
9067 | static/CAM_UPLOADS/M's Phone/Camera Roll | 101 | 1613024883.3877454
|
|
||||||
1 | static/CAM_UPLOADS | 9167 | 1613024867.8217578
|
|
||||||
|
|
||||||
|
|
||||||
############
|
|
||||||
SO FILE counts are really broken, a slight bit off on a couple of dirs, but say M51, its 675 in DB, and the file tree walk saw 2400 -- and an ls -lR confirms...
|
|
||||||
|
|
||||||
|
|
||||||
checking by hand for the first file in M51 dir:
|
|
||||||
|
|
||||||
ddp@mara:~/src/photoassistant$ ls -l /export/docker/storage/photos/CAM_UPLOADS/M\'s\ Galaxy\ A51/Camera\ Roll/20190105_175219.jpg
|
|
||||||
-rw-r--r-- 1 mythtv mythtv 2.6M Dec 28 20:37 "/export/docker/storage/photos/CAM_UPLOADS/M's Galaxy A51/Camera Roll/20190105_175219.jpg"
|
|
||||||
|
|
||||||
pa=# select * from entry where name = '20190105_175219.jpg';
|
|
||||||
id | name | type_id | exists_on_fs
|
|
||||||
-----+---------------------+---------+--------------
|
|
||||||
951 | 20190105_175219.jpg | 2 | t
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
pa=# select * from entry_dir_link where entry_id = 951;
|
|
||||||
entry_id | dir_eid
|
|
||||||
----------+---------
|
|
||||||
951 | 3
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
pa=# select * from dir where eid = 3;
|
|
||||||
eid | path_prefix | num_files | last_import_date
|
|
||||||
-----+----------------------------------------------+-----------+-------------------
|
|
||||||
3 | static/CAM_UPLOADS/Mandy's Phone/Camera Roll | 2999 | 1613024872.385247
|
|
||||||
|
|
||||||
|
|
||||||
so the file is really in the M51 dir, but is in the DB in Mandy's phone dir instead...
|
|
||||||
|
|||||||
2
TODO
2
TODO
@@ -13,6 +13,8 @@
|
|||||||
- without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff
|
- without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff
|
||||||
- with exifread & debug: import == 04:26
|
- with exifread & debug: import == 04:26
|
||||||
|
|
||||||
|
* CheckForDups() needs to allow the f/end to actually do the work, and then clear the MessageToFE() as well
|
||||||
|
|
||||||
* try again with walk to go through loop once quickly just to add up files,
|
* try again with walk to go through loop once quickly just to add up files,
|
||||||
* then start the import dir counting up / progress
|
* then start the import dir counting up / progress
|
||||||
|
|
||||||
|
|||||||
23
files.py
23
files.py
@@ -159,6 +159,29 @@ def forcescan():
|
|||||||
st.SetMessage("force scan & rebuild data for files in: <a href=/job/{}>Job #{}</a> (Click the link to follow progress)".format( job.id, job.id) )
|
st.SetMessage("force scan & rebuild data for files in: <a href=/job/{}>Job #{}</a> (Click the link to follow progress)".format( job.id, job.id) )
|
||||||
return render_template("base.html")
|
return render_template("base.html")
|
||||||
|
|
||||||
|
@app.route("/fix_dups", methods=["GET"])
|
||||||
|
def fix_dups():
|
||||||
|
# dups = db.engine.execute.session.execute( "select d1.path_prefix as path1, e1.name as fname1, d2.path_prefix as path2, e2.name as name2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
|
||||||
|
|
||||||
|
# if len(dups) > 0:
|
||||||
|
# ActionForFE( job, dups, "danger", "Found duplicate(s), click <a href="/fix_dups">here</a> to finalise import by removing duplicates" )
|
||||||
|
# p1=""
|
||||||
|
# done=list()
|
||||||
|
# for dup in dups:
|
||||||
|
# if p1 != dup.path1:
|
||||||
|
# p1 = dup.path1
|
||||||
|
# p2 = dup.path2
|
||||||
|
# # this is the flip-side of a previous p1 <-> p2 dup (this p2 is a previous p1)
|
||||||
|
# if p2 in done:
|
||||||
|
# continue
|
||||||
|
# done.append(p1)
|
||||||
|
# print(f"Duplicates in: {p1} <-> {p2}")
|
||||||
|
|
||||||
|
st.SetAlert("warning")
|
||||||
|
st.SetMessage("Not Yet!")
|
||||||
|
return render_template("base.html")
|
||||||
|
|
||||||
|
|
||||||
@app.route("/move_files", methods=["POST"])
|
@app.route("/move_files", methods=["POST"])
|
||||||
def move_files():
|
def move_files():
|
||||||
st.SetAlert("warning")
|
st.SetAlert("warning")
|
||||||
|
|||||||
@@ -233,8 +233,9 @@ def MessageToFE( job_id, alert, message ):
|
|||||||
msg = PA_JobManager_FE_Message( job_id=job_id, alert=alert, message=message)
|
msg = PA_JobManager_FE_Message( job_id=job_id, alert=alert, message=message)
|
||||||
session.add(msg)
|
session.add(msg)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
return
|
||||||
|
|
||||||
def ProcessImportDirs(parent_job=None):
|
def ProcessImportDirs(parent_job):
|
||||||
settings = session.query(Settings).first()
|
settings = session.query(Settings).first()
|
||||||
if settings == None:
|
if settings == None:
|
||||||
raise Exception("Cannot create file data with no settings / import path is missing")
|
raise Exception("Cannot create file data with no settings / import path is missing")
|
||||||
@@ -263,15 +264,23 @@ def ProcessImportDirs(parent_job=None):
|
|||||||
session.commit()
|
session.commit()
|
||||||
if parent_job:
|
if parent_job:
|
||||||
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job2.id, job2.id, job2.name, job2.wait_for ) )
|
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job2.id, job2.id, job2.name, job2.wait_for ) )
|
||||||
"""
|
|
||||||
jex3=JobExtra( name="path", value=path )
|
jex3=JobExtra( name="path", value=path )
|
||||||
job3=Job(start_time=now, last_update=now, name="processai", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
|
job3=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
|
||||||
job3.extra.append(jex3)
|
job3.extra.append(jex3)
|
||||||
session.add(job3)
|
session.add(job3)
|
||||||
session.commit()
|
session.commit()
|
||||||
if parent_job:
|
if parent_job:
|
||||||
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job3.id, job3.id, job3.name, job3.wait_for ) )
|
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job3.id, job3.id, job3.name, job3.wait_for ) )
|
||||||
"""
|
"""
|
||||||
|
jex4=JobExtra( name="path", value=path )
|
||||||
|
job4=Job(start_time=now, last_update=now, name="processai", state="New", wait_for=job2.id, pa_job_state="New", current_file_num=0 )
|
||||||
|
job4.extra.append(jex4)
|
||||||
|
session.add(job4)
|
||||||
|
session.commit()
|
||||||
|
if parent_job:
|
||||||
|
AddLogForJob(parent_job, "adding <a href='/job/{}'>job id={} {}</a> (wait for: {})".format( job3.id, job3.id, job3.name, job3.wait_for ) )
|
||||||
|
"""
|
||||||
HandleJobs()
|
HandleJobs()
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -298,6 +307,8 @@ def RunJob(job):
|
|||||||
JobImportDir(job)
|
JobImportDir(job)
|
||||||
elif job.name =="getfiledetails":
|
elif job.name =="getfiledetails":
|
||||||
JobGetFileDetails(job)
|
JobGetFileDetails(job)
|
||||||
|
elif job.name == "checkdups":
|
||||||
|
CheckForDups(job)
|
||||||
elif job.name == "processai":
|
elif job.name == "processai":
|
||||||
JobProcessAI(job)
|
JobProcessAI(job)
|
||||||
else:
|
else:
|
||||||
@@ -429,6 +440,7 @@ def AddDir(job, dirname, path_prefix, in_dir):
|
|||||||
def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ):
|
def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ):
|
||||||
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first()
|
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first()
|
||||||
if e:
|
if e:
|
||||||
|
print( f"################################################ FILE EXISTS ALREADY: {fname} -- {in_dir.path_prefix} {e}" )
|
||||||
e.exists_on_fs=True
|
e.exists_on_fs=True
|
||||||
return e
|
return e
|
||||||
ftype = session.query(FileType).filter(FileType.name==type_str).first()
|
ftype = session.query(FileType).filter(FileType.name==type_str).first()
|
||||||
@@ -503,7 +515,6 @@ def GetDateFromFile(file, stat):
|
|||||||
year, month, day, _, _, _, _, _, _ = datetime.fromtimestamp(stat.st_ctime).timetuple()
|
year, month, day, _, _, _, _, _, _ = datetime.fromtimestamp(stat.st_ctime).timetuple()
|
||||||
c=date(year, month, day).isocalendar()
|
c=date(year, month, day).isocalendar()
|
||||||
woy=c[1]
|
woy=c[1]
|
||||||
print(f"DEL ME: year={year}, month={month}, day={day}")
|
|
||||||
return year, month, day, woy
|
return year, month, day, woy
|
||||||
|
|
||||||
|
|
||||||
@@ -536,7 +547,6 @@ def JobImportDir(job):
|
|||||||
root=root[0:-1]
|
root=root[0:-1]
|
||||||
|
|
||||||
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
|
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
|
||||||
parent_dir=dir
|
|
||||||
for basename in files:
|
for basename in files:
|
||||||
# commit every 100 files to see progress being made but not hammer the database
|
# commit every 100 files to see progress being made but not hammer the database
|
||||||
if job.current_file_num % 100 == 0:
|
if job.current_file_num % 100 == 0:
|
||||||
@@ -559,15 +569,15 @@ def JobImportDir(job):
|
|||||||
year, month, day, woy = GetDateFromFile(fname, stat)
|
year, month, day, woy = GetDateFromFile(fname, stat)
|
||||||
e=AddFile( job, basename, type_str, fsize, dir, year, month, day, woy )
|
e=AddFile( job, basename, type_str, fsize, dir, year, month, day, woy )
|
||||||
else:
|
else:
|
||||||
e=session.query(Entry).filter(Entry.name==basename).first()
|
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==basename,Dir.eid==dir.eid).first()
|
||||||
e.exists_on_fs=True
|
e.exists_on_fs=True
|
||||||
if DEBUG==1:
|
if DEBUG==1:
|
||||||
print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename )
|
print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename )
|
||||||
job.current_file=basename
|
job.current_file=basename
|
||||||
job.current_file_num+=1
|
job.current_file_num+=1
|
||||||
|
|
||||||
dir.num_files=len(files)+len(subdirs)
|
dir.num_files=len(files)+len(subdirs)
|
||||||
dir.last_import_date = time.time()
|
dir.last_import_date = time.time()
|
||||||
|
parent_dir=dir
|
||||||
job.num_files=overall_file_cnt
|
job.num_files=overall_file_cnt
|
||||||
job.current_file_num=overall_file_cnt
|
job.current_file_num=overall_file_cnt
|
||||||
|
|
||||||
@@ -814,9 +824,24 @@ def GenVideoThumbnail(job, file):
|
|||||||
return None
|
return None
|
||||||
return thumbnail
|
return thumbnail
|
||||||
|
|
||||||
|
def CheckForDups(job):
|
||||||
|
path=[jex.value for jex in job.extra if jex.name == "path"][0]
|
||||||
|
path='static'+'/'+os.path.basename(path[0:-1])
|
||||||
|
AddLogForJob( job, f"Check for duplicates in import path: {path}" )
|
||||||
|
res = session.execute( f"select count(e1.name) as count from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and d1.path_prefix like '%{path}%' and f1.hash = f2.hash and e1.id != e2.id" )
|
||||||
|
for row in res:
|
||||||
|
if row.count > 0:
|
||||||
|
MessageToFE( job.id, "danger", "Found duplicate(s), click <a href='/fix_dups'>here</a> to finalise import by removing duplicates" )
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("INFO: PA job manager starting - listening on {}:{}".format( PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT) )
|
print("INFO: PA job manager starting - listening on {}:{}".format( PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT) )
|
||||||
ProcessImportDirs()
|
|
||||||
|
##### have to test the the lines below (to force a scan on startup)
|
||||||
|
now=datetime.now(pytz.utc)
|
||||||
|
job=Job(start_time=now, last_update=now, name="scannow", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=0 )
|
||||||
|
session.add(job)
|
||||||
|
session.commit()
|
||||||
|
HandleJobs()
|
||||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
s.bind((PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT))
|
s.bind((PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT))
|
||||||
s.listen()
|
s.listen()
|
||||||
|
|||||||
@@ -110,10 +110,15 @@
|
|||||||
{% if GetJM_Message() != None %}
|
{% if GetJM_Message() != None %}
|
||||||
{% set msg=GetJM_Message() %}
|
{% set msg=GetJM_Message() %}
|
||||||
<div class="row alert alert-{{msg.alert}}">
|
<div class="row alert alert-{{msg.alert}}">
|
||||||
<a href="{{url_for('joblog', id=msg.job_id)}}">Job #{{msg.job_id}}</a>: {{msg.message|safe}}
|
{% if msg.job_id %}
|
||||||
|
<a href="{{url_for('joblog', id=msg.job_id)}}">Job #{{msg.job_id}}</a>:
|
||||||
|
{% endif %}
|
||||||
|
{{msg.message|safe}}
|
||||||
</div>
|
</div>
|
||||||
|
{% if msg.alert != "danger" %}
|
||||||
{% set dont_print=ClearJM_Message(msg.id) %}
|
{% set dont_print=ClearJM_Message(msg.id) %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% block main_content %}
|
{% block main_content %}
|
||||||
|
|||||||
Reference in New Issue
Block a user