Files
photoassistant/files.py

297 lines
13 KiB
Python

from wtforms import SubmitField, StringField, HiddenField, validators, Form
from flask_wtf import FlaskForm
from flask import request, render_template, redirect, send_from_directory
from main import db, app, ma
from sqlalchemy import Sequence
from sqlalchemy.exc import SQLAlchemyError
from status import st, Status
import os
import glob
from PIL import Image
from pymediainfo import MediaInfo
import hashlib
import exifread
import base64
import numpy
import cv2
import time
################################################################################
# Local Class imports
################################################################################
from job import Job, JobExtra, Joblog, NewJob
from person import Person, PersonRefimgLink
from refimg import Refimg
from settings import Settings
################################################################################
# Class describing File in the database, and via sqlalchemy, connected to the DB as well
# This has to match one-for-one the DB table
################################################################################
class EntryDirLink(db.Model):
__tablename__ = "entry_dir_link"
entry_id = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
dir_eid = db.Column(db.Integer, db.ForeignKey("dir.eid"), primary_key=True )
def __repr__(self):
return "<entry_id: {}, dir_eid: {}>".format(self.entry_id, self.dir_eid)
class Dir(db.Model):
__tablename__ = "dir"
eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
path_prefix = db.Column(db.String, unique=True, nullable=False )
def __repr__(self):
return "<eid: {}, path_prefix: {}>".format(self.eid, self.path_prefix)
class Entry(db.Model):
__tablename__ = "entry"
id = db.Column(db.Integer, db.Sequence('file_id_seq'), primary_key=True )
name = db.Column(db.String, unique=False, nullable=False )
type_id = db.Column(db.Integer, db.ForeignKey("file_type.id"))
type = db.relationship("FileType")
dir_details = db.relationship( "Dir")
file_details = db.relationship( "File" )
in_dir = db.relationship ("Dir", secondary="entry_dir_link" )
def __repr__(self):
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
class FileRefimgLink(db.Model):
__tablename__ = "file_refimg_link"
file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
when_processed = db.Column(db.Float)
matched = db.Column(db.Boolean)
def __repr__(self):
return f"<file_id: {self.file_id}, refimg_id: {self.refimg_id} when_processed={self.when_processed}, matched={self.matched}"
class File(db.Model):
__tablename__ = "file"
eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
size_mb = db.Column(db.Integer, unique=False, nullable=False)
thumbnail = db.Column(db.String, unique=False, nullable=True)
hash = db.Column(db.Integer)
year = db.Column(db.Integer)
month = db.Column(db.Integer)
day = db.Column(db.Integer)
woy = db.Column(db.Integer)
def __repr__(self):
return f"<eid: {self.eid}, size_mb={self.size_mb}, hash={self.hash}, year={self.year}, month={self.month}, day={self.day}, woy={self.woy}>"
class FileType(db.Model):
__tablename__ = "file_type"
id = db.Column(db.Integer, db.Sequence('file_type_id_seq'), primary_key=True )
name = db.Column(db.String, unique=True, nullable=False )
def __repr__(self):
return "<id: {}, name={}>".format(self.id, self.name )
def ViewingOptions( request ):
noo="Oldest"
grouping="Day"
how_many="50"
offset=0
size=128
if request.method=="POST":
noo=request.form['noo']
how_many=request.form['how_many']
offset=int(request.form['offset'])
grouping=request.form['grouping']
size = request.form['size']
if 'prev' in request.form:
offset -= int(how_many)
if offset < 0:
offset=0
if 'next' in request.form:
offset += int(how_many)
return noo, grouping, how_many, offset, size
################################################################################
# /file_list -> show detailed file list of files from import_path(s)
################################################################################
@app.route("/file_list", methods=["GET"])
def file_list():
return render_template("file_list.html", page_title='View Files (details)', entry_data=Entry.query.order_by(Entry.name).all())
################################################################################
# /files -> show thumbnail view of files from import_path(s)
################################################################################
@app.route("/files", methods=["GET", "POST"])
def files():
noo, grouping, how_many, offset, size = ViewingOptions( request )
entries=[]
if noo == "Oldest":
entries=Entry.query.join(File).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all()
else:
entries=Entry.query.join(File).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
return render_template("files.html", page_title='View Files', entry_data=entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size )
################################################################################
# /search -> show thumbnail view of files from import_path(s)
################################################################################
@app.route("/search", methods=["GET","POST"])
def search():
noo, grouping, how_many, offset, size = ViewingOptions( request )
file_data=Entry.query.join(File).filter(Entry.name.ilike(f"%{request.form['term']}%")).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
ai_data=Entry.query.join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).filter(Person.tag.ilike(f"%{request.form['term']}%")).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
all_entries = file_data + ai_data
return render_template("files.html", page_title='View Files', search_term=request.form['term'], entry_data=all_entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size )
################################################################################
# /files/scannow -> allows us to force a check for new files
################################################################################
@app.route("/files/scannow", methods=["GET"])
def scannow():
job=NewJob("scannow" )
st.SetAlert("success")
st.SetMessage("scanning for new files in:&nbsp;<a href=/job/{}>Job #{}</a>&nbsp;(Click the link to follow progress)".format( job.id, job.id) )
return render_template("base.html")
################################################################################
# /files/forcescan -> deletes old data in DB, and does a brand new scan
################################################################################
@app.route("/files/forcescan", methods=["GET"])
def forcescan():
job=NewJob("forcescan" )
st.SetAlert("success")
st.SetMessage("force scan & rebuild data for files in:&nbsp;<a href=/job/{}>Job #{}</a>&nbsp;(Click the link to follow progress)".format( job.id, job.id) )
return render_template("base.html")
def TrimmedPath( prefix, path ):
return path.replace(prefix, '' )
def AddDup( prefix, row, dups ):
if row.hash not in dups:
dups[row.hash]=[]
dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1), 'did': row.did1, 'h':row.hash, 'id':row.id1 } )
dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2), 'did': row.did2, 'h':row.hash, 'id':row.id2 } )
else:
# process path1 / fname1 -- if that combo is not in the dups[hash], add it
found=0
for dup in dups[row.hash]:
if dup['f'] == row.fname1 and dup['d'] == TrimmedPath(prefix, row.path1):
found=1
continue
if not found:
dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1), 'did': row.did1, 'h':row.hash, 'id':row.id1 } )
# process path2 / fname2 -- if that combo is not in the dups[hash], add it
found=0
for dup in dups[row.hash]:
if dup['f'] == row.fname2 and dup['d'] == TrimmedPath(prefix, row.path2):
found=1
continue
if not found:
dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2), 'did': row.did2, 'h':row.hash, 'id':row.id2 } )
return
@app.route("/fix_dups", methods=["POST"])
def fix_dups():
rows = db.engine.execute( "select e1.id as id1, f1.hash, d1.path_prefix as path1, d1.eid as did1, e1.name as fname1, e2.id as id2, d2.path_prefix as path2, d2.eid as did2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" )
if rows.returns_rows == False:
st.SetAlert("success")
st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?")
return render_template("base.html")
# use import_path setting to remove the dup path prefix of static/basename(<import_path>)
# -- static isn't really seen, and the import path basename is always going to be the same
s=Settings.query.first()
if s.import_path[-1] == '/':
prefix = os.path.basename(s.import_path[0:-1])
else:
prefix = os.path.basename(s.import_path)
prefix=f"static/{prefix}/"
dups={}
for row in rows:
AddDup( prefix, row, dups )
d1=""
d2=""
did1=""
did2=""
str=""
dup_cnt=1
per_file_dups=[]
per_path_dups=[]
hashes=""
for hash in dups:
if len(dups[hash]) > 2:
per_file_dups.append(dups[hash])
elif dups[hash][0]['d'] == dups[hash][1]['d']:
per_file_dups.append(dups[hash])
elif dups[hash][0]['f'] != dups[hash][1]['f']:
per_file_dups.append(dups[hash])
# by here we have only 2 files, with the same name, different path
# (MOST COMMON, and I think we dont care per file, just per path)
elif d1 != dups[hash][0]['d']:
if d1 != '':
dup_cnt=1
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
d1 = dups[hash][0]['d']
d2 = dups[hash][1]['d']
did1 = dups[hash][0]['did']
did2 = dups[hash][1]['did']
str=f"duplicates found in {d1} and {d2}"
hashes = f"{hash},"
else:
dup_cnt += 1
hashes += f"{hash},"
if d1 != '':
per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes })
print( f"msg={request.form['fe_msg_id']}" )
return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'] )
@app.route("/rm_dups", methods=["POST"])
def rm_dups():
jex=[]
for el in request.form:
if 'kfhash-' in el:
# get which row/number kf it is...
pfx, which = el.split('-')
jex.append( JobExtra( name=f"kfid-{which}", value=request.form['kfname-'+which] ) )
jex.append( JobExtra( name=f"kfhash-{which}", value=request.form[el] ) )
if 'kdhash-' in el:
# get which row/number kd it is...
pfx, which = el.split('-')
jex.append( JobExtra( name=f"kdid-{which}", value=request.form['kdid-'+which] ) )
jex.append( JobExtra( name=f"kdhash-{which}", value=request.form[el] ) )
fe_msg_id=request.form['fe_msg_id']
# allow backend to delete FE message once delete is being processed
jex.append( JobExtra( name="fe_msg_id", value=fe_msg_id ) )
job=NewJob( "rmdups", 0, None, jex )
st.SetAlert("success")
st.SetMessage( f"Created&nbsp;<a href=/job/{job.id}>Job #{job.id}</a>&nbsp;to delete duplicate files")
return render_template("base.html")
@app.route("/move_files", methods=["POST"])
def move_files():
st.SetAlert("warning")
st.SetMessage("Not Yet!")
return render_template("base.html")
################################################################################
# /static -> returns the contents of any file referenced inside /static.
# we create/use symlinks in static/ to reference the images to show
################################################################################
@app.route("/static/<filename>")
def custom_static(filename):
return send_from_directory("static/", filename)