from wtforms import SubmitField, StringField, HiddenField, validators, Form from flask_wtf import FlaskForm from flask import request, render_template, redirect, send_from_directory from main import db, app, ma from sqlalchemy import Sequence from sqlalchemy.exc import SQLAlchemyError from status import st, Status import os import glob from PIL import Image from pymediainfo import MediaInfo import hashlib import exifread import base64 import numpy import cv2 import time ################################################################################ # Local Class imports ################################################################################ from job import Job, JobExtra, Joblog, NewJob from person import Person, PersonRefimgLink from refimg import Refimg from settings import Settings from shared import SymlinkName ################################################################################ # Class describing File in the database, and via sqlalchemy, connected to the DB as well # This has to match one-for-one the DB table ################################################################################ class EntryDirLink(db.Model): __tablename__ = "entry_dir_link" entry_id = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True ) dir_eid = db.Column(db.Integer, db.ForeignKey("dir.eid"), primary_key=True ) def __repr__(self): return "".format(self.entry_id, self.dir_eid) class Dir(db.Model): __tablename__ = "dir" eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True ) path_prefix = db.Column(db.String, unique=True, nullable=False ) def __repr__(self): return "".format(self.eid, self.path_prefix) class Entry(db.Model): __tablename__ = "entry" id = db.Column(db.Integer, db.Sequence('file_id_seq'), primary_key=True ) name = db.Column(db.String, unique=False, nullable=False ) type_id = db.Column(db.Integer, db.ForeignKey("file_type.id")) type = db.relationship("FileType") dir_details = db.relationship( "Dir") file_details = db.relationship( "File" ) in_dir = db.relationship ("Dir", secondary="entry_dir_link" ) def __repr__(self): return "".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir) class FileRefimgLink(db.Model): __tablename__ = "file_refimg_link" file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True) refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) when_processed = db.Column(db.Float) matched = db.Column(db.Boolean) def __repr__(self): return f"" class FileType(db.Model): __tablename__ = "file_type" id = db.Column(db.Integer, db.Sequence('file_type_id_seq'), primary_key=True ) name = db.Column(db.String, unique=True, nullable=False ) def __repr__(self): return "".format(self.id, self.name ) def ViewingOptions( request ): noo="Oldest" grouping="None" how_many="50" offset=0 size=128 if request.method=="POST": noo=request.form['noo'] how_many=request.form['how_many'] offset=int(request.form['offset']) grouping=request.form['grouping'] size = request.form['size'] if 'prev' in request.form: offset -= int(how_many) if offset < 0: offset=0 if 'next' in request.form: offset += int(how_many) return noo, grouping, how_many, offset, size ################################################################################ # /file_list -> show detailed file list of files from import_path(s) ################################################################################ @app.route("/file_list_ip", methods=["GET"]) def file_list_ip(): return render_template("file_list.html", page_title='View File Details (Import Path)', entry_data=Entry.query.order_by(Entry.name).all()) ################################################################################ # /files -> show thumbnail view of files from import_path(s) ################################################################################ @app.route("/files_ip", methods=["GET", "POST"]) def files_ip(): noo, grouping, how_many, offset, size = ViewingOptions( request ) entries=[] # per import path, add entries to view settings=Settings.query.first() paths = settings.import_path.split("#") for path in paths: prefix = SymlinkName(path,path+'/') if noo == "Oldest": entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all() else: entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() return render_template("files.html", page_title='View Files (Import Path)', entry_data=entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size ) ################################################################################ # /files -> show thumbnail view of files from storage_path ################################################################################ @app.route("/files_sp", methods=["GET", "POST"]) def files_sp(): noo, grouping, how_many, offset, size = ViewingOptions( request ) entries=[] # per storage path, add entries to view settings=Settings.query.first() paths = settings.storage_path.split("#") for path in paths: prefix = SymlinkName(path,path+'/') if noo == "Oldest": entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all() else: entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() return render_template("files.html", page_title='View Files (Storage Path)', entry_data=entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size ) ################################################################################ # /search -> show thumbnail view of files from import_path(s) ################################################################################ @app.route("/search", methods=["GET","POST"]) def search(): noo, grouping, how_many, offset, size = ViewingOptions( request ) file_data=Entry.query.join(File).filter(Entry.name.ilike(f"%{request.form['term']}%")).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() ai_data=Entry.query.join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).filter(Person.tag.ilike(f"%{request.form['term']}%")).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() all_entries = file_data + ai_data return render_template("files.html", page_title='View Files', search_term=request.form['term'], entry_data=all_entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size ) ################################################################################ # /files/scannow -> allows us to force a check for new files ################################################################################ @app.route("/files/scannow", methods=["GET"]) def scannow(): job=NewJob("scannow" ) st.SetAlert("success") st.SetMessage("scanning for new files in: Job #{} (Click the link to follow progress)".format( job.id, job.id) ) return render_template("base.html") ################################################################################ # /files/forcescan -> deletes old data in DB, and does a brand new scan ################################################################################ @app.route("/files/forcescan", methods=["GET"]) def forcescan(): job=NewJob("forcescan" ) st.SetAlert("success") st.SetMessage("force scan & rebuild data for files in: Job #{} (Click the link to follow progress)".format( job.id, job.id) ) return render_template("base.html") ################################################################################ # /files/scan_sp -> allows us to force a check for new files ################################################################################ @app.route("/files/scan_sp", methods=["GET"]) def scan_sp(): job=NewJob("scan_sp" ) st.SetAlert("success") st.SetMessage("scanning for new files in: Job #{} (Click the link to follow progress)".format( job.id, job.id) ) return render_template("base.html") def TrimmedPath( prefix, path ): return path.replace(prefix, '' ) def AddDup( prefix, row, dups ): if row.hash not in dups: dups[row.hash]=[] dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1), 'did': row.did1, 'h':row.hash, 'id':row.id1 } ) dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2), 'did': row.did2, 'h':row.hash, 'id':row.id2 } ) else: # process path1 / fname1 -- if that combo is not in the dups[hash], add it found=0 for dup in dups[row.hash]: if dup['f'] == row.fname1 and dup['d'] == TrimmedPath(prefix, row.path1): found=1 continue if not found: dups[row.hash].append( { 'f': row.fname1, 'd':TrimmedPath(prefix, row.path1), 'did': row.did1, 'h':row.hash, 'id':row.id1 } ) # process path2 / fname2 -- if that combo is not in the dups[hash], add it found=0 for dup in dups[row.hash]: if dup['f'] == row.fname2 and dup['d'] == TrimmedPath(prefix, row.path2): found=1 continue if not found: dups[row.hash].append( { 'f': row.fname2, 'd':TrimmedPath(prefix, row.path2), 'did': row.did2, 'h':row.hash, 'id':row.id2 } ) return @app.route("/fix_dups", methods=["POST"]) def fix_dups(): rows = db.engine.execute( "select e1.id as id1, f1.hash, d1.path_prefix as path1, d1.eid as did1, e1.name as fname1, e2.id as id2, d2.path_prefix as path2, d2.eid as did2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id order by path1, fname1;" ) if rows.returns_rows == False: st.SetAlert("success") st.SetMessage(f"Err, no dups - should now clear the FE 'danger' message?") return render_template("base.html") # use import_path setting to remove the dup path prefix of static/basename() # -- static isn't really seen, and the import path basename is always going to be the same s=Settings.query.first() if s.import_path[-1] == '/': prefix = os.path.basename(s.import_path[0:-1]) else: prefix = os.path.basename(s.import_path) prefix=f"static/{prefix}/" dups={} for row in rows: AddDup( prefix, row, dups ) d1="" d2="" did1="" did2="" str="" dup_cnt=1 per_file_dups=[] per_path_dups=[] hashes="" overall_dup_cnt=0 overall_dup_sets=0 for hash in dups: # more than 2 files (just ask per file) if len(dups[hash]) > 2: per_file_dups.append(dups[hash]) overall_dup_cnt += len(dups[hash]) overall_dup_sets += 1 # only 2 copies, and files are in same dir (so must be diff name, so just ask) elif dups[hash][0]['d'] == dups[hash][1]['d']: per_file_dups.append(dups[hash]) overall_dup_cnt += len(dups[hash]) overall_dup_sets += 1 # content same, filename different (just ask per file) elif dups[hash][0]['f'] != dups[hash][1]['f']: per_file_dups.append(dups[hash]) overall_dup_cnt += len(dups[hash]) overall_dup_sets += 1 # by here we have only 2 files, with the same name, different path # (MOST COMMON, and I think we dont care per file, just per path) elif d1 != dups[hash][0]['d']: if d1 != '': overall_dup_cnt += dup_cnt overall_dup_sets += 1 per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes }) dup_cnt=1 d1 = dups[hash][0]['d'] d2 = dups[hash][1]['d'] did1 = dups[hash][0]['did'] did2 = dups[hash][1]['did'] str=f"duplicates found in {d1} and {d2}" hashes = f"{hash}," else: dup_cnt += 1 hashes += f"{hash}," if d1 != '': overall_dup_cnt += dup_cnt overall_dup_sets += dup_cnt per_path_dups.append({'count': dup_cnt, 'd1': d1, 'd2': d2, 'did1': did1, 'did2': did2, 'hashes' : hashes }) return render_template("dups.html", per_file_dups=per_file_dups, per_path_dups=per_path_dups, fe_msg_id=request.form['fe_msg_id'], overall_dup_cnt=overall_dup_cnt, overall_dup_sets=overall_dup_sets ) @app.route("/rm_dups", methods=["POST"]) def rm_dups(): jex=[] for el in request.form: if 'kfhash-' in el: # get which row/number kf it is... pfx, which = el.split('-') jex.append( JobExtra( name=f"kfid-{which}", value=request.form['kfname-'+which] ) ) jex.append( JobExtra( name=f"kfhash-{which}", value=request.form[el] ) ) if 'kdhash-' in el: # get which row/number kd it is... pfx, which = el.split('-') jex.append( JobExtra( name=f"kdid-{which}", value=request.form['kdid-'+which] ) ) jex.append( JobExtra( name=f"kdhash-{which}", value=request.form[el] ) ) fe_msg_id=request.form['fe_msg_id'] # allow backend to delete FE message once delete is being processed jex.append( JobExtra( name="fe_msg_id", value=fe_msg_id ) ) job=NewJob( "rmdups", 0, None, jex ) st.SetAlert("success") st.SetMessage( f"Created Job #{job.id} to delete duplicate files") return render_template("base.html") @app.route("/move_files", methods=["POST"]) def move_files(): st.SetAlert("warning") st.SetMessage("Not Yet!") return render_template("base.html") ################################################################################ # /static -> returns the contents of any file referenced inside /static. # we create/use symlinks in static/ to reference the images to show ################################################################################ @app.route("/static/") def custom_static(filename): return send_from_directory("static/", filename)