From 095b7c83336ad43053c09881284afbb828dd9982 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Sun, 11 Apr 2021 15:35:01 +1000 Subject: [PATCH] Path in the DB is complete, still have hacks around displaying folders (hardcoded path name) --- files.py | 52 +++++++++++++++++------- pa_job_manager.py | 86 ++++++++++++++++++++++++++-------------- tables.sql | 11 ++++- templates/file_list.html | 4 +- templates/files.html | 12 +++--- 5 files changed, 113 insertions(+), 52 deletions(-) diff --git a/files.py b/files.py index 854bb0e..734c3b8 100644 --- a/files.py +++ b/files.py @@ -33,21 +33,39 @@ from dups import Duplicates # Class describing File in the database, and via sqlalchemy, connected to the DB as well # This has to match one-for-one the DB table ################################################################################ +class PathDirLink(db.Model): + __tablename__ = "path_dir_link" + path_id = db.Column(db.Integer, db.ForeignKey("path.id"), primary_key=True ) + dir_eid = db.Column(db.Integer, db.ForeignKey("dir.eid"), primary_key=True ) + + def __repr__(self): + return f"" + +class Path(db.Model): + __tablename__ = "path" + id = db.Column(db.Integer, db.Sequence('path_id_seq'), primary_key=True ) + path_prefix = db.Column(db.String, unique=True, nullable=False ) + num_files = db.Column(db.Integer) + + def __repr__(self): + return f"" + class EntryDirLink(db.Model): __tablename__ = "entry_dir_link" entry_id = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True ) dir_eid = db.Column(db.Integer, db.ForeignKey("dir.eid"), primary_key=True ) def __repr__(self): - return "".format(self.entry_id, self.dir_eid) + return f"" class Dir(db.Model): __tablename__ = "dir" eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True ) - path_prefix = db.Column(db.String, unique=True, nullable=False ) + rel_path = db.Column(db.String, unique=True ) + in_path = db.relationship("Path", secondary="path_dir_link", uselist=False) def __repr__(self): - return "".format(self.eid, self.path_prefix) + return f"" class Entry(db.Model): __tablename__ = "entry" @@ -136,7 +154,12 @@ def ViewingOptions( request ): offset=int(request.form['offset']) grouping=request.form['grouping'] size = request.form['size'] - folders = request.form['folders'] + # seems html cant do boolean, but uses strings so convert + if request.form['folders'] == "False": + folders=False + if request.form['folders'] == "True": + folders=True + cwd = request.form['cwd'] print( f"setting cwd basedon form: {cwd}" ) if 'prev' in request.form: @@ -171,9 +194,9 @@ def files_ip(): prefix = SymlinkName(path,path+'/') if noo == "Oldest": - entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all() + entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all() else: - entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() + entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() return render_template("files.html", page_title='View Files (Import Path)', entry_data=entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size, folders=folders, cwd=cwd ) @@ -194,10 +217,11 @@ def files_sp(): prefix = SymlinkName(path,path+'/') if noo == "Oldest": - entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all() - entries+=Entry.query.join(Dir).join(EntryDirLink).filter(Dir.path_prefix.like(prefix+'%')).order_by(Entry.name).offset(offset).limit(how_many).all() + entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all() + entries+=Entry.query.join(Dir).join(EntryDirLink).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(Entry.name).offset(offset).limit(how_many).all() else: - entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() + entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() + entries+=Entry.query.join(Dir).join(EntryDirLink).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(Entry.name).offset(offset).limit(how_many).all() return render_template("files.html", page_title='View Files (Storage Path)', entry_data=entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size, folders=folders, cwd=cwd ) @@ -209,12 +233,6 @@ def search(): noo, grouping, how_many, offset, size, folders, cwd = ViewingOptions( request ) - # seems html cant do boolean, but uses strings so convert - if folders == "False": - folders=False - if folders == "True": - folders=True - print( f"folders={folders}, type={type(folders)}" ) file_data=Entry.query.join(File).filter(Entry.name.ilike(f"%{request.form['term']}%")).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all() @@ -326,9 +344,13 @@ def custom_static(filename): ################################################################################ @app.template_filter('TopLevelFolderOf') def _jinja2_filter_toplevelfolderof(path, cwd): + print( f"TopLevelFolderOf( {path}, {cwd} -- dirname={os.path.dirname(path)}" ) + if os.path.dirname(path) == cwd: + print("---TopLevelFolderOf is true") return True else: + print("---TopLevelFolderOf is false") return False ############################################################################### diff --git a/pa_job_manager.py b/pa_job_manager.py index 1d9fdbe..f3fb044 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -1,4 +1,3 @@ - # # This file controls the 'external' job control manager, that (periodically # # looks / somehow is pushed an event?) picks up new jobs, and processes them. @@ -11,9 +10,10 @@ # ### -### SQLALCHEMY IMPORTS ### # pylint: disable=no-member +### SQLALCHEMY IMPORTS ### + from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary, Boolean from sqlalchemy.exc import SQLAlchemyError @@ -28,6 +28,8 @@ from sqlalchemy.orm import scoped_session from shared import DB_URL, PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT, THUMBSIZE, SymlinkName from datetime import datetime, timedelta, date + +### PYTHON LIB IMPORTS ### import pytz import time import os @@ -47,10 +49,12 @@ import re import sys -DEBUG=0 +# global debug setting +DEBUG=1 +# this is required to handle the duplicate processing code sys.setrecursionlimit(50000) -# an Manager, which the Session will use for connection resources +# a Manager, which the Session will use for connection resources some_engine = create_engine(DB_URL) # create a configured "Session" class @@ -69,6 +73,14 @@ Base = declarative_base() # Class describing File in the database, and via sqlalchemy, connected to the DB as well # This has to match one-for-one the DB table ################################################################################ +class PathDirLink(Base): + __tablename__ = "path_dir_link" + path_id = Column(Integer, ForeignKey("path.id"), primary_key=True ) + dir_eid = Column(Integer, ForeignKey("dir.eid"), primary_key=True ) + + def __repr__(self): + return f"" + class EntryDirLink(Base): __tablename__ = "entry_dir_link" entry_id = Column(Integer, ForeignKey("entry.id"), primary_key=True ) @@ -77,19 +89,25 @@ class EntryDirLink(Base): def __repr__(self): return f"" +class Path(Base): + __tablename__ = "path" + id = Column(Integer, Sequence('path_id_seq'), primary_key=True ) + path_prefix = Column(String, unique=True, nullable=False ) + num_files = Column(Integer) + class Dir(Base): __tablename__ = "dir" eid = Column(Integer, ForeignKey("entry.id"), primary_key=True ) - path_prefix = Column(String, unique=True, nullable=False ) - num_files = Column(Integer) + rel_path = Column(String, unique=True, nullable=False ) + in_path = relationship("Path", secondary="path_dir_link", uselist=False) last_import_date = Column(Float) files = relationship("Entry", secondary="entry_dir_link") def PathOnFS(self): - return self.path_prefix + return self.in_path.path_prefix+'/'+self.rel_path def __repr__(self): - return f"" + return f"" class Entry(Base): __tablename__ = "entry" @@ -103,7 +121,12 @@ class Entry(Base): in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False ) def FullPathOnFS(self): - return self.in_dir.path_prefix + '/' + self.name + print( f"(FullPathOnFS: pp={self.in_dir.in_path.path_prefix}, rp={self.in_dir.rel_path}, n={self.name}" ) + s=self.in_dir.in_path.path_prefix + '/' + if len(self.in_dir.rel_path) > 0: + s += self.in_dir.rel_path + '/' + s += self.name + return s def __repr__(self): return f"" @@ -266,10 +289,10 @@ def JobsForPaths( parent_job, paths ): now=datetime.now(pytz.utc) # make new set of Jobs per path... HandleJobs will make them run later for path in paths: - d=session.query(Dir).filter(Dir.path_prefix==SymlinkName(path,path+'/')).first() + p=session.query(Path).filter(Path.path_prefix==SymlinkName(path,path+'/')).first() cfn=0 - if d: - cfn=d.num_files + if p: + cfn=p.num_files jex=JobExtra( name="path", value=path ) job=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn ) @@ -423,6 +446,8 @@ def JobForceScan(job): JobProgressState( job, "In Progress" ) session.query(FileRefimgLink).delete() session.query(EntryDirLink).delete() + session.query(PathDirLink).delete() + session.query(Path).delete() session.query(Dir).delete() session.query(File).delete() session.query(Entry).delete() @@ -441,13 +466,13 @@ def CreateSymlink(job,path): os.symlink(path, symlink) return symlink -def AddDir(job, dirname, path_prefix, in_dir): - dir=session.query(Dir).filter(Dir.path_prefix==path_prefix).first() +def AddDir(job, dirname, in_dir, rel_path, in_path ): + dir=session.query(Dir).join(PathDirLink).join(Path).filter(Path.id==in_path.id).filter(Dir.rel_path==rel_path).first() if dir: e=session.query(Entry).get(dir.eid) e.exists_on_fs=True return dir - dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0 ) + dir=Dir( last_import_date=0, rel_path=rel_path, in_path=in_path ) dtype=session.query(FileType).filter(FileType.name=='Directory').first() e=Entry( name=dirname, type=dtype, exists_on_fs=True ) e.dir_details=dir @@ -455,7 +480,7 @@ def AddDir(job, dirname, path_prefix, in_dir): if in_dir: e.in_dir=in_dir if DEBUG==1: - print(f"AddDir: created d={dirname}, pp={path_prefix}") + print(f"AddDir: created d={dirname}, rp={rel_path}") AddLogForJob(job, f"DEBUG: Process new dir: {dirname}") session.add(e) return dir @@ -477,7 +502,7 @@ def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ): # reset exists_on_fs to False for everything in this import path, if we find it on the FS in the walk below, it goes back to True, anything that # is still false, has been deleted def ResetExistsOnFS(job, path): - reset_dirs = session.query(Entry).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.ilike(path+'%')).all() + reset_dirs = session.query(Entry).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==path).all() for reset_dir in reset_dirs: reset_dir.exists_on_fs=False session.add(reset_dir) @@ -558,6 +583,9 @@ def JobImportDir(job): FinishJob( job, "Finished Importing: {} -- Path does not exist".format( path), "Failed" ) return symlink=CreateSymlink(job,path) + + path_obj=Path( path_prefix=symlink, num_files=0 ) + session.add(path_obj) ResetExistsOnFS(job, symlink) walk=os.walk(path, topdown=True) @@ -567,10 +595,11 @@ def JobImportDir(job): overall_file_cnt=0 for root, subdirs, files in ftree: overall_file_cnt+= len(subdirs) + len(files) + path_obj.num_files=overall_file_cnt parent_dir=None - dir=AddDir(job, os.path.basename(symlink), symlink, parent_dir) - dir.num_files=overall_file_cnt + # rel_path is always '' at the top of the path objects path_prefix for the first dir + dir=AddDir(job, os.path.basename(symlink), parent_dir, '', path_obj) # session.add in case we already have imported this dir (as AddDir wont) & now we might have diff num of files to last time, session.add(dir) job.num_files=overall_file_cnt @@ -582,7 +611,9 @@ def JobImportDir(job): # already create root above to work out num_files for whole os.walk if root != path: pp=SymlinkName( path, root )+'/'+os.path.basename(root) - dir=AddDir(job, os.path.basename(root), pp, parent_dir) + print( F"pp={pp}, root={root}, symlink={symlink}" ) + rel_path=pp.replace(symlink+'/','') + dir=AddDir(job, os.path.basename(root), parent_dir, rel_path, path_obj) for basename in files: # commit every 100 files to see progress being made but not hammer the database if job.current_file_num % 100 == 0: @@ -612,8 +643,6 @@ def JobImportDir(job): job.current_file_num+=1 job.current_file_num += len(subdirs) dir.last_import_date = time.time() - if parent_dir != None: - dir.num_files=len(files)+len(subdirs) parent_dir=dir job.num_files=overall_file_cnt @@ -623,7 +652,7 @@ def JobImportDir(job): return def RunFuncOnFilesInPath( job, path, file_func ): - d=session.query(Dir).filter(Dir.path_prefix==path).first() + d = session.query(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==path).filter(Dir.rel_path=='').first() for e in d.files: ProcessFilesInDir(job, e, file_func) @@ -631,8 +660,8 @@ def RunFuncOnFilesInPath( job, path, file_func ): def JobProcessAI(job): path=[jex.value for jex in job.extra if jex.name == "path"][0] path = SymlinkName(path, '/') - d=session.query(Dir).filter(Dir.path_prefix==path).first() - job.num_files=d.num_files + p = session.query(Path).filter(Path.path_prefix==path).first() + job.num_files=p.num_files people = session.query(Person).all() for person in people: @@ -777,9 +806,9 @@ def JobGetFileDetails(job): path=SymlinkName( path, path ) if DEBUG==1: print("DEBUG: JobGetFileDetails for path={}".format( path ) ) - dir=session.query(Dir).filter(Dir.path_prefix==path).first() + p=session.query(Path).filter(Path.path_prefix==path).first() job.current_file_num = 0 - job.num_files = dir.num_files + job.num_files = p.num_files session.commit() RunFuncOnFilesInPath( job, path, GenHashAndThumb ) FinishJob(job, "File Details job finished") @@ -875,8 +904,7 @@ def CheckForDups(job): def RemoveFileFromFS( del_me ): try: settings = session.query(Settings).first() - m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix) - dst_dir=settings.recycle_bin_path + m[1] + '/' + dst_dir=settings.recycle_bin_path + '/' + del_me.in_dir.in_path.path_prefix.replace('static/','') + '/' + del_me.in_dir.rel_path + '/' os.makedirs( dst_dir,mode=0o777, exist_ok=True ) src=del_me.FullPathOnFS() dst=dst_dir + '/' + del_me.name diff --git a/tables.sql b/tables.sql index 129507a..0e194fa 100644 --- a/tables.sql +++ b/tables.sql @@ -4,6 +4,9 @@ create table SETTINGS( ID integer, IMPORT_PATH varchar, STORAGE_PATH varchar, RE create table FILE_TYPE ( ID integer, NAME varchar(32) unique, constraint PK_FILE_TYPE_ID primary key(ID) ); +create table PATH ( ID integer, PATH_PREFIX varchar(1024), NUM_FILES integer, + constraint PK_PATH_ID primary key(ID) ); + create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, EXISTS_ON_FS boolean, constraint PK_ENTRY_ID primary key(ID), constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) ); @@ -12,10 +15,15 @@ create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL va constraint PK_FILE_ID primary key(EID), constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) ); -create table DIR ( EID integer, PATH_PREFIX varchar(256), NUM_FILES integer, LAST_IMPORT_DATE float, +create table DIR ( EID integer, PATH_PREFIX varchar(256), REL_PATH varchar(256), NUM_FILES integer, LAST_IMPORT_DATE float, constraint PK_DIR_EID primary key(EID), constraint FK_DIR_ENTRY_ID foreign key (EID) references ENTRY(ID) ); +create table PATH_DIR_LINK ( path_id integer, dir_eid integer, + constraint PK_PDL_path_id_dir_eid primary key (path_id, dir_eid), + constraint FK_PDL_PATH_ID foreign key (PATH_ID) references PATH(ID), + constraint FK_PDL_DIR_EID foreign key (DIR_EID) references DIR(EID) ); + create table ENTRY_DIR_LINK ( entry_id integer, dir_eid integer, constraint PK_EDL_entry_id_dir_eid primary key (entry_id, dir_eid), constraint FK_EDL_ENTRY_ID foreign key (ENTRY_ID) references ENTRY(ID), @@ -55,6 +63,7 @@ create table PA_JOB_MANAGER_FE_MESSAGE ( ID integer, JOB_ID integer, ALERT varch constraint PA_JOB_MANAGER_FE_ACKS_ID primary key(ID), constraint FK_PA_JOB_MANAGER_FE_MESSAGE_JOB_ID foreign key(JOB_ID) references JOB(ID) ); +create sequence PATH_ID_SEQ; create sequence FILE_ID_SEQ; create sequence FILE_TYPE_ID_SEQ; create sequence JOBEXTRA_ID_SEQ; diff --git a/templates/file_list.html b/templates/file_list.html index 7eae553..7591ef0 100644 --- a/templates/file_list.html +++ b/templates/file_list.html @@ -13,10 +13,10 @@
{% if obj.type.name=="Image" %} {% set icon="fa-file-image" %} - + {% elif obj.type.name == "Video" %} {% set icon="fa-film" %} - + {% elif obj.type.name == "Directory" %} {% set icon="fa-folder" %} {% else %} diff --git a/templates/files.html b/templates/files.html index f175516..de5918e 100644 --- a/templates/files.html +++ b/templates/files.html @@ -97,6 +97,7 @@ {% endif %} {% for obj in entry_data %} {% if loop.index==1 and folders %} + FIXME: hardcoded 'static/storage' {% if cwd != 'static/storage' %}
@@ -136,13 +137,13 @@ {% endif %} {% endif %} {% if obj.type.name != "Directory" %} - {% if (not folders) or ((obj.in_dir.path_prefix+'/'+obj.name) | TopLevelFolderOf(cwd)) %} + {% if (not folders) or ((obj.in_dir.in_path.path_prefix+'/'+obj.in_dir.rel_path+'/'+obj.name) | TopLevelFolderOf(cwd)) %}
{% if obj.type.name=="Image" %} - + {% elif obj.type.name == "Video" %}
- +
@@ -153,8 +154,9 @@ {% endif %} {% else %} {% if folders %} - {% if (cwd != obj.dir_details.path_prefix) and (obj.dir_details.path_prefix | TopLevelFolderOf(cwd)) %} -
+ {# if not the top-level of the path and it is a top-level folder, then display a folder icon #} + {% if (cwd != obj.dir_details.in_path.path_prefix or obj.dir_details.rel_path|length > 0) and ((obj.dir_details.in_path.path_prefix+'/'+obj.dir_details.rel_path) | TopLevelFolderOf(cwd)) %} +
{{obj.name}}