Path in the DB is complete, still have hacks around displaying folders (hardcoded path name)

This commit is contained in:
2021-04-11 15:35:01 +10:00
parent 4a55e98f5b
commit 095b7c8333
5 changed files with 113 additions and 52 deletions

View File

@@ -33,21 +33,39 @@ from dups import Duplicates
# Class describing File in the database, and via sqlalchemy, connected to the DB as well
# This has to match one-for-one the DB table
################################################################################
class PathDirLink(db.Model):
__tablename__ = "path_dir_link"
path_id = db.Column(db.Integer, db.ForeignKey("path.id"), primary_key=True )
dir_eid = db.Column(db.Integer, db.ForeignKey("dir.eid"), primary_key=True )
def __repr__(self):
return f"<path_id: {self.path_id}, dir_eid: {self.dir_eid}>"
class Path(db.Model):
__tablename__ = "path"
id = db.Column(db.Integer, db.Sequence('path_id_seq'), primary_key=True )
path_prefix = db.Column(db.String, unique=True, nullable=False )
num_files = db.Column(db.Integer)
def __repr__(self):
return f"<id: {self.id}, path_prefix: {self.path_prefix}, num_files={self.num_files}>"
class EntryDirLink(db.Model):
__tablename__ = "entry_dir_link"
entry_id = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
dir_eid = db.Column(db.Integer, db.ForeignKey("dir.eid"), primary_key=True )
def __repr__(self):
return "<entry_id: {}, dir_eid: {}>".format(self.entry_id, self.dir_eid)
return f"<entry_id: {self.entry_id}, dir_eid: {self.dir_eid}>"
class Dir(db.Model):
__tablename__ = "dir"
eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
path_prefix = db.Column(db.String, unique=True, nullable=False )
rel_path = db.Column(db.String, unique=True )
in_path = db.relationship("Path", secondary="path_dir_link", uselist=False)
def __repr__(self):
return "<eid: {}, path_prefix: {}>".format(self.eid, self.path_prefix)
return f"<eid: {self.eid}, rel_path: {self.rel_path}, in_path: {self.in_path}>"
class Entry(db.Model):
__tablename__ = "entry"
@@ -136,7 +154,12 @@ def ViewingOptions( request ):
offset=int(request.form['offset'])
grouping=request.form['grouping']
size = request.form['size']
folders = request.form['folders']
# seems html cant do boolean, but uses strings so convert
if request.form['folders'] == "False":
folders=False
if request.form['folders'] == "True":
folders=True
cwd = request.form['cwd']
print( f"setting cwd basedon form: {cwd}" )
if 'prev' in request.form:
@@ -171,9 +194,9 @@ def files_ip():
prefix = SymlinkName(path,path+'/')
if noo == "Oldest":
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all()
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all()
else:
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
return render_template("files.html", page_title='View Files (Import Path)', entry_data=entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size, folders=folders, cwd=cwd )
@@ -194,10 +217,11 @@ def files_sp():
prefix = SymlinkName(path,path+'/')
if noo == "Oldest":
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all()
entries+=Entry.query.join(Dir).join(EntryDirLink).filter(Dir.path_prefix.like(prefix+'%')).order_by(Entry.name).offset(offset).limit(how_many).all()
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year,File.month,File.day,Entry.name).offset(offset).limit(how_many).all()
entries+=Entry.query.join(Dir).join(EntryDirLink).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(Entry.name).offset(offset).limit(how_many).all()
else:
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.like(prefix+'%')).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
entries+=Entry.query.join(File).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
entries+=Entry.query.join(Dir).join(EntryDirLink).join(PathDirLink).join(Path).filter(Path.path_prefix==prefix).order_by(Entry.name).offset(offset).limit(how_many).all()
return render_template("files.html", page_title='View Files (Storage Path)', entry_data=entries, noo=noo, grouping=grouping, how_many=how_many, offset=offset, size=size, folders=folders, cwd=cwd )
@@ -209,12 +233,6 @@ def search():
noo, grouping, how_many, offset, size, folders, cwd = ViewingOptions( request )
# seems html cant do boolean, but uses strings so convert
if folders == "False":
folders=False
if folders == "True":
folders=True
print( f"folders={folders}, type={type(folders)}" )
file_data=Entry.query.join(File).filter(Entry.name.ilike(f"%{request.form['term']}%")).order_by(File.year.desc(),File.month.desc(),File.day.desc(),Entry.name).offset(offset).limit(how_many).all()
@@ -326,9 +344,13 @@ def custom_static(filename):
################################################################################
@app.template_filter('TopLevelFolderOf')
def _jinja2_filter_toplevelfolderof(path, cwd):
print( f"TopLevelFolderOf( {path}, {cwd} -- dirname={os.path.dirname(path)}" )
if os.path.dirname(path) == cwd:
print("---TopLevelFolderOf is true")
return True
else:
print("---TopLevelFolderOf is false")
return False
###############################################################################

View File

@@ -1,4 +1,3 @@
#
# This file controls the 'external' job control manager, that (periodically #
# looks / somehow is pushed an event?) picks up new jobs, and processes them.
@@ -11,9 +10,10 @@
#
###
### SQLALCHEMY IMPORTS ###
# pylint: disable=no-member
### SQLALCHEMY IMPORTS ###
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary, Boolean
from sqlalchemy.exc import SQLAlchemyError
@@ -28,6 +28,8 @@ from sqlalchemy.orm import scoped_session
from shared import DB_URL, PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT, THUMBSIZE, SymlinkName
from datetime import datetime, timedelta, date
### PYTHON LIB IMPORTS ###
import pytz
import time
import os
@@ -47,10 +49,12 @@ import re
import sys
DEBUG=0
# global debug setting
DEBUG=1
# this is required to handle the duplicate processing code
sys.setrecursionlimit(50000)
# an Manager, which the Session will use for connection resources
# a Manager, which the Session will use for connection resources
some_engine = create_engine(DB_URL)
# create a configured "Session" class
@@ -69,6 +73,14 @@ Base = declarative_base()
# Class describing File in the database, and via sqlalchemy, connected to the DB as well
# This has to match one-for-one the DB table
################################################################################
class PathDirLink(Base):
__tablename__ = "path_dir_link"
path_id = Column(Integer, ForeignKey("path.id"), primary_key=True )
dir_eid = Column(Integer, ForeignKey("dir.eid"), primary_key=True )
def __repr__(self):
return f"<path_id: {self.path_id}, dir_eid: {self.dir_eid}>"
class EntryDirLink(Base):
__tablename__ = "entry_dir_link"
entry_id = Column(Integer, ForeignKey("entry.id"), primary_key=True )
@@ -77,19 +89,25 @@ class EntryDirLink(Base):
def __repr__(self):
return f"<entry_id: {self.entry_id}, dir_eid: {self.dir_eid}>"
class Path(Base):
__tablename__ = "path"
id = Column(Integer, Sequence('path_id_seq'), primary_key=True )
path_prefix = Column(String, unique=True, nullable=False )
num_files = Column(Integer)
class Dir(Base):
__tablename__ = "dir"
eid = Column(Integer, ForeignKey("entry.id"), primary_key=True )
path_prefix = Column(String, unique=True, nullable=False )
num_files = Column(Integer)
rel_path = Column(String, unique=True, nullable=False )
in_path = relationship("Path", secondary="path_dir_link", uselist=False)
last_import_date = Column(Float)
files = relationship("Entry", secondary="entry_dir_link")
def PathOnFS(self):
return self.path_prefix
return self.in_path.path_prefix+'/'+self.rel_path
def __repr__(self):
return f"<eid: {self.eid}, path_prefix: {self.path_prefix}, num_files: {self.num_files}, last_import_date: {self.last_import_date}, files: {self.files}>"
return f"<eid: {self.eid}, last_import_date: {self.last_import_date}, files: {self.files}>"
class Entry(Base):
__tablename__ = "entry"
@@ -103,7 +121,12 @@ class Entry(Base):
in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )
def FullPathOnFS(self):
return self.in_dir.path_prefix + '/' + self.name
print( f"(FullPathOnFS: pp={self.in_dir.in_path.path_prefix}, rp={self.in_dir.rel_path}, n={self.name}" )
s=self.in_dir.in_path.path_prefix + '/'
if len(self.in_dir.rel_path) > 0:
s += self.in_dir.rel_path + '/'
s += self.name
return s
def __repr__(self):
return f"<id: {self.id}, name: {self.name}, type={self.type}, exists_on_fs={self.exists_on_fs}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"
@@ -266,10 +289,10 @@ def JobsForPaths( parent_job, paths ):
now=datetime.now(pytz.utc)
# make new set of Jobs per path... HandleJobs will make them run later
for path in paths:
d=session.query(Dir).filter(Dir.path_prefix==SymlinkName(path,path+'/')).first()
p=session.query(Path).filter(Path.path_prefix==SymlinkName(path,path+'/')).first()
cfn=0
if d:
cfn=d.num_files
if p:
cfn=p.num_files
jex=JobExtra( name="path", value=path )
job=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn )
@@ -423,6 +446,8 @@ def JobForceScan(job):
JobProgressState( job, "In Progress" )
session.query(FileRefimgLink).delete()
session.query(EntryDirLink).delete()
session.query(PathDirLink).delete()
session.query(Path).delete()
session.query(Dir).delete()
session.query(File).delete()
session.query(Entry).delete()
@@ -441,13 +466,13 @@ def CreateSymlink(job,path):
os.symlink(path, symlink)
return symlink
def AddDir(job, dirname, path_prefix, in_dir):
dir=session.query(Dir).filter(Dir.path_prefix==path_prefix).first()
def AddDir(job, dirname, in_dir, rel_path, in_path ):
dir=session.query(Dir).join(PathDirLink).join(Path).filter(Path.id==in_path.id).filter(Dir.rel_path==rel_path).first()
if dir:
e=session.query(Entry).get(dir.eid)
e.exists_on_fs=True
return dir
dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0 )
dir=Dir( last_import_date=0, rel_path=rel_path, in_path=in_path )
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
e=Entry( name=dirname, type=dtype, exists_on_fs=True )
e.dir_details=dir
@@ -455,7 +480,7 @@ def AddDir(job, dirname, path_prefix, in_dir):
if in_dir:
e.in_dir=in_dir
if DEBUG==1:
print(f"AddDir: created d={dirname}, pp={path_prefix}")
print(f"AddDir: created d={dirname}, rp={rel_path}")
AddLogForJob(job, f"DEBUG: Process new dir: {dirname}")
session.add(e)
return dir
@@ -477,7 +502,7 @@ def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ):
# reset exists_on_fs to False for everything in this import path, if we find it on the FS in the walk below, it goes back to True, anything that
# is still false, has been deleted
def ResetExistsOnFS(job, path):
reset_dirs = session.query(Entry).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.ilike(path+'%')).all()
reset_dirs = session.query(Entry).join(EntryDirLink).join(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==path).all()
for reset_dir in reset_dirs:
reset_dir.exists_on_fs=False
session.add(reset_dir)
@@ -558,6 +583,9 @@ def JobImportDir(job):
FinishJob( job, "Finished Importing: {} -- Path does not exist".format( path), "Failed" )
return
symlink=CreateSymlink(job,path)
path_obj=Path( path_prefix=symlink, num_files=0 )
session.add(path_obj)
ResetExistsOnFS(job, symlink)
walk=os.walk(path, topdown=True)
@@ -567,10 +595,11 @@ def JobImportDir(job):
overall_file_cnt=0
for root, subdirs, files in ftree:
overall_file_cnt+= len(subdirs) + len(files)
path_obj.num_files=overall_file_cnt
parent_dir=None
dir=AddDir(job, os.path.basename(symlink), symlink, parent_dir)
dir.num_files=overall_file_cnt
# rel_path is always '' at the top of the path objects path_prefix for the first dir
dir=AddDir(job, os.path.basename(symlink), parent_dir, '', path_obj)
# session.add in case we already have imported this dir (as AddDir wont) & now we might have diff num of files to last time,
session.add(dir)
job.num_files=overall_file_cnt
@@ -582,7 +611,9 @@ def JobImportDir(job):
# already create root above to work out num_files for whole os.walk
if root != path:
pp=SymlinkName( path, root )+'/'+os.path.basename(root)
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
print( F"pp={pp}, root={root}, symlink={symlink}" )
rel_path=pp.replace(symlink+'/','')
dir=AddDir(job, os.path.basename(root), parent_dir, rel_path, path_obj)
for basename in files:
# commit every 100 files to see progress being made but not hammer the database
if job.current_file_num % 100 == 0:
@@ -612,8 +643,6 @@ def JobImportDir(job):
job.current_file_num+=1
job.current_file_num += len(subdirs)
dir.last_import_date = time.time()
if parent_dir != None:
dir.num_files=len(files)+len(subdirs)
parent_dir=dir
job.num_files=overall_file_cnt
@@ -623,7 +652,7 @@ def JobImportDir(job):
return
def RunFuncOnFilesInPath( job, path, file_func ):
d=session.query(Dir).filter(Dir.path_prefix==path).first()
d = session.query(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==path).filter(Dir.rel_path=='').first()
for e in d.files:
ProcessFilesInDir(job, e, file_func)
@@ -631,8 +660,8 @@ def RunFuncOnFilesInPath( job, path, file_func ):
def JobProcessAI(job):
path=[jex.value for jex in job.extra if jex.name == "path"][0]
path = SymlinkName(path, '/')
d=session.query(Dir).filter(Dir.path_prefix==path).first()
job.num_files=d.num_files
p = session.query(Path).filter(Path.path_prefix==path).first()
job.num_files=p.num_files
people = session.query(Person).all()
for person in people:
@@ -777,9 +806,9 @@ def JobGetFileDetails(job):
path=SymlinkName( path, path )
if DEBUG==1:
print("DEBUG: JobGetFileDetails for path={}".format( path ) )
dir=session.query(Dir).filter(Dir.path_prefix==path).first()
p=session.query(Path).filter(Path.path_prefix==path).first()
job.current_file_num = 0
job.num_files = dir.num_files
job.num_files = p.num_files
session.commit()
RunFuncOnFilesInPath( job, path, GenHashAndThumb )
FinishJob(job, "File Details job finished")
@@ -875,8 +904,7 @@ def CheckForDups(job):
def RemoveFileFromFS( del_me ):
try:
settings = session.query(Settings).first()
m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix)
dst_dir=settings.recycle_bin_path + m[1] + '/'
dst_dir=settings.recycle_bin_path + '/' + del_me.in_dir.in_path.path_prefix.replace('static/','') + '/' + del_me.in_dir.rel_path + '/'
os.makedirs( dst_dir,mode=0o777, exist_ok=True )
src=del_me.FullPathOnFS()
dst=dst_dir + '/' + del_me.name

View File

@@ -4,6 +4,9 @@ create table SETTINGS( ID integer, IMPORT_PATH varchar, STORAGE_PATH varchar, RE
create table FILE_TYPE ( ID integer, NAME varchar(32) unique, constraint PK_FILE_TYPE_ID primary key(ID) );
create table PATH ( ID integer, PATH_PREFIX varchar(1024), NUM_FILES integer,
constraint PK_PATH_ID primary key(ID) );
create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, EXISTS_ON_FS boolean,
constraint PK_ENTRY_ID primary key(ID),
constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) );
@@ -12,10 +15,15 @@ create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL va
constraint PK_FILE_ID primary key(EID),
constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) );
create table DIR ( EID integer, PATH_PREFIX varchar(256), NUM_FILES integer, LAST_IMPORT_DATE float,
create table DIR ( EID integer, PATH_PREFIX varchar(256), REL_PATH varchar(256), NUM_FILES integer, LAST_IMPORT_DATE float,
constraint PK_DIR_EID primary key(EID),
constraint FK_DIR_ENTRY_ID foreign key (EID) references ENTRY(ID) );
create table PATH_DIR_LINK ( path_id integer, dir_eid integer,
constraint PK_PDL_path_id_dir_eid primary key (path_id, dir_eid),
constraint FK_PDL_PATH_ID foreign key (PATH_ID) references PATH(ID),
constraint FK_PDL_DIR_EID foreign key (DIR_EID) references DIR(EID) );
create table ENTRY_DIR_LINK ( entry_id integer, dir_eid integer,
constraint PK_EDL_entry_id_dir_eid primary key (entry_id, dir_eid),
constraint FK_EDL_ENTRY_ID foreign key (ENTRY_ID) references ENTRY(ID),
@@ -55,6 +63,7 @@ create table PA_JOB_MANAGER_FE_MESSAGE ( ID integer, JOB_ID integer, ALERT varch
constraint PA_JOB_MANAGER_FE_ACKS_ID primary key(ID),
constraint FK_PA_JOB_MANAGER_FE_MESSAGE_JOB_ID foreign key(JOB_ID) references JOB(ID) );
create sequence PATH_ID_SEQ;
create sequence FILE_ID_SEQ;
create sequence FILE_TYPE_ID_SEQ;
create sequence JOBEXTRA_ID_SEQ;

View File

@@ -13,10 +13,10 @@
<div style="position:relative; width:100%">
{% if obj.type.name=="Image" %}
{% set icon="fa-file-image" %}
<a href="{{obj.in_dir.path_prefix}}/{{obj.name}}">
<a href="{{obj.in_dir.in_path.path_prefix}}/{{obj.in_dir.rel_path}}/{{obj.name}}">
{% elif obj.type.name == "Video" %}
{% set icon="fa-film" %}
<a href="{{obj.in_dir.path_prefix}}/{{obj.name}}">
<a href="{{obj.in_dir.in_path.path_prefix}}/{{obj.in_dir.rel_path}}/{{obj.name}}">
{% elif obj.type.name == "Directory" %}
{% set icon="fa-folder" %}
{% else %}

View File

@@ -97,6 +97,7 @@
{% endif %}
{% for obj in entry_data %}
{% if loop.index==1 and folders %}
FIXME: hardcoded 'static/storage'
{% if cwd != 'static/storage' %}
<figure class="px-1 dir" dir={{cwd|ParentPath}}>
<span style="font-size:{{(size|int-22)/2}}" class="fa-stack">
@@ -136,13 +137,13 @@
{% endif %}
{% endif %}
{% if obj.type.name != "Directory" %}
{% if (not folders) or ((obj.in_dir.path_prefix+'/'+obj.name) | TopLevelFolderOf(cwd)) %}
{% if (not folders) or ((obj.in_dir.in_path.path_prefix+'/'+obj.in_dir.rel_path+'/'+obj.name) | TopLevelFolderOf(cwd)) %}
<figure id="{{obj.id}}" img="{{loop.index-1}}" class="figure mx-1" fname="{{obj.name}}" yr="{{obj.file_details.year}}" date="{{obj.file_details.year}}{{"%02d" % obj.file_details.month}}{{"%02d" % obj.file_details.day}}" details="{{obj.name}} (Date: {{obj.file_details.day}}/{{obj.file_details.month}}/{{obj.file_details.year}})">
{% if obj.type.name=="Image" %}
<a href="{{obj.in_dir.path_prefix}}/{{obj.name}}"><img class="thumb" height="{{size}}" src="data:image/jpeg;base64,{{obj.file_details.thumbnail}}"></img></a>
<a href="{{obj.in_dir.in_path.path_prefix}}/{{obj.in_dir.rel_path}}/{{obj.name}}"><img class="thumb" height="{{size}}" src="data:image/jpeg;base64,{{obj.file_details.thumbnail}}"></img></a>
{% elif obj.type.name == "Video" %}
<div style="position:relative; width:100%">
<a href="{{obj.in_dir.path_prefix}}/{{obj.name}}"><img class="thumb" style="display:block" height="{{size}}" src="data:image/jpeg;base64,{{obj.file_details.thumbnail}}"></img></a>
<a href="{{obj.in_dir.in_path.path_prefix}}/{{obj.in_dir.rel_path}}/{{obj.name}}"><img class="thumb" style="display:block" height="{{size}}" src="data:image/jpeg;base64,{{obj.file_details.thumbnail}}"></img></a>
<div style="position:absolute; top: 2; left: 2;">
<i style="font-size:32;background-color:black;color:white" class="fas fa-film"></i>
</div>
@@ -153,8 +154,9 @@
{% endif %}
{% else %}
{% if folders %}
{% if (cwd != obj.dir_details.path_prefix) and (obj.dir_details.path_prefix | TopLevelFolderOf(cwd)) %}
<figure class="px-1 dir" dir={{obj.dir_details.path_prefix}}>
{# if not the top-level of the path and it is a top-level folder, then display a folder icon #}
{% if (cwd != obj.dir_details.in_path.path_prefix or obj.dir_details.rel_path|length > 0) and ((obj.dir_details.in_path.path_prefix+'/'+obj.dir_details.rel_path) | TopLevelFolderOf(cwd)) %}
<figure class="px-1 dir" dir={{obj.dir_details.in_path.path_prefix+'/'+obj.dir_details.rel_path}}>
<i style="font-size:{{size|int-22}};" class="fas fa-folder"></i>
<figcaption class="figure-caption text-center">{{obj.name}}</figcaption>
</figure class="figure">