diff --git a/TODO b/TODO index 0dbb7b0..cce5699 100644 --- a/TODO +++ b/TODO @@ -1,26 +1,19 @@ ## GENERAL - * need a path_details_dir_link table (path_details -> type 'import/storage/recycle' - - need to make some FILE/DIR, etc. funcs into OO - -- is there more to do here? (should probably see how often I reference fields of FILE/DIR, etc. directly, and get rid of them in most instances) - - stop fudging the path in DIR (to add static), and just add 'static/' + path_details_dir.prefix + dir.rel_path - -- this might be done, but re-consider the idea that 'static' is hard-coded? -- as long as it is in 1 spot - -- because, I want to change the recycle bin to be .pa_bin/IMPORT/xxxx and .pa_bin/STORAGE/xxxx (to address the potential issue where import_path is /xxx/photos, and storage_path is /yyy/photos.... - ??? is this possible to cause issues in other spots, like de-dup trim path... in fact, probably need to revisit TrimPath anyway! - - ignore *thumb* -- but consider how we do this and don't screw up 'dir/job counts' - and other stuff like .pa_bin if its in storage/import folder? - * storage_path viewing needs to be by folder / not a big grab bag of files (by default) + * issue where someone could call IP .../Imp/photos and SP .../Sto/photos and then static/photos is ambiguous: + -- TODO: make path prefix by static// so that files are in: static//in_path.pp/dir.rel_path/ + -- then deleting below would just path_prefix from static/storage to .pa_bin/storage, etc. + -- need to be able to view recycle bin (should be simple when we have path_types) &&& should able to consolidate the files_ip/files_sp/files_rb? route handling functions + -- could also allow undelete per file / show content as another Files->View and more like storage (i.e. show folders) + * storage_path viewing needs to be by folder / not a big grab bag of files (by default - DONE) -- BUG: issue with view by Day, etc. we print out day even if the Entry is not in the cwd - -- mostly done. Need to toggle the view if I want, and when viewing storage area, change single-click to be view file again, and right-click to be my context menu - -> could this also be a small symbol near the icons mentioned below (disk/?/bin)? - * doing actual file deletes needed again [DONE] - - decided a recycle bin would be good [DONE] - - when we put files in recycle bin, they need to stay in the DB and just have their root/base path moved (so they can be view as per above/below) <--- TO BE DONE - AND need to be able to view recycle bin (should be simple when we have path_types) &&& should able to consolidate the files_ip/files_sp/files_rb? route handling functions - - could also allow undelete per file / show content as another Files->View and more like storage (i.e. show folders) + -- TODO: Need to toggle the view if I want, and when viewing storage area, change single-click to be view file again, and right-click to be my context menu + * need a way for search results to show we found something in import_path or storage_path: + - now we can use the in_path, then have a series of icons, e.g. disk for storage, ? for import, and bin for recycling (before the blue path)--maybe even show different colours, e.g. info for import, primary for storage and danger for bin? + * handle thumbs: + - need to ignore *thumb* -- but consider how we do this and don't screw up 'dir/job counts' + and potentially other stuff like .pa_bin if its in storage/import folder? * AddJobForLog can absorb DEBUGs, etc. in fact fix up logging in general * comment your code - * need a way for page to show we are in import_path or storage_path: - - now we can use the in_path (which needs a type !!!), then have a series of icons, e.g. disk for storage, ? for import, and bin for recycling (before the blue path)--maybe even show different colours, e.g. info for import, primary for storage and danger for bin? ## DB Need to think about... diff --git a/dups.py b/dups.py index d8efcb9..e97173a 100644 --- a/dups.py +++ b/dups.py @@ -23,6 +23,7 @@ import re from job import Job, JobExtra, Joblog, NewJob from settings import Settings from shared import SymlinkName +from path import PathType ################################################################################ # DupRow class is a simple 'struct' to keep data per duplicate file / just to @@ -91,47 +92,23 @@ class Duplicates: self.per_path_dups=[] self.preferred_file={} self.preferred_path={} - self.all_paths=[] - self.storage_paths=[] - self.import_paths=[] self.hashes_processed={} self.uniq_dups=0 self.total_dups=0 - # pull apart the storage path Setting, and make array of each for use in TrimmedPath() - settings=Settings.query.first() - paths = settings.storage_path.split("#") - for path in paths: - prefix = SymlinkName(path,path+'/') - self.storage_paths.append(prefix) - self.all_paths.append(prefix) - # pull apart the import path Setting, and make array of each for use in TrimmedPath() - paths = settings.import_path.split("#") - for path in paths: - prefix = SymlinkName(path,path+'/') - self.import_paths.append(prefix) - self.all_paths.append(prefix) - - # Strip the front of the path (any match on a storage or import path) is - # removed. Just to make it easier to read when we display in the web page - def TrimmedPath( self, path ): - for p in self.all_paths: - if re.match( f"^{p}", path ): - return path.replace(p, '' ) - return path + self.import_ptype_id = PathType.query.filter(PathType.name=='Import').first().id + self.storage_ptype_id = PathType.query.filter(PathType.name=='Storage').first().id # is this file in the import path? - def InImportPath( self, path ): - for p in self.import_paths: - if re.match( f"^{p}", path ): - return True + def InImportPath( self, path_type ): + if path_type == self.import_ptype_id: + return True return False # is this file in the storage path? - def InStoragePath( self, path ): - for p in self.storage_paths: - if re.match( f"^{p}", path ): - return True + def InStoragePath( self, path_type ): + if path_type == self.storage_ptype_id: + return True return False # this stores this object into the keep from same path list (DDP: sometimes there can be more than 1 SP, e.g SP to SP to IP) @@ -160,11 +137,11 @@ class Duplicates: # and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups() # and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups() def DupInImportAndStoragePath( self, row, dr1, dr2 ): - if self.InStoragePath(row.path1) and self.InImportPath(row.path2): + if self.InStoragePath(row.path_type1) and self.InImportPath(row.path_type2): self.KeepInIPSPDups( dr1 ) self.DelInIPSPDups( dr2 ) return True - if self.InStoragePath(row.path2) and self.InImportPath(row.path1): + if self.InStoragePath(row.path_type2) and self.InImportPath(row.path_type1): self.KeepInIPSPDups( dr2 ) self.DelInIPSPDups( dr1 ) return True @@ -174,8 +151,8 @@ class Duplicates: # we process these into appropriate data structures on this first pass def AddDup( self, row ): self.hashes_processed[row.hash]=1 - dr1=DupRow( row.hash, row.fname1, self.TrimmedPath(row.path1), row.did1, row.id1 ) - dr2=DupRow( row.hash, row.fname2, self.TrimmedPath(row.path2), row.did2, row.id2 ) + dr1=DupRow( row.hash, row.fname1, row.rel_path1, row.did1, row.id1 ) + dr2=DupRow( row.hash, row.fname2, row.rel_path2, row.did2, row.id2 ) # if in both import and storage path, just keep the storage path file, # and del import path file. if self.DupInImportAndStoragePath( row, dr1, dr2 ): diff --git a/files.py b/files.py index cc9e45d..1b47d19 100644 --- a/files.py +++ b/files.py @@ -21,6 +21,7 @@ import re # Local Class imports ################################################################################ from job import Job, JobExtra, Joblog, NewJob +from path import PathType, Path from person import Person, PersonRefimgLink from refimg import Refimg from settings import Settings @@ -41,15 +42,6 @@ class PathDirLink(db.Model): def __repr__(self): return f"" -class Path(db.Model): - __tablename__ = "path" - id = db.Column(db.Integer, db.Sequence('path_id_seq'), primary_key=True ) - path_prefix = db.Column(db.String, unique=True, nullable=False ) - num_files = db.Column(db.Integer) - - def __repr__(self): - return f"" - class EntryDirLink(db.Model): __tablename__ = "entry_dir_link" entry_id = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True ) @@ -277,7 +269,7 @@ def scan_sp(): @app.route("/fix_dups", methods=["POST"]) def fix_dups(): - rows = db.engine.execute( "select e1.id as id1, f1.hash, d1.path_prefix as path1, d1.eid as did1, e1.name as fname1, e2.id as id2, d2.path_prefix as path2, d2.eid as did2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb order by path1, fname1" ) + rows = db.engine.execute( "select e1.id as id1, f1.hash, d1.rel_path as rel_path1, d1.eid as did1, e1.name as fname1, p1.id as path1, p1.type_id as path_type1, e2.id as id2, d2.rel_path as rel_path2, d2.eid as did2, e2.name as fname2, p2.id as path2, p2.type_id as path_type2 from entry e1, file f1, dir d1, entry_dir_link edl1, path_dir_link pdl1, path p1, entry e2, file f2, dir d2, entry_dir_link edl2, path_dir_link pdl2, path p2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and p1.id = pdl1.path_id and pdl1.dir_eid = d1.eid and p2.id = pdl2.path_id and pdl2.dir_eid = d2.eid and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb order by path1, fname1" ) if rows.returns_rows == False: st.SetAlert("success") diff --git a/pa_job_manager.py b/pa_job_manager.py index f3fb044..0a8d52b 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -73,6 +73,15 @@ Base = declarative_base() # Class describing File in the database, and via sqlalchemy, connected to the DB as well # This has to match one-for-one the DB table ################################################################################ +class PathType(Base): + __tablename__ = "path_type" + id = Column(Integer, Sequence('path_type_id_seq'), primary_key=True ) + name = Column(String, unique=True, nullable=False ) + + def __repr__(self): + return f"" + + class PathDirLink(Base): __tablename__ = "path_dir_link" path_id = Column(Integer, ForeignKey("path.id"), primary_key=True ) @@ -92,6 +101,8 @@ class EntryDirLink(Base): class Path(Base): __tablename__ = "path" id = Column(Integer, Sequence('path_id_seq'), primary_key=True ) + type_id = Column(Integer, ForeignKey("path_type.id")) + type = relationship("PathType") path_prefix = Column(String, unique=True, nullable=False ) num_files = Column(Integer) @@ -121,7 +132,6 @@ class Entry(Base): in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False ) def FullPathOnFS(self): - print( f"(FullPathOnFS: pp={self.in_dir.in_path.path_prefix}, rp={self.in_dir.rel_path}, n={self.name}" ) s=self.in_dir.in_path.path_prefix + '/' if len(self.in_dir.rel_path) > 0: s += self.in_dir.rel_path + '/' @@ -274,7 +284,8 @@ def ProcessStorageDirs(parent_job): if settings == None: raise Exception("Cannot create file data with no settings / import path is missing") paths = settings.storage_path.split("#") - JobsForPaths( parent_job, paths ) + ptype = session.query(PathType).filter(PathType.name=='Storage').first().id + JobsForPaths( parent_job, paths, ptype ) return def ProcessImportDirs(parent_job): @@ -282,10 +293,11 @@ def ProcessImportDirs(parent_job): if settings == None: raise Exception("Cannot create file data with no settings / import path is missing") paths = settings.import_path.split("#") - JobsForPaths( parent_job, paths ) + ptype = session.query(PathType).filter(PathType.name=='Import').first().id + JobsForPaths( parent_job, paths, ptype ) return -def JobsForPaths( parent_job, paths ): +def JobsForPaths( parent_job, paths, ptype ): now=datetime.now(pytz.utc) # make new set of Jobs per path... HandleJobs will make them run later for path in paths: @@ -295,8 +307,10 @@ def JobsForPaths( parent_job, paths ): cfn=p.num_files jex=JobExtra( name="path", value=path ) + jex2=JobExtra( name="path_type", value=ptype ) job=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn ) job.extra.append(jex) + job.extra.append(jex2) session.add(job) session.commit() if parent_job: @@ -573,10 +587,9 @@ def GetDateFromFile(file, stat): def JobImportDir(job): JobProgressState( job, "In Progress" ) settings = session.query(Settings).first() - if settings == None: - raise Exception("Cannot create file data with no settings / paths missing") path=[jex.value for jex in job.extra if jex.name == "path"][0] - AddLogForJob(job, "Checking Directory: {}".format( path ) ) + path_type=[jex.value for jex in job.extra if jex.name == "path_type"][0] + AddLogForJob(job, f"Checking 'path_type' Directory: {path}" ) if DEBUG==1: print("DEBUG: Checking Directory: {}".format( path ) ) if not os.path.exists( path ): @@ -584,7 +597,7 @@ def JobImportDir(job): return symlink=CreateSymlink(job,path) - path_obj=Path( path_prefix=symlink, num_files=0 ) + path_obj=Path( path_prefix=symlink, num_files=0, type_id=path_type ) session.add(path_obj) ResetExistsOnFS(job, symlink) @@ -922,7 +935,6 @@ def RemoveDups(job): cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all() for j in cd_jobs: FinishJob(j, "Just removed duplicates - so no need to do any other checkdups, we will force 1 last one after the remove step", "Withdrawn") - print("here-loop") session.commit() dup_cnt=0 diff --git a/tables.sql b/tables.sql index 0e194fa..28f9399 100644 --- a/tables.sql +++ b/tables.sql @@ -4,8 +4,11 @@ create table SETTINGS( ID integer, IMPORT_PATH varchar, STORAGE_PATH varchar, RE create table FILE_TYPE ( ID integer, NAME varchar(32) unique, constraint PK_FILE_TYPE_ID primary key(ID) ); -create table PATH ( ID integer, PATH_PREFIX varchar(1024), NUM_FILES integer, - constraint PK_PATH_ID primary key(ID) ); +create table PATH_TYPE ( ID integer, NAME varchar(16) unique, constraint PK_PATH_TYPE_ID primary key(ID) ); + +create table PATH ( ID integer, TYPE_ID integer, PATH_PREFIX varchar(1024), NUM_FILES integer, + constraint PK_PATH_ID primary key(ID), + constraint FK_PATH_TYPE_TYPE_ID foreign key (TYPE_ID) references PATH_TYPE(ID) ); create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, EXISTS_ON_FS boolean, constraint PK_ENTRY_ID primary key(ID), @@ -64,6 +67,7 @@ create table PA_JOB_MANAGER_FE_MESSAGE ( ID integer, JOB_ID integer, ALERT varch constraint FK_PA_JOB_MANAGER_FE_MESSAGE_JOB_ID foreign key(JOB_ID) references JOB(ID) ); create sequence PATH_ID_SEQ; +create sequence PATH_TYPE_ID_SEQ; create sequence FILE_ID_SEQ; create sequence FILE_TYPE_ID_SEQ; create sequence JOBEXTRA_ID_SEQ; @@ -75,9 +79,15 @@ create sequence SETTINGS_ID_SEQ; create sequence PA_JOB_MANAGER_ID_SEQ; create sequence PA_JOB_MANAGER_FE_MESSAGE_ID_SEQ; -insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Directory' ); +-- default data for types of paths +insert into PATH_TYPE values ( (select nextval('PATH_TYPE_ID_SEQ')), 'Import' ); +insert into PATH_TYPE values ( (select nextval('PATH_TYPE_ID_SEQ')), 'Storage' ); +insert into PATH_TYPE values ( (select nextval('PATH_TYPE_ID_SEQ')), 'Bin' ); + +-- default data for types of files insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Image' ); insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Video' ); +insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Directory' ); insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Unknown' ); -- fake data only for making testing easier diff --git a/templates/files.html b/templates/files.html index e2d56bf..f8b3746 100644 --- a/templates/files.html +++ b/templates/files.html @@ -16,6 +16,13 @@ {% endif %}
+ {% if "files_ip" in request.url %} + I + {% elif "files_sp" in request.url %} + S + {% else %} + R + {% endif %} {% if folders %}
In: {{cwd}}