fixed up dup code to work with paths, added path_types throughout and updated TODO to be clear on what next
This commit is contained in:
31
TODO
31
TODO
@@ -1,26 +1,19 @@
|
||||
## GENERAL
|
||||
* need a path_details_dir_link table (path_details -> type 'import/storage/recycle'
|
||||
- need to make some FILE/DIR, etc. funcs into OO
|
||||
-- is there more to do here? (should probably see how often I reference fields of FILE/DIR, etc. directly, and get rid of them in most instances)
|
||||
- stop fudging the path in DIR (to add static), and just add 'static/' + path_details_dir.prefix + dir.rel_path
|
||||
-- this might be done, but re-consider the idea that 'static' is hard-coded? -- as long as it is in 1 spot
|
||||
-- because, I want to change the recycle bin to be .pa_bin/IMPORT/xxxx and .pa_bin/STORAGE/xxxx (to address the potential issue where import_path is /xxx/photos, and storage_path is /yyy/photos....
|
||||
??? is this possible to cause issues in other spots, like de-dup trim path... in fact, probably need to revisit TrimPath anyway!
|
||||
- ignore *thumb* -- but consider how we do this and don't screw up 'dir/job counts'
|
||||
and other stuff like .pa_bin if its in storage/import folder?
|
||||
* storage_path viewing needs to be by folder / not a big grab bag of files (by default)
|
||||
* issue where someone could call IP .../Imp/photos and SP .../Sto/photos and then static/photos is ambiguous:
|
||||
-- TODO: make path prefix by static/<ptype.name>/ so that files are in: static/<ptype.name>/in_path.pp/dir.rel_path/
|
||||
-- then deleting below would just path_prefix from static/storage to .pa_bin/storage, etc.
|
||||
-- need to be able to view recycle bin (should be simple when we have path_types) &&& should able to consolidate the files_ip/files_sp/files_rb? route handling functions
|
||||
-- could also allow undelete per file / show content as another Files->View and more like storage (i.e. show folders)
|
||||
* storage_path viewing needs to be by folder / not a big grab bag of files (by default - DONE)
|
||||
-- BUG: issue with view by Day, etc. we print out day even if the Entry is not in the cwd
|
||||
-- mostly done. Need to toggle the view if I want, and when viewing storage area, change single-click to be view file again, and right-click to be my context menu
|
||||
-> could this also be a small symbol near the icons mentioned below (disk/?/bin)?
|
||||
* doing actual file deletes needed again [DONE]
|
||||
- decided a recycle bin would be good [DONE]
|
||||
- when we put files in recycle bin, they need to stay in the DB and just have their root/base path moved (so they can be view as per above/below) <--- TO BE DONE
|
||||
AND need to be able to view recycle bin (should be simple when we have path_types) &&& should able to consolidate the files_ip/files_sp/files_rb? route handling functions
|
||||
- could also allow undelete per file / show content as another Files->View and more like storage (i.e. show folders)
|
||||
-- TODO: Need to toggle the view if I want, and when viewing storage area, change single-click to be view file again, and right-click to be my context menu
|
||||
* need a way for search results to show we found something in import_path or storage_path:
|
||||
- now we can use the in_path, then have a series of icons, e.g. disk for storage, ? for import, and bin for recycling (before the blue path)--maybe even show different colours, e.g. info for import, primary for storage and danger for bin?
|
||||
* handle thumbs:
|
||||
- need to ignore *thumb* -- but consider how we do this and don't screw up 'dir/job counts'
|
||||
and potentially other stuff like .pa_bin if its in storage/import folder?
|
||||
* AddJobForLog can absorb DEBUGs, etc. in fact fix up logging in general
|
||||
* comment your code
|
||||
* need a way for page to show we are in import_path or storage_path:
|
||||
- now we can use the in_path (which needs a type !!!), then have a series of icons, e.g. disk for storage, ? for import, and bin for recycling (before the blue path)--maybe even show different colours, e.g. info for import, primary for storage and danger for bin?
|
||||
|
||||
## DB
|
||||
Need to think about...
|
||||
|
||||
49
dups.py
49
dups.py
@@ -23,6 +23,7 @@ import re
|
||||
from job import Job, JobExtra, Joblog, NewJob
|
||||
from settings import Settings
|
||||
from shared import SymlinkName
|
||||
from path import PathType
|
||||
|
||||
################################################################################
|
||||
# DupRow class is a simple 'struct' to keep data per duplicate file / just to
|
||||
@@ -91,47 +92,23 @@ class Duplicates:
|
||||
self.per_path_dups=[]
|
||||
self.preferred_file={}
|
||||
self.preferred_path={}
|
||||
self.all_paths=[]
|
||||
self.storage_paths=[]
|
||||
self.import_paths=[]
|
||||
self.hashes_processed={}
|
||||
self.uniq_dups=0
|
||||
self.total_dups=0
|
||||
|
||||
# pull apart the storage path Setting, and make array of each for use in TrimmedPath()
|
||||
settings=Settings.query.first()
|
||||
paths = settings.storage_path.split("#")
|
||||
for path in paths:
|
||||
prefix = SymlinkName(path,path+'/')
|
||||
self.storage_paths.append(prefix)
|
||||
self.all_paths.append(prefix)
|
||||
# pull apart the import path Setting, and make array of each for use in TrimmedPath()
|
||||
paths = settings.import_path.split("#")
|
||||
for path in paths:
|
||||
prefix = SymlinkName(path,path+'/')
|
||||
self.import_paths.append(prefix)
|
||||
self.all_paths.append(prefix)
|
||||
|
||||
# Strip the front of the path (any match on a storage or import path) is
|
||||
# removed. Just to make it easier to read when we display in the web page
|
||||
def TrimmedPath( self, path ):
|
||||
for p in self.all_paths:
|
||||
if re.match( f"^{p}", path ):
|
||||
return path.replace(p, '' )
|
||||
return path
|
||||
self.import_ptype_id = PathType.query.filter(PathType.name=='Import').first().id
|
||||
self.storage_ptype_id = PathType.query.filter(PathType.name=='Storage').first().id
|
||||
|
||||
# is this file in the import path?
|
||||
def InImportPath( self, path ):
|
||||
for p in self.import_paths:
|
||||
if re.match( f"^{p}", path ):
|
||||
return True
|
||||
def InImportPath( self, path_type ):
|
||||
if path_type == self.import_ptype_id:
|
||||
return True
|
||||
return False
|
||||
|
||||
# is this file in the storage path?
|
||||
def InStoragePath( self, path ):
|
||||
for p in self.storage_paths:
|
||||
if re.match( f"^{p}", path ):
|
||||
return True
|
||||
def InStoragePath( self, path_type ):
|
||||
if path_type == self.storage_ptype_id:
|
||||
return True
|
||||
return False
|
||||
|
||||
# this stores this object into the keep from same path list (DDP: sometimes there can be more than 1 SP, e.g SP to SP to IP)
|
||||
@@ -160,11 +137,11 @@ class Duplicates:
|
||||
# and then puts the storage path file in the keep list (self.ip_to_sp_dups_keep) via self.KeepInIPSPDups()
|
||||
# and then puts the import path file in the delete list (self.ip_to_sp_dups_keep) via self.DelInIPSPDups()
|
||||
def DupInImportAndStoragePath( self, row, dr1, dr2 ):
|
||||
if self.InStoragePath(row.path1) and self.InImportPath(row.path2):
|
||||
if self.InStoragePath(row.path_type1) and self.InImportPath(row.path_type2):
|
||||
self.KeepInIPSPDups( dr1 )
|
||||
self.DelInIPSPDups( dr2 )
|
||||
return True
|
||||
if self.InStoragePath(row.path2) and self.InImportPath(row.path1):
|
||||
if self.InStoragePath(row.path_type2) and self.InImportPath(row.path_type1):
|
||||
self.KeepInIPSPDups( dr2 )
|
||||
self.DelInIPSPDups( dr1 )
|
||||
return True
|
||||
@@ -174,8 +151,8 @@ class Duplicates:
|
||||
# we process these into appropriate data structures on this first pass
|
||||
def AddDup( self, row ):
|
||||
self.hashes_processed[row.hash]=1
|
||||
dr1=DupRow( row.hash, row.fname1, self.TrimmedPath(row.path1), row.did1, row.id1 )
|
||||
dr2=DupRow( row.hash, row.fname2, self.TrimmedPath(row.path2), row.did2, row.id2 )
|
||||
dr1=DupRow( row.hash, row.fname1, row.rel_path1, row.did1, row.id1 )
|
||||
dr2=DupRow( row.hash, row.fname2, row.rel_path2, row.did2, row.id2 )
|
||||
# if in both import and storage path, just keep the storage path file,
|
||||
# and del import path file.
|
||||
if self.DupInImportAndStoragePath( row, dr1, dr2 ):
|
||||
|
||||
12
files.py
12
files.py
@@ -21,6 +21,7 @@ import re
|
||||
# Local Class imports
|
||||
################################################################################
|
||||
from job import Job, JobExtra, Joblog, NewJob
|
||||
from path import PathType, Path
|
||||
from person import Person, PersonRefimgLink
|
||||
from refimg import Refimg
|
||||
from settings import Settings
|
||||
@@ -41,15 +42,6 @@ class PathDirLink(db.Model):
|
||||
def __repr__(self):
|
||||
return f"<path_id: {self.path_id}, dir_eid: {self.dir_eid}>"
|
||||
|
||||
class Path(db.Model):
|
||||
__tablename__ = "path"
|
||||
id = db.Column(db.Integer, db.Sequence('path_id_seq'), primary_key=True )
|
||||
path_prefix = db.Column(db.String, unique=True, nullable=False )
|
||||
num_files = db.Column(db.Integer)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<id: {self.id}, path_prefix: {self.path_prefix}, num_files={self.num_files}>"
|
||||
|
||||
class EntryDirLink(db.Model):
|
||||
__tablename__ = "entry_dir_link"
|
||||
entry_id = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
|
||||
@@ -277,7 +269,7 @@ def scan_sp():
|
||||
|
||||
@app.route("/fix_dups", methods=["POST"])
|
||||
def fix_dups():
|
||||
rows = db.engine.execute( "select e1.id as id1, f1.hash, d1.path_prefix as path1, d1.eid as did1, e1.name as fname1, e2.id as id2, d2.path_prefix as path2, d2.eid as did2, e2.name as fname2 from entry e1, file f1, dir d1, entry_dir_link edl1, entry e2, file f2, dir d2, entry_dir_link edl2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb order by path1, fname1" )
|
||||
rows = db.engine.execute( "select e1.id as id1, f1.hash, d1.rel_path as rel_path1, d1.eid as did1, e1.name as fname1, p1.id as path1, p1.type_id as path_type1, e2.id as id2, d2.rel_path as rel_path2, d2.eid as did2, e2.name as fname2, p2.id as path2, p2.type_id as path_type2 from entry e1, file f1, dir d1, entry_dir_link edl1, path_dir_link pdl1, path p1, entry e2, file f2, dir d2, entry_dir_link edl2, path_dir_link pdl2, path p2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and p1.id = pdl1.path_id and pdl1.dir_eid = d1.eid and p2.id = pdl2.path_id and pdl2.dir_eid = d2.eid and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb order by path1, fname1" )
|
||||
|
||||
if rows.returns_rows == False:
|
||||
st.SetAlert("success")
|
||||
|
||||
@@ -73,6 +73,15 @@ Base = declarative_base()
|
||||
# Class describing File in the database, and via sqlalchemy, connected to the DB as well
|
||||
# This has to match one-for-one the DB table
|
||||
################################################################################
|
||||
class PathType(Base):
|
||||
__tablename__ = "path_type"
|
||||
id = Column(Integer, Sequence('path_type_id_seq'), primary_key=True )
|
||||
name = Column(String, unique=True, nullable=False )
|
||||
|
||||
def __repr__(self):
|
||||
return f"<id: {self.id}, name={self.name}>"
|
||||
|
||||
|
||||
class PathDirLink(Base):
|
||||
__tablename__ = "path_dir_link"
|
||||
path_id = Column(Integer, ForeignKey("path.id"), primary_key=True )
|
||||
@@ -92,6 +101,8 @@ class EntryDirLink(Base):
|
||||
class Path(Base):
|
||||
__tablename__ = "path"
|
||||
id = Column(Integer, Sequence('path_id_seq'), primary_key=True )
|
||||
type_id = Column(Integer, ForeignKey("path_type.id"))
|
||||
type = relationship("PathType")
|
||||
path_prefix = Column(String, unique=True, nullable=False )
|
||||
num_files = Column(Integer)
|
||||
|
||||
@@ -121,7 +132,6 @@ class Entry(Base):
|
||||
in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )
|
||||
|
||||
def FullPathOnFS(self):
|
||||
print( f"(FullPathOnFS: pp={self.in_dir.in_path.path_prefix}, rp={self.in_dir.rel_path}, n={self.name}" )
|
||||
s=self.in_dir.in_path.path_prefix + '/'
|
||||
if len(self.in_dir.rel_path) > 0:
|
||||
s += self.in_dir.rel_path + '/'
|
||||
@@ -274,7 +284,8 @@ def ProcessStorageDirs(parent_job):
|
||||
if settings == None:
|
||||
raise Exception("Cannot create file data with no settings / import path is missing")
|
||||
paths = settings.storage_path.split("#")
|
||||
JobsForPaths( parent_job, paths )
|
||||
ptype = session.query(PathType).filter(PathType.name=='Storage').first().id
|
||||
JobsForPaths( parent_job, paths, ptype )
|
||||
return
|
||||
|
||||
def ProcessImportDirs(parent_job):
|
||||
@@ -282,10 +293,11 @@ def ProcessImportDirs(parent_job):
|
||||
if settings == None:
|
||||
raise Exception("Cannot create file data with no settings / import path is missing")
|
||||
paths = settings.import_path.split("#")
|
||||
JobsForPaths( parent_job, paths )
|
||||
ptype = session.query(PathType).filter(PathType.name=='Import').first().id
|
||||
JobsForPaths( parent_job, paths, ptype )
|
||||
return
|
||||
|
||||
def JobsForPaths( parent_job, paths ):
|
||||
def JobsForPaths( parent_job, paths, ptype ):
|
||||
now=datetime.now(pytz.utc)
|
||||
# make new set of Jobs per path... HandleJobs will make them run later
|
||||
for path in paths:
|
||||
@@ -295,8 +307,10 @@ def JobsForPaths( parent_job, paths ):
|
||||
cfn=p.num_files
|
||||
|
||||
jex=JobExtra( name="path", value=path )
|
||||
jex2=JobExtra( name="path_type", value=ptype )
|
||||
job=Job(start_time=now, last_update=now, name="importdir", state="New", wait_for=None, pa_job_state="New", current_file_num=0, num_files=cfn )
|
||||
job.extra.append(jex)
|
||||
job.extra.append(jex2)
|
||||
session.add(job)
|
||||
session.commit()
|
||||
if parent_job:
|
||||
@@ -573,10 +587,9 @@ def GetDateFromFile(file, stat):
|
||||
def JobImportDir(job):
|
||||
JobProgressState( job, "In Progress" )
|
||||
settings = session.query(Settings).first()
|
||||
if settings == None:
|
||||
raise Exception("Cannot create file data with no settings / paths missing")
|
||||
path=[jex.value for jex in job.extra if jex.name == "path"][0]
|
||||
AddLogForJob(job, "Checking Directory: {}".format( path ) )
|
||||
path_type=[jex.value for jex in job.extra if jex.name == "path_type"][0]
|
||||
AddLogForJob(job, f"Checking 'path_type' Directory: {path}" )
|
||||
if DEBUG==1:
|
||||
print("DEBUG: Checking Directory: {}".format( path ) )
|
||||
if not os.path.exists( path ):
|
||||
@@ -584,7 +597,7 @@ def JobImportDir(job):
|
||||
return
|
||||
symlink=CreateSymlink(job,path)
|
||||
|
||||
path_obj=Path( path_prefix=symlink, num_files=0 )
|
||||
path_obj=Path( path_prefix=symlink, num_files=0, type_id=path_type )
|
||||
session.add(path_obj)
|
||||
ResetExistsOnFS(job, symlink)
|
||||
|
||||
@@ -922,7 +935,6 @@ def RemoveDups(job):
|
||||
cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all()
|
||||
for j in cd_jobs:
|
||||
FinishJob(j, "Just removed duplicates - so no need to do any other checkdups, we will force 1 last one after the remove step", "Withdrawn")
|
||||
print("here-loop")
|
||||
session.commit()
|
||||
|
||||
dup_cnt=0
|
||||
|
||||
16
tables.sql
16
tables.sql
@@ -4,8 +4,11 @@ create table SETTINGS( ID integer, IMPORT_PATH varchar, STORAGE_PATH varchar, RE
|
||||
|
||||
create table FILE_TYPE ( ID integer, NAME varchar(32) unique, constraint PK_FILE_TYPE_ID primary key(ID) );
|
||||
|
||||
create table PATH ( ID integer, PATH_PREFIX varchar(1024), NUM_FILES integer,
|
||||
constraint PK_PATH_ID primary key(ID) );
|
||||
create table PATH_TYPE ( ID integer, NAME varchar(16) unique, constraint PK_PATH_TYPE_ID primary key(ID) );
|
||||
|
||||
create table PATH ( ID integer, TYPE_ID integer, PATH_PREFIX varchar(1024), NUM_FILES integer,
|
||||
constraint PK_PATH_ID primary key(ID),
|
||||
constraint FK_PATH_TYPE_TYPE_ID foreign key (TYPE_ID) references PATH_TYPE(ID) );
|
||||
|
||||
create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, EXISTS_ON_FS boolean,
|
||||
constraint PK_ENTRY_ID primary key(ID),
|
||||
@@ -64,6 +67,7 @@ create table PA_JOB_MANAGER_FE_MESSAGE ( ID integer, JOB_ID integer, ALERT varch
|
||||
constraint FK_PA_JOB_MANAGER_FE_MESSAGE_JOB_ID foreign key(JOB_ID) references JOB(ID) );
|
||||
|
||||
create sequence PATH_ID_SEQ;
|
||||
create sequence PATH_TYPE_ID_SEQ;
|
||||
create sequence FILE_ID_SEQ;
|
||||
create sequence FILE_TYPE_ID_SEQ;
|
||||
create sequence JOBEXTRA_ID_SEQ;
|
||||
@@ -75,9 +79,15 @@ create sequence SETTINGS_ID_SEQ;
|
||||
create sequence PA_JOB_MANAGER_ID_SEQ;
|
||||
create sequence PA_JOB_MANAGER_FE_MESSAGE_ID_SEQ;
|
||||
|
||||
insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Directory' );
|
||||
-- default data for types of paths
|
||||
insert into PATH_TYPE values ( (select nextval('PATH_TYPE_ID_SEQ')), 'Import' );
|
||||
insert into PATH_TYPE values ( (select nextval('PATH_TYPE_ID_SEQ')), 'Storage' );
|
||||
insert into PATH_TYPE values ( (select nextval('PATH_TYPE_ID_SEQ')), 'Bin' );
|
||||
|
||||
-- default data for types of files
|
||||
insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Image' );
|
||||
insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Video' );
|
||||
insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Directory' );
|
||||
insert into FILE_TYPE values ( (select nextval('FILE_TYPE_ID_SEQ')), 'Unknown' );
|
||||
|
||||
-- fake data only for making testing easier
|
||||
|
||||
@@ -16,6 +16,13 @@
|
||||
<input type="hidden" name="term" id="view_term" value="{{search_term}}">
|
||||
{% endif %}
|
||||
<div class="row">
|
||||
{% if "files_ip" in request.url %}
|
||||
I
|
||||
{% elif "files_sp" in request.url %}
|
||||
S
|
||||
{% else %}
|
||||
R
|
||||
{% endif %}
|
||||
{% if folders %}
|
||||
<div class="my-auto">
|
||||
<span class="alert alert-primary">In: {{cwd}}</span>
|
||||
|
||||
Reference in New Issue
Block a user