From ac9bb7ee2e79fd3c12332a3492d4a5af0d18cc9c Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Mon, 25 Jan 2021 13:40:53 +1100 Subject: [PATCH] fixed BUG-17 (deleting files/dirs) - currently 0 BUGS :) --- BUGs | 1 - pa_job_manager.py | 56 ++++++++++++++++++++++++++++++++++++++++++----- tables.sql | 2 +- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/BUGs b/BUGs index 4882752..ae535bd 100644 --- a/BUGs +++ b/BUGs @@ -1,2 +1 @@ ### Next: 19 -BUG-17: I think it won't handle me deleting files after scan diff --git a/pa_job_manager.py b/pa_job_manager.py index 358e7ee..a8ab385 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -88,13 +88,14 @@ class Entry(Base): id = Column(Integer, Sequence('file_id_seq'), primary_key=True ) name = Column(String, unique=True, nullable=False ) type_id = Column(Integer, ForeignKey("file_type.id")) + exists_on_fs=Column(Boolean) type=relationship("FileType") dir_details = relationship( "Dir") file_details = relationship( "File" ) in_dir = relationship ("Dir", secondary="entry_dir_link" ) def __repr__(self): - return f"" + return f"" class FileRefimgLink(Base): __tablename__ = "file_refimg_link" @@ -396,10 +397,12 @@ def CreateSymlink(job,path): def AddDir(job, dirname, path_prefix, in_dir): dir=session.query(Dir).filter(Dir.path_prefix==path_prefix).first() if dir: + e=session.query(Entry).get(dir.eid) + e.exists_on_fs=True return dir dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0 ) dtype=session.query(FileType).filter(FileType.name=='Directory').first() - e=Entry( name=dirname, type=dtype ) + e=Entry( name=dirname, type=dtype, exists_on_fs=True ) e.dir_details.append(dir) # no in_dir occurs when we Add the actual Dir for the import_path (top of the tree) if in_dir: @@ -414,9 +417,11 @@ def AddFile(job, fname, type_str, fsize, in_dir ): # see if this exists already e=session.query(Entry).filter(Entry.name==fname).first() if e: + print(f"in theory we reset file: {e.name} back to on fs") + e.exists_on_fs=True return e ftype = session.query(FileType).filter(FileType.name==type_str).first() - e=Entry( name=fname, type=ftype ) + e=Entry( name=fname, type=ftype, exists_on_fs=True ) f=File( size_mb=fsize, last_hash_date=0, faces_created_on=0 ) e.file_details.append(f) e.in_dir.append(in_dir) @@ -424,6 +429,40 @@ def AddFile(job, fname, type_str, fsize, in_dir ): session.add(e) return e +# reset exists_on_fs to False for everything in this import path, if we find it on the FS in the walk below, it goes back to True, anything that +# is still false, has been deleted +def ResetExistsOnFS(job, path): + reset_dirs = session.query(Entry).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.ilike(path+'%')).all() + for reset_dir in reset_dirs: + reset_dir.exists_on_fs=False + session.add(reset_dir) + reset_files = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==reset_dir.id).all() + for reset_file in reset_files: + reset_file.exists_on_fs=False + session.add(reset_file) + return + +def HandleAnyFSDeletions(job): + dtype=session.query(FileType).filter(FileType.name=='Directory').first() + rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all() + rm_cnt=0 + for rm in rms: + session.query(EntryDirLink).filter(EntryDirLink.entry_id==rm.id).delete() + session.query(File).filter(File.eid==rm.id).delete() + session.query(Entry).filter(Entry.id==rm.id).delete() + AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system") + rm_cnt+=1 + + rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all() + for rmdir in rmdirs: + print(f"We have a directory ({rmdir.name}) to delete from DB as it no longer exists on fs"); + session.query(EntryDirLink).filter(EntryDirLink.entry_id==rmdir.id).delete() + session.query(Dir).filter(Dir.eid==rmdir.id).delete() + session.query(Entry).filter(Entry.id==rmdir.id).delete() + AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system") + rm_cnt+=1 + return rm_cnt + def JobImportDir(job): JobProgressState( job, "In Progress" ) settings = session.query(Settings).first() @@ -437,12 +476,13 @@ def JobImportDir(job): FinishJob( job, "Finished Importing: {} -- Path does not exist".format( path), "Failed" ) return symlink=CreateSymlink(job,path) + ResetExistsOnFS(job, symlink) + overall_file_cnt=0 walk=os.walk(path, topdown=True) # root == path of dir, files are in dir... subdirs are in dir parent_dir=None for root, subdirs, files in walk: - print(f"walk: r={root} s={subdirs} f={files}") overall_file_cnt+= len(subdirs) + len(files) if root == path: pp = symlink @@ -450,6 +490,7 @@ def JobImportDir(job): pp=SymlinkName( path, root )+'/'+os.path.basename(root) if root[-1]=="/": root=root[0:-1] + dir=AddDir(job, os.path.basename(root), pp, parent_dir) parent_dir=dir for basename in files: @@ -468,6 +509,8 @@ def JobImportDir(job): fsize = round(stat.st_size/(1024*1024)) e=AddFile( job, basename, type_str, fsize, dir ) else: + e=session.query(Entry).filter(Entry.name==basename).first() + e.exists_on_fs=True if DEBUG==1: AddLogForJob(job, "DEBUG: {} - is unchanged".format( basename, basename ) ) print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename ) @@ -475,7 +518,10 @@ def JobImportDir(job): dir.last_import_date = time.time() job.num_files=overall_file_cnt job.current_file_num=overall_file_cnt - FinishJob(job, "Finished Importing: {} - Found {} new files".format( path, overall_file_cnt ) ) + + rm_cnt=HandleAnyFSDeletions(job) + + FinishJob(job, f"Finished Importing: {path} - Processed {overall_file_cnt} files, Removed {rm_cnt} file(s)") import_dir=session.query(Dir).filter(Dir.path_prefix==symlink).first() import_dir.num_files=overall_file_cnt session.commit() diff --git a/tables.sql b/tables.sql index f1190e1..eca8706 100644 --- a/tables.sql +++ b/tables.sql @@ -4,7 +4,7 @@ create table SETTINGS( ID integer, IMPORT_PATH varchar, constraint PK_SETTINGS_I create table FILE_TYPE ( ID integer, NAME varchar(32) unique, constraint PK_FILE_TYPE_ID primary key(ID) ); -create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, +create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, EXISTS_ON_FS boolean, constraint PK_ENTRY_ID primary key(ID), constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) );