fixed BUG-17 (deleting files/dirs) - currently 0 BUGS :)

This commit is contained in:
2021-01-25 13:40:53 +11:00
parent 96b9a6b5ca
commit ac9bb7ee2e
3 changed files with 52 additions and 7 deletions

1
BUGs
View File

@@ -1,2 +1 @@
### Next: 19
BUG-17: I think it won't handle me deleting files after scan

View File

@@ -88,13 +88,14 @@ class Entry(Base):
id = Column(Integer, Sequence('file_id_seq'), primary_key=True )
name = Column(String, unique=True, nullable=False )
type_id = Column(Integer, ForeignKey("file_type.id"))
exists_on_fs=Column(Boolean)
type=relationship("FileType")
dir_details = relationship( "Dir")
file_details = relationship( "File" )
in_dir = relationship ("Dir", secondary="entry_dir_link" )
def __repr__(self):
return f"<id: {self.id}, name: {self.name}, type={self.type}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"
return f"<id: {self.id}, name: {self.name}, type={self.type}, exists_on_fs={self.exists_on_fs}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"
class FileRefimgLink(Base):
__tablename__ = "file_refimg_link"
@@ -396,10 +397,12 @@ def CreateSymlink(job,path):
def AddDir(job, dirname, path_prefix, in_dir):
dir=session.query(Dir).filter(Dir.path_prefix==path_prefix).first()
if dir:
e=session.query(Entry).get(dir.eid)
e.exists_on_fs=True
return dir
dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0 )
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
e=Entry( name=dirname, type=dtype )
e=Entry( name=dirname, type=dtype, exists_on_fs=True )
e.dir_details.append(dir)
# no in_dir occurs when we Add the actual Dir for the import_path (top of the tree)
if in_dir:
@@ -414,9 +417,11 @@ def AddFile(job, fname, type_str, fsize, in_dir ):
# see if this exists already
e=session.query(Entry).filter(Entry.name==fname).first()
if e:
print(f"in theory we reset file: {e.name} back to on fs")
e.exists_on_fs=True
return e
ftype = session.query(FileType).filter(FileType.name==type_str).first()
e=Entry( name=fname, type=ftype )
e=Entry( name=fname, type=ftype, exists_on_fs=True )
f=File( size_mb=fsize, last_hash_date=0, faces_created_on=0 )
e.file_details.append(f)
e.in_dir.append(in_dir)
@@ -424,6 +429,40 @@ def AddFile(job, fname, type_str, fsize, in_dir ):
session.add(e)
return e
# reset exists_on_fs to False for everything in this import path, if we find it on the FS in the walk below, it goes back to True, anything that
# is still false, has been deleted
def ResetExistsOnFS(job, path):
reset_dirs = session.query(Entry).join(EntryDirLink).join(Dir).filter(Dir.path_prefix.ilike(path+'%')).all()
for reset_dir in reset_dirs:
reset_dir.exists_on_fs=False
session.add(reset_dir)
reset_files = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==reset_dir.id).all()
for reset_file in reset_files:
reset_file.exists_on_fs=False
session.add(reset_file)
return
def HandleAnyFSDeletions(job):
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
rms = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id!=dtype.id).all()
rm_cnt=0
for rm in rms:
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rm.id).delete()
session.query(File).filter(File.eid==rm.id).delete()
session.query(Entry).filter(Entry.id==rm.id).delete()
AddLogForJob( job, f"INFO: Removing {rm.name} from system as it is no longer on the file system")
rm_cnt+=1
rmdirs = session.query(Entry).filter(Entry.exists_on_fs==False,Entry.type_id==1).order_by(Entry.id.desc()).all()
for rmdir in rmdirs:
print(f"We have a directory ({rmdir.name}) to delete from DB as it no longer exists on fs");
session.query(EntryDirLink).filter(EntryDirLink.entry_id==rmdir.id).delete()
session.query(Dir).filter(Dir.eid==rmdir.id).delete()
session.query(Entry).filter(Entry.id==rmdir.id).delete()
AddLogForJob( job, f"INFO: Removing {rmdir.name} from system as it is no longer on the file system")
rm_cnt+=1
return rm_cnt
def JobImportDir(job):
JobProgressState( job, "In Progress" )
settings = session.query(Settings).first()
@@ -437,12 +476,13 @@ def JobImportDir(job):
FinishJob( job, "Finished Importing: {} -- Path does not exist".format( path), "Failed" )
return
symlink=CreateSymlink(job,path)
ResetExistsOnFS(job, symlink)
overall_file_cnt=0
walk=os.walk(path, topdown=True)
# root == path of dir, files are in dir... subdirs are in dir
parent_dir=None
for root, subdirs, files in walk:
print(f"walk: r={root} s={subdirs} f={files}")
overall_file_cnt+= len(subdirs) + len(files)
if root == path:
pp = symlink
@@ -450,6 +490,7 @@ def JobImportDir(job):
pp=SymlinkName( path, root )+'/'+os.path.basename(root)
if root[-1]=="/":
root=root[0:-1]
dir=AddDir(job, os.path.basename(root), pp, parent_dir)
parent_dir=dir
for basename in files:
@@ -468,6 +509,8 @@ def JobImportDir(job):
fsize = round(stat.st_size/(1024*1024))
e=AddFile( job, basename, type_str, fsize, dir )
else:
e=session.query(Entry).filter(Entry.name==basename).first()
e.exists_on_fs=True
if DEBUG==1:
AddLogForJob(job, "DEBUG: {} - is unchanged".format( basename, basename ) )
print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename )
@@ -475,7 +518,10 @@ def JobImportDir(job):
dir.last_import_date = time.time()
job.num_files=overall_file_cnt
job.current_file_num=overall_file_cnt
FinishJob(job, "Finished Importing: {} - Found {} new files".format( path, overall_file_cnt ) )
rm_cnt=HandleAnyFSDeletions(job)
FinishJob(job, f"Finished Importing: {path} - Processed {overall_file_cnt} files, Removed {rm_cnt} file(s)")
import_dir=session.query(Dir).filter(Dir.path_prefix==symlink).first()
import_dir.num_files=overall_file_cnt
session.commit()

View File

@@ -4,7 +4,7 @@ create table SETTINGS( ID integer, IMPORT_PATH varchar, constraint PK_SETTINGS_I
create table FILE_TYPE ( ID integer, NAME varchar(32) unique, constraint PK_FILE_TYPE_ID primary key(ID) );
create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer,
create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, EXISTS_ON_FS boolean,
constraint PK_ENTRY_ID primary key(ID),
constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) );