stop causing SqlAlchemy to have an ambiguous join of entry dir links for files and directories in the class, by removing files from class Entry, and explicitly getting the list of files for a directory via sqlalchemy in RunFuncOnFilesInPath() and ProcessFilesInDir()

This commit is contained in:
2021-06-16 23:29:25 +10:00
parent 2b87f84695
commit 43b39af61d

View File

@@ -118,13 +118,12 @@ class Dir(Base):
rel_path = Column(String, unique=True, nullable=False ) rel_path = Column(String, unique=True, nullable=False )
in_path = relationship("Path", secondary="path_dir_link", uselist=False) in_path = relationship("Path", secondary="path_dir_link", uselist=False)
last_import_date = Column(Float) last_import_date = Column(Float)
files = relationship("Entry", secondary="entry_dir_link")
def PathOnFS(self): def PathOnFS(self):
return self.in_path.path_prefix+'/'+self.rel_path return self.in_path.path_prefix+'/'+self.rel_path
def __repr__(self): def __repr__(self):
return f"<eid: {self.eid}, rel_path: {self.rel_path}, in_path={self.in_path}, last_import_date: {self.last_import_date}, files: {self.files}>" return f"<eid: {self.eid}, rel_path: {self.rel_path}, in_path={self.in_path}, last_import_date: {self.last_import_date}>"
class Entry(Base): class Entry(Base):
__tablename__ = "entry" __tablename__ = "entry"
@@ -655,7 +654,7 @@ def RestoreFile(job,restore_me):
restore_me.in_dir = new_dir restore_me.in_dir = new_dir
AddLogForJob(job, f"Restored file: {restore_me.name} to {os.path.dirname(restore_me.FullPathOnFS())}" ) AddLogForJob(job, f"Restored file: {restore_me.name} to {os.path.dirname(restore_me.FullPathOnFS())}" )
### DDP: when restoring, an original dir tree might have been removed, so need make it (if needed) ### when restoring, an original dir tree might have been removed, so need make it (if needed)
os.makedirs( os.path.dirname(restore_me.FullPathOnFS()),mode=0o777, exist_ok=True ) os.makedirs( os.path.dirname(restore_me.FullPathOnFS()),mode=0o777, exist_ok=True )
# remove DelFile entry for this restored file # remove DelFile entry for this restored file
session.query(DelFile).filter(DelFile.file_eid==restore_me.id).delete() session.query(DelFile).filter(DelFile.file_eid==restore_me.id).delete()
@@ -780,10 +779,10 @@ def JobImportDir(job):
# create/find the Path # create/find the Path
path_obj=AddPath( job, symlink, path_type ) path_obj=AddPath( job, symlink, path_type )
session.add(path_obj) session.commit()
# for recycle bin path, we dont want to import content, just create the path/dir vars (above) in the DB # for recycle bin path, we dont want to import content, just create the path/dir vars (above) in the DB
bin_path=session.query(Path).join(PathType).filter(PathType.name=='Bin').first() bin_path=session.query(Path).join(PathType).filter(PathType.name=='Bin').first()
if path_type == bin_path.type.id: if bin_path != None and path_type == bin_path.type.id:
return return
# find all jobs waiting on me and their children, etc. and add a path_prefix jex to symlink, so we can just reference it form here on in, rather than recreate that string # find all jobs waiting on me and their children, etc. and add a path_prefix jex to symlink, so we can just reference it form here on in, rather than recreate that string
@@ -858,8 +857,10 @@ def JobImportDir(job):
def RunFuncOnFilesInPath( job, path, file_func ): def RunFuncOnFilesInPath( job, path, file_func ):
d = session.query(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==path).filter(Dir.rel_path=='').first() d = session.query(Dir).join(PathDirLink).join(Path).filter(Path.path_prefix==path).filter(Dir.rel_path=='').first()
for e in d.files: files = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==d.eid).all()
for e in files:
ProcessFilesInDir(job, e, file_func) ProcessFilesInDir(job, e, file_func)
return
def JobProcessAI(job): def JobProcessAI(job):
@@ -994,13 +995,14 @@ def compareAI(known_encoding, unknown_encoding):
def ProcessFilesInDir(job, e, file_func): def ProcessFilesInDir(job, e, file_func):
if DEBUG==1: if DEBUG==1:
print("DEBUG: ProcessFilesInDir: {e.FullPathOnFS()}") print( f"DEBUG: ProcessFilesInDir: {e.FullPathOnFS()}")
if e.type.name != 'Directory': if e.type.name != 'Directory':
file_func(job, e) file_func(job, e)
else: else:
dir=session.query(Dir).filter(Dir.eid==e.id).first() d=session.query(Dir).filter(Dir.eid==e.id).first()
job.current_file_num+=1 job.current_file_num+=1
for sub in dir.files: files = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==d.eid).all()
for sub in files:
ProcessFilesInDir(job, sub, file_func) ProcessFilesInDir(job, sub, file_func)
return return
@@ -1253,7 +1255,7 @@ def InitialValidationChecks():
sp_exists=1 sp_exists=1
break break
if not sp_exists: if not sp_exists:
print("ERROR: None of the storge paths in the settings does not exist - Please fix now"); print("ERROR: None of the storage paths in the settings exist - Please fix now");
ip_exists=0 ip_exists=0
for path in paths: for path in paths:
if os.path.exists(path): if os.path.exists(path):
@@ -1263,7 +1265,7 @@ def InitialValidationChecks():
print("ERROR: None of the import paths in the settings exist - Please fix now"); print("ERROR: None of the import paths in the settings exist - Please fix now");
paths = settings.import_path.split("#") paths = settings.import_path.split("#")
if not rbp_exists or not sp_exists or not ip_exists: if not rbp_exists or not sp_exists or not ip_exists:
print("ERROR: Existing until above errors are fixed by paths being created or settings being updated to valid paths" ) print("ERROR: Exiting until above errors are fixed by paths being created or settings being updated to valid paths" )
exit(-1) exit(-1)
return return