first pass at removing explicit use of path_prefix, the remaining code either uses it to search so needs new path_dir_link table, or is AddDir which I need to think about

This commit is contained in:
2021-04-10 11:48:56 +10:00
parent fddd47a1ab
commit 4a55e98f5b

View File

@@ -85,6 +85,9 @@ class Dir(Base):
last_import_date = Column(Float)
files = relationship("Entry", secondary="entry_dir_link")
def PathOnFS(self):
return self.path_prefix
def __repr__(self):
return f"<eid: {self.eid}, path_prefix: {self.path_prefix}, num_files: {self.num_files}, last_import_date: {self.last_import_date}, files: {self.files}>"
@@ -99,6 +102,9 @@ class Entry(Base):
file_details = relationship( "File", uselist=False )
in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )
def FullPathOnFS(self):
return self.in_dir.path_prefix + '/' + self.name
def __repr__(self):
return f"<id: {self.id}, name: {self.name}, type={self.type}, exists_on_fs={self.exists_on_fs}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"
@@ -457,7 +463,6 @@ def AddDir(job, dirname, path_prefix, in_dir):
def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ):
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first()
if e:
print( f"################################################ FILE EXISTS ALREADY: {fname} -- {in_dir.path_prefix} {e}" )
e.exists_on_fs=True
return e
ftype = session.query(FileType).filter(FileType.name==type_str).first()
@@ -582,7 +587,7 @@ def JobImportDir(job):
# commit every 100 files to see progress being made but not hammer the database
if job.current_file_num % 100 == 0:
session.commit()
fname=dir.path_prefix+'/'+basename
fname=dir.PathOnFS()+'/'+basename
stat = os.stat(fname)
if stat.st_ctime > dir.last_import_date:
@@ -643,20 +648,20 @@ def GenHashAndThumb(job, e):
# commit every 100 files to see progress being made but not hammer the database
if job.current_file_num % 100 == 0:
session.commit()
stat = os.stat( e.in_dir.path_prefix + '/' + e.name )
stat = os.stat( e.FullPathOnFS() )
if stat.st_ctime < e.file_details.last_hash_date:
if DEBUG==1:
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
job.current_file_num+=1
return
e.file_details.hash = md5( job, e.in_dir.path_prefix+'/'+ e.name )
e.file_details.hash = md5( job, e.FullPathOnFS() )
if DEBUG==1:
print( f"{e.name} - hash={e.file_details.hash}" )
if e.type.name == 'Image':
e.file_details.thumbnail = GenImageThumbnail( job, e.in_dir.path_prefix+'/'+ e.name )
e.file_details.thumbnail = GenImageThumbnail( job, e.FullPathOnFS() )
elif e.type.name == 'Video':
e.file_details.thumbnail = GenVideoThumbnail( job, e.in_dir.path_prefix+'/'+ e.name )
e.file_details.thumbnail = GenVideoThumbnail( job, e.FullPathOnFS() )
elif e.type.name == 'Unknown':
job.current_file_num+=1
e.file_details.last_hash_date = time.time()
@@ -667,7 +672,7 @@ def ProcessAI(job, e):
job.current_file_num+=1
return
file = e.in_dir.path_prefix + '/' + e.name
file = e.FullPathOnFS()
stat = os.stat(file)
# find if file is newer than when we found faces before (fyi: first time faces_created_on == 0)
if stat.st_ctime > e.file_details.faces_created_on:
@@ -709,7 +714,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
if not frl:
frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details.eid)
else:
stat=os.stat(e.in_dir.path_prefix+'/'+ e.name)
stat=os.stat( e.FullPathOnFS() )
# file & refimg are not newer then we dont need to check
if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg haven't changed")
@@ -757,7 +762,7 @@ def compareAI(known_encoding, unknown_encoding):
def ProcessFilesInDir(job, e, file_func):
if DEBUG==1:
print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir.path_prefix))
print("DEBUG: files in dir - process: {}".format(e.FullPathOnFS()) )
if e.type.name != 'Directory':
file_func(job, e)
else:
@@ -873,7 +878,7 @@ def RemoveFileFromFS( del_me ):
m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix)
dst_dir=settings.recycle_bin_path + m[1] + '/'
os.makedirs( dst_dir,mode=0o777, exist_ok=True )
src=del_me.in_dir.path_prefix+'/'+del_me.name
src=del_me.FullPathOnFS()
dst=dst_dir + '/' + del_me.name
os.replace( src, dst )
except Exception as e:
@@ -903,8 +908,8 @@ def RemoveDups(job):
found=None
del_me_lst = []
for f in files:
if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False:
AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file")
if os.path.isfile( f.FullPathOnFS() ) == False:
AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
elif f.file_details.eid == int(keeping):
found = f
else:
@@ -912,9 +917,9 @@ def RemoveDups(job):
if found == None:
AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" )
AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
for del_me in del_me_lst:
AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" )
AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.FullPathOnFS()}" )
RemoveFileFromFS( del_me )
RemoveFileFromDB(del_me.id)
@@ -923,14 +928,14 @@ def RemoveDups(job):
hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0]
keeping=jex.value
tmp=session.query(Dir).filter(Dir.eid==keeping).first()
AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" )
AddLogForJob(job, f"Keeping files in {tmp.PathOnFS()}" )
for hash in hashes.split(","):
files=session.query(Entry).join(File).filter(File.hash==hash).all()
found=None
del_me=None
for f in files:
if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False:
AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file")
if os.path.isfile(f.FullPathOnFS()) == False:
AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
if f.in_dir.eid == int(keeping):
found=f
else:
@@ -939,8 +944,8 @@ def RemoveDups(job):
if found == None:
AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" )
AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" )
AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.FullPathOnFS()}" )
RemoveFileFromFS( del_me )
RemoveFileFromDB(del_me.id)
dup_cnt += 1