first pass at removing explicit use of path_prefix, the remaining code either uses it to search so needs new path_dir_link table, or is AddDir which I need to think about

This commit is contained in:
2021-04-10 11:48:56 +10:00
parent fddd47a1ab
commit 4a55e98f5b

View File

@@ -85,6 +85,9 @@ class Dir(Base):
last_import_date = Column(Float) last_import_date = Column(Float)
files = relationship("Entry", secondary="entry_dir_link") files = relationship("Entry", secondary="entry_dir_link")
def PathOnFS(self):
return self.path_prefix
def __repr__(self): def __repr__(self):
return f"<eid: {self.eid}, path_prefix: {self.path_prefix}, num_files: {self.num_files}, last_import_date: {self.last_import_date}, files: {self.files}>" return f"<eid: {self.eid}, path_prefix: {self.path_prefix}, num_files: {self.num_files}, last_import_date: {self.last_import_date}, files: {self.files}>"
@@ -99,6 +102,9 @@ class Entry(Base):
file_details = relationship( "File", uselist=False ) file_details = relationship( "File", uselist=False )
in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False ) in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False )
def FullPathOnFS(self):
return self.in_dir.path_prefix + '/' + self.name
def __repr__(self): def __repr__(self):
return f"<id: {self.id}, name: {self.name}, type={self.type}, exists_on_fs={self.exists_on_fs}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>" return f"<id: {self.id}, name: {self.name}, type={self.type}, exists_on_fs={self.exists_on_fs}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"
@@ -457,7 +463,6 @@ def AddDir(job, dirname, path_prefix, in_dir):
def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ): def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ):
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first() e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==fname,Dir.eid==in_dir.eid).first()
if e: if e:
print( f"################################################ FILE EXISTS ALREADY: {fname} -- {in_dir.path_prefix} {e}" )
e.exists_on_fs=True e.exists_on_fs=True
return e return e
ftype = session.query(FileType).filter(FileType.name==type_str).first() ftype = session.query(FileType).filter(FileType.name==type_str).first()
@@ -582,7 +587,7 @@ def JobImportDir(job):
# commit every 100 files to see progress being made but not hammer the database # commit every 100 files to see progress being made but not hammer the database
if job.current_file_num % 100 == 0: if job.current_file_num % 100 == 0:
session.commit() session.commit()
fname=dir.path_prefix+'/'+basename fname=dir.PathOnFS()+'/'+basename
stat = os.stat(fname) stat = os.stat(fname)
if stat.st_ctime > dir.last_import_date: if stat.st_ctime > dir.last_import_date:
@@ -643,20 +648,20 @@ def GenHashAndThumb(job, e):
# commit every 100 files to see progress being made but not hammer the database # commit every 100 files to see progress being made but not hammer the database
if job.current_file_num % 100 == 0: if job.current_file_num % 100 == 0:
session.commit() session.commit()
stat = os.stat( e.in_dir.path_prefix + '/' + e.name ) stat = os.stat( e.FullPathOnFS() )
if stat.st_ctime < e.file_details.last_hash_date: if stat.st_ctime < e.file_details.last_hash_date:
if DEBUG==1: if DEBUG==1:
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this") print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
job.current_file_num+=1 job.current_file_num+=1
return return
e.file_details.hash = md5( job, e.in_dir.path_prefix+'/'+ e.name ) e.file_details.hash = md5( job, e.FullPathOnFS() )
if DEBUG==1: if DEBUG==1:
print( f"{e.name} - hash={e.file_details.hash}" ) print( f"{e.name} - hash={e.file_details.hash}" )
if e.type.name == 'Image': if e.type.name == 'Image':
e.file_details.thumbnail = GenImageThumbnail( job, e.in_dir.path_prefix+'/'+ e.name ) e.file_details.thumbnail = GenImageThumbnail( job, e.FullPathOnFS() )
elif e.type.name == 'Video': elif e.type.name == 'Video':
e.file_details.thumbnail = GenVideoThumbnail( job, e.in_dir.path_prefix+'/'+ e.name ) e.file_details.thumbnail = GenVideoThumbnail( job, e.FullPathOnFS() )
elif e.type.name == 'Unknown': elif e.type.name == 'Unknown':
job.current_file_num+=1 job.current_file_num+=1
e.file_details.last_hash_date = time.time() e.file_details.last_hash_date = time.time()
@@ -667,7 +672,7 @@ def ProcessAI(job, e):
job.current_file_num+=1 job.current_file_num+=1
return return
file = e.in_dir.path_prefix + '/' + e.name file = e.FullPathOnFS()
stat = os.stat(file) stat = os.stat(file)
# find if file is newer than when we found faces before (fyi: first time faces_created_on == 0) # find if file is newer than when we found faces before (fyi: first time faces_created_on == 0)
if stat.st_ctime > e.file_details.faces_created_on: if stat.st_ctime > e.file_details.faces_created_on:
@@ -709,7 +714,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
if not frl: if not frl:
frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details.eid) frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details.eid)
else: else:
stat=os.stat(e.in_dir.path_prefix+'/'+ e.name) stat=os.stat( e.FullPathOnFS() )
# file & refimg are not newer then we dont need to check # file & refimg are not newer then we dont need to check
if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed: if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg haven't changed") print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg haven't changed")
@@ -757,7 +762,7 @@ def compareAI(known_encoding, unknown_encoding):
def ProcessFilesInDir(job, e, file_func): def ProcessFilesInDir(job, e, file_func):
if DEBUG==1: if DEBUG==1:
print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir.path_prefix)) print("DEBUG: files in dir - process: {}".format(e.FullPathOnFS()) )
if e.type.name != 'Directory': if e.type.name != 'Directory':
file_func(job, e) file_func(job, e)
else: else:
@@ -873,7 +878,7 @@ def RemoveFileFromFS( del_me ):
m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix) m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix)
dst_dir=settings.recycle_bin_path + m[1] + '/' dst_dir=settings.recycle_bin_path + m[1] + '/'
os.makedirs( dst_dir,mode=0o777, exist_ok=True ) os.makedirs( dst_dir,mode=0o777, exist_ok=True )
src=del_me.in_dir.path_prefix+'/'+del_me.name src=del_me.FullPathOnFS()
dst=dst_dir + '/' + del_me.name dst=dst_dir + '/' + del_me.name
os.replace( src, dst ) os.replace( src, dst )
except Exception as e: except Exception as e:
@@ -903,8 +908,8 @@ def RemoveDups(job):
found=None found=None
del_me_lst = [] del_me_lst = []
for f in files: for f in files:
if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False: if os.path.isfile( f.FullPathOnFS() ) == False:
AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file") AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
elif f.file_details.eid == int(keeping): elif f.file_details.eid == int(keeping):
found = f found = f
else: else:
@@ -912,9 +917,9 @@ def RemoveDups(job):
if found == None: if found == None:
AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" ) AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" )
else: else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" ) AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
for del_me in del_me_lst: for del_me in del_me_lst:
AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" ) AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.FullPathOnFS()}" )
RemoveFileFromFS( del_me ) RemoveFileFromFS( del_me )
RemoveFileFromDB(del_me.id) RemoveFileFromDB(del_me.id)
@@ -923,14 +928,14 @@ def RemoveDups(job):
hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0] hashes=[jex.value for jex in job.extra if jex.name == f"kdhash-{which}"][0]
keeping=jex.value keeping=jex.value
tmp=session.query(Dir).filter(Dir.eid==keeping).first() tmp=session.query(Dir).filter(Dir.eid==keeping).first()
AddLogForJob(job, f"Keeping files in {tmp.path_prefix}" ) AddLogForJob(job, f"Keeping files in {tmp.PathOnFS()}" )
for hash in hashes.split(","): for hash in hashes.split(","):
files=session.query(Entry).join(File).filter(File.hash==hash).all() files=session.query(Entry).join(File).filter(File.hash==hash).all()
found=None found=None
del_me=None del_me=None
for f in files: for f in files:
if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False: if os.path.isfile(f.FullPathOnFS()) == False:
AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file") AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.FullPathOnFS()}) does not exist? ignorning file")
if f.in_dir.eid == int(keeping): if f.in_dir.eid == int(keeping):
found=f found=f
else: else:
@@ -939,8 +944,8 @@ def RemoveDups(job):
if found == None: if found == None:
AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" ) AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" )
else: else:
AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" ) AddLogForJob(job, f"Keep duplicate file: {found.FullPathOnFS()}" )
AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" ) AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.FullPathOnFS()}" )
RemoveFileFromFS( del_me ) RemoveFileFromFS( del_me )
RemoveFileFromDB(del_me.id) RemoveFileFromDB(del_me.id)
dup_cnt += 1 dup_cnt += 1