removed debugs, created ClearOtherDupMessagesAndJobs() function so CheckDups jobs created by restore/delete are removing old f/e messages, e.g. I delete a dup by hand and the dup error disappears :) MAIN thing though is the crazy restore/delete paths are now fully functional, with content in top-level of a path, different paths on import, and in subdirs of import PATHs, also tested mass delete via flat-view of all files, then mass restore -- ALL WORKED :)

This commit is contained in:
2021-06-11 20:26:15 +10:00
parent c3398ef77a
commit 137b3f347d

View File

@@ -124,7 +124,7 @@ class Dir(Base):
return self.in_path.path_prefix+'/'+self.rel_path
def __repr__(self):
return f"<eid: {self.eid}, last_import_date: {self.last_import_date}, files: {self.files}>"
return f"<eid: {self.eid}, rel_path: {self.rel_path}, in_path={self.in_path}, last_import_date: {self.last_import_date}, files: {self.files}>"
class Entry(Base):
__tablename__ = "entry"
@@ -627,28 +627,31 @@ def RestoreFile(job,restore_me):
orig_path = session.query(Path).filter(Path.path_prefix==orig_file_details.orig_path_prefix).first()
parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==orig_path.id).first()
# in case our new_rel_path is just in the top level of the path, make
# new_dir = parent_dir so restore_me.in_dir = new_dir after the for loop works
new_dir=parent_dir
# e.g. restore_me's rel_path 'Import/images_to_process/1111', orig_path was 'static/Import/images_to_process', need new rel_path to be just the 1111 bit...
new_rel_path='static/'+restore_me.in_dir.rel_path
new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix+'/', '')
new_rel_path='1111'
new_rel_path=new_rel_path.replace(orig_file_details.orig_path_prefix, '')
if len(new_rel_path) > 0 and new_rel_path[-1] == '/':
new_rel_path=new_rel_path[0:-1]
# okay, go through new relative path and AddDir any missing subdirs of this
# path (think Import/Dir1/Dir2) which b/c we have orig_path in AddDir will
# create static/Import, static/Import/Dir1, static/Import/Dir1/Dir2
part_rel_path=""
for dirname in new_rel_path.split("/"):
part_rel_path += f"{dirname}"
### DDP: when restoring, an original dir might have been removed, so need make it (if needed)
os.makedirs( dirname,mode=0o777, exist_ok=True )
new_dir=AddDir( job, dirname, parent_dir, part_rel_path, orig_path )
parent_dir=new_dir
part_rel_path += "/"
restore_me.in_dir = new_dir
### DDP: when restoring, an original dir tree might have been removed, so need make it (if needed)
os.makedirs( os.path.dirname(restore_me.FullPathOnFS()),mode=0o777, exist_ok=True )
# remove DelFile entry for this restored file
session.query(DelFile).filter(DelFile.file_eid==restore_me.id).delete()
session.commit()
CheckForDups(job)
print("TODO: test the check_dups above works")
return
# Function that moves a file we are "deleting" to the recycle bin, it moves the
@@ -669,7 +672,6 @@ def MoveFileToRecycleBin(job,del_me):
bin_path=session.query(Path).join(PathType).filter(PathType.name=='Bin').first()
parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==bin_path.id).first()
print( f"need to keep this (ind): {del_me.in_dir.in_path.path_prefix}" )
# if we ever need to restore, lets remember this file's original path
# (use a string in case the dir/path is ever deleted from FS (and then DB) and we need to recreate)
del_file_details = DelFile( file_eid=del_me.id, orig_path_prefix=del_me.in_dir.in_path.path_prefix )
@@ -827,10 +829,10 @@ def JobImportDir(job):
year, month, day, woy = GetDateFromFile(fname, stat)
e=AddFile( job, basename, type_str, fsize, dir, year, month, day, woy )
else:
if DEBUG==1:
print( f"DEBUG: { basename} - {stat.st_ctime} is OLDER than {dir.last_import_date}" )
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==basename,Dir.eid==dir.eid).first()
e.exists_on_fs=True
if DEBUG==1:
print("DEBUG: {} - {} is OLDER than {}".format( basename, stat.st_ctime, dir.last_import_date ), basename )
job.current_file=basename
job.current_file_num+=1
job.current_file_num += len(subdirs)
@@ -948,7 +950,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64)
results = compareAI(deserialized_bytes, unknown_encoding)
if results[0]:
print(f'Found a match between: {person.tag} and {e.name}')
print(f'DEBUG: Found a match between: {person.tag} and {e.name}')
AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}')
frl.matched=True
return
@@ -998,7 +1000,6 @@ def JobGetFileDetails(job):
#### I think the fix here is to get JobImportDir (or whatever makes the PATH) to add a jex for path_prefix and just pull it here, and stop 're-creating' it via SymlinkName
path=[jex.value for jex in job.extra if jex.name == "path"][0]
path_prefix=[jex.value for jex in job.extra if jex.name == "path_prefix"][0]
print( f"JobGetFileDetails({job}) -- pp={path_prefix}" )
if DEBUG==1:
print("DEBUG: JobGetFileDetails for path={}".format( path_prefix ) )
p=session.query(Path).filter(Path.path_prefix==path_prefix).first()
@@ -1083,8 +1084,22 @@ def GenVideoThumbnail(job, file):
return None
return thumbnail
# utility function to clear any other future Duplicate messages, called if we
# either create a "new" CheckDups (often del/restore related), OR because we
# are actualyl handling the dups now from a front-end click through to
# /removedups, but some other job has since created another dup message...
def ClearOtherDupMessagesAndJobs():
msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups')
for msg in msgs:
session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete()
cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all()
for j in cd_jobs:
FinishJob(j, "New CheckForDups job/removal supercedes this job, withdrawing it", "Withdrawn")
session.commit()
def CheckForDups(job):
AddLogForJob( job, f"Check for duplicates" )
ClearOtherDupMessagesAndJobs()
res = session.execute( "select count(e1.id) from entry e1, file f1, dir d1, entry_dir_link edl1, path_dir_link pdl1, path p1, entry e2, file f2, dir d2, entry_dir_link edl2, path_dir_link pdl2, path p2 where e1.id = f1.eid and e2.id = f2.eid and d1.eid = edl1.dir_eid and edl1.entry_id = e1.id and edl2.dir_eid = d2.eid and edl2.entry_id = e2.id and p1.type_id != (select id from path_type where name = 'Bin') and p1.id = pdl1.path_id and pdl1.dir_eid = d1.eid and p2.type_id != (select id from path_type where name = 'Bin') and p2.id = pdl2.path_id and pdl2.dir_eid = d2.eid and f1.hash = f2.hash and e1.id != e2.id and f1.size_mb = f2.size_mb" )
for row in res:
@@ -1099,13 +1114,7 @@ def CheckForDups(job):
def RemoveDups(job):
AddLogForJob(job, f"INFO: Starting Remove Duplicates job...")
# as checkdups covers all dups, delete all future dups messages, and Withdraw future checkdups jobs
msgs=session.query(PA_JobManager_FE_Message).join(Job).filter(Job.name=='checkdups')
for msg in msgs:
session.query(PA_JobManager_FE_Message).filter(PA_JobManager_FE_Message.id==msg.id).delete()
cd_jobs=session.query(Job).filter(Job.name=='checkdups').filter(Job.pa_job_state=='New').all()
for j in cd_jobs:
FinishJob(j, "Just removed duplicates - so no need to do any other checkdups, we will force 1 last one after the remove step", "Withdrawn")
session.commit()
ClearOtherDupMessagesAndJobs()
dup_cnt=0
for jex in job.extra:
@@ -1173,8 +1182,11 @@ def JobDeleteFiles(job):
for jex in job.extra:
if 'eid-' in jex.name:
del_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me}" )
AddLogForJob(job, f"INFO: Removing file: #{del_me.id} -> {del_me.name}" )
MoveFileToRecycleBin(job,del_me)
now=datetime.now(pytz.utc)
next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
session.add(next_job)
FinishJob(job, f"Finished deleting selected file(s)")
return
@@ -1185,6 +1197,9 @@ def JobRestoreFiles(job):
restore_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
AddLogForJob(job, f"INFO: Removing file: #{restore_me.id} -> {restore_me}" )
RestoreFile(job,restore_me)
now=datetime.now(pytz.utc)
next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
session.add(next_job)
FinishJob(job, f"Finished restoring selected file(s)")
return