From fddd47a1ab5a990ee18cb96cb0474d79d541241c Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Sat, 10 Apr 2021 11:28:17 +1000 Subject: [PATCH] now using uselist=False to ensure file_details, dir_details, in_dir are one-to-one, and dont have useless list of 1 element. updated TODO to reflect order of how to tackle change over to paths and dirs --- TODO | 13 ++++---- files.py | 6 ++-- pa_job_manager.py | 72 ++++++++++++++++++++-------------------- templates/file_list.html | 8 ++--- templates/files.html | 30 ++++++++--------- 5 files changed, 65 insertions(+), 64 deletions(-) diff --git a/TODO b/TODO index 741094b..08d4e32 100644 --- a/TODO +++ b/TODO @@ -1,14 +1,18 @@ ## GENERAL + * need a path_details_dir_link table (path_details -> type 'import/storage/recycle' + - need to make some FILE/DIR, etc. funcs into OO (hide all use of path_prefix, then we can do the below) + - stop fudging the path in DIR (to add static), and just add 'static/' + path_details_dir.prefix + dir.rel_path + - path has overall file count, so we dont fudge the file count for parent dir in the import, etc. + -- do we even need/care about per Dir counts? (I dont think so) + - ignore *thumb* -- but consider how we do this and don't screw up 'dir/job counts' * storage_path viewing needs to be by folder / not a big grab bag of files (by default) -- mostly done. Need to toggle the view if I want, and when viewing storage area, change single-click to be view file again, and right-click to be my context menu - * when we put files in recycle bin, they need to stay in the DB and just have their root/base path moved (so they can be view as per above/below) - - do I need a 'import/storage/recycle' path/dir 'type'? * doing actual file deletes needed again [DONE] - decided a recycle bin would be good [DONE] + - when we put files in recycle bin, they need to stay in the DB and just have their root/base path moved (so they can be view as per above/below) - could also allow undelete per file / show content as another Files->View and more like storage (i.e. show folders) * AddJobForLog can absorb DEBUGs, etc. in fact fix up logging in general * comment your code - * do we need to make some funcs/code into OO? * need a way for page to show we are in import_path or storage_path ## DB @@ -20,9 +24,6 @@ (file_refimg_link --> file_refimg_link needs a face_num?) ### BACKEND - scan storage/import dir: - ignore *thumb* -- but consider how we do this and don't screw up 'dir/job counts' - -- started on some basic optimisations (commit logs every 100 logs, not each log) - with debugs: import = 04:11, getfiledetails== 0:35:35 - without debugs: import == 04:03, getfiledetails == 0:35:36 -- not a sig diff diff --git a/files.py b/files.py index 9441089..854bb0e 100644 --- a/files.py +++ b/files.py @@ -55,9 +55,9 @@ class Entry(db.Model): name = db.Column(db.String, unique=False, nullable=False ) type_id = db.Column(db.Integer, db.ForeignKey("file_type.id")) type = db.relationship("FileType") - dir_details = db.relationship( "Dir") - file_details = db.relationship( "File" ) - in_dir = db.relationship ("Dir", secondary="entry_dir_link" ) + dir_details = db.relationship( "Dir", uselist=False ) + file_details = db.relationship( "File", uselist=False ) + in_dir = db.relationship ("Dir", secondary="entry_dir_link", uselist=False ) def __repr__(self): return "".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir) diff --git a/pa_job_manager.py b/pa_job_manager.py index 37f1c60..8446d96 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -95,9 +95,9 @@ class Entry(Base): type_id = Column(Integer, ForeignKey("file_type.id")) exists_on_fs=Column(Boolean) type=relationship("FileType") - dir_details = relationship( "Dir") - file_details = relationship( "File" ) - in_dir = relationship ("Dir", secondary="entry_dir_link" ) + dir_details = relationship( "Dir", uselist=False ) + file_details = relationship( "File", uselist=False ) + in_dir = relationship ("Dir", secondary="entry_dir_link", uselist=False ) def __repr__(self): return f"" @@ -444,10 +444,10 @@ def AddDir(job, dirname, path_prefix, in_dir): dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0 ) dtype=session.query(FileType).filter(FileType.name=='Directory').first() e=Entry( name=dirname, type=dtype, exists_on_fs=True ) - e.dir_details.append(dir) + e.dir_details=dir # no in_dir occurs when we Add the actual Dir for the import_path (top of the tree) if in_dir: - e.in_dir.append(in_dir) + e.in_dir=in_dir if DEBUG==1: print(f"AddDir: created d={dirname}, pp={path_prefix}") AddLogForJob(job, f"DEBUG: Process new dir: {dirname}") @@ -463,8 +463,8 @@ def AddFile(job, fname, type_str, fsize, in_dir, year, month, day, woy ): ftype = session.query(FileType).filter(FileType.name==type_str).first() e=Entry( name=fname, type=ftype, exists_on_fs=True ) f=File( size_mb=fsize, last_hash_date=0, faces_created_on=0, year=year, month=month, day=day, woy=woy ) - e.file_details.append(f) - e.in_dir.append(in_dir) + e.file_details = f + e.in_dir=in_dir AddLogForJob(job, "Found new file: {}".format(fname) ) session.add(e) return e @@ -643,23 +643,23 @@ def GenHashAndThumb(job, e): # commit every 100 files to see progress being made but not hammer the database if job.current_file_num % 100 == 0: session.commit() - stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name ) - if stat.st_ctime < e.file_details[0].last_hash_date: + stat = os.stat( e.in_dir.path_prefix + '/' + e.name ) + if stat.st_ctime < e.file_details.last_hash_date: if DEBUG==1: print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this") job.current_file_num+=1 return - e.file_details[0].hash = md5( job, e.in_dir[0].path_prefix+'/'+ e.name ) + e.file_details.hash = md5( job, e.in_dir.path_prefix+'/'+ e.name ) if DEBUG==1: - print( f"{e.name} - hash={e.file_details[0].hash}" ) + print( f"{e.name} - hash={e.file_details.hash}" ) if e.type.name == 'Image': - e.file_details[0].thumbnail = GenImageThumbnail( job, e.in_dir[0].path_prefix+'/'+ e.name ) + e.file_details.thumbnail = GenImageThumbnail( job, e.in_dir.path_prefix+'/'+ e.name ) elif e.type.name == 'Video': - e.file_details[0].thumbnail = GenVideoThumbnail( job, e.in_dir[0].path_prefix+'/'+ e.name ) + e.file_details.thumbnail = GenVideoThumbnail( job, e.in_dir.path_prefix+'/'+ e.name ) elif e.type.name == 'Unknown': job.current_file_num+=1 - e.file_details[0].last_hash_date = time.time() + e.file_details.last_hash_date = time.time() return def ProcessAI(job, e): @@ -667,29 +667,29 @@ def ProcessAI(job, e): job.current_file_num+=1 return - file = e.in_dir[0].path_prefix + '/' + e.name + file = e.in_dir.path_prefix + '/' + e.name stat = os.stat(file) # find if file is newer than when we found faces before (fyi: first time faces_created_on == 0) - if stat.st_ctime > e.file_details[0].faces_created_on: + if stat.st_ctime > e.file_details.faces_created_on: session.add(e) im_orig = Image.open(file) im = ImageOps.exif_transpose(im_orig) faces = generateUnknownEncodings(im) - e.file_details[0].faces_created_on=time.time() + e.file_details.faces_created_on=time.time() if faces: flat_faces = numpy.array(faces) - e.file_details[0].faces = flat_faces.tobytes() + e.file_details.faces = flat_faces.tobytes() else: - e.file_details[0].faces = None + e.file_details.faces = None job.current_file_num+=1 return else: - if not e.file_details[0].faces: + if not e.file_details.faces: print("OPTIM: This image has no faces, skip it") job.current_file_num+=1 return - recover=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64) + recover=numpy.frombuffer(e.file_details.faces,dtype=numpy.float64) real_recover=numpy.reshape(recover,(-1,128)) l=[] for el in real_recover: @@ -707,9 +707,9 @@ def lookForPersonInImage(job, person, unknown_encoding, e): # lets see if we have tried this check before frl=session.query(FileRefimgLink).filter(FileRefimgLink.file_id==e.id, FileRefimgLink.refimg_id==refimg.id).first() if not frl: - frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details[0].eid) + frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details.eid) else: - stat=os.stat(e.in_dir[0].path_prefix+'/'+ e.name) + stat=os.stat(e.in_dir.path_prefix+'/'+ e.name) # file & refimg are not newer then we dont need to check if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed: print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg haven't changed") @@ -757,7 +757,7 @@ def compareAI(known_encoding, unknown_encoding): def ProcessFilesInDir(job, e, file_func): if DEBUG==1: - print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir[0].path_prefix)) + print("DEBUG: files in dir - process: {} {}".format(e.name, e.in_dir.path_prefix)) if e.type.name != 'Directory': file_func(job, e) else: @@ -870,10 +870,10 @@ def CheckForDups(job): def RemoveFileFromFS( del_me ): try: settings = session.query(Settings).first() - m=re.search( r'^static/(.+)', del_me.in_dir[0].path_prefix) + m=re.search( r'^static/(.+)', del_me.in_dir.path_prefix) dst_dir=settings.recycle_bin_path + m[1] + '/' os.makedirs( dst_dir,mode=0o777, exist_ok=True ) - src=del_me.in_dir[0].path_prefix+'/'+del_me.name + src=del_me.in_dir.path_prefix+'/'+del_me.name dst=dst_dir + '/' + del_me.name os.replace( src, dst ) except Exception as e: @@ -903,18 +903,18 @@ def RemoveDups(job): found=None del_me_lst = [] for f in files: - if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False: - AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") - elif f.file_details[0].eid == int(keeping): + if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False: + AddLogForJob( job, f"ERROR: (per file del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file") + elif f.file_details.eid == int(keeping): found = f else: del_me_lst.append(f) if found == None: AddLogForJob( job, f"ERROR: (per file dup) Cannot find file with hash={hash} to process - skipping it)" ) else: - AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" ) + AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" ) for del_me in del_me_lst: - AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) + AddLogForJob(job, f"Remove duplicate (per file dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" ) RemoveFileFromFS( del_me ) RemoveFileFromDB(del_me.id) @@ -929,9 +929,9 @@ def RemoveDups(job): found=None del_me=None for f in files: - if os.path.isfile(f.in_dir[0].path_prefix+'/'+f.name) == False: - AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir[0].path_prefix}/{f.name}) does not exist? ignorning file") - if f.in_dir[0].eid == int(keeping): + if os.path.isfile(f.in_dir.path_prefix+'/'+f.name) == False: + AddLogForJob( job, f"ERROR: (per path del) file (DB id: {f.eid} - {f.in_dir.path_prefix}/{f.name}) does not exist? ignorning file") + if f.in_dir.eid == int(keeping): found=f else: del_me=f @@ -939,8 +939,8 @@ def RemoveDups(job): if found == None: AddLogForJob( job, f"ERROR: (per path dup - dir id={keeping}) Cannot find file with hash={hash} to process - skipping it)" ) else: - AddLogForJob(job, f"Keep duplicate file: {found.in_dir[0].path_prefix}/{found.name}" ) - AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir[0].path_prefix}/{del_me.name}" ) + AddLogForJob(job, f"Keep duplicate file: {found.in_dir.path_prefix}/{found.name}" ) + AddLogForJob(job, f"Remove duplicate (per path dup) file: {del_me.in_dir.path_prefix}/{del_me.name}" ) RemoveFileFromFS( del_me ) RemoveFileFromDB(del_me.id) dup_cnt += 1 diff --git a/templates/file_list.html b/templates/file_list.html index 55c5b15..7eae553 100644 --- a/templates/file_list.html +++ b/templates/file_list.html @@ -13,16 +13,16 @@
{% if obj.type.name=="Image" %} {% set icon="fa-file-image" %} - + {% elif obj.type.name == "Video" %} {% set icon="fa-film" %} - + {% elif obj.type.name == "Directory" %} {% set icon="fa-folder" %} {% else %} {% set icon="fa-question-circle" %} {% endif %} - + {% if obj.type.name=="Image" or obj.type.name == "Video" %} {% endif %} @@ -35,7 +35,7 @@ {% endif %} {% if obj.type.name != "Directory" %} - {{obj.file_details[0].size_mb}}{{obj.in_dir[0].path_prefix}}{{obj.file_details[0].hash}} + {{obj.file_details.size_mb}}{{obj.in_dir.path_prefix}}{{obj.file_details.hash}} {% else %} {% endif %} diff --git a/templates/files.html b/templates/files.html index caa1b49..f175516 100644 --- a/templates/files.html +++ b/templates/files.html @@ -108,41 +108,41 @@ {% endif %} {% endif %} {% if grouping == "Day" %} - {% if last.printed != obj.file_details[0].day %} + {% if last.printed != obj.file_details.day %} {% if last.printed > 0 %}
{% endif %} -
Day: {{obj.file_details[0].day}} of {{obj.file_details[0].month}}/{{obj.file_details[0].year}}
+
Day: {{obj.file_details.day}} of {{obj.file_details.month}}/{{obj.file_details.year}}
- {% set last.printed = obj.file_details[0].day %} + {% set last.printed = obj.file_details.day %} {% endif %} {% elif grouping == "Week" %} - {% if last.printed != obj.file_details[0].woy %} + {% if last.printed != obj.file_details.woy %} {% if last.printed > 0 %}
{% endif %} -
Week #: {{obj.file_details[0].woy}} of {{obj.file_details[0].year}}
+
Week #: {{obj.file_details.woy}} of {{obj.file_details.year}}
- {% set last.printed = obj.file_details[0].woy %} + {% set last.printed = obj.file_details.woy %} {% endif %} {% elif grouping == "Month" %} - {% if last.printed != obj.file_details[0].month %} + {% if last.printed != obj.file_details.month %} {% if last.printed > 0 %}
{% endif %} -
Month: {{obj.file_details[0].month}} of {{obj.file_details[0].year}}
+
Month: {{obj.file_details.month}} of {{obj.file_details.year}}
- {% set last.printed = obj.file_details[0].month %} + {% set last.printed = obj.file_details.month %} {% endif %} {% endif %} {% if obj.type.name != "Directory" %} - {% if (not folders) or ((obj.in_dir[0].path_prefix+'/'+obj.name) | TopLevelFolderOf(cwd)) %} -
+ {% if (not folders) or ((obj.in_dir.path_prefix+'/'+obj.name) | TopLevelFolderOf(cwd)) %} +
{% if obj.type.name=="Image" %} - + {% elif obj.type.name == "Video" %}
- +
@@ -153,8 +153,8 @@ {% endif %} {% else %} {% if folders %} - {% if (cwd != obj.dir_details[0].path_prefix) and (obj.dir_details[0].path_prefix | TopLevelFolderOf(cwd)) %} -
+ {% if (cwd != obj.dir_details.path_prefix) and (obj.dir_details.path_prefix | TopLevelFolderOf(cwd)) %} +
{{obj.name}}