fixed BUG-16 - Hash optimisation (last_hast_date is now in File not Dir), also converted a few .format() to f"
This commit is contained in:
1
BUGs
1
BUGs
@@ -1,3 +1,2 @@
|
||||
### Next: 19
|
||||
BUG-16: now we dont do dir level optimising for genfiledetails, need to stat the file / check dates that way to optimise
|
||||
BUG-17: I think it won't handle me deleting files after scan
|
||||
|
||||
@@ -70,7 +70,7 @@ class EntryDirLink(Base):
|
||||
dir_eid = Column(Integer, ForeignKey("dir.eid"), primary_key=True )
|
||||
|
||||
def __repr__(self):
|
||||
return "<entry_id: {}, dir_eid: {}>".format(self.entry_id, self.dir_eid)
|
||||
return f"<entry_id: {self.entry_id}, dir_eid: {self.dir_eid}>"
|
||||
|
||||
class Dir(Base):
|
||||
__tablename__ = "dir"
|
||||
@@ -78,11 +78,10 @@ class Dir(Base):
|
||||
path_prefix = Column(String, unique=False, nullable=False )
|
||||
num_files = Column(Integer)
|
||||
last_import_date = Column(Float)
|
||||
last_hash_date = Column(Float)
|
||||
files = relationship("Entry", secondary="entry_dir_link")
|
||||
|
||||
def __repr__(self):
|
||||
return "<eid: {}, path_prefix: {}, num_files: {}, last_import_date: {}, last_hash_date: {}>".format(self.eid, self.path_prefix, self.num_files, self.last_import_date, self.last_hash_date)
|
||||
return f"<eid: {self.eid}, path_prefix: {self.path_prefix}, num_files: {self.num_files}, last_import_date: {self.last_import_date}, files: {self.files}>"
|
||||
|
||||
class Entry(Base):
|
||||
__tablename__ = "entry"
|
||||
@@ -95,7 +94,7 @@ class Entry(Base):
|
||||
in_dir = relationship ("Dir", secondary="entry_dir_link" )
|
||||
|
||||
def __repr__(self):
|
||||
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
|
||||
return f"<id: {self.id}, name: {self.name}, type={self.type}, dir_details={self.dir_details}, file_details={self.file_details}, in_dir={self.in_dir}>"
|
||||
|
||||
class FileRefimgLink(Base):
|
||||
__tablename__ = "file_refimg_link"
|
||||
@@ -113,11 +112,12 @@ class File(Base):
|
||||
size_mb = Column(Integer, unique=False, nullable=False)
|
||||
hash = Column(Integer, unique=True, nullable=True)
|
||||
thumbnail = Column(String, unique=False, nullable=True)
|
||||
last_hash_date = Column(Float)
|
||||
faces = Column( LargeBinary )
|
||||
faces_created_on = Column(Float)
|
||||
|
||||
def __repr__(self):
|
||||
return "<eid: {}, size_mb={}, hash={}>".format(self.eid, self.size_mb, self.hash )
|
||||
return f"<eid: {self.eid}, size_mb={self.size_mb}, hash={self.hash}, last_hash_date: {self.last_hash_date}>"
|
||||
|
||||
class FileType(Base):
|
||||
__tablename__ = "file_type"
|
||||
@@ -125,7 +125,7 @@ class FileType(Base):
|
||||
name = Column(String, unique=True, nullable=False )
|
||||
|
||||
def __repr__(self):
|
||||
return "<id: {}, name={}>".format(self.id, self.name )
|
||||
return f"<id: {self.id}, name={self.name}>"
|
||||
|
||||
class Settings(Base):
|
||||
__tablename__ = "settings"
|
||||
@@ -133,7 +133,7 @@ class Settings(Base):
|
||||
import_path = Column(String)
|
||||
|
||||
def __repr__(self):
|
||||
return "<id: {}, import_path: {}>".format(self.id, self.import_path )
|
||||
return f"<id: {self.id}, import_path: {self.import_path}>"
|
||||
|
||||
class PersonRefimgLink(Base):
|
||||
__tablename__ = "person_refimg_link"
|
||||
@@ -141,7 +141,7 @@ class PersonRefimgLink(Base):
|
||||
refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
|
||||
|
||||
def __repr__(self):
|
||||
return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
|
||||
return f"<person_id: {self.person_id}, refimg_id: {self.refimg_id}>"
|
||||
|
||||
class Person(Base):
|
||||
__tablename__ = "person"
|
||||
@@ -152,7 +152,7 @@ class Person(Base):
|
||||
refimg = relationship('Refimg', secondary=PersonRefimgLink.__table__)
|
||||
|
||||
def __repr__(self):
|
||||
return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg)
|
||||
return f"<tag: {self.tag}, firstname: {self.firstname}, surname: {self.surname}, refimg: {self.refimg}>"
|
||||
|
||||
class Refimg(Base):
|
||||
__tablename__ = "refimg"
|
||||
@@ -162,7 +162,7 @@ class Refimg(Base):
|
||||
created_on = Column(Float)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<id: {id}, fname: {fname}, created_on: {created_on}, encodings: {encodings}>"
|
||||
return f"<id: {self.id}, fname: {self.fname}, created_on: {self.created_on}, encodings: {self.encodings}>"
|
||||
|
||||
|
||||
|
||||
@@ -397,7 +397,7 @@ def AddDir(job, dirname, path_prefix, in_dir):
|
||||
dir=session.query(Dir).filter(Dir.path_prefix==path_prefix).first()
|
||||
if dir:
|
||||
return dir
|
||||
dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0, last_hash_date=0 )
|
||||
dir=Dir( path_prefix=path_prefix, num_files=0, last_import_date=0 )
|
||||
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
|
||||
e=Entry( name=dirname, type=dtype )
|
||||
e.dir_details.append(dir)
|
||||
@@ -417,7 +417,7 @@ def AddFile(job, fname, type_str, fsize, in_dir ):
|
||||
return e
|
||||
ftype = session.query(FileType).filter(FileType.name==type_str).first()
|
||||
e=Entry( name=fname, type=ftype )
|
||||
f=File( size_mb=fsize )
|
||||
f=File( size_mb=fsize, last_hash_date=0, faces_created_on=0 )
|
||||
e.file_details.append(f)
|
||||
e.in_dir.append(in_dir)
|
||||
AddLogForJob(job, "Found new file: {}".format(fname) )
|
||||
@@ -501,6 +501,12 @@ def FilesInDir( path ):
|
||||
return d.files
|
||||
|
||||
def GenHashAndThumb(job, e):
|
||||
stat = os.stat( e.in_dir[0].path_prefix + '/' + e.name )
|
||||
if stat.st_ctime < e.file_details[0].last_hash_date:
|
||||
print(f"OPTIM: GenHashAndThumb {e.name} file is older than last hash, skip this")
|
||||
job.current_file_num+=1
|
||||
return
|
||||
|
||||
e.file_details[0].hash = md5( job, e.in_dir[0].path_prefix+'/'+ e.name )
|
||||
if e.type.name == 'Image':
|
||||
e.file_details[0].thumbnail = GenImageThumbnail( job, e.in_dir[0].path_prefix+'/'+ e.name )
|
||||
@@ -508,6 +514,7 @@ def GenHashAndThumb(job, e):
|
||||
e.file_details[0].thumbnail = GenVideoThumbnail( job, e.in_dir[0].path_prefix+'/'+ e.name )
|
||||
elif e.type.name == 'Unknown':
|
||||
job.current_file_num+=1
|
||||
e.file_details[0].last_hash_date = time.time()
|
||||
return
|
||||
|
||||
def ProcessAI(job, e):
|
||||
@@ -518,8 +525,8 @@ def ProcessAI(job, e):
|
||||
|
||||
file = e.in_dir[0].path_prefix + '/' + e.name
|
||||
stat = os.stat(file)
|
||||
# only find faces if we have not already OR file is newer than when we found faces before
|
||||
if not e.file_details[0].faces_created_on or stat.st_ctime > e.file_details[0].faces_created_on:
|
||||
# find if file is newer than when we found faces before (fyi: first time faces_created_on == 0)
|
||||
if stat.st_ctime > e.file_details[0].faces_created_on:
|
||||
session.add(e)
|
||||
im_orig = Image.open(file)
|
||||
im = ImageOps.exif_transpose(im_orig)
|
||||
@@ -561,7 +568,7 @@ def lookForPersonInImage(job, person, unknown_encoding, e):
|
||||
stat=os.stat(e.in_dir[0].path_prefix+'/'+ e.name)
|
||||
# file & refimg are not newer then we dont need to check
|
||||
if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
|
||||
print("OPTIM: lookForPersonInImage: file has a previous match, and the file & refimg haven't changed")
|
||||
print(f"OPTIM: lookForPersonInImage: file {e.name} has a previous match for: {refimg.fname}, and the file & refimg haven't changed")
|
||||
return
|
||||
|
||||
session.add(frl)
|
||||
@@ -622,15 +629,6 @@ def JobGetFileDetails(job):
|
||||
if DEBUG==1:
|
||||
print("DEBUG: JobGetFileDetails for path={}".format( path ) )
|
||||
dir=session.query(Dir).filter(Dir.path_prefix==path).first()
|
||||
stat=os.stat( path )
|
||||
if stat.st_ctime < dir.last_hash_date:
|
||||
session.add(dir)
|
||||
dir.last_hash_date = time.time()
|
||||
FinishJob(job, "{} has not changed since last hashing - finished job".format(dir.path_prefix))
|
||||
if DEBUG==1:
|
||||
print ("DEBUG: skip this dir {} as it has not changed since last hashing".format(dir.path_prefix))
|
||||
return
|
||||
dir.last_hash_date = time.time()
|
||||
job.current_file_num = 0
|
||||
job.num_files = dir.num_files
|
||||
session.commit()
|
||||
|
||||
@@ -8,11 +8,11 @@ create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer,
|
||||
constraint PK_ENTRY_ID primary key(ID),
|
||||
constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) );
|
||||
|
||||
create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, FACES bytea,
|
||||
create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, FACES bytea, LAST_HASH_DATE float,
|
||||
constraint PK_FILE_ID primary key(EID),
|
||||
constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) );
|
||||
|
||||
create table DIR ( EID integer, PATH_PREFIX varchar(256), NUM_FILES integer, LAST_IMPORT_DATE float, LAST_HASH_DATE float,
|
||||
create table DIR ( EID integer, PATH_PREFIX varchar(256), NUM_FILES integer, LAST_IMPORT_DATE float,
|
||||
constraint PK_DIR_EID primary key(EID),
|
||||
constraint FK_DIR_ENTRY_ID foreign key (EID) references ENTRY(ID) );
|
||||
|
||||
|
||||
Reference in New Issue
Block a user