Fixed BUG-11: we have changed to file_refimg_link table (from file_person_link), this means we now have some optimisations, and can definitely re-run AI jobs without crashing. Several optims could still be done - see TODO

This commit is contained in:
2021-01-25 01:05:30 +11:00
parent 0829a98376
commit 18b8a30140
9 changed files with 108 additions and 89 deletions

6
BUGs
View File

@@ -1,7 +1,3 @@
### Next: 17 ### Next: 17
BUG-11: Ai ref img jobs are not able to be "re-run"
DONE - only need to calc refimgs once (so timestamp in refimg and check it)
- if we re-run a process AI job and no file changes, then don't process (as above)
- if we do see a new file/updated file, should delete all FPLs then insert new
-- probably should insert new into a file.people.append(...), rather than FPL direct
BUG-16: now we dont do dir level optimising for genfiledetails, need to stat the file / check dates that way to optimise BUG-16: now we dont do dir level optimising for genfiledetails, need to stat the file / check dates that way to optimise
BUG-17: I think it won't handle me deleting files after scan

18
TODO
View File

@@ -1,17 +1,11 @@
## DB ## DB
should FPL really be EPL?
FILE -> add, has_unidentified_face Need to think about...
?has_face?, file (image) -> has X faces, Y matches
X == Y (optim: dont scan again)
AI_SCAN: say X-Y == 1, then to optimise, we need to only check the missing
id face... at the moment, the DB structure is not that clever...
date of scan (file_refimg_link --> file_refimg_link needs a face_num?)
version of code?
settings used
AI_SCAN_FILE_LINK
id to link to AI_scan
refimg used/found
### BACKEND ### BACKEND
*** Need to use thread-safe sessions per Thread, half-assed version did not work *** Need to use thread-safe sessions per Thread, half-assed version did not work

15
ai.py
View File

@@ -5,13 +5,22 @@ from main import db, app, ma
from sqlalchemy import Sequence from sqlalchemy import Sequence
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from status import st, Status from status import st, Status
from files import Entry, File from files import Entry, File, FileRefimgLink
from person import File_Person_Link from person import Person, PersonRefimgLink
from refimg import Refimg
################################################################################ ################################################################################
# /aistats -> placholder for some sort of stats # /aistats -> placholder for some sort of stats
################################################################################ ################################################################################
@app.route("/aistats", methods=["GET", "POST"]) @app.route("/aistats", methods=["GET", "POST"])
def aistats(): def aistats():
entries=db.session.query(Entry).join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).all() tmp=db.session.query(Entry,Person).join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).all()
entries=[]
last_fname=""
for e, p in tmp:
if last_fname != e.name:
entry = { 'name': e.name, 'people': [] }
entries.append( entry )
last_fname = e.name
entry['people'].append( { 'tag': p.tag } )
return render_template("aistats.html", page_title='Placeholder', entries=entries) return render_template("aistats.html", page_title='Placeholder', entries=entries)

View File

@@ -19,9 +19,10 @@ import time
################################################################################ ################################################################################
# Local Class imports # Local Class imports
################################################################################ ################################################################################
from settings import Settings
from job import Job, Joblog, NewJob from job import Job, Joblog, NewJob
from person import Person, File_Person_Link from person import Person, PersonRefimgLink
from refimg import Refimg
from settings import Settings
################################################################################ ################################################################################
# Class describing File in the database, and via sqlalchemy, connected to the DB as well # Class describing File in the database, and via sqlalchemy, connected to the DB as well
@@ -56,13 +57,21 @@ class Entry(db.Model):
def __repr__(self): def __repr__(self):
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir) return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
class FileRefimgLink(db.Model):
__tablename__ = "file_refimg_link"
file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
when_processed = db.Column(db.Float)
matched = db.Column(db.Boolean)
def __repr__(self):
return f"<file_id: {self.file_id}, refimg_id: {self.refimg_id} when_processed={self.when_processed}, matched={self.matched}"
class File(db.Model): class File(db.Model):
__tablename__ = "file" __tablename__ = "file"
eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True ) eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
size_mb = db.Column(db.Integer, unique=False, nullable=False) size_mb = db.Column(db.Integer, unique=False, nullable=False)
hash = db.Column(db.Integer, unique=True, nullable=True) hash = db.Column(db.Integer, unique=True, nullable=True)
thumbnail = db.Column(db.String, unique=False, nullable=True) thumbnail = db.Column(db.String, unique=False, nullable=True)
people = db.relationship("Person", secondary="file_person_link" )
def __repr__(self): def __repr__(self):
return "<eid: {}, size_mb={}, hash={}>".format(self.eid, self.size_mb, self.hash ) return "<eid: {}, size_mb={}, hash={}>".format(self.eid, self.size_mb, self.hash )
@@ -97,7 +106,7 @@ def files():
def search(): def search():
file_data=Entry.query.filter(Entry.name.ilike(f"%{request.form['term']}%")).all() file_data=Entry.query.filter(Entry.name.ilike(f"%{request.form['term']}%")).all()
ai_data=Entry.query.join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).join(Person).filter(Person.tag.ilike(f"%{request.form['term']}%")).all() ai_data=Entry.query.join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).filter(Person.tag.ilike(f"%{request.form['term']}%")).all()
all_entries = file_data + ai_data all_entries = file_data + ai_data
return render_template("files.html", page_title='View Files', entry_data=all_entries) return render_template("files.html", page_title='View Files', entry_data=all_entries)

View File

@@ -14,7 +14,7 @@
### SQLALCHEMY IMPORTS ### ### SQLALCHEMY IMPORTS ###
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary, Boolean
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship
from sqlalchemy import create_engine from sqlalchemy import create_engine
@@ -97,14 +97,23 @@ class Entry(Base):
def __repr__(self): def __repr__(self):
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir) return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
class FileRefimgLink(Base):
__tablename__ = "file_refimg_link"
file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
when_processed = Column(Float)
matched = Column(Boolean)
def __repr__(self):
return f"<file_id: {self.file_id}, refimg_id: {self.refimg_id} when_processed={self.when_processed}, matched={self.matched}"
class File(Base): class File(Base):
__tablename__ = "file" __tablename__ = "file"
eid = Column(Integer, ForeignKey("entry.id"), primary_key=True ) eid = Column(Integer, ForeignKey("entry.id"), primary_key=True )
size_mb = Column(Integer, unique=False, nullable=False) size_mb = Column(Integer, unique=False, nullable=False)
hash = Column(Integer, unique=True, nullable=True) hash = Column(Integer, unique=True, nullable=True)
thumbnail = Column(String, unique=False, nullable=True) thumbnail = Column(String, unique=False, nullable=True)
# DDP: need bytea? in db (see other DDP comment) faces = Column( LargeBinary )
# faces =
faces_created_on = Column(Float) faces_created_on = Column(Float)
def __repr__(self): def __repr__(self):
@@ -126,7 +135,7 @@ class Settings(Base):
def __repr__(self): def __repr__(self):
return "<id: {}, import_path: {}>".format(self.id, self.import_path ) return "<id: {}, import_path: {}>".format(self.id, self.import_path )
class Person_Refimg_Link(Base): class PersonRefimgLink(Base):
__tablename__ = "person_refimg_link" __tablename__ = "person_refimg_link"
person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
@@ -140,7 +149,7 @@ class Person(Base):
tag = Column(String(48), unique=False, nullable=False) tag = Column(String(48), unique=False, nullable=False)
surname = Column(String(48), unique=False, nullable=False) surname = Column(String(48), unique=False, nullable=False)
firstname = Column(String(48), unique=False, nullable=False) firstname = Column(String(48), unique=False, nullable=False)
refimg = relationship('Refimg', secondary=Person_Refimg_Link.__table__) refimg = relationship('Refimg', secondary=PersonRefimgLink.__table__)
def __repr__(self): def __repr__(self):
return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg) return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg)
@@ -155,14 +164,6 @@ class Refimg(Base):
def __repr__(self): def __repr__(self):
return f"<id: {id}, fname: {fname}, created_on: {created_on}, encodings: {encodings}>" return f"<id: {id}, fname: {fname}, created_on: {created_on}, encodings: {encodings}>"
class File_Person_Link(Base):
__tablename__ = "file_person_link"
file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
def __repr__(self):
return "<file_id: {}, person_id: {}>".format(self.file_id, self.person_id)
################################################################################ ################################################################################
@@ -360,7 +361,7 @@ def JobScanNow(job):
def JobForceScan(job): def JobForceScan(job):
JobProgressState( job, "In Progress" ) JobProgressState( job, "In Progress" )
session.query(File_Person_Link).delete() session.query(FileRefimgLink).delete()
session.query(EntryDirLink).delete() session.query(EntryDirLink).delete()
session.query(Dir).delete() session.query(Dir).delete()
session.query(File).delete() session.query(File).delete()
@@ -481,13 +482,8 @@ def JobImportDir(job):
return return
def JobProcessAI(job): def JobProcessAI(job):
print ("DDP: HACK - to allow re-running jobs for now, del FPL");
session.query(File_Person_Link).delete()
#### (delete the above 2 lines)
path=[jex.value for jex in job.extra if jex.name == "path"][0] path=[jex.value for jex in job.extra if jex.name == "path"][0]
path = SymlinkName(path, '/') path = SymlinkName(path, '/')
print('REMOVE AFTER TESTING ON WINDOWS... path=',path)
d=session.query(Dir).filter(Dir.path_prefix==path).first() d=session.query(Dir).filter(Dir.path_prefix==path).first()
job.num_files=d.num_files job.num_files=d.num_files
for e in FilesInDir( path ): for e in FilesInDir( path ):
@@ -518,26 +514,33 @@ def ProcessAI(job, e):
for person in people: for person in people:
generateKnownEncodings(person) generateKnownEncodings(person)
file = e.in_dir[0].path_prefix + '/' + e.name file = e.in_dir[0].path_prefix + '/' + e.name
stat = os.stat(file) stat = os.stat(file)
# only find faces if we have not already OR file is newer than when we found faces before # only find faces if we have not already OR file is newer than when we found faces before
if not e.file_details[0].faces_created_on or stat.st_ctime > e.file_details[0].faces_created_on: if not e.file_details[0].faces_created_on or stat.st_ctime > e.file_details[0].faces_created_on:
session.add(e) session.add(e)
im = Image.open(file) im_orig = Image.open(file)
try: im = ImageOps.exif_transpose(im_orig)
im = ImageOps.exif_transpose(im)
except:
print("DEBUG: looks like image does not have exif")
faces = generateUnknownEncodings(im) faces = generateUnknownEncodings(im)
# DDP: uncomment the below to optimise, but I need to store the faces into the DB, not sure how right now e.file_details[0].faces_created_on=time.time()
##### is this really 0? or will there be many with the many faces? if faces:
# if its many, should we do a faces_file_link??? flat_faces = numpy.array(faces)
# e.file_details[0].faces = faces[0].tobytes() e.file_details[0].faces = flat_faces.tobytes()
# e.file_details[0].faces_created_on=time.time() else:
# else: e.file_details[0].faces = None
# faces=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64) return
else:
if not e.file_details[0].faces:
print("OPTIM: This image has no faces, skip it")
return
recover=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64)
real_recover=numpy.reshape(recover,(-1,128))
l=[]
for el in real_recover:
l.append(numpy.array(el))
faces = l
for unknown_encoding in faces: for unknown_encoding in faces:
for person in people: for person in people:
lookForPersonInImage(job, person, unknown_encoding, e) lookForPersonInImage(job, person, unknown_encoding, e)
@@ -546,25 +549,34 @@ def ProcessAI(job, e):
def lookForPersonInImage(job, person, unknown_encoding, e): def lookForPersonInImage(job, person, unknown_encoding, e):
for refimg in person.refimg: for refimg in person.refimg:
### # lets see if we have tried this check before
# need a date_stamp in refimg_file_link, but we currently have a person_file_link frl=session.query(FileRefimgLink).filter(FileRefimgLink.file_id==e.id, FileRefimgLink.refimg_id==refimg.id).first()
# should consider whether we break this into just a scan ( id, refimg, file, date, threshold, etc.) if not frl:
### frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details[0].eid)
else:
stat=os.stat(e.in_dir[0].path_prefix+'/'+ e.name)
# file & refimg are not newer then we dont need to check
if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
print("OPTIM: lookForPersonInImage: file has a previous match, and the file & refimg haven't changed")
return
session.add(frl)
frl.matched=False
frl.when_processed=time.time()
deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64) deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64)
results = compareAI(deserialized_bytes, unknown_encoding) results = compareAI(deserialized_bytes, unknown_encoding)
if results[0]: if results[0]:
print(f'Found a match between: {person.tag} and {e.name}') print(f'Found a match between: {person.tag} and {e.name}')
AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}') AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}')
fpl = File_Person_Link(person_id=person.id, file_id=e.file_details[0].eid) frl.matched=True
session.add(fpl)
return return
def generateUnknownEncodings(im): def generateUnknownEncodings(im):
unknown_image = numpy.array(im) unknown_image = numpy.array(im)
face_locations = face_recognition.face_locations(unknown_image) face_locations = face_recognition.face_locations(unknown_image)
if not face_locations:
return None
unknown_encodings = face_recognition.face_encodings(unknown_image, known_face_locations=face_locations) unknown_encodings = face_recognition.face_encodings(unknown_image, known_face_locations=face_locations)
# should save these to the db
# file.locations = face_locations
return unknown_encodings return unknown_encodings
@@ -573,7 +585,7 @@ def generateKnownEncodings(person):
file = 'reference_images/'+refimg.fname file = 'reference_images/'+refimg.fname
stat = os.stat(file) stat = os.stat(file)
if refimg.created_on and stat.st_ctime < refimg.created_on: if refimg.created_on and stat.st_ctime < refimg.created_on:
print("DEBUG: skipping re-creating encoding for refimg because file has changed since we did this before") print("OPTIM: skipping re-creating encoding for refimg because file has not changed")
continue continue
img = face_recognition.load_image_file(file) img = face_recognition.load_image_file(file)
location = face_recognition.face_locations(img) location = face_recognition.face_locations(img)

View File

@@ -5,31 +5,30 @@ from main import db, app, ma
from sqlalchemy import Sequence from sqlalchemy import Sequence
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from status import st, Status from status import st, Status
from refimg import Refimg from refimg import Refimg
from refimg import Person_Refimg_Link
################################################################################ ################################################################################
# Class describing Person in the database, and via sqlalchemy, connected to the DB as well # Class describing Person in the database, and via sqlalchemy, connected to the DB as well
################################################################################ ################################################################################
class PersonRefimgLink(db.Model):
__tablename__ = "person_refimg_link"
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
def __repr__(self):
return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
class Person(db.Model): class Person(db.Model):
id = db.Column(db.Integer, db.Sequence('person_id_seq'), primary_key=True ) id = db.Column(db.Integer, db.Sequence('person_id_seq'), primary_key=True )
tag = db.Column(db.String(48), unique=False, nullable=False) tag = db.Column(db.String(48), unique=False, nullable=False)
surname = db.Column(db.String(48), unique=False, nullable=False) surname = db.Column(db.String(48), unique=False, nullable=False)
firstname = db.Column(db.String(48), unique=False, nullable=False) firstname = db.Column(db.String(48), unique=False, nullable=False)
refimg = db.relationship('Refimg', secondary=Person_Refimg_Link.__table__) refimg = db.relationship('Refimg', secondary=PersonRefimgLink.__table__)
def __repr__(self): def __repr__(self):
return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg) return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg)
class File_Person_Link(db.Model):
__tablename__ = "file_person_link"
file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
def __repr__(self):
return "<file_id: {}, person_id: {}>".format(self.file_id, self.person_id)
################################################################################ ################################################################################
# Helper class that inherits a .dump() method to turn class Person into json / useful in jinja2 # Helper class that inherits a .dump() method to turn class Person into json / useful in jinja2
################################################################################ ################################################################################

View File

@@ -17,13 +17,13 @@ class Refimg(db.Model):
def __repr__(self): def __repr__(self):
return "<id: {}, fname: {}>".format(self.id, self.fname ) return "<id: {}, fname: {}>".format(self.id, self.fname )
class Person_Refimg_Link(db.Model): #class Person_Refimg_Link(db.Model):
__tablename__ = "person_refimg_link" # __tablename__ = "person_refimg_link"
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) # person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) # refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
#
def __repr__(self): # def __repr__(self):
return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id) # return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
################################################################################ ################################################################################
# Helper class that inherits a .dump() method to turn class Refimg into json / useful in jinja2 # Helper class that inherits a .dump() method to turn class Refimg into json / useful in jinja2

View File

@@ -8,7 +8,7 @@ create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer,
constraint PK_ENTRY_ID primary key(ID), constraint PK_ENTRY_ID primary key(ID),
constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) ); constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) );
create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, FACES bytea,
constraint PK_FILE_ID primary key(EID), constraint PK_FILE_ID primary key(EID),
constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) ); constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) );
@@ -28,10 +28,10 @@ create table REFIMG ( ID integer, FNAME varchar(256), ENCODINGS bytea,
CREATED_ON fLOAT, CREATED_ON fLOAT,
constraint PK_REFIMG_ID primary key(ID) ); constraint PK_REFIMG_ID primary key(ID) );
create table FILE_PERSON_LINK ( FILE_ID integer, PERSON_ID integer, create table FILE_REFIMG_LINK ( FILE_ID integer, REFIMG_ID integer, WHEN_PROCESSED float, MATCHED boolean,
constraint PK_FPL primary key(FILE_ID, PERSON_ID), constraint PK_FRL primary key(FILE_ID, REFIMG_ID),
constraint FK_FPL_FILE_ID foreign key (FILE_ID) references FILE(EID), constraint FK_FRL_FILE_ID foreign key (FILE_ID) references FILE(EID),
constraint FK_FPL_PERSON_ID foreign key (PERSON_ID) references PERSON(ID) ); constraint FK_FRL_REFIMG_ID foreign key (REFIMG_ID) references REFIMG(ID) );
create table PERSON_REFIMG_LINK ( PERSON_ID integer, REFIMG_ID integer, create table PERSON_REFIMG_LINK ( PERSON_ID integer, REFIMG_ID integer,
constraint PK_PRL primary key(PERSON_ID, REFIMG_ID), constraint PK_PRL primary key(PERSON_ID, REFIMG_ID),

View File

@@ -6,7 +6,7 @@
<tbody><thead class="thead-light"><tr><th>File</th><th>AI Matched people</th></thead> <tbody><thead class="thead-light"><tr><th>File</th><th>AI Matched people</th></thead>
{% for e in entries %} {% for e in entries %}
<tr><td>{{e.name}}</td><td> <tr><td>{{e.name}}</td><td>
{% for p in e.file_details[0].people %} {% for p in e.people %}
{{p.tag}} {{p.tag}}
{% endfor %} {% endfor %}
</td></tr> </td></tr>