Fixed BUG-11: we have changed to file_refimg_link table (from file_person_link), this means we now have some optimisations, and can definitely re-run AI jobs without crashing. Several optims could still be done - see TODO

This commit is contained in:
2021-01-25 01:05:30 +11:00
parent 0829a98376
commit 18b8a30140
9 changed files with 108 additions and 89 deletions

6
BUGs
View File

@@ -1,7 +1,3 @@
### Next: 17
BUG-11: Ai ref img jobs are not able to be "re-run"
DONE - only need to calc refimgs once (so timestamp in refimg and check it)
- if we re-run a process AI job and no file changes, then don't process (as above)
- if we do see a new file/updated file, should delete all FPLs then insert new
-- probably should insert new into a file.people.append(...), rather than FPL direct
BUG-16: now we dont do dir level optimising for genfiledetails, need to stat the file / check dates that way to optimise
BUG-17: I think it won't handle me deleting files after scan

18
TODO
View File

@@ -1,17 +1,11 @@
## DB
should FPL really be EPL?
FILE -> add, has_unidentified_face
?has_face?,
AI_SCAN:
id
date of scan
version of code?
settings used
AI_SCAN_FILE_LINK
id to link to AI_scan
refimg used/found
Need to think about...
file (image) -> has X faces, Y matches
X == Y (optim: dont scan again)
say X-Y == 1, then to optimise, we need to only check the missing
face... at the moment, the DB structure is not that clever...
(file_refimg_link --> file_refimg_link needs a face_num?)
### BACKEND
*** Need to use thread-safe sessions per Thread, half-assed version did not work

15
ai.py
View File

@@ -5,13 +5,22 @@ from main import db, app, ma
from sqlalchemy import Sequence
from sqlalchemy.exc import SQLAlchemyError
from status import st, Status
from files import Entry, File
from person import File_Person_Link
from files import Entry, File, FileRefimgLink
from person import Person, PersonRefimgLink
from refimg import Refimg
################################################################################
# /aistats -> placholder for some sort of stats
################################################################################
@app.route("/aistats", methods=["GET", "POST"])
def aistats():
entries=db.session.query(Entry).join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).all()
tmp=db.session.query(Entry,Person).join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).all()
entries=[]
last_fname=""
for e, p in tmp:
if last_fname != e.name:
entry = { 'name': e.name, 'people': [] }
entries.append( entry )
last_fname = e.name
entry['people'].append( { 'tag': p.tag } )
return render_template("aistats.html", page_title='Placeholder', entries=entries)

View File

@@ -19,9 +19,10 @@ import time
################################################################################
# Local Class imports
################################################################################
from settings import Settings
from job import Job, Joblog, NewJob
from person import Person, File_Person_Link
from person import Person, PersonRefimgLink
from refimg import Refimg
from settings import Settings
################################################################################
# Class describing File in the database, and via sqlalchemy, connected to the DB as well
@@ -56,13 +57,21 @@ class Entry(db.Model):
def __repr__(self):
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
class FileRefimgLink(db.Model):
__tablename__ = "file_refimg_link"
file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
when_processed = db.Column(db.Float)
matched = db.Column(db.Boolean)
def __repr__(self):
return f"<file_id: {self.file_id}, refimg_id: {self.refimg_id} when_processed={self.when_processed}, matched={self.matched}"
class File(db.Model):
__tablename__ = "file"
eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
size_mb = db.Column(db.Integer, unique=False, nullable=False)
hash = db.Column(db.Integer, unique=True, nullable=True)
thumbnail = db.Column(db.String, unique=False, nullable=True)
people = db.relationship("Person", secondary="file_person_link" )
def __repr__(self):
return "<eid: {}, size_mb={}, hash={}>".format(self.eid, self.size_mb, self.hash )
@@ -97,7 +106,7 @@ def files():
def search():
file_data=Entry.query.filter(Entry.name.ilike(f"%{request.form['term']}%")).all()
ai_data=Entry.query.join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).join(Person).filter(Person.tag.ilike(f"%{request.form['term']}%")).all()
ai_data=Entry.query.join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).filter(Person.tag.ilike(f"%{request.form['term']}%")).all()
all_entries = file_data + ai_data
return render_template("files.html", page_title='View Files', entry_data=all_entries)

View File

@@ -14,7 +14,7 @@
### SQLALCHEMY IMPORTS ###
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary
from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary, Boolean
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine
@@ -97,14 +97,23 @@ class Entry(Base):
def __repr__(self):
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
class FileRefimgLink(Base):
__tablename__ = "file_refimg_link"
file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
when_processed = Column(Float)
matched = Column(Boolean)
def __repr__(self):
return f"<file_id: {self.file_id}, refimg_id: {self.refimg_id} when_processed={self.when_processed}, matched={self.matched}"
class File(Base):
__tablename__ = "file"
eid = Column(Integer, ForeignKey("entry.id"), primary_key=True )
size_mb = Column(Integer, unique=False, nullable=False)
hash = Column(Integer, unique=True, nullable=True)
thumbnail = Column(String, unique=False, nullable=True)
# DDP: need bytea? in db (see other DDP comment)
# faces =
faces = Column( LargeBinary )
faces_created_on = Column(Float)
def __repr__(self):
@@ -126,7 +135,7 @@ class Settings(Base):
def __repr__(self):
return "<id: {}, import_path: {}>".format(self.id, self.import_path )
class Person_Refimg_Link(Base):
class PersonRefimgLink(Base):
__tablename__ = "person_refimg_link"
person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
@@ -140,7 +149,7 @@ class Person(Base):
tag = Column(String(48), unique=False, nullable=False)
surname = Column(String(48), unique=False, nullable=False)
firstname = Column(String(48), unique=False, nullable=False)
refimg = relationship('Refimg', secondary=Person_Refimg_Link.__table__)
refimg = relationship('Refimg', secondary=PersonRefimgLink.__table__)
def __repr__(self):
return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg)
@@ -155,14 +164,6 @@ class Refimg(Base):
def __repr__(self):
return f"<id: {id}, fname: {fname}, created_on: {created_on}, encodings: {encodings}>"
class File_Person_Link(Base):
__tablename__ = "file_person_link"
file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
def __repr__(self):
return "<file_id: {}, person_id: {}>".format(self.file_id, self.person_id)
################################################################################
@@ -360,7 +361,7 @@ def JobScanNow(job):
def JobForceScan(job):
JobProgressState( job, "In Progress" )
session.query(File_Person_Link).delete()
session.query(FileRefimgLink).delete()
session.query(EntryDirLink).delete()
session.query(Dir).delete()
session.query(File).delete()
@@ -481,13 +482,8 @@ def JobImportDir(job):
return
def JobProcessAI(job):
print ("DDP: HACK - to allow re-running jobs for now, del FPL");
session.query(File_Person_Link).delete()
#### (delete the above 2 lines)
path=[jex.value for jex in job.extra if jex.name == "path"][0]
path = SymlinkName(path, '/')
print('REMOVE AFTER TESTING ON WINDOWS... path=',path)
d=session.query(Dir).filter(Dir.path_prefix==path).first()
job.num_files=d.num_files
for e in FilesInDir( path ):
@@ -518,26 +514,33 @@ def ProcessAI(job, e):
for person in people:
generateKnownEncodings(person)
file = e.in_dir[0].path_prefix + '/' + e.name
stat = os.stat(file)
# only find faces if we have not already OR file is newer than when we found faces before
if not e.file_details[0].faces_created_on or stat.st_ctime > e.file_details[0].faces_created_on:
session.add(e)
im = Image.open(file)
try:
im = ImageOps.exif_transpose(im)
except:
print("DEBUG: looks like image does not have exif")
im_orig = Image.open(file)
im = ImageOps.exif_transpose(im_orig)
faces = generateUnknownEncodings(im)
# DDP: uncomment the below to optimise, but I need to store the faces into the DB, not sure how right now
##### is this really 0? or will there be many with the many faces?
# if its many, should we do a faces_file_link???
# e.file_details[0].faces = faces[0].tobytes()
# e.file_details[0].faces_created_on=time.time()
# else:
# faces=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64)
e.file_details[0].faces_created_on=time.time()
if faces:
flat_faces = numpy.array(faces)
e.file_details[0].faces = flat_faces.tobytes()
else:
e.file_details[0].faces = None
return
else:
if not e.file_details[0].faces:
print("OPTIM: This image has no faces, skip it")
return
recover=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64)
real_recover=numpy.reshape(recover,(-1,128))
l=[]
for el in real_recover:
l.append(numpy.array(el))
faces = l
for unknown_encoding in faces:
for person in people:
lookForPersonInImage(job, person, unknown_encoding, e)
@@ -546,25 +549,34 @@ def ProcessAI(job, e):
def lookForPersonInImage(job, person, unknown_encoding, e):
for refimg in person.refimg:
###
# need a date_stamp in refimg_file_link, but we currently have a person_file_link
# should consider whether we break this into just a scan ( id, refimg, file, date, threshold, etc.)
###
# lets see if we have tried this check before
frl=session.query(FileRefimgLink).filter(FileRefimgLink.file_id==e.id, FileRefimgLink.refimg_id==refimg.id).first()
if not frl:
frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details[0].eid)
else:
stat=os.stat(e.in_dir[0].path_prefix+'/'+ e.name)
# file & refimg are not newer then we dont need to check
if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
print("OPTIM: lookForPersonInImage: file has a previous match, and the file & refimg haven't changed")
return
session.add(frl)
frl.matched=False
frl.when_processed=time.time()
deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64)
results = compareAI(deserialized_bytes, unknown_encoding)
if results[0]:
print(f'Found a match between: {person.tag} and {e.name}')
AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}')
fpl = File_Person_Link(person_id=person.id, file_id=e.file_details[0].eid)
session.add(fpl)
frl.matched=True
return
def generateUnknownEncodings(im):
unknown_image = numpy.array(im)
face_locations = face_recognition.face_locations(unknown_image)
if not face_locations:
return None
unknown_encodings = face_recognition.face_encodings(unknown_image, known_face_locations=face_locations)
# should save these to the db
# file.locations = face_locations
return unknown_encodings
@@ -573,7 +585,7 @@ def generateKnownEncodings(person):
file = 'reference_images/'+refimg.fname
stat = os.stat(file)
if refimg.created_on and stat.st_ctime < refimg.created_on:
print("DEBUG: skipping re-creating encoding for refimg because file has changed since we did this before")
print("OPTIM: skipping re-creating encoding for refimg because file has not changed")
continue
img = face_recognition.load_image_file(file)
location = face_recognition.face_locations(img)

View File

@@ -5,31 +5,30 @@ from main import db, app, ma
from sqlalchemy import Sequence
from sqlalchemy.exc import SQLAlchemyError
from status import st, Status
from refimg import Refimg
from refimg import Person_Refimg_Link
################################################################################
# Class describing Person in the database, and via sqlalchemy, connected to the DB as well
################################################################################
class PersonRefimgLink(db.Model):
__tablename__ = "person_refimg_link"
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
def __repr__(self):
return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
class Person(db.Model):
id = db.Column(db.Integer, db.Sequence('person_id_seq'), primary_key=True )
tag = db.Column(db.String(48), unique=False, nullable=False)
surname = db.Column(db.String(48), unique=False, nullable=False)
firstname = db.Column(db.String(48), unique=False, nullable=False)
refimg = db.relationship('Refimg', secondary=Person_Refimg_Link.__table__)
refimg = db.relationship('Refimg', secondary=PersonRefimgLink.__table__)
def __repr__(self):
return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg)
class File_Person_Link(db.Model):
__tablename__ = "file_person_link"
file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
def __repr__(self):
return "<file_id: {}, person_id: {}>".format(self.file_id, self.person_id)
################################################################################
# Helper class that inherits a .dump() method to turn class Person into json / useful in jinja2
################################################################################

View File

@@ -17,13 +17,13 @@ class Refimg(db.Model):
def __repr__(self):
return "<id: {}, fname: {}>".format(self.id, self.fname )
class Person_Refimg_Link(db.Model):
__tablename__ = "person_refimg_link"
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
def __repr__(self):
return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
#class Person_Refimg_Link(db.Model):
# __tablename__ = "person_refimg_link"
# person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
# refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
#
# def __repr__(self):
# return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
################################################################################
# Helper class that inherits a .dump() method to turn class Refimg into json / useful in jinja2

View File

@@ -8,7 +8,7 @@ create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer,
constraint PK_ENTRY_ID primary key(ID),
constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) );
create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float,
create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, FACES bytea,
constraint PK_FILE_ID primary key(EID),
constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) );
@@ -28,10 +28,10 @@ create table REFIMG ( ID integer, FNAME varchar(256), ENCODINGS bytea,
CREATED_ON fLOAT,
constraint PK_REFIMG_ID primary key(ID) );
create table FILE_PERSON_LINK ( FILE_ID integer, PERSON_ID integer,
constraint PK_FPL primary key(FILE_ID, PERSON_ID),
constraint FK_FPL_FILE_ID foreign key (FILE_ID) references FILE(EID),
constraint FK_FPL_PERSON_ID foreign key (PERSON_ID) references PERSON(ID) );
create table FILE_REFIMG_LINK ( FILE_ID integer, REFIMG_ID integer, WHEN_PROCESSED float, MATCHED boolean,
constraint PK_FRL primary key(FILE_ID, REFIMG_ID),
constraint FK_FRL_FILE_ID foreign key (FILE_ID) references FILE(EID),
constraint FK_FRL_REFIMG_ID foreign key (REFIMG_ID) references REFIMG(ID) );
create table PERSON_REFIMG_LINK ( PERSON_ID integer, REFIMG_ID integer,
constraint PK_PRL primary key(PERSON_ID, REFIMG_ID),

View File

@@ -6,7 +6,7 @@
<tbody><thead class="thead-light"><tr><th>File</th><th>AI Matched people</th></thead>
{% for e in entries %}
<tr><td>{{e.name}}</td><td>
{% for p in e.file_details[0].people %}
{% for p in e.people %}
{{p.tag}}
{% endfor %}
</td></tr>