Fixed BUG-11: we have changed to file_refimg_link table (from file_person_link), this means we now have some optimisations, and can definitely re-run AI jobs without crashing. Several optims could still be done - see TODO
This commit is contained in:
6
BUGs
6
BUGs
@@ -1,7 +1,3 @@
|
||||
### Next: 17
|
||||
BUG-11: Ai ref img jobs are not able to be "re-run"
|
||||
DONE - only need to calc refimgs once (so timestamp in refimg and check it)
|
||||
- if we re-run a process AI job and no file changes, then don't process (as above)
|
||||
- if we do see a new file/updated file, should delete all FPLs then insert new
|
||||
-- probably should insert new into a file.people.append(...), rather than FPL direct
|
||||
BUG-16: now we dont do dir level optimising for genfiledetails, need to stat the file / check dates that way to optimise
|
||||
BUG-17: I think it won't handle me deleting files after scan
|
||||
|
||||
18
TODO
18
TODO
@@ -1,17 +1,11 @@
|
||||
## DB
|
||||
should FPL really be EPL?
|
||||
|
||||
FILE -> add, has_unidentified_face
|
||||
?has_face?,
|
||||
|
||||
AI_SCAN:
|
||||
id
|
||||
date of scan
|
||||
version of code?
|
||||
settings used
|
||||
AI_SCAN_FILE_LINK
|
||||
id to link to AI_scan
|
||||
refimg used/found
|
||||
Need to think about...
|
||||
file (image) -> has X faces, Y matches
|
||||
X == Y (optim: dont scan again)
|
||||
say X-Y == 1, then to optimise, we need to only check the missing
|
||||
face... at the moment, the DB structure is not that clever...
|
||||
(file_refimg_link --> file_refimg_link needs a face_num?)
|
||||
|
||||
### BACKEND
|
||||
*** Need to use thread-safe sessions per Thread, half-assed version did not work
|
||||
|
||||
15
ai.py
15
ai.py
@@ -5,13 +5,22 @@ from main import db, app, ma
|
||||
from sqlalchemy import Sequence
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from status import st, Status
|
||||
from files import Entry, File
|
||||
from person import File_Person_Link
|
||||
from files import Entry, File, FileRefimgLink
|
||||
from person import Person, PersonRefimgLink
|
||||
from refimg import Refimg
|
||||
|
||||
################################################################################
|
||||
# /aistats -> placholder for some sort of stats
|
||||
################################################################################
|
||||
@app.route("/aistats", methods=["GET", "POST"])
|
||||
def aistats():
|
||||
entries=db.session.query(Entry).join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).all()
|
||||
tmp=db.session.query(Entry,Person).join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).all()
|
||||
entries=[]
|
||||
last_fname=""
|
||||
for e, p in tmp:
|
||||
if last_fname != e.name:
|
||||
entry = { 'name': e.name, 'people': [] }
|
||||
entries.append( entry )
|
||||
last_fname = e.name
|
||||
entry['people'].append( { 'tag': p.tag } )
|
||||
return render_template("aistats.html", page_title='Placeholder', entries=entries)
|
||||
|
||||
17
files.py
17
files.py
@@ -19,9 +19,10 @@ import time
|
||||
################################################################################
|
||||
# Local Class imports
|
||||
################################################################################
|
||||
from settings import Settings
|
||||
from job import Job, Joblog, NewJob
|
||||
from person import Person, File_Person_Link
|
||||
from person import Person, PersonRefimgLink
|
||||
from refimg import Refimg
|
||||
from settings import Settings
|
||||
|
||||
################################################################################
|
||||
# Class describing File in the database, and via sqlalchemy, connected to the DB as well
|
||||
@@ -56,13 +57,21 @@ class Entry(db.Model):
|
||||
def __repr__(self):
|
||||
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
|
||||
|
||||
class FileRefimgLink(db.Model):
|
||||
__tablename__ = "file_refimg_link"
|
||||
file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
|
||||
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
|
||||
when_processed = db.Column(db.Float)
|
||||
matched = db.Column(db.Boolean)
|
||||
def __repr__(self):
|
||||
return f"<file_id: {self.file_id}, refimg_id: {self.refimg_id} when_processed={self.when_processed}, matched={self.matched}"
|
||||
|
||||
class File(db.Model):
|
||||
__tablename__ = "file"
|
||||
eid = db.Column(db.Integer, db.ForeignKey("entry.id"), primary_key=True )
|
||||
size_mb = db.Column(db.Integer, unique=False, nullable=False)
|
||||
hash = db.Column(db.Integer, unique=True, nullable=True)
|
||||
thumbnail = db.Column(db.String, unique=False, nullable=True)
|
||||
people = db.relationship("Person", secondary="file_person_link" )
|
||||
|
||||
def __repr__(self):
|
||||
return "<eid: {}, size_mb={}, hash={}>".format(self.eid, self.size_mb, self.hash )
|
||||
@@ -97,7 +106,7 @@ def files():
|
||||
def search():
|
||||
|
||||
file_data=Entry.query.filter(Entry.name.ilike(f"%{request.form['term']}%")).all()
|
||||
ai_data=Entry.query.join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).join(Person).filter(Person.tag.ilike(f"%{request.form['term']}%")).all()
|
||||
ai_data=Entry.query.join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).filter(Person.tag.ilike(f"%{request.form['term']}%")).all()
|
||||
|
||||
all_entries = file_data + ai_data
|
||||
return render_template("files.html", page_title='View Files', entry_data=all_entries)
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
### SQLALCHEMY IMPORTS ###
|
||||
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary
|
||||
from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary, Boolean
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy import create_engine
|
||||
@@ -97,14 +97,23 @@ class Entry(Base):
|
||||
def __repr__(self):
|
||||
return "<id: {}, name: {}, type={}, dir_details={}, file_details={}, in_dir={}>".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir)
|
||||
|
||||
class FileRefimgLink(Base):
|
||||
__tablename__ = "file_refimg_link"
|
||||
file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
|
||||
refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
|
||||
when_processed = Column(Float)
|
||||
matched = Column(Boolean)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<file_id: {self.file_id}, refimg_id: {self.refimg_id} when_processed={self.when_processed}, matched={self.matched}"
|
||||
|
||||
class File(Base):
|
||||
__tablename__ = "file"
|
||||
eid = Column(Integer, ForeignKey("entry.id"), primary_key=True )
|
||||
size_mb = Column(Integer, unique=False, nullable=False)
|
||||
hash = Column(Integer, unique=True, nullable=True)
|
||||
thumbnail = Column(String, unique=False, nullable=True)
|
||||
# DDP: need bytea? in db (see other DDP comment)
|
||||
# faces =
|
||||
faces = Column( LargeBinary )
|
||||
faces_created_on = Column(Float)
|
||||
|
||||
def __repr__(self):
|
||||
@@ -126,7 +135,7 @@ class Settings(Base):
|
||||
def __repr__(self):
|
||||
return "<id: {}, import_path: {}>".format(self.id, self.import_path )
|
||||
|
||||
class Person_Refimg_Link(Base):
|
||||
class PersonRefimgLink(Base):
|
||||
__tablename__ = "person_refimg_link"
|
||||
person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
|
||||
refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
|
||||
@@ -140,7 +149,7 @@ class Person(Base):
|
||||
tag = Column(String(48), unique=False, nullable=False)
|
||||
surname = Column(String(48), unique=False, nullable=False)
|
||||
firstname = Column(String(48), unique=False, nullable=False)
|
||||
refimg = relationship('Refimg', secondary=Person_Refimg_Link.__table__)
|
||||
refimg = relationship('Refimg', secondary=PersonRefimgLink.__table__)
|
||||
|
||||
def __repr__(self):
|
||||
return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg)
|
||||
@@ -155,14 +164,6 @@ class Refimg(Base):
|
||||
def __repr__(self):
|
||||
return f"<id: {id}, fname: {fname}, created_on: {created_on}, encodings: {encodings}>"
|
||||
|
||||
class File_Person_Link(Base):
|
||||
__tablename__ = "file_person_link"
|
||||
file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
|
||||
person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
|
||||
|
||||
def __repr__(self):
|
||||
return "<file_id: {}, person_id: {}>".format(self.file_id, self.person_id)
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
@@ -360,7 +361,7 @@ def JobScanNow(job):
|
||||
|
||||
def JobForceScan(job):
|
||||
JobProgressState( job, "In Progress" )
|
||||
session.query(File_Person_Link).delete()
|
||||
session.query(FileRefimgLink).delete()
|
||||
session.query(EntryDirLink).delete()
|
||||
session.query(Dir).delete()
|
||||
session.query(File).delete()
|
||||
@@ -481,13 +482,8 @@ def JobImportDir(job):
|
||||
return
|
||||
|
||||
def JobProcessAI(job):
|
||||
print ("DDP: HACK - to allow re-running jobs for now, del FPL");
|
||||
session.query(File_Person_Link).delete()
|
||||
#### (delete the above 2 lines)
|
||||
|
||||
path=[jex.value for jex in job.extra if jex.name == "path"][0]
|
||||
path = SymlinkName(path, '/')
|
||||
print('REMOVE AFTER TESTING ON WINDOWS... path=',path)
|
||||
d=session.query(Dir).filter(Dir.path_prefix==path).first()
|
||||
job.num_files=d.num_files
|
||||
for e in FilesInDir( path ):
|
||||
@@ -518,26 +514,33 @@ def ProcessAI(job, e):
|
||||
for person in people:
|
||||
generateKnownEncodings(person)
|
||||
|
||||
|
||||
file = e.in_dir[0].path_prefix + '/' + e.name
|
||||
stat = os.stat(file)
|
||||
# only find faces if we have not already OR file is newer than when we found faces before
|
||||
if not e.file_details[0].faces_created_on or stat.st_ctime > e.file_details[0].faces_created_on:
|
||||
session.add(e)
|
||||
im = Image.open(file)
|
||||
try:
|
||||
im = ImageOps.exif_transpose(im)
|
||||
except:
|
||||
print("DEBUG: looks like image does not have exif")
|
||||
im_orig = Image.open(file)
|
||||
im = ImageOps.exif_transpose(im_orig)
|
||||
|
||||
faces = generateUnknownEncodings(im)
|
||||
# DDP: uncomment the below to optimise, but I need to store the faces into the DB, not sure how right now
|
||||
##### is this really 0? or will there be many with the many faces?
|
||||
# if its many, should we do a faces_file_link???
|
||||
# e.file_details[0].faces = faces[0].tobytes()
|
||||
# e.file_details[0].faces_created_on=time.time()
|
||||
# else:
|
||||
# faces=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64)
|
||||
|
||||
e.file_details[0].faces_created_on=time.time()
|
||||
if faces:
|
||||
flat_faces = numpy.array(faces)
|
||||
e.file_details[0].faces = flat_faces.tobytes()
|
||||
else:
|
||||
e.file_details[0].faces = None
|
||||
return
|
||||
else:
|
||||
if not e.file_details[0].faces:
|
||||
print("OPTIM: This image has no faces, skip it")
|
||||
return
|
||||
recover=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64)
|
||||
real_recover=numpy.reshape(recover,(-1,128))
|
||||
l=[]
|
||||
for el in real_recover:
|
||||
l.append(numpy.array(el))
|
||||
faces = l
|
||||
for unknown_encoding in faces:
|
||||
for person in people:
|
||||
lookForPersonInImage(job, person, unknown_encoding, e)
|
||||
@@ -546,25 +549,34 @@ def ProcessAI(job, e):
|
||||
|
||||
def lookForPersonInImage(job, person, unknown_encoding, e):
|
||||
for refimg in person.refimg:
|
||||
###
|
||||
# need a date_stamp in refimg_file_link, but we currently have a person_file_link
|
||||
# should consider whether we break this into just a scan ( id, refimg, file, date, threshold, etc.)
|
||||
###
|
||||
# lets see if we have tried this check before
|
||||
frl=session.query(FileRefimgLink).filter(FileRefimgLink.file_id==e.id, FileRefimgLink.refimg_id==refimg.id).first()
|
||||
if not frl:
|
||||
frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details[0].eid)
|
||||
else:
|
||||
stat=os.stat(e.in_dir[0].path_prefix+'/'+ e.name)
|
||||
# file & refimg are not newer then we dont need to check
|
||||
if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed:
|
||||
print("OPTIM: lookForPersonInImage: file has a previous match, and the file & refimg haven't changed")
|
||||
return
|
||||
|
||||
session.add(frl)
|
||||
frl.matched=False
|
||||
frl.when_processed=time.time()
|
||||
deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64)
|
||||
results = compareAI(deserialized_bytes, unknown_encoding)
|
||||
if results[0]:
|
||||
print(f'Found a match between: {person.tag} and {e.name}')
|
||||
AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}')
|
||||
fpl = File_Person_Link(person_id=person.id, file_id=e.file_details[0].eid)
|
||||
session.add(fpl)
|
||||
frl.matched=True
|
||||
return
|
||||
|
||||
def generateUnknownEncodings(im):
|
||||
unknown_image = numpy.array(im)
|
||||
face_locations = face_recognition.face_locations(unknown_image)
|
||||
if not face_locations:
|
||||
return None
|
||||
unknown_encodings = face_recognition.face_encodings(unknown_image, known_face_locations=face_locations)
|
||||
# should save these to the db
|
||||
# file.locations = face_locations
|
||||
return unknown_encodings
|
||||
|
||||
|
||||
@@ -573,7 +585,7 @@ def generateKnownEncodings(person):
|
||||
file = 'reference_images/'+refimg.fname
|
||||
stat = os.stat(file)
|
||||
if refimg.created_on and stat.st_ctime < refimg.created_on:
|
||||
print("DEBUG: skipping re-creating encoding for refimg because file has changed since we did this before")
|
||||
print("OPTIM: skipping re-creating encoding for refimg because file has not changed")
|
||||
continue
|
||||
img = face_recognition.load_image_file(file)
|
||||
location = face_recognition.face_locations(img)
|
||||
|
||||
21
person.py
21
person.py
@@ -5,31 +5,30 @@ from main import db, app, ma
|
||||
from sqlalchemy import Sequence
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from status import st, Status
|
||||
|
||||
from refimg import Refimg
|
||||
from refimg import Person_Refimg_Link
|
||||
|
||||
################################################################################
|
||||
# Class describing Person in the database, and via sqlalchemy, connected to the DB as well
|
||||
################################################################################
|
||||
class PersonRefimgLink(db.Model):
|
||||
__tablename__ = "person_refimg_link"
|
||||
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
|
||||
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
|
||||
|
||||
def __repr__(self):
|
||||
return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
|
||||
|
||||
|
||||
class Person(db.Model):
|
||||
id = db.Column(db.Integer, db.Sequence('person_id_seq'), primary_key=True )
|
||||
tag = db.Column(db.String(48), unique=False, nullable=False)
|
||||
surname = db.Column(db.String(48), unique=False, nullable=False)
|
||||
firstname = db.Column(db.String(48), unique=False, nullable=False)
|
||||
refimg = db.relationship('Refimg', secondary=Person_Refimg_Link.__table__)
|
||||
refimg = db.relationship('Refimg', secondary=PersonRefimgLink.__table__)
|
||||
|
||||
def __repr__(self):
|
||||
return "<tag: {}, firstname: {}, surname: {}, refimg: {}>".format(self.tag,self.firstname, self.surname, self.refimg)
|
||||
|
||||
class File_Person_Link(db.Model):
|
||||
__tablename__ = "file_person_link"
|
||||
file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True)
|
||||
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
|
||||
|
||||
def __repr__(self):
|
||||
return "<file_id: {}, person_id: {}>".format(self.file_id, self.person_id)
|
||||
|
||||
################################################################################
|
||||
# Helper class that inherits a .dump() method to turn class Person into json / useful in jinja2
|
||||
################################################################################
|
||||
|
||||
14
refimg.py
14
refimg.py
@@ -17,13 +17,13 @@ class Refimg(db.Model):
|
||||
def __repr__(self):
|
||||
return "<id: {}, fname: {}>".format(self.id, self.fname )
|
||||
|
||||
class Person_Refimg_Link(db.Model):
|
||||
__tablename__ = "person_refimg_link"
|
||||
person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
|
||||
refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
|
||||
|
||||
def __repr__(self):
|
||||
return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
|
||||
#class Person_Refimg_Link(db.Model):
|
||||
# __tablename__ = "person_refimg_link"
|
||||
# person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True)
|
||||
# refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return "<person_id: {}, refimg_id>".format(self.person_id, self.refimg_id)
|
||||
|
||||
################################################################################
|
||||
# Helper class that inherits a .dump() method to turn class Refimg into json / useful in jinja2
|
||||
|
||||
10
tables.sql
10
tables.sql
@@ -8,7 +8,7 @@ create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer,
|
||||
constraint PK_ENTRY_ID primary key(ID),
|
||||
constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) );
|
||||
|
||||
create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float,
|
||||
create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, FACES bytea,
|
||||
constraint PK_FILE_ID primary key(EID),
|
||||
constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) );
|
||||
|
||||
@@ -28,10 +28,10 @@ create table REFIMG ( ID integer, FNAME varchar(256), ENCODINGS bytea,
|
||||
CREATED_ON fLOAT,
|
||||
constraint PK_REFIMG_ID primary key(ID) );
|
||||
|
||||
create table FILE_PERSON_LINK ( FILE_ID integer, PERSON_ID integer,
|
||||
constraint PK_FPL primary key(FILE_ID, PERSON_ID),
|
||||
constraint FK_FPL_FILE_ID foreign key (FILE_ID) references FILE(EID),
|
||||
constraint FK_FPL_PERSON_ID foreign key (PERSON_ID) references PERSON(ID) );
|
||||
create table FILE_REFIMG_LINK ( FILE_ID integer, REFIMG_ID integer, WHEN_PROCESSED float, MATCHED boolean,
|
||||
constraint PK_FRL primary key(FILE_ID, REFIMG_ID),
|
||||
constraint FK_FRL_FILE_ID foreign key (FILE_ID) references FILE(EID),
|
||||
constraint FK_FRL_REFIMG_ID foreign key (REFIMG_ID) references REFIMG(ID) );
|
||||
|
||||
create table PERSON_REFIMG_LINK ( PERSON_ID integer, REFIMG_ID integer,
|
||||
constraint PK_PRL primary key(PERSON_ID, REFIMG_ID),
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<tbody><thead class="thead-light"><tr><th>File</th><th>AI Matched people</th></thead>
|
||||
{% for e in entries %}
|
||||
<tr><td>{{e.name}}</td><td>
|
||||
{% for p in e.file_details[0].people %}
|
||||
{% for p in e.people %}
|
||||
{{p.tag}}
|
||||
{% endfor %}
|
||||
</td></tr>
|
||||
|
||||
Reference in New Issue
Block a user