diff --git a/BUGs b/BUGs index 79dab94..e02f245 100644 --- a/BUGs +++ b/BUGs @@ -1,7 +1,3 @@ ### Next: 17 -BUG-11: Ai ref img jobs are not able to be "re-run" - DONE - only need to calc refimgs once (so timestamp in refimg and check it) - - if we re-run a process AI job and no file changes, then don't process (as above) - - if we do see a new file/updated file, should delete all FPLs then insert new - -- probably should insert new into a file.people.append(...), rather than FPL direct BUG-16: now we dont do dir level optimising for genfiledetails, need to stat the file / check dates that way to optimise +BUG-17: I think it won't handle me deleting files after scan diff --git a/TODO b/TODO index ce1384c..b888f71 100644 --- a/TODO +++ b/TODO @@ -1,17 +1,11 @@ ## DB - should FPL really be EPL? - FILE -> add, has_unidentified_face - ?has_face?, - - AI_SCAN: - id - date of scan - version of code? - settings used - AI_SCAN_FILE_LINK - id to link to AI_scan - refimg used/found + Need to think about... + file (image) -> has X faces, Y matches + X == Y (optim: dont scan again) + say X-Y == 1, then to optimise, we need to only check the missing + face... at the moment, the DB structure is not that clever... + (file_refimg_link --> file_refimg_link needs a face_num?) ### BACKEND *** Need to use thread-safe sessions per Thread, half-assed version did not work diff --git a/ai.py b/ai.py index 82938af..b07bb88 100644 --- a/ai.py +++ b/ai.py @@ -5,13 +5,22 @@ from main import db, app, ma from sqlalchemy import Sequence from sqlalchemy.exc import SQLAlchemyError from status import st, Status -from files import Entry, File -from person import File_Person_Link +from files import Entry, File, FileRefimgLink +from person import Person, PersonRefimgLink +from refimg import Refimg ################################################################################ # /aistats -> placholder for some sort of stats ################################################################################ @app.route("/aistats", methods=["GET", "POST"]) def aistats(): - entries=db.session.query(Entry).join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).all() + tmp=db.session.query(Entry,Person).join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).all() + entries=[] + last_fname="" + for e, p in tmp: + if last_fname != e.name: + entry = { 'name': e.name, 'people': [] } + entries.append( entry ) + last_fname = e.name + entry['people'].append( { 'tag': p.tag } ) return render_template("aistats.html", page_title='Placeholder', entries=entries) diff --git a/files.py b/files.py index 8b57d8b..6e3c099 100644 --- a/files.py +++ b/files.py @@ -19,9 +19,10 @@ import time ################################################################################ # Local Class imports ################################################################################ -from settings import Settings from job import Job, Joblog, NewJob -from person import Person, File_Person_Link +from person import Person, PersonRefimgLink +from refimg import Refimg +from settings import Settings ################################################################################ # Class describing File in the database, and via sqlalchemy, connected to the DB as well @@ -56,13 +57,21 @@ class Entry(db.Model): def __repr__(self): return "".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir) +class FileRefimgLink(db.Model): + __tablename__ = "file_refimg_link" + file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True) + refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) + when_processed = db.Column(db.Float) + matched = db.Column(db.Boolean) + def __repr__(self): + return f"".format(self.eid, self.size_mb, self.hash ) @@ -97,7 +106,7 @@ def files(): def search(): file_data=Entry.query.filter(Entry.name.ilike(f"%{request.form['term']}%")).all() - ai_data=Entry.query.join(File).join(File_Person_Link).filter(File_Person_Link.file_id==File.eid).join(Person).filter(Person.tag.ilike(f"%{request.form['term']}%")).all() + ai_data=Entry.query.join(File).join(FileRefimgLink).join(Refimg).join(PersonRefimgLink).join(Person).filter(FileRefimgLink.matched==True).filter(Person.tag.ilike(f"%{request.form['term']}%")).all() all_entries = file_data + ai_data return render_template("files.html", page_title='View Files', entry_data=all_entries) diff --git a/pa_job_manager.py b/pa_job_manager.py index 87ddd4e..fc6d66c 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -14,7 +14,7 @@ ### SQLALCHEMY IMPORTS ### from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary +from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime, LargeBinary, Boolean from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import relationship from sqlalchemy import create_engine @@ -97,14 +97,23 @@ class Entry(Base): def __repr__(self): return "".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir) +class FileRefimgLink(Base): + __tablename__ = "file_refimg_link" + file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True) + refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) + when_processed = Column(Float) + matched = Column(Boolean) + + def __repr__(self): + return f"".format(self.id, self.import_path ) -class Person_Refimg_Link(Base): +class PersonRefimgLink(Base): __tablename__ = "person_refimg_link" person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) refimg_id = Column(Integer, ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) @@ -140,7 +149,7 @@ class Person(Base): tag = Column(String(48), unique=False, nullable=False) surname = Column(String(48), unique=False, nullable=False) firstname = Column(String(48), unique=False, nullable=False) - refimg = relationship('Refimg', secondary=Person_Refimg_Link.__table__) + refimg = relationship('Refimg', secondary=PersonRefimgLink.__table__) def __repr__(self): return "".format(self.tag,self.firstname, self.surname, self.refimg) @@ -155,14 +164,6 @@ class Refimg(Base): def __repr__(self): return f"" -class File_Person_Link(Base): - __tablename__ = "file_person_link" - file_id = Column(Integer, ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True) - person_id = Column(Integer, ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) - - def __repr__(self): - return "".format(self.file_id, self.person_id) - ################################################################################ @@ -360,7 +361,7 @@ def JobScanNow(job): def JobForceScan(job): JobProgressState( job, "In Progress" ) - session.query(File_Person_Link).delete() + session.query(FileRefimgLink).delete() session.query(EntryDirLink).delete() session.query(Dir).delete() session.query(File).delete() @@ -481,13 +482,8 @@ def JobImportDir(job): return def JobProcessAI(job): - print ("DDP: HACK - to allow re-running jobs for now, del FPL"); - session.query(File_Person_Link).delete() - #### (delete the above 2 lines) - path=[jex.value for jex in job.extra if jex.name == "path"][0] path = SymlinkName(path, '/') - print('REMOVE AFTER TESTING ON WINDOWS... path=',path) d=session.query(Dir).filter(Dir.path_prefix==path).first() job.num_files=d.num_files for e in FilesInDir( path ): @@ -518,26 +514,33 @@ def ProcessAI(job, e): for person in people: generateKnownEncodings(person) + file = e.in_dir[0].path_prefix + '/' + e.name stat = os.stat(file) # only find faces if we have not already OR file is newer than when we found faces before if not e.file_details[0].faces_created_on or stat.st_ctime > e.file_details[0].faces_created_on: session.add(e) - im = Image.open(file) - try: - im = ImageOps.exif_transpose(im) - except: - print("DEBUG: looks like image does not have exif") + im_orig = Image.open(file) + im = ImageOps.exif_transpose(im_orig) faces = generateUnknownEncodings(im) -# DDP: uncomment the below to optimise, but I need to store the faces into the DB, not sure how right now -##### is this really 0? or will there be many with the many faces? -# if its many, should we do a faces_file_link??? -# e.file_details[0].faces = faces[0].tobytes() -# e.file_details[0].faces_created_on=time.time() -# else: -# faces=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64) - + e.file_details[0].faces_created_on=time.time() + if faces: + flat_faces = numpy.array(faces) + e.file_details[0].faces = flat_faces.tobytes() + else: + e.file_details[0].faces = None + return + else: + if not e.file_details[0].faces: + print("OPTIM: This image has no faces, skip it") + return + recover=numpy.frombuffer(e.file_details[0].faces,dtype=numpy.float64) + real_recover=numpy.reshape(recover,(-1,128)) + l=[] + for el in real_recover: + l.append(numpy.array(el)) + faces = l for unknown_encoding in faces: for person in people: lookForPersonInImage(job, person, unknown_encoding, e) @@ -546,25 +549,34 @@ def ProcessAI(job, e): def lookForPersonInImage(job, person, unknown_encoding, e): for refimg in person.refimg: - ### - # need a date_stamp in refimg_file_link, but we currently have a person_file_link - # should consider whether we break this into just a scan ( id, refimg, file, date, threshold, etc.) - ### + # lets see if we have tried this check before + frl=session.query(FileRefimgLink).filter(FileRefimgLink.file_id==e.id, FileRefimgLink.refimg_id==refimg.id).first() + if not frl: + frl = FileRefimgLink(refimg_id=refimg.id, file_id=e.file_details[0].eid) + else: + stat=os.stat(e.in_dir[0].path_prefix+'/'+ e.name) + # file & refimg are not newer then we dont need to check + if frl.matched and stat.st_ctime < frl.when_processed and refimg.created_on < frl.when_processed: + print("OPTIM: lookForPersonInImage: file has a previous match, and the file & refimg haven't changed") + return + + session.add(frl) + frl.matched=False + frl.when_processed=time.time() deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64) results = compareAI(deserialized_bytes, unknown_encoding) if results[0]: print(f'Found a match between: {person.tag} and {e.name}') AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}') - fpl = File_Person_Link(person_id=person.id, file_id=e.file_details[0].eid) - session.add(fpl) + frl.matched=True return def generateUnknownEncodings(im): unknown_image = numpy.array(im) face_locations = face_recognition.face_locations(unknown_image) + if not face_locations: + return None unknown_encodings = face_recognition.face_encodings(unknown_image, known_face_locations=face_locations) - # should save these to the db - # file.locations = face_locations return unknown_encodings @@ -573,7 +585,7 @@ def generateKnownEncodings(person): file = 'reference_images/'+refimg.fname stat = os.stat(file) if refimg.created_on and stat.st_ctime < refimg.created_on: - print("DEBUG: skipping re-creating encoding for refimg because file has changed since we did this before") + print("OPTIM: skipping re-creating encoding for refimg because file has not changed") continue img = face_recognition.load_image_file(file) location = face_recognition.face_locations(img) diff --git a/person.py b/person.py index cad27e0..90f8b28 100644 --- a/person.py +++ b/person.py @@ -5,31 +5,30 @@ from main import db, app, ma from sqlalchemy import Sequence from sqlalchemy.exc import SQLAlchemyError from status import st, Status - from refimg import Refimg -from refimg import Person_Refimg_Link ################################################################################ # Class describing Person in the database, and via sqlalchemy, connected to the DB as well ################################################################################ +class PersonRefimgLink(db.Model): + __tablename__ = "person_refimg_link" + person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) + refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) + + def __repr__(self): + return "".format(self.person_id, self.refimg_id) + + class Person(db.Model): id = db.Column(db.Integer, db.Sequence('person_id_seq'), primary_key=True ) tag = db.Column(db.String(48), unique=False, nullable=False) surname = db.Column(db.String(48), unique=False, nullable=False) firstname = db.Column(db.String(48), unique=False, nullable=False) - refimg = db.relationship('Refimg', secondary=Person_Refimg_Link.__table__) + refimg = db.relationship('Refimg', secondary=PersonRefimgLink.__table__) def __repr__(self): return "".format(self.tag,self.firstname, self.surname, self.refimg) -class File_Person_Link(db.Model): - __tablename__ = "file_person_link" - file_id = db.Column(db.Integer, db.ForeignKey('file.eid'), unique=True, nullable=False, primary_key=True) - person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) - - def __repr__(self): - return "".format(self.file_id, self.person_id) - ################################################################################ # Helper class that inherits a .dump() method to turn class Person into json / useful in jinja2 ################################################################################ diff --git a/refimg.py b/refimg.py index 6841436..41b7d8b 100644 --- a/refimg.py +++ b/refimg.py @@ -17,13 +17,13 @@ class Refimg(db.Model): def __repr__(self): return "".format(self.id, self.fname ) -class Person_Refimg_Link(db.Model): - __tablename__ = "person_refimg_link" - person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) - refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) - - def __repr__(self): - return "".format(self.person_id, self.refimg_id) +#class Person_Refimg_Link(db.Model): +# __tablename__ = "person_refimg_link" +# person_id = db.Column(db.Integer, db.ForeignKey('person.id'), unique=True, nullable=False, primary_key=True) +# refimg_id = db.Column(db.Integer, db.ForeignKey('refimg.id'), unique=True, nullable=False, primary_key=True) +# +# def __repr__(self): +# return "".format(self.person_id, self.refimg_id) ################################################################################ # Helper class that inherits a .dump() method to turn class Refimg into json / useful in jinja2 diff --git a/tables.sql b/tables.sql index 9121b9e..a394ef1 100644 --- a/tables.sql +++ b/tables.sql @@ -8,7 +8,7 @@ create table ENTRY( ID integer, NAME varchar(128), TYPE_ID integer, constraint PK_ENTRY_ID primary key(ID), constraint FK_FILE_TYPE_TYPE_ID foreign key (TYPE_ID) references FILE_TYPE(ID) ); -create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, +create table FILE ( EID integer, SIZE_MB integer, HASH varchar(34), THUMBNAIL varchar, FACES_CREATED_ON float, FACES bytea, constraint PK_FILE_ID primary key(EID), constraint FK_FILE_ENTRY_ID foreign key (EID) references ENTRY(ID) ); @@ -28,10 +28,10 @@ create table REFIMG ( ID integer, FNAME varchar(256), ENCODINGS bytea, CREATED_ON fLOAT, constraint PK_REFIMG_ID primary key(ID) ); -create table FILE_PERSON_LINK ( FILE_ID integer, PERSON_ID integer, - constraint PK_FPL primary key(FILE_ID, PERSON_ID), - constraint FK_FPL_FILE_ID foreign key (FILE_ID) references FILE(EID), - constraint FK_FPL_PERSON_ID foreign key (PERSON_ID) references PERSON(ID) ); +create table FILE_REFIMG_LINK ( FILE_ID integer, REFIMG_ID integer, WHEN_PROCESSED float, MATCHED boolean, + constraint PK_FRL primary key(FILE_ID, REFIMG_ID), + constraint FK_FRL_FILE_ID foreign key (FILE_ID) references FILE(EID), + constraint FK_FRL_REFIMG_ID foreign key (REFIMG_ID) references REFIMG(ID) ); create table PERSON_REFIMG_LINK ( PERSON_ID integer, REFIMG_ID integer, constraint PK_PRL primary key(PERSON_ID, REFIMG_ID), diff --git a/templates/aistats.html b/templates/aistats.html index 4a2e503..0797cb6 100644 --- a/templates/aistats.html +++ b/templates/aistats.html @@ -6,7 +6,7 @@ FileAI Matched people {% for e in entries %} {{e.name}} - {% for p in e.file_details[0].people %} + {% for p in e.people %} {{p.tag}} {% endfor %}