From 685c626fd7e1949603922051e82589d1a1ec9c57 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Fri, 22 Jan 2021 22:27:35 +1100 Subject: [PATCH] updated AI job, to show progress in joblog, and fixed BUG-12 (can now forcescan again) --- pa_job_manager.py | 113 ++++++---------------------------------------- 1 file changed, 13 insertions(+), 100 deletions(-) diff --git a/pa_job_manager.py b/pa_job_manager.py index 91c82e4..4ce06a6 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -275,7 +275,7 @@ def RunJob(job): elif job.name =="forcescan": JobForceScan(job) elif job.name =="importdir": - JobNewImportDir(job) + JobImportDir(job) elif job.name =="getfiledetails": JobGetFileDetails(job) elif job.name == "processai": @@ -342,6 +342,7 @@ def JobScanNow(job): def JobForceScan(job): JobProgressState( job, "In Progress" ) + session.query(File_Person_Link).delete() session.query(EntryDirLink).delete() session.query(Dir).delete() session.query(File).delete() @@ -403,7 +404,7 @@ def AddFile(job, fname, type_str, fsize, in_dir ): session.add(e) return e -def JobNewImportDir(job): +def JobImportDir(job): JobProgressState( job, "In Progress" ) settings = session.query(Settings).first() if settings == None: @@ -464,7 +465,6 @@ def JobNewImportDir(job): job.num_files=overall_file_cnt job.current_file_num=overall_file_cnt FinishJob(job, "Finished Importing: {} - Found {} new files".format( path, overall_file_cnt ) ) - ####### NEED TO FIX THIS BASED ON os.walk contents import_dir=session.query(Dir).filter(Dir.path_prefix==symlink).first() import_dir.num_files=overall_file_cnt session.commit() @@ -473,97 +473,12 @@ def JobNewImportDir(job): def JobProcessAI(job): path=[jex.value for jex in job.extra if jex.name == "path"][0] path = SymlinkName(path, '/') - print('meeeee',path) + print('REMOVE AFTER TESTING ON WINDOWS... path=',path) + d=session.query(Dir).filter(Dir.path_prefix==path).first() + job.num_files=d.num_files for e in FilesInDir( path ): ProcessFilesInDir(job, e, ProcessAI, lambda a: True) FinishJob(job, "Finished Processesing AI") - r = session.query(Refimg).get(1) - print(r) - -def JobImportDir(job): - JobProgressState( job, "In Progress" ) - settings = session.query(Settings).first() - if settings == None: - raise Exception("Cannot create file data with no settings / import path is missing") - overall_file_cnt=0 - fcnt={} - keep_dirs={} - path=[jex.value for jex in job.extra if jex.name == "path"][0] - AddLogForJob(job, "Checking Import Directory: {}".format( path ) ) - if DEBUG==1: - print("DEBUG: Checking Import Directory: {}".format( path ) ) - if os.path.exists( path ): - symlink=CreateSymlink(job,path) - # dont want to do add a Dir, if this already exists - dir=session.query(Dir).filter(Dir.path_prefix==symlink).first() - if dir != None: - stat = os.stat( symlink ) - # check any modificaiton on fs, since last import, if none we are done - if dir.last_import_date > 0 and stat.st_ctime < dir.last_import_date: - if DEBUG==1: - print( "DEBUG: Directory has not been altered since the last import, just return" ) - job.current_file_num=dir.num_files - job.num_files=dir.num_files - FinishJob( job, "No new files in directory since the last import") - return - else: - dir=AddDir(job, os.path.basename(path[0:-1]), symlink, None ) - session.commit() - keep_dirs[dir.path_prefix]=dir - import_dir=dir - fcnt[symlink]=0 - files = sorted(glob.glob(path + '**', recursive=True)) - job.current_file_num=0 - # reduce this by 1, becasuse we skip file == path below - job.num_files=len(files)-1 - session.commit() - for file in sorted(glob.glob(path + '**', recursive=True)): - if file == path: - continue - fname=file.replace(path, "") - stat = os.stat(file) - dirname=SymlinkName(path, file) - if not keep_dirs[dirname]: - print("ERROR: dirname={}, keep_dir={}, fname={}, path={}, symlink=symlink", dirname, keep_dir, fname, path, symlink ) - if stat.st_ctime > keep_dirs[dirname].last_import_date: - if DEBUG==1: - AddLogForJob(job, "DEBUG: {} - {} is newer than {}".format( file, stat.st_ctime, keep_dirs[dirname].last_import_date ), file ) - print("DEBUG: {} - {} is newer than {}".format( file, stat.st_ctime, keep_dirs[dirname].last_import_date ) ) - if os.path.isdir(file): - path_prefix=symlink+'/'+fname - dir=AddDir( job, fname, path_prefix, dir ) - fcnt[path_prefix]=0 - keep_dirs[dir.path_prefix]=dir - else: - overall_file_cnt=overall_file_cnt+1 - dirname=SymlinkName(path, file) - fcnt[dirname]=fcnt[dirname]+1 - if isImage(file): - type_str = 'Image' - elif isVideo(file): - type_str = 'Video' - else: - type_str = 'Unknown' - fsize = round(os.stat(file).st_size/(1024*1024)) - dir=keep_dirs[dirname] - e=AddFile( job, os.path.basename(fname), type_str, fsize, dir ) - else: - if DEBUG==1: - AddLogForJob(job, "DEBUG: {} - {} is OLDER than {}".format( file, stat.st_ctime, keep_dirs[dirname].last_import_date ), file ) - print("DEBUG: {} - {} is OLDER than {}".format( file, stat.st_ctime, keep_dirs[dirname].last_import_date ), file ) - FinishJob(job, "Finished Importing: {} - Found {} new files".format( path, overall_file_cnt ) ) - for d in keep_dirs: - keep_dirs[d].num_files = fcnt[d] - keep_dirs[d].last_import_date = time.time() - # override this to be all the files in dir & its sub dirs... (used to know how many files in jobs for this import dir) - import_dir.num_files=overall_file_cnt - else: - FinishJob( job, "Finished Importing: {} -- Path does not exist".format( path), "Failed" ) - for j in session.query(Job).filter(Job.wait_for==job.id).all(): - if DEBUG==1: - print("DEBUG: cancelling job: {} as it was waiting for this failed job: {}".format(job.id, j.id) ) - FinishJob(j, "Job has been withdrawn as the job being waited for failed", "Withdrawn" ) - session.commit() return def FilesInDir( path ): @@ -594,9 +509,8 @@ def HashAndThumbDirHasNew(dir): return 1 def ProcessAI(job, e): - print('AING',e) - if e.type.name != 'Image': + job.current_file_num+=1 return people = session.query(Person).all() @@ -606,15 +520,15 @@ def ProcessAI(job, e): file = e.in_dir[0].path_prefix + '/' + e.name im_orig = Image.open(file) im = ImageOps.exif_transpose(im_orig) - unknown_encodings = generateUnknownEncodings(im) for unknown_encoding in unknown_encodings: for person in people: - lookForPersonInImage(person, unknown_encoding, e) + lookForPersonInImage(job, person, unknown_encoding, e) + AddLogForJob(job, f"Finished processing {e.name}", e.name ) return -def lookForPersonInImage(person, unknown_encoding, e): +def lookForPersonInImage(job, person, unknown_encoding, e): for refimg in person.refimg: deserialized_bytes = numpy.frombuffer(refimg.encodings, dtype=numpy.float64) #deserialized_x = numpy.reshape(deserialized_bytes, newshape=(2,2)) @@ -622,6 +536,7 @@ def lookForPersonInImage(person, unknown_encoding, e): results = compareAI(deserialized_bytes, unknown_encoding) if results[0]: print(f'Found a match between: {person.tag} and {e.name}') + AddLogForJob(job, f'Found a match between: {person.tag} and {e.name}') fpl = File_Person_Link(person_id=person.id, file_id=e.file_details[0].eid) session.add(fpl) return @@ -632,9 +547,6 @@ def generateUnknownEncodings(im): unknown_encodings = face_recognition.face_encodings(unknown_image, known_face_locations=face_locations) # should save these to the db # file.locations = face_locations - - - return unknown_encodings @@ -750,7 +662,8 @@ def GenVideoThumbnail(job, file): res, im_ar = vcap.read() im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR) res, thumb_buf = cv2.imencode('.jpeg', im_ar) - bt = thumb_buf.tostring() +# bt = thumb_buf.tostring() + bt = thumb_buf.tobytes() thumbnail = base64.b64encode(bt) thumbnail = str(thumbnail)[2:-1] return thumbnail