### # # This file controls the 'external' job control manager, that (periodically # # looks / somehow is pushed an event?) picks up new jobs, and processes them. # # It then stores the progress/status, etc. in job and joblog tables as needed # via wrapper functions. # # The whole pa_job_manager is multi-threaded, and uses the database tables for # state management and communication back to the pa web site # ### from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import relationship from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from shared import DB_URL, PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT from datetime import datetime, timedelta import pytz import time import os import glob from PIL import Image from pymediainfo import MediaInfo import hashlib import exifread import base64 import numpy import cv2 import socket import threading DEBUG=1 # an Manager, which the Session will use for connection resources some_engine = create_engine(DB_URL) # create a configured "Session" class Session = sessionmaker(bind=some_engine) # create a Session session = Session() Base = declarative_base() # global for us to keep state / let front-end know our state pa_eng=None ################################################################################ # FileData class... ################################################################################ class FileData(): def getExif(self, file): f = open(file, 'rb') try: tags = exifread.process_file(f) except: print('NO EXIF TAGS?!?!?!?') AddLogForJob(job, "WARNING: No EXIF TAF found for: {}".format(file)) f.close() raise f.close() fthumbnail = base64.b64encode(tags['JPEGThumbnail']) fthumbnail = str(fthumbnail)[2:-1] return fthumbnail def isVideo(self, file): try: fileInfo = MediaInfo.parse(file) for track in fileInfo.tracks: if track.track_type == "Video": return True return False except Exception as e: return False # Converts linux paths into windows paths # HACK: assumes c:, might be best to just look for [a-z]: ? def FixPath(self, p): if p.startswith('c:'): p = p.replace('/', '\\') return p # Returns an md5 hash of the fnames' contents def md5(self, fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() def isImage(self, file): try: img = Image.open(file) return True except: return False def generateVideoThumbnail(self, file): #overall wrapper function for generating video thumbnails vcap = cv2.VideoCapture(file) res, im_ar = vcap.read() while im_ar.mean() < 15 and res: res, im_ar = vcap.read() im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR) #save on a buffer for direct transmission res, thumb_buf = cv2.imencode('.jpeg', im_ar) # '.jpeg' etc are permitted #get the bytes content bt = thumb_buf.tostring() fthumbnail = base64.b64encode(bt) fthumbnail = str(fthumbnail)[2:-1] return fthumbnail ############################################################################## def ProcessImportDirs(self): settings = session.query(Settings).first() if settings == None: raise Exception("Cannot create file data with no settings / import path is missing") last_import_date = settings.last_import_date paths = settings.import_path.split("#") for path in paths: # make new Job; HandleJobs will make them run later jex=JobExtra( name="path", value=path ) job=Job(start_time='now()', last_update='now()', name="importdir", state="New", wait_for=None ) job.extra.append(jex) session.add(job) return ############################################################################## def GenerateFileData(self, job): settings = session.query(Settings).first() if settings == None: return last_import_date = settings.last_import_date paths = settings.import_path.split("#") for path in paths: AddLogForJob(job, "Checking Import Directory: {}".format( path ) ) path = self.FixPath(path) if os.path.exists( path ): # to serve static content of the images, we create a symlink # from inside the static subdir of each import_path that exists symlink = self.FixPath('static/{}'.format( os.path.basename(path[0:-1]))) if not os.path.exists(symlink): os.symlink(path, symlink) file_list=[] file_list.append(glob.glob(path + '**', recursive=True)) for file in file_list[0]: if file == path: continue fname=file.replace(path, "") stat = os.stat(file) if last_import_date == 0 or stat.st_ctime > last_import_date: AddLogForJob(job, "DEBUG: {} - {} is newer than {}".format( file, stat.st_ctime, last_import_date ) ) fthumbnail = None if os.path.isdir(file): ftype = 'Directory' elif self.isImage(file): ftype = 'Image' fthumbnail = self.getExif(file) elif self.isVideo(file): ftype = 'Video' fthumbnail = self.generateVideoThumbnail(file) else: ftype = 'File' if ftype != "Directory": fhash=self.md5(file) else: fhash=None fsize = round(os.stat(file).st_size/(1024*1024)) path_prefix=symlink.replace(path,"") file_obj = File( name=fname, type=ftype, size_mb=fsize, hash=fhash, path_prefix=path_prefix, thumbnail=fthumbnail ) session.add(file_obj) AddLogForJob(job, "DEBUG: {} - {} is OLDER than {}".format( file, stat.st_ctime, last_import_date ), file ) AddLogForJob(job, "Found new file: {}".format(fname) ) else: AddLogForJob(job, "DEBUG: {} - {} is OLDER than {}".format( file, stat.st_ctime, last_import_date ), file ) # include this for making the job be a bit slower, and # allowing testing of active jobs/job refresh page, etc. #time.sleep(0.4) settings.last_import_date = time.time() session.commit() return self ################################################################################ # Class describing File in the database, and via sqlalchemy, connected to the DB as well # This has to match one-for-one the DB table ################################################################################ class EntryDirLink(Base): __tablename__ = "entry_dir_link" entry_id = Column(Integer, ForeignKey("entry.id"), primary_key=True ) dir_eid = Column(Integer, ForeignKey("dir.eid"), primary_key=True ) def __repr__(self): return "".format(self.entry_id, self.dir_eid) class Dir(Base): __tablename__ = "dir" eid = Column(Integer, ForeignKey("entry.id"), primary_key=True ) path_prefix = Column(String, unique=False, nullable=False ) def __repr__(self): return "".format(self.eid, self.path_prefix) class Entry(Base): __tablename__ = "entry" id = Column(Integer, Sequence('file_id_seq'), primary_key=True ) name = Column(String, unique=True, nullable=False ) type = Column(String, unique=False, nullable=False) dir_details = relationship( "Dir") file_details = relationship( "New_File" ) in_dir = relationship ("Dir", secondary="entry_dir_link" ) def __repr__(self): return "".format(self.id, self.name, self.type, self.dir_details, self.file_details, self.in_dir) class New_File(Base): __tablename__ = "new_file" eid = Column(Integer, ForeignKey("entry.id"), primary_key=True ) size_mb = Column(Integer, unique=False, nullable=False) hash = Column(Integer, unique=True, nullable=True) thumbnail = Column(String, unique=False, nullable=True) def __repr__(self): return "".format(self.eid, self.size_mb, self.hash ) class FileType(Base): __tablename__ = "file_type" id = Column(Integer, Sequence('file_type_id_seq'), primary_key=True ) name = Column(String, unique=True, nullable=False ) class File(Base): __tablename__ = "file" id = Column(Integer, Sequence('file_id_seq'), primary_key=True ) name = Column(String, unique=True, nullable=False ) type = Column(String, unique=False, nullable=False) path_prefix = Column(String, unique=False, nullable=False) size_mb = Column(Integer, unique=False, nullable=False) # hash might not be unique, this could be the source of dupe problems hash = Column(Integer, unique=True, nullable=True) thumbnail = Column(String, unique=False, nullable=True) def __repr__(self): return "".format(self.id, self.name ) class Settings(Base): __tablename__ = "settings" id = Column(Integer, Sequence('settings_id_seq'), primary_key=True ) import_path = Column(String) last_import_date = Column(Float) def __repr__(self): return "".format(self.id, self.import_path, self.last_import_date) ### Initiatlise the file data set filedata = FileData() ################################################################################ # classes for the job manager: # PA_JobManager overall status tracking), # Job (and Joblog) for each JOb, and # PA_Jobmanager_fe_message (to pass messages to the front-end web) ################################################################################ class PA_JobManager(Base): __tablename__ = "pa_job_manager" id = Column(Integer, Sequence('pa_job_manager_id_seq'), primary_key=True) state = Column(String) num_active_jobs = Column(Integer) num_completed_jobs = Column(Integer) def __repr__(self): return "".format( self.id, self.state, self.num_active_jobs, self.num_completed_jobs ) class Joblog(Base): __tablename__ = "joblog" id = Column(Integer, Sequence('joblog_id_seq'), primary_key=True ) job_id = Column(Integer, ForeignKey('job.id') ) log_date = Column(DateTime(timezone=True)) log = Column(String) def __repr__(self): return "".format(self.id, self.job_id, self.name, self.value ) class Job(Base): __tablename__ = "job" id = Column(Integer, Sequence('job_id_seq'), primary_key=True ) start_time = Column(DateTime(timezone=True)) last_update = Column(DateTime(timezone=True)) name = Column(String) state = Column(String) num_passes = Column(Integer) current_pass = Column(Integer) num_files = Column(Integer) current_file_num = Column(Integer) current_file = Column(String) wait_for = Column(Integer) pa_job_state = Column(String) logs = relationship( "Joblog") extra = relationship( "JobExtra") def __repr__(self): return "".format(self.id, self.start_time, self.last_update, self.name, self.state, self.num_passes, self.current_pass, self.num_files, self.num_files, self.current_file_num, self.current_file, self.extra, self.logs) class PA_JobManager_FE_Message(Base): __tablename__ = "pa_job_manager_fe_message" id = Column(Integer, Sequence('pa_job_manager_fe_message_id_seq'), primary_key=True ) job_id = Column(Integer, ForeignKey('job.id'), primary_key=True ) alert = Column(String) message = Column(String) def __repr__(self): return " last_import_date: AddLogForJob(job, "DEBUG: {} - {} is newer than {}".format( file, stat.st_ctime, last_import_date ) ) print("DEBUG: {} - {} is newer than {}".format( file, stat.st_ctime, last_import_date ) ) if os.path.isdir(file): path_prefix=os.path.join(symlink,fname) e=Entry( name=fname, type=dtype.id ) dir=Dir( path_prefix=path_prefix ) e.dir_details.append(dir) print("DEBUG: {} - {} is newer than {}".format( file, stat.st_ctime, last_import_date ) ) print("DEBUG: DIR- path={}, pp={}, sl={}".format( path, path_prefix, symlink ) ) # DEBUG: DIR- path=/home/ddp/src/photoassistant/images_to_process/, pp=static/images_to_process, sl=static/images_to_process else: if isImage(file): ftype = FileType(name='Image') elif isVideo(file): ftype = FileType(name='Video') else: ftype = FileType('File') fsize = round(os.stat(file).st_size/(1024*1024)) e=Entry( name=os.path.basename(fname), type=ftype.id ) f=New_File( size_mb=fsize ) e.file_details.append(f) e.in_dir.append(dir) session.add(e) print( session.new ) AddLogForJob(job, "Found new file: {}".format(fname) ) print("Found new file: {}".format(fname) ) else: AddLogForJob(job, "DEBUG: {} - {} is OLDER than {}".format( file, stat.st_ctime, last_import_date ), file ) print("DEBUG: {} - {} is OLDER than {}".format( file, stat.st_ctime, last_import_date ), file ) #settings.last_import_date = time.time() session.commit() print( "Ending, list session new objects" ) print ("fake finished import dir") return def isVideo(file): try: fileInfo = MediaInfo.parse(file) for track in fileInfo.tracks: if track.track_type == "Video": return True return False except Exception as e: return False # Converts linux paths into windows paths # HACK: assumes c:, might be best to just look for [a-z]: ? def FixPath(p): if p.startswith('c:'): p = p.replace('/', '\\') return p # Returns an md5 hash of the fnames' contents def md5(fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() def isImage(file): try: img = Image.open(file) return True except: return False if __name__ == "__main__": print("PA job manager starting") try: InitialiseManager() filedata.ProcessImportDirs() session.commit() except Exception as e: print( "Failed to initialise PA Job Manager: {}".format(e) ) session.rollback() HandleJobs() with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind((PA_JOB_MANAGER_HOST, PA_JOB_MANAGER_PORT)) s.listen() while True: conn, addr = s.accept() print("Connection from: {} so HandleJobs".format(addr)) HandleJobs()