diff --git a/pa_job_manager.py b/pa_job_manager.py index 46681e4..4eab369 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -11,22 +11,30 @@ # ### -#### DDP: work out the import line for just sqlalchemy from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy import Column, Integer, String, Sequence, ForeignKey, DateTime +from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime from sqlalchemy.exc import SQLAlchemyError from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from shared import DB_URL from datetime import datetime, timedelta import pytz - +import time +import os +import glob +from PIL import Image +from pymediainfo import MediaInfo +import hashlib +import exifread +import base64 +import numpy +import cv2 # an Manager, which the Session will use for connection resources -some_manager = create_engine(DB_URL) +some_engine = create_engine(DB_URL) # create a configured "Session" class -Session = sessionmaker(bind=some_manager) +Session = sessionmaker(bind=some_engine) # create a Session session = Session() @@ -38,6 +46,162 @@ Base = declarative_base() pa_eng=None +################################################################################ +# FileData class... +################################################################################ + +class FileData(): + def getExif(self, file): + f = open(file, 'rb') + try: + tags = exifread.process_file(f) + except: + print('NO EXIF TAGS?!?!?!?') + f.close() + raise + f.close() + + fthumbnail = base64.b64encode(tags['JPEGThumbnail']) + fthumbnail = str(fthumbnail)[2:-1] + return fthumbnail + + def isVideo(self, file): + try: + fileInfo = MediaInfo.parse(file) + for track in fileInfo.tracks: + if track.track_type == "Video": + return True + return False + except Exception as e: + return False + + # Converts linux paths into windows paths + # HACK: assumes c:, might be best to just look for [a-z]: ? + def FixPath(self, p): + if p.startswith('c:'): + p = p.replace('/', '\\') + return p + + # Returns an md5 hash of the fnames' contents + def md5(self, fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + + def isImage(self, file): + try: + img = Image.open(file) + return True + except: + return False + + def generateVideoThumbnail(self, file): + #overall wrapper function for generating video thumbnails + vcap = cv2.VideoCapture(file) + res, im_ar = vcap.read() + while im_ar.mean() < 15 and res: + res, im_ar = vcap.read() + im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR) + #save on a buffer for direct transmission + res, thumb_buf = cv2.imencode('.jpeg', im_ar) + # '.jpeg' etc are permitted + #get the bytes content + bt = thumb_buf.tostring() + fthumbnail = base64.b64encode(bt) + fthumbnail = str(fthumbnail)[2:-1] + return fthumbnail + + + ############################################################################## + # HACK: At present this only handles one path (need to re-factor if we have # + # multiple valid paths in import_path) # + ############################################################################## + def GenerateFileData(self): + settings = session.query(Settings).first() + if settings == None: + return + last_import_date = settings.last_import_date + paths = settings.import_path.split("#") + + for path in paths: + print( "GenerateFileData: Checking {}".format( path ) ) + path = self.FixPath(path) + if os.path.exists( path ): + # to serve static content of the images, we create a symlink + # from inside the static subdir of each import_path that exists + symlink = self.FixPath('static/{}'.format( os.path.basename(path[0:-1]))) + if not os.path.exists(symlink): + os.symlink(path, symlink) + + file_list=[] + file_list.append(glob.glob(path + '**', recursive=True)) + for file in file_list[0]: + if file == path: + continue + stat = os.stat(file) + if last_import_date == 0 or stat.st_ctime > last_import_date: + print( "{} - {} is newer than {}".format( file, stat.st_ctime, last_import_date ) ) + fthumbnail = None + if os.path.isdir(file): + ftype = 'Directory' + elif self.isImage(file): + ftype = 'Image' + fthumbnail = self.getExif(file) + elif self.isVideo(file): + ftype = 'Video' + fthumbnail = self.generateVideoThumbnail(file) + else: + ftype = 'File' + + if ftype != "Directory": + fhash=self.md5(file) + else: + fhash=None + + fsize = round(os.stat(file).st_size/(1024*1024)) + fname=file.replace(path, "") + path_prefix=symlink.replace(path,"") + file_obj = File( name=fname, type=ftype, size_mb=fsize, hash=fhash, path_prefix=path_prefix, thumbnail=fthumbnail ) + session.add(file_obj) + else: + print( "{} - {} is OLDER than {}".format( file, stat.st_ctime, last_import_date ) ) + settings.last_import_date = time.time() + session.commit() + return self + +################################################################################ +# Class describing File in the database, and via sqlalchemy, connected to the DB as well +# This has to match one-for-one the DB table +################################################################################ +class File(Base): + __tablename__ = "file" + id = Column(Integer, Sequence('file_id_seq'), primary_key=True ) + name = Column(String, unique=True, nullable=False ) + type = Column(String, unique=False, nullable=False) + path_prefix = Column(String, unique=False, nullable=False) + size_mb = Column(Integer, unique=False, nullable=False) + # hash might not be unique, this could be the source of dupe problems + hash = Column(Integer, unique=True, nullable=True) + thumbnail = Column(String, unique=False, nullable=True) + + def __repr__(self): + return "".format(self.id, self.name ) + +class Settings(Base): + __tablename__ = "settings" + id = Column(Integer, Sequence('settings_id_seq'), primary_key=True ) + import_path = Column(String) + last_import_date = Column(Float) + + def __repr__(self): + return "".format(self.id, self.import_path, self.last_import_date) + + +### Initiatlise the file data set +filedata = FileData() + #### DDP: work out the class creation line for just sqlalchemy class PA_JobManager(Base): __tablename__ = "pa_job_manager" @@ -86,17 +250,32 @@ def GetJobs(): return session.query(Job).all() def InitialiseManager(): + global pa_eng + pa_eng=session.query(PA_JobManager).first() print(pa_eng) if( pa_eng == None ): pa_eng = PA_JobManager(state='Initialising', num_active_jobs=0, num_completed_jobs=0 ) session.add(pa_eng) + return +def RunJob(job): + print("Run job: {}, pa_eng state: {}, internal job state: {}".format( job.name, job.pa_job_state, job.state) ) + if job.name =="scannow": + print("scannow not being handled yet") + JobScanNow(job) + elif job.name =="forcescan": + print("force scan not being handled yet") + else: + print("Requested to process unknown job type: {}".format(job.name)) + return + +def HandleJobs(): pa_eng.state = 'Scanning Jobs' jobs=GetJobs() for job in jobs: if job.pa_job_state != 'complete': - print("We have a job we need to handle, its current pa_eng state is {}, internal job state is: {}".format( job.pa_job_state, job.state) ) + RunJob(job) pa_eng.num_active_jobs = pa_eng.num_active_jobs + 1 else: pa_eng.num_completed_jobs = pa_eng.num_completed_jobs +1 @@ -104,8 +283,15 @@ def InitialiseManager(): pa_eng.state = 'Waiting for new Jobs' return -def CreateJob(): - return +def JobScanNow(job): + print("About to call GenerateFileData()") + filedata.GenerateFileData() + print( "need to update job"); + job.state="Completed" + job.pa_job_state="Completed" + job.last_update=datetime.now(pytz.utc) + session.commit() + return def UpdateJob(): return @@ -118,4 +304,10 @@ if __name__ == "__main__": except Exception as e: print( "Failed to initialise PA Job Manager: {}".format(e) ) session.rollback() - print("Exiting for now") + print("Exiting for now: {}".format( pa_eng )) + cnt=0 + while cnt < 3: + HandleJobs() + time.sleep(60) + cnt=cnt+1 +