had to make pa_eng global in Init* func, but that worked, now have a basic harness to run a job, and ran scannow job, by pulling in much of the files.py code, but it will be removed from there when this is finished. Will have to move to IPC (via sockets?) to allow better coordination of new job processing and updating

2021-01-16 23:54:31 +11:00
parent e138ab22aa
commit b01bdea505
1 changed files with 201 additions and 9 deletions
--- a/pa_job_manager.py
+++ b/pa_job_manager.py
@@ -11,22 +11,30 @@
 #
 ###

-#### DDP: work out the import line for just sqlalchemy
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy import Column, Integer, String, Sequence, ForeignKey, DateTime
+from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
 from shared import DB_URL
 from datetime import datetime, timedelta
 import pytz
-
+import time
+import os
+import glob
+from PIL import Image
+from pymediainfo import MediaInfo
+import hashlib
+import exifread
+import base64
+import numpy
+import cv2

 # an Manager, which the Session will use for connection resources
-some_manager = create_engine(DB_URL)
+some_engine = create_engine(DB_URL)

 # create a configured "Session" class
-Session = sessionmaker(bind=some_manager)
+Session = sessionmaker(bind=some_engine)

 # create a Session
 session = Session()
@@ -38,6 +46,162 @@ Base = declarative_base()
 pa_eng=None


+################################################################################
+# FileData class...
+################################################################################
+
+class FileData():
+    def getExif(self, file):
+        f = open(file, 'rb')
+        try:
+            tags = exifread.process_file(f)
+        except:
+            print('NO EXIF TAGS?!?!?!?')
+            f.close()
+            raise
+        f.close()
+
+        fthumbnail = base64.b64encode(tags['JPEGThumbnail'])
+        fthumbnail = str(fthumbnail)[2:-1]
+        return fthumbnail
+    
+    def isVideo(self, file):
+        try:
+            fileInfo = MediaInfo.parse(file)
+            for track in fileInfo.tracks:
+                if track.track_type == "Video":
+                    return True
+            return False
+        except Exception as e:
+            return False
+
+    # Converts linux paths into windows paths 
+    # HACK: assumes c:, might be best to just look for [a-z]: ?
+    def FixPath(self, p):
+        if p.startswith('c:'):
+            p = p.replace('/', '\\')
+        return p
+
+    # Returns an md5 hash of the fnames' contents 
+    def md5(self, fname):
+        hash_md5 = hashlib.md5()
+        with open(fname, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                hash_md5.update(chunk)
+        return hash_md5.hexdigest()
+
+    def isImage(self, file):
+        try:
+            img = Image.open(file)
+            return True
+        except:
+            return False
+
+    def generateVideoThumbnail(self, file):
+        #overall wrapper function for generating video thumbnails
+        vcap = cv2.VideoCapture(file)
+        res, im_ar = vcap.read()
+        while im_ar.mean() < 15 and res:
+            res, im_ar = vcap.read()
+        im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR)
+        #save on a buffer for direct transmission
+        res, thumb_buf = cv2.imencode('.jpeg', im_ar)
+        # '.jpeg' etc are permitted
+        #get the bytes content
+        bt = thumb_buf.tostring()
+        fthumbnail = base64.b64encode(bt)
+        fthumbnail = str(fthumbnail)[2:-1]
+        return fthumbnail
+
+
+    ##############################################################################
+    # HACK: At present this only handles one path (need to re-factor if we have  #
+    # multiple valid paths in import_path)                                       #
+    ##############################################################################
+    def GenerateFileData(self):
+        settings = session.query(Settings).first()
+        if settings == None:
+            return
+        last_import_date = settings.last_import_date
+        paths = settings.import_path.split("#")
+
+        for path in paths:
+            print( "GenerateFileData: Checking {}".format( path ) )
+            path = self.FixPath(path)
+            if os.path.exists( path ):
+                # to serve static content of the images, we create a symlink
+                # from inside the static subdir of each import_path that exists
+                symlink = self.FixPath('static/{}'.format( os.path.basename(path[0:-1])))
+                if not os.path.exists(symlink):
+                    os.symlink(path, symlink)
+
+                file_list=[]
+                file_list.append(glob.glob(path + '**', recursive=True))
+                for file in file_list[0]:
+                    if file == path:
+                        continue
+                    stat = os.stat(file)
+                    if last_import_date == 0 or stat.st_ctime > last_import_date:
+                        print( "{} - {} is newer than {}".format( file, stat.st_ctime, last_import_date ) )
+                        fthumbnail = None
+                        if os.path.isdir(file):
+                            ftype = 'Directory'
+                        elif self.isImage(file):
+                            ftype = 'Image'
+                            fthumbnail = self.getExif(file)
+                        elif self.isVideo(file):
+                            ftype = 'Video'
+                            fthumbnail = self.generateVideoThumbnail(file)
+                        else:
+                            ftype = 'File'
+
+                        if ftype != "Directory":
+                            fhash=self.md5(file)
+                        else:
+                            fhash=None
+
+                        fsize = round(os.stat(file).st_size/(1024*1024))
+                        fname=file.replace(path, "")
+                        path_prefix=symlink.replace(path,"")
+                        file_obj = File( name=fname, type=ftype, size_mb=fsize, hash=fhash, path_prefix=path_prefix, thumbnail=fthumbnail )
+                        session.add(file_obj)
+                    else:
+                        print( "{} - {} is OLDER than {}".format( file, stat.st_ctime, last_import_date ) )
+            settings.last_import_date = time.time()
+            session.commit()
+        return self
+
+################################################################################
+# Class describing File in the database, and via sqlalchemy, connected to the DB as well
+# This has to match one-for-one the DB table
+################################################################################
+class File(Base):
+    __tablename__ = "file"
+    id = Column(Integer, Sequence('file_id_seq'), primary_key=True )
+    name = Column(String, unique=True, nullable=False )
+    type = Column(String, unique=False, nullable=False)
+    path_prefix = Column(String, unique=False, nullable=False)
+    size_mb = Column(Integer, unique=False, nullable=False)
+    # hash might not be unique, this could be the source of dupe problems
+    hash = Column(Integer, unique=True, nullable=True)
+    thumbnail = Column(String, unique=False, nullable=True)
+
+    def __repr__(self):
+        return "<id: {}, name: {}>".format(self.id, self.name )
+
+class Settings(Base):
+    __tablename__ = "settings"
+    id = Column(Integer, Sequence('settings_id_seq'), primary_key=True )
+    import_path = Column(String)
+    last_import_date = Column(Float)
+
+    def __repr__(self):
+        return "<id: {}, import_path: {}, last_import_date: {}>".format(self.id, self.import_path, self.last_import_date)
+
+
+### Initiatlise the file data set
+filedata = FileData()
+
 #### DDP: work out the class creation line for just sqlalchemy
 class PA_JobManager(Base):
    __tablename__ = "pa_job_manager"
@@ -86,17 +250,32 @@ def GetJobs():
    return session.query(Job).all()
    
 def InitialiseManager():
+    global pa_eng
+
    pa_eng=session.query(PA_JobManager).first()
    print(pa_eng)
    if( pa_eng == None ):
        pa_eng = PA_JobManager(state='Initialising', num_active_jobs=0, num_completed_jobs=0 )
        session.add(pa_eng)
+    return

+def RunJob(job):
+    print("Run job: {}, pa_eng state: {}, internal job state: {}".format( job.name, job.pa_job_state, job.state) )
+    if job.name =="scannow":
+        print("scannow not being handled yet")
+        JobScanNow(job)
+    elif job.name =="forcescan":
+        print("force scan not being handled yet")
+    else:
+        print("Requested to process unknown job type: {}".format(job.name))
+    return
+
+def HandleJobs():
    pa_eng.state = 'Scanning Jobs'
    jobs=GetJobs()
    for job in jobs:
        if job.pa_job_state != 'complete':
-            print("We have a job we need to handle, its current pa_eng state is {}, internal job state is: {}".format( job.pa_job_state, job.state) )
+            RunJob(job)
            pa_eng.num_active_jobs = pa_eng.num_active_jobs + 1
        else:
            pa_eng.num_completed_jobs = pa_eng.num_completed_jobs +1
@@ -104,7 +283,14 @@ def InitialiseManager():
    pa_eng.state = 'Waiting for new Jobs'
    return

-def CreateJob():
+def JobScanNow(job):
+    print("About to call GenerateFileData()")
+    filedata.GenerateFileData()
+    print( "need to update job");
+    job.state="Completed"
+    job.pa_job_state="Completed"
+    job.last_update=datetime.now(pytz.utc)
+    session.commit()
    return

 def UpdateJob():
@@ -118,4 +304,10 @@ if __name__ == "__main__":
    except Exception as e:
        print( "Failed to initialise PA Job Manager: {}".format(e) )
        session.rollback()
-    print("Exiting for now")
+    print("Exiting for now: {}".format( pa_eng ))
+    cnt=0
+    while cnt < 3:
+        HandleJobs()
+        time.sleep(60)
+        cnt=cnt+1
+