had to make pa_eng global in Init* func, but that worked, now have a basic harness to run a job, and ran scannow job, by pulling in much of the files.py code, but it will be removed from there when this is finished. Will have to move to IPC (via sockets?) to allow better coordination of new job processing and updating

This commit is contained in:
2021-01-16 23:54:31 +11:00
parent e138ab22aa
commit b01bdea505

View File

@@ -11,22 +11,30 @@
#
###
#### DDP: work out the import line for just sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Sequence, ForeignKey, DateTime
from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, DateTime
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from shared import DB_URL
from datetime import datetime, timedelta
import pytz
import time
import os
import glob
from PIL import Image
from pymediainfo import MediaInfo
import hashlib
import exifread
import base64
import numpy
import cv2
# an Manager, which the Session will use for connection resources
some_manager = create_engine(DB_URL)
some_engine = create_engine(DB_URL)
# create a configured "Session" class
Session = sessionmaker(bind=some_manager)
Session = sessionmaker(bind=some_engine)
# create a Session
session = Session()
@@ -38,6 +46,162 @@ Base = declarative_base()
pa_eng=None
################################################################################
# FileData class...
################################################################################
class FileData():
def getExif(self, file):
f = open(file, 'rb')
try:
tags = exifread.process_file(f)
except:
print('NO EXIF TAGS?!?!?!?')
f.close()
raise
f.close()
fthumbnail = base64.b64encode(tags['JPEGThumbnail'])
fthumbnail = str(fthumbnail)[2:-1]
return fthumbnail
def isVideo(self, file):
try:
fileInfo = MediaInfo.parse(file)
for track in fileInfo.tracks:
if track.track_type == "Video":
return True
return False
except Exception as e:
return False
# Converts linux paths into windows paths
# HACK: assumes c:, might be best to just look for [a-z]: ?
def FixPath(self, p):
if p.startswith('c:'):
p = p.replace('/', '\\')
return p
# Returns an md5 hash of the fnames' contents
def md5(self, fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def isImage(self, file):
try:
img = Image.open(file)
return True
except:
return False
def generateVideoThumbnail(self, file):
#overall wrapper function for generating video thumbnails
vcap = cv2.VideoCapture(file)
res, im_ar = vcap.read()
while im_ar.mean() < 15 and res:
res, im_ar = vcap.read()
im_ar = cv2.resize(im_ar, (160, 90), 0, 0, cv2.INTER_LINEAR)
#save on a buffer for direct transmission
res, thumb_buf = cv2.imencode('.jpeg', im_ar)
# '.jpeg' etc are permitted
#get the bytes content
bt = thumb_buf.tostring()
fthumbnail = base64.b64encode(bt)
fthumbnail = str(fthumbnail)[2:-1]
return fthumbnail
##############################################################################
# HACK: At present this only handles one path (need to re-factor if we have #
# multiple valid paths in import_path) #
##############################################################################
def GenerateFileData(self):
settings = session.query(Settings).first()
if settings == None:
return
last_import_date = settings.last_import_date
paths = settings.import_path.split("#")
for path in paths:
print( "GenerateFileData: Checking {}".format( path ) )
path = self.FixPath(path)
if os.path.exists( path ):
# to serve static content of the images, we create a symlink
# from inside the static subdir of each import_path that exists
symlink = self.FixPath('static/{}'.format( os.path.basename(path[0:-1])))
if not os.path.exists(symlink):
os.symlink(path, symlink)
file_list=[]
file_list.append(glob.glob(path + '**', recursive=True))
for file in file_list[0]:
if file == path:
continue
stat = os.stat(file)
if last_import_date == 0 or stat.st_ctime > last_import_date:
print( "{} - {} is newer than {}".format( file, stat.st_ctime, last_import_date ) )
fthumbnail = None
if os.path.isdir(file):
ftype = 'Directory'
elif self.isImage(file):
ftype = 'Image'
fthumbnail = self.getExif(file)
elif self.isVideo(file):
ftype = 'Video'
fthumbnail = self.generateVideoThumbnail(file)
else:
ftype = 'File'
if ftype != "Directory":
fhash=self.md5(file)
else:
fhash=None
fsize = round(os.stat(file).st_size/(1024*1024))
fname=file.replace(path, "")
path_prefix=symlink.replace(path,"")
file_obj = File( name=fname, type=ftype, size_mb=fsize, hash=fhash, path_prefix=path_prefix, thumbnail=fthumbnail )
session.add(file_obj)
else:
print( "{} - {} is OLDER than {}".format( file, stat.st_ctime, last_import_date ) )
settings.last_import_date = time.time()
session.commit()
return self
################################################################################
# Class describing File in the database, and via sqlalchemy, connected to the DB as well
# This has to match one-for-one the DB table
################################################################################
class File(Base):
__tablename__ = "file"
id = Column(Integer, Sequence('file_id_seq'), primary_key=True )
name = Column(String, unique=True, nullable=False )
type = Column(String, unique=False, nullable=False)
path_prefix = Column(String, unique=False, nullable=False)
size_mb = Column(Integer, unique=False, nullable=False)
# hash might not be unique, this could be the source of dupe problems
hash = Column(Integer, unique=True, nullable=True)
thumbnail = Column(String, unique=False, nullable=True)
def __repr__(self):
return "<id: {}, name: {}>".format(self.id, self.name )
class Settings(Base):
__tablename__ = "settings"
id = Column(Integer, Sequence('settings_id_seq'), primary_key=True )
import_path = Column(String)
last_import_date = Column(Float)
def __repr__(self):
return "<id: {}, import_path: {}, last_import_date: {}>".format(self.id, self.import_path, self.last_import_date)
### Initiatlise the file data set
filedata = FileData()
#### DDP: work out the class creation line for just sqlalchemy
class PA_JobManager(Base):
__tablename__ = "pa_job_manager"
@@ -86,17 +250,32 @@ def GetJobs():
return session.query(Job).all()
def InitialiseManager():
global pa_eng
pa_eng=session.query(PA_JobManager).first()
print(pa_eng)
if( pa_eng == None ):
pa_eng = PA_JobManager(state='Initialising', num_active_jobs=0, num_completed_jobs=0 )
session.add(pa_eng)
return
def RunJob(job):
print("Run job: {}, pa_eng state: {}, internal job state: {}".format( job.name, job.pa_job_state, job.state) )
if job.name =="scannow":
print("scannow not being handled yet")
JobScanNow(job)
elif job.name =="forcescan":
print("force scan not being handled yet")
else:
print("Requested to process unknown job type: {}".format(job.name))
return
def HandleJobs():
pa_eng.state = 'Scanning Jobs'
jobs=GetJobs()
for job in jobs:
if job.pa_job_state != 'complete':
print("We have a job we need to handle, its current pa_eng state is {}, internal job state is: {}".format( job.pa_job_state, job.state) )
RunJob(job)
pa_eng.num_active_jobs = pa_eng.num_active_jobs + 1
else:
pa_eng.num_completed_jobs = pa_eng.num_completed_jobs +1
@@ -104,8 +283,15 @@ def InitialiseManager():
pa_eng.state = 'Waiting for new Jobs'
return
def CreateJob():
return
def JobScanNow(job):
print("About to call GenerateFileData()")
filedata.GenerateFileData()
print( "need to update job");
job.state="Completed"
job.pa_job_state="Completed"
job.last_update=datetime.now(pytz.utc)
session.commit()
return
def UpdateJob():
return
@@ -118,4 +304,10 @@ if __name__ == "__main__":
except Exception as e:
print( "Failed to initialise PA Job Manager: {}".format(e) )
session.rollback()
print("Exiting for now")
print("Exiting for now: {}".format( pa_eng ))
cnt=0
while cnt < 3:
HandleJobs()
time.sleep(60)
cnt=cnt+1