Merge pull request 'Merge dev/remoteFetch' (#2) from dev/remoteFetch into master

Reviewed-on: #2
This commit is contained in:
giuliof 2021-09-14 19:41:11 +00:00
commit 9819fbdead
2 changed files with 98 additions and 51 deletions

View File

@ -9,7 +9,9 @@
[default]
# Path to Tellico .tc database to use
path = /path/to/some/tellico.tc
path = http://path/to/some/tellico.tc
user = user
pswd = pswd
# Path to output directory for images and temporary data
# Must be accessible by webserver

145
main.py
View File

@ -20,65 +20,110 @@ import json
import sys
import cgitb, cgi
import zipfile
from io import BytesIO
import shutil as sh
import os
import time
import logging
# Connection to remote library file
import requests
# Parsing of HTTP RFC 1123 datetime format
from email.utils import parsedate_to_datetime
# Our custom library (again no pun intended)
import tcparser
# Global variables and configurations
import glob
# Start CGI handling for webserver
cgitb.enable()
inputvars = cgi.FieldStorage()
print('Content-Type: text/json; charset=utf-8')
print('Access-Control-Allow-Origin: *')
print()
### End of HTTP headers: it is now safe to output things
##########################################################
# Create output directory and temporary files if they do not exist
if not os.path.exists(glob.conf['default']['outdir']):
os.mkdir(glob.conf['default']['outdir'])
if not os.path.exists(glob.conf['default']['outdir'] + '/lastupdate.txt'):
luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w')
luh.write('0')
luh.close()
# Retrieve last database update timestamp
luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'r')
lu = int(float(luh.read()))
luh.close()
mtime = os.path.getmtime(glob.conf['default']['path'])
if int(lu) < int(mtime):
# Unzip Tellico .tc database
zipHandler = zipfile.ZipFile(glob.conf['default']['path'], 'r')
zipHandler.extractall(glob.conf['default']['outdir'])
zipHandler.close()
luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w')
luh.write(str(time.time()))
luh.close()
# Get a Python-friendly library struct from XML file
library = tcparser.getLibrary(glob.conf['default']['outdir'] + "/tellico.xml", lu)
### Get filters to search for books ###
try:
title = inputvars['title'].value
except KeyError:
title = ''
try:
author = inputvars['author'].value
except KeyError:
author = ''
# Start the logging library (to avoid printing on stdout)
# TODO
logging.basicConfig(filename='conf/tpdf.log', level=logging.DEBUG)
# Start CGI handling for webserver
cgitb.enable()
inputvars = cgi.FieldStorage()
logging.debug("Started CGI")
print('Content-Type: text/json; charset=utf-8')
print('Access-Control-Allow-Origin: *')
print()
### End of HTTP headers: it is now safe to output things
##########################################################
# Create output directory and temporary files if they do not exist
if not os.path.exists(glob.conf['default']['outdir']):
logging.debug("missing outdir, creating...")
os.mkdir(glob.conf['default']['outdir'])
if not os.path.exists(glob.conf['default']['outdir'] + '/lastupdate.txt'):
logging.debug("missing lastupdate.txt, creating...")
luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w')
luh.write('0')
luh.close()
# Retrieve last database update timestamp
luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'r')
try:
lu = int(float(luh.read()))
except ValueError:
lu = 0
luh.close()
logging.info("last database update timestamp is %d" % lu)
result = tcparser.filter(library, title=title, author=author)
# Fetch last modified from HTTP header
path = glob.conf['default']['path']
user = glob.conf['default']['user']
pswd = glob.conf['default']['pswd']
req = requests.head(path, auth=(user, pswd))
# Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
print(json.dumps(result, indent=4))
logging.debug("fetched header from %s, returned code %d" % (path, req.status_code))
cachefile = glob.conf['default']['outdir'] + "/tellico.xml"
# If header fetch fails I can't update cache.
# Try with current one, if exists
if req.status_code == 200 and 'Last-modified' in req.headers:
mtime = int(parsedate_to_datetime(req.headers['Last-modified']).timestamp())
logging.info("Tellico last modified timestamp is %d" % mtime)
# If local xml is out-of-date or missing, try download it
if int(lu) < int(mtime) or not os.path.isfile(cachefile):
logging.info("Out-of-date, updating")
# Download Tellico .tc database
req = requests.get(path, auth=(user, pswd))
if req.status_code == 200 and req.content != None:
# Unzip Tellico .tc database and "cache it" locally
zipHandler = zipfile.ZipFile(BytesIO(req.content), 'r')
zipHandler.extractall(glob.conf['default']['outdir'])
zipHandler.close()
luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w')
luh.write(str(mtime))
luh.close()
else:
logging.error("Update failed")
# Get a Python-friendly library struct from XML file
library = tcparser.getLibrary(cachefile, lu)
### Get filters to search for books ###
try:
title = inputvars['title'].value
except KeyError:
title = ''
try:
author = inputvars['author'].value
except KeyError:
author = ''
result = tcparser.filter(library, title=title, author=author)
# Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
print(json.dumps(result, indent=4))
# Avoid printing on str{out,err} the unexpected exception traces. Log it instead.
except Exception as e:
logging.exception(e)