From e5143a86ad615c66c7531924796b19831365f22c Mon Sep 17 00:00:00 2001 From: giuliof Date: Sat, 4 Sep 2021 00:19:06 +0200 Subject: [PATCH 1/7] Tentativo di implementazione fetching remoto .tc. Da testare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Richiesta HTTP (o via webdav, รจ equivalente). - Uso del Last-modified nell'header HTTP per il rinnovo della cache. --- conf/conf.ini | 4 +++- main.py | 41 +++++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/conf/conf.ini b/conf/conf.ini index 67763ad..ca856c4 100644 --- a/conf/conf.ini +++ b/conf/conf.ini @@ -9,7 +9,9 @@ [default] # Path to Tellico .tc database to use - path = /path/to/some/tellico.tc + path = http://path/to/some/tellico.tc + user = user + pswd = pswd # Path to output directory for images and temporary data # Must be accessible by webserver diff --git a/main.py b/main.py index 9659f4d..5cbbc0a 100755 --- a/main.py +++ b/main.py @@ -20,9 +20,14 @@ import json import sys import cgitb, cgi import zipfile +from io import BytesIO import shutil as sh import os import time +# Connection to remote library file +import requests +# Parsing of HTTP RFC 1123 datetime format +from email.utils import parsedate_to_datetime # Our custom library (again no pun intended) import tcparser @@ -52,19 +57,35 @@ luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'r') lu = int(float(luh.read())) luh.close() -mtime = os.path.getmtime(glob.conf['default']['path']) +# Fetch last modified from HTTP header +path = glob.conf['default']['path'] +user = glob.conf['default']['user'] +pswd = glob.conf['default']['pswd'] +req = requests.head(path, auth=(user, pswd)) -if int(lu) < int(mtime): - # Unzip Tellico .tc database - zipHandler = zipfile.ZipFile(glob.conf['default']['path'], 'r') - zipHandler.extractall(glob.conf['default']['outdir']) - zipHandler.close() - luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w') - luh.write(str(time.time())) - luh.close() +cachefile = glob.conf['default']['outdir'] + "/tellico.xml" + +# If header fetch fails I can't update cache. +# Try with current one, if exists +if req.status_code == 200 and 'Last-modified' in req.headers: + mtime = parsedate_to_datetime(req.headers['Last-modified']).timestamp() + + # If local xml is out-of-date or missing, try download it + if int(lu) < int(mtime) or not os.path.isfile(cachefile): + # Download Tellico .tc database + req = requests.get(path, auth=(user, pswd)) + if req.status_code == 200 and req.content != None: + # Unzip Tellico .tc database and "cache it" locally + zipHandler = zipfile.ZipFile(BytesIO(req.content), 'r') + zipHandler.extractall(glob.conf['default']['outdir']) + zipHandler.close() + luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w') + luh.write(int(mtime)) + luh.close() # Get a Python-friendly library struct from XML file -library = tcparser.getLibrary(glob.conf['default']['outdir'] + "/tellico.xml", lu) +library = tcparser.getLibrary(cachefile, lu) +# TODO: properly handle missing file errors or things like this ### Get filters to search for books ### try: From 45752d29038fc95fd5b13a87c6de85ac105f2097 Mon Sep 17 00:00:00 2001 From: giuliof Date: Sat, 4 Sep 2021 10:45:51 +0200 Subject: [PATCH 2/7] Test con la libreria di logging --- main.py | 151 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 65 deletions(-) diff --git a/main.py b/main.py index 5cbbc0a..ae986da 100755 --- a/main.py +++ b/main.py @@ -24,82 +24,103 @@ from io import BytesIO import shutil as sh import os import time +import logging # Connection to remote library file import requests # Parsing of HTTP RFC 1123 datetime format from email.utils import parsedate_to_datetime - # Our custom library (again no pun intended) import tcparser - +# Global variables and configurations import glob -# Start CGI handling for webserver -cgitb.enable() -inputvars = cgi.FieldStorage() +try: -print('Content-Type: text/json; charset=utf-8') -print('Access-Control-Allow-Origin: *') -print() -### End of HTTP headers: it is now safe to output things -########################################################## + # Start the logging library (to avoid printing on stdout) + # TODO + logging.basicConfig(filename='conf/tpdf.log', encoding='utf-8', level=logging.DEBUG) -# Create output directory and temporary files if they do not exist -if not os.path.exists(glob.conf['default']['outdir']): - os.mkdir(glob.conf['default']['outdir']) -if not os.path.exists(glob.conf['default']['outdir'] + '/lastupdate.txt'): - luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w') - luh.write('0') + # Start CGI handling for webserver + cgitb.enable() + inputvars = cgi.FieldStorage() + + logging.debug("Started CGI") + + print('Content-Type: text/json; charset=utf-8') + print('Access-Control-Allow-Origin: *') + print() + ### End of HTTP headers: it is now safe to output things + ########################################################## + + # Create output directory and temporary files if they do not exist + if not os.path.exists(glob.conf['default']['outdir']): + logging.debug("missing outdir, creating...") + os.mkdir(glob.conf['default']['outdir']) + if not os.path.exists(glob.conf['default']['outdir'] + '/lastupdate.txt'): + logging.debug("missing lastupdate.txt, creating...") + luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w') + luh.write('0') + luh.close() + + # Retrieve last database update timestamp + luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'r') + lu = int(float(luh.read())) luh.close() - -# Retrieve last database update timestamp -luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'r') -lu = int(float(luh.read())) -luh.close() - -# Fetch last modified from HTTP header -path = glob.conf['default']['path'] -user = glob.conf['default']['user'] -pswd = glob.conf['default']['pswd'] -req = requests.head(path, auth=(user, pswd)) - -cachefile = glob.conf['default']['outdir'] + "/tellico.xml" - -# If header fetch fails I can't update cache. -# Try with current one, if exists -if req.status_code == 200 and 'Last-modified' in req.headers: - mtime = parsedate_to_datetime(req.headers['Last-modified']).timestamp() - - # If local xml is out-of-date or missing, try download it - if int(lu) < int(mtime) or not os.path.isfile(cachefile): - # Download Tellico .tc database - req = requests.get(path, auth=(user, pswd)) - if req.status_code == 200 and req.content != None: - # Unzip Tellico .tc database and "cache it" locally - zipHandler = zipfile.ZipFile(BytesIO(req.content), 'r') - zipHandler.extractall(glob.conf['default']['outdir']) - zipHandler.close() - luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w') - luh.write(int(mtime)) - luh.close() - -# Get a Python-friendly library struct from XML file -library = tcparser.getLibrary(cachefile, lu) -# TODO: properly handle missing file errors or things like this - -### Get filters to search for books ### -try: - title = inputvars['title'].value -except KeyError: - title = '' - -try: - author = inputvars['author'].value -except KeyError: - author = '' + logging.info("last database update timestamp is %d" % lu) -result = tcparser.filter(library, title=title, author=author) + # Fetch last modified from HTTP header + path = glob.conf['default']['path'] + user = glob.conf['default']['user'] + pswd = glob.conf['default']['pswd'] + req = requests.head(path, auth=(user, pswd)) -# Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) -print(json.dumps(result, indent=4)) + logging.debug("fetched header from %s, returned code %d" % (path, req.status_code)) + + cachefile = glob.conf['default']['outdir'] + "/tellico.xml" + + # If header fetch fails I can't update cache. + # Try with current one, if exists + if req.status_code == 200 and 'Last-modified' in req.headers: + mtime = int(parsedate_to_datetime(req.headers['Last-modified']).timestamp()) + logging.info("Tellico last modified timestamp is %d" % mtime) + + # If local xml is out-of-date or missing, try download it + if int(lu) < int(mtime) or not os.path.isfile(cachefile): + logging.info("Out-of-date, updating") + # Download Tellico .tc database + req = requests.get(path, auth=(user, pswd)) + if req.status_code == 200 and req.content != None: + # Unzip Tellico .tc database and "cache it" locally + zipHandler = zipfile.ZipFile(BytesIO(req.content), 'r') + zipHandler.extractall(glob.conf['default']['outdir']) + zipHandler.close() + luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w') + luh.write(mtime) + luh.close() + else: + logging.error("Update failed") + + # Get a Python-friendly library struct from XML file + library = tcparser.getLibrary(cachefile, lu) + # TODO: properly handle missing file errors or things like this + + ### Get filters to search for books ### + try: + title = inputvars['title'].value + except KeyError: + title = '' + + try: + author = inputvars['author'].value + except KeyError: + author = '' + + + result = tcparser.filter(library, title=title, author=author) + + # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) + print(json.dumps(result, indent=4)) + +except: + logging.fatal("Unexpected error:", sys.exc_info()[0]) From cd6543f837c25bda245c2eb4ca53b697d1acea6d Mon Sep 17 00:00:00 2001 From: giuliof Date: Sat, 4 Sep 2021 10:50:11 +0200 Subject: [PATCH 3/7] Rimosso encoding --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index ae986da..b795f1a 100755 --- a/main.py +++ b/main.py @@ -38,7 +38,7 @@ try: # Start the logging library (to avoid printing on stdout) # TODO - logging.basicConfig(filename='conf/tpdf.log', encoding='utf-8', level=logging.DEBUG) + logging.basicConfig(filename='conf/tpdf.log', level=logging.DEBUG) # Start CGI handling for webserver cgitb.enable() From 54bbdb919edebdfeaa6c00bbcfe3f65b05bc57ad Mon Sep 17 00:00:00 2001 From: giuliof Date: Sat, 4 Sep 2021 11:00:37 +0200 Subject: [PATCH 4/7] Write a string --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index b795f1a..a39505d 100755 --- a/main.py +++ b/main.py @@ -96,7 +96,7 @@ try: zipHandler.extractall(glob.conf['default']['outdir']) zipHandler.close() luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'w') - luh.write(mtime) + luh.write(str(mtime)) luh.close() else: logging.error("Update failed") From 34dbffac9f4f688df9839851c88fcd4b1ca6fd4a Mon Sep 17 00:00:00 2001 From: giuliof Date: Sat, 4 Sep 2021 11:04:07 +0200 Subject: [PATCH 5/7] fatal->error --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index a39505d..8f88bc9 100755 --- a/main.py +++ b/main.py @@ -123,4 +123,4 @@ try: print(json.dumps(result, indent=4)) except: - logging.fatal("Unexpected error:", sys.exc_info()[0]) + logging.error("Unexpected error:", sys.exc_info()[0]) From df82337f1f6c7eb5aaa808d555077bbf978d395b Mon Sep 17 00:00:00 2001 From: giuliof Date: Sat, 4 Sep 2021 11:13:41 +0200 Subject: [PATCH 6/7] Proper way to handle logging of exceptions --- main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 8f88bc9..4d4500e 100755 --- a/main.py +++ b/main.py @@ -122,5 +122,6 @@ try: # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) print(json.dumps(result, indent=4)) -except: - logging.error("Unexpected error:", sys.exc_info()[0]) +# Avoid printing on str{out,err} the unexpected exception traces. Log it instead. +except Exception as e: + logging.exception(e) From 6c20ea6d0f5204dcbc13afd978e6e0b08d3de95e Mon Sep 17 00:00:00 2001 From: giuliof Date: Sat, 4 Sep 2021 11:20:53 +0200 Subject: [PATCH 7/7] Handled value error --- main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 4d4500e..2ed8152 100755 --- a/main.py +++ b/main.py @@ -64,7 +64,10 @@ try: # Retrieve last database update timestamp luh = open(glob.conf['default']['outdir'] + '/lastupdate.txt', 'r') - lu = int(float(luh.read())) + try: + lu = int(float(luh.read())) + except ValueError: + lu = 0 luh.close() logging.info("last database update timestamp is %d" % lu) @@ -103,7 +106,6 @@ try: # Get a Python-friendly library struct from XML file library = tcparser.getLibrary(cachefile, lu) - # TODO: properly handle missing file errors or things like this ### Get filters to search for books ### try: