From 312a4829aa727d5c44c77f178a5fd0b28097d854 Mon Sep 17 00:00:00 2001 From: giomba Date: Sun, 28 Oct 2018 11:03:26 +0100 Subject: [PATCH] Database extraction only when needed Keeps a timestamp of last database update, and extracts it only if changed from last extraction (more efficient). Modified library struct, now contains not only a book list but also some informations about the library (specific last update timestamp). Last database update is shown in html and json output. Added .htaccess for Apache webserver to prevent tellico files leaks from output directory. --- .gitignore | 1 + main.py | 32 +++++++++++++++++++------------- output/.gitignore | 2 ++ output/.htaccess | 3 +++ tcparser.py | 35 ++++++++++++++++++++++++----------- 5 files changed, 49 insertions(+), 24 deletions(-) create mode 100644 output/.gitignore create mode 100644 output/.htaccess diff --git a/.gitignore b/.gitignore index 4fb8631..f343551 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__/ test/ input/ images/ +.directory diff --git a/main.py b/main.py index 8e86fd7..c4283a3 100755 --- a/main.py +++ b/main.py @@ -22,8 +22,9 @@ import json import sys import cgitb, cgi import zipfile -import tempfile import shutil as sh +import os +import time # Our custom library (again no pun intended) import tcparser @@ -49,17 +50,25 @@ print() ### End of HTTP headers: it is now safe to output things ########################################################## -# Unzip Tellico .tc database in temporary directory -tmpOutDir = tempfile.mkdtemp() -zipHandler = zipfile.ZipFile(settings.path, 'r') -zipHandler.extractall(tmpOutDir) -zipHandler.close() -# Move images to webserver folder -sh.rmtree('./images') -sh.move(tmpOutDir + '/images', '.') +luh = open('./output/lastupdate.txt', 'r') +lu = int(float(luh.read())) +luh.close() + +mtime = os.path.getmtime(settings.path) + +outdir = './output' + +if int(lu) < int(mtime): + # Unzip Tellico .tc database + zipHandler = zipfile.ZipFile(settings.path, 'r') + zipHandler.extractall(outdir) + zipHandler.close() + luh = open('./output/lastupdate.txt', 'w') + luh.write(str(time.time())) + luh.close() # Get a Python-friendly library struct from XML file -library = tcparser.getLibrary(tmpOutDir + "/tellico.xml") +library = tcparser.getLibrary(outdir + "/tellico.xml", lu) ### Get filters to search for books ### try: @@ -81,6 +90,3 @@ if format == 'html': if format == 'json': # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) print(json.dumps(result, indent=4)) - -# Delete temp files -sh.rmtree(tmpOutDir) diff --git a/output/.gitignore b/output/.gitignore new file mode 100644 index 0000000..651aa3a --- /dev/null +++ b/output/.gitignore @@ -0,0 +1,2 @@ +* +!.htaccess diff --git a/output/.htaccess b/output/.htaccess new file mode 100644 index 0000000..c804eac --- /dev/null +++ b/output/.htaccess @@ -0,0 +1,3 @@ + + Require all denied + diff --git a/tcparser.py b/tcparser.py index de04339..1763f7f 100644 --- a/tcparser.py +++ b/tcparser.py @@ -1,9 +1,10 @@ import xml.etree.ElementTree as ET import json +import datetime # Parse Tellico's XML and get a library struct, # a stripped version of our library in a Python-friendly format -def getLibrary(path): +def getLibrary(path, lastUpdate): # Get XML string from file fh = open(path) xmlstring = fh.read() @@ -17,7 +18,9 @@ def getLibrary(path): collection = root[0] - library = list() + library = dict() + library['lastupdate'] = lastUpdate + library['books'] = list() for i in collection.findall('entry'): newbook = dict() @@ -44,7 +47,7 @@ def getLibrary(path): for k in j.findall('author'): newbook['authors'].append(k.text) - library.append(newbook) + library['books'].append(newbook) return library @@ -77,8 +80,14 @@ def getHTML(library): body.append(main) main.append(h1) + # Last database update string + p = ET.Element('p') + # p.text = 'Last DB update: ' + str(main.lu) + p.text = 'Last database update ' + datetime.date.fromtimestamp(library['lastupdate']).strftime('%d %B %Y') + main.append(p) + # Check for empty resultset - if len(library) == 0: + if len(library['books']) == 0: p = ET.Element('p') p.text = "No items" main.append(p) @@ -99,7 +108,7 @@ def getHTML(library): tr.append(th) # Add a row in our table for every book in the library object - for i in library: + for i in library['books']: tr = ET.Element('tr') id = ET.Element('td') @@ -108,7 +117,7 @@ def getHTML(library): cover = ET.Element('td') if i.get('cover'): - img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'images/' + i.get('cover')}) + img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'output/images/' + i.get('cover')}) cover.append(img) tr.append(cover) @@ -150,15 +159,19 @@ def getHTML(library): # Filter results using following filter functions and order by title #################################################################### def filter(library, title='', author=''): - result = list() + resultBookList = list() - for i in library: + for i in library['books']: if filterTitle(i, title) and filterAuthor(i, author): - result.append(i) + resultBookList.append(i) - sortedResult = sorted(result, key=lambda k: k.get('title', '')) + sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', '')) - return sortedResult + result = dict() + result['lastupdate'] = library['lastupdate'] + result['books'] = sortedResultBookList + + return result # Filter by title def filterTitle(book, filter):