Database extraction only when needed

Keeps a timestamp of last database update, and extracts it only if
changed from last extraction (more efficient).
Modified library struct, now contains not only a book list but also some
informations about the library (specific last update timestamp).
Last database update is shown in html and json output.
Added .htaccess for Apache webserver to prevent tellico files leaks from
output directory.
This commit is contained in:
giomba 2018-10-28 11:03:26 +01:00
parent b6e818d66d
commit 312a4829aa
5 changed files with 49 additions and 24 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@ __pycache__/
test/ test/
input/ input/
images/ images/
.directory

32
main.py
View File

@ -22,8 +22,9 @@ import json
import sys import sys
import cgitb, cgi import cgitb, cgi
import zipfile import zipfile
import tempfile
import shutil as sh import shutil as sh
import os
import time
# Our custom library (again no pun intended) # Our custom library (again no pun intended)
import tcparser import tcparser
@ -49,17 +50,25 @@ print()
### End of HTTP headers: it is now safe to output things ### End of HTTP headers: it is now safe to output things
########################################################## ##########################################################
# Unzip Tellico .tc database in temporary directory luh = open('./output/lastupdate.txt', 'r')
tmpOutDir = tempfile.mkdtemp() lu = int(float(luh.read()))
zipHandler = zipfile.ZipFile(settings.path, 'r') luh.close()
zipHandler.extractall(tmpOutDir)
zipHandler.close() mtime = os.path.getmtime(settings.path)
# Move images to webserver folder
sh.rmtree('./images') outdir = './output'
sh.move(tmpOutDir + '/images', '.')
if int(lu) < int(mtime):
# Unzip Tellico .tc database
zipHandler = zipfile.ZipFile(settings.path, 'r')
zipHandler.extractall(outdir)
zipHandler.close()
luh = open('./output/lastupdate.txt', 'w')
luh.write(str(time.time()))
luh.close()
# Get a Python-friendly library struct from XML file # Get a Python-friendly library struct from XML file
library = tcparser.getLibrary(tmpOutDir + "/tellico.xml") library = tcparser.getLibrary(outdir + "/tellico.xml", lu)
### Get filters to search for books ### ### Get filters to search for books ###
try: try:
@ -81,6 +90,3 @@ if format == 'html':
if format == 'json': if format == 'json':
# Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
print(json.dumps(result, indent=4)) print(json.dumps(result, indent=4))
# Delete temp files
sh.rmtree(tmpOutDir)

2
output/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.htaccess

3
output/.htaccess Normal file
View File

@ -0,0 +1,3 @@
<Files tellico.xml>
Require all denied
</Files>

View File

@ -1,9 +1,10 @@
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import json import json
import datetime
# Parse Tellico's XML and get a library struct, # Parse Tellico's XML and get a library struct,
# a stripped version of our library in a Python-friendly format # a stripped version of our library in a Python-friendly format
def getLibrary(path): def getLibrary(path, lastUpdate):
# Get XML string from file # Get XML string from file
fh = open(path) fh = open(path)
xmlstring = fh.read() xmlstring = fh.read()
@ -17,7 +18,9 @@ def getLibrary(path):
collection = root[0] collection = root[0]
library = list() library = dict()
library['lastupdate'] = lastUpdate
library['books'] = list()
for i in collection.findall('entry'): for i in collection.findall('entry'):
newbook = dict() newbook = dict()
@ -44,7 +47,7 @@ def getLibrary(path):
for k in j.findall('author'): for k in j.findall('author'):
newbook['authors'].append(k.text) newbook['authors'].append(k.text)
library.append(newbook) library['books'].append(newbook)
return library return library
@ -77,8 +80,14 @@ def getHTML(library):
body.append(main) body.append(main)
main.append(h1) main.append(h1)
# Last database update string
p = ET.Element('p')
# p.text = 'Last DB update: ' + str(main.lu)
p.text = 'Last database update ' + datetime.date.fromtimestamp(library['lastupdate']).strftime('%d %B %Y')
main.append(p)
# Check for empty resultset # Check for empty resultset
if len(library) == 0: if len(library['books']) == 0:
p = ET.Element('p') p = ET.Element('p')
p.text = "No items" p.text = "No items"
main.append(p) main.append(p)
@ -99,7 +108,7 @@ def getHTML(library):
tr.append(th) tr.append(th)
# Add a row in our table for every book in the library object # Add a row in our table for every book in the library object
for i in library: for i in library['books']:
tr = ET.Element('tr') tr = ET.Element('tr')
id = ET.Element('td') id = ET.Element('td')
@ -108,7 +117,7 @@ def getHTML(library):
cover = ET.Element('td') cover = ET.Element('td')
if i.get('cover'): if i.get('cover'):
img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'images/' + i.get('cover')}) img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'output/images/' + i.get('cover')})
cover.append(img) cover.append(img)
tr.append(cover) tr.append(cover)
@ -150,15 +159,19 @@ def getHTML(library):
# Filter results using following filter functions and order by title # Filter results using following filter functions and order by title
#################################################################### ####################################################################
def filter(library, title='', author=''): def filter(library, title='', author=''):
result = list() resultBookList = list()
for i in library: for i in library['books']:
if filterTitle(i, title) and filterAuthor(i, author): if filterTitle(i, title) and filterAuthor(i, author):
result.append(i) resultBookList.append(i)
sortedResult = sorted(result, key=lambda k: k.get('title', '')) sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))
return sortedResult result = dict()
result['lastupdate'] = library['lastupdate']
result['books'] = sortedResultBookList
return result
# Filter by title # Filter by title
def filterTitle(book, filter): def filterTitle(book, filter):