Database extraction only when needed

Keeps a timestamp of last database update, and extracts it only if
changed from last extraction (more efficient).
Modified library struct, now contains not only a book list but also some
informations about the library (specific last update timestamp).
Last database update is shown in html and json output.
Added .htaccess for Apache webserver to prevent tellico files leaks from
output directory.
This commit is contained in:
giomba 2018-10-28 11:03:26 +01:00
parent b6e818d66d
commit 312a4829aa
5 changed files with 49 additions and 24 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@ __pycache__/
test/
input/
images/
.directory

32
main.py
View File

@ -22,8 +22,9 @@ import json
import sys
import cgitb, cgi
import zipfile
import tempfile
import shutil as sh
import os
import time
# Our custom library (again no pun intended)
import tcparser
@ -49,17 +50,25 @@ print()
### End of HTTP headers: it is now safe to output things
##########################################################
# Unzip Tellico .tc database in temporary directory
tmpOutDir = tempfile.mkdtemp()
zipHandler = zipfile.ZipFile(settings.path, 'r')
zipHandler.extractall(tmpOutDir)
zipHandler.close()
# Move images to webserver folder
sh.rmtree('./images')
sh.move(tmpOutDir + '/images', '.')
luh = open('./output/lastupdate.txt', 'r')
lu = int(float(luh.read()))
luh.close()
mtime = os.path.getmtime(settings.path)
outdir = './output'
if int(lu) < int(mtime):
# Unzip Tellico .tc database
zipHandler = zipfile.ZipFile(settings.path, 'r')
zipHandler.extractall(outdir)
zipHandler.close()
luh = open('./output/lastupdate.txt', 'w')
luh.write(str(time.time()))
luh.close()
# Get a Python-friendly library struct from XML file
library = tcparser.getLibrary(tmpOutDir + "/tellico.xml")
library = tcparser.getLibrary(outdir + "/tellico.xml", lu)
### Get filters to search for books ###
try:
@ -81,6 +90,3 @@ if format == 'html':
if format == 'json':
# Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
print(json.dumps(result, indent=4))
# Delete temp files
sh.rmtree(tmpOutDir)

2
output/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.htaccess

3
output/.htaccess Normal file
View File

@ -0,0 +1,3 @@
<Files tellico.xml>
Require all denied
</Files>

View File

@ -1,9 +1,10 @@
import xml.etree.ElementTree as ET
import json
import datetime
# Parse Tellico's XML and get a library struct,
# a stripped version of our library in a Python-friendly format
def getLibrary(path):
def getLibrary(path, lastUpdate):
# Get XML string from file
fh = open(path)
xmlstring = fh.read()
@ -17,7 +18,9 @@ def getLibrary(path):
collection = root[0]
library = list()
library = dict()
library['lastupdate'] = lastUpdate
library['books'] = list()
for i in collection.findall('entry'):
newbook = dict()
@ -44,7 +47,7 @@ def getLibrary(path):
for k in j.findall('author'):
newbook['authors'].append(k.text)
library.append(newbook)
library['books'].append(newbook)
return library
@ -77,8 +80,14 @@ def getHTML(library):
body.append(main)
main.append(h1)
# Last database update string
p = ET.Element('p')
# p.text = 'Last DB update: ' + str(main.lu)
p.text = 'Last database update ' + datetime.date.fromtimestamp(library['lastupdate']).strftime('%d %B %Y')
main.append(p)
# Check for empty resultset
if len(library) == 0:
if len(library['books']) == 0:
p = ET.Element('p')
p.text = "No items"
main.append(p)
@ -99,7 +108,7 @@ def getHTML(library):
tr.append(th)
# Add a row in our table for every book in the library object
for i in library:
for i in library['books']:
tr = ET.Element('tr')
id = ET.Element('td')
@ -108,7 +117,7 @@ def getHTML(library):
cover = ET.Element('td')
if i.get('cover'):
img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'images/' + i.get('cover')})
img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'output/images/' + i.get('cover')})
cover.append(img)
tr.append(cover)
@ -150,15 +159,19 @@ def getHTML(library):
# Filter results using following filter functions and order by title
####################################################################
def filter(library, title='', author=''):
result = list()
resultBookList = list()
for i in library:
for i in library['books']:
if filterTitle(i, title) and filterAuthor(i, author):
result.append(i)
resultBookList.append(i)
sortedResult = sorted(result, key=lambda k: k.get('title', ''))
sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))
return sortedResult
result = dict()
result['lastupdate'] = library['lastupdate']
result['books'] = sortedResultBookList
return result
# Filter by title
def filterTitle(book, filter):