Database extraction only when needed

Keeps a timestamp of last database update, and extracts it only if changed from last extraction (more efficient). Modified library struct, now contains not only a book list but also some informations about the library (specific last update timestamp). Last database update is shown in html and json output. Added .htaccess for Apache webserver to prevent tellico files leaks from output directory.
2018-10-28 11:03:26 +01:00 · 2018-10-28 11:03:26 +01:00 · 312a4829aa
parent b6e818d66d
commit 312a4829aa
5 changed files with 49 additions and 24 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@ __pycache__/
 test/
 input/
 images/
+.directory
--- a/main.py
+++ b/main.py
@ -22,8 +22,9 @@ import json
 import sys
 import cgitb, cgi
 import zipfile
-import tempfile
 import shutil as sh
+import os
+import time

 # Our custom library (again no pun intended)
 import tcparser
@ -49,17 +50,25 @@ print()
 ### End of HTTP headers:  it is now safe to output things
 ##########################################################

-# Unzip Tellico .tc database in temporary directory
-tmpOutDir = tempfile.mkdtemp()
-zipHandler = zipfile.ZipFile(settings.path, 'r')
-zipHandler.extractall(tmpOutDir)
-zipHandler.close()
-# Move images to webserver folder
-sh.rmtree('./images')
-sh.move(tmpOutDir + '/images', '.')
+luh = open('./output/lastupdate.txt', 'r')
+lu = int(float(luh.read()))
+luh.close()
+
+mtime = os.path.getmtime(settings.path)
+
+outdir = './output'
+
+if int(lu) < int(mtime):
+    # Unzip Tellico .tc database
+    zipHandler = zipfile.ZipFile(settings.path, 'r')
+    zipHandler.extractall(outdir)
+    zipHandler.close()
+    luh = open('./output/lastupdate.txt', 'w')
+    luh.write(str(time.time()))
+    luh.close()

 # Get a Python-friendly library struct from XML file
-library = tcparser.getLibrary(tmpOutDir + "/tellico.xml")
+library = tcparser.getLibrary(outdir + "/tellico.xml", lu)

 ### Get filters to search for books ###
 try:
@ -81,6 +90,3 @@ if format == 'html':
 if format == 'json':
    # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
    print(json.dumps(result, indent=4))
-
-# Delete temp files
-sh.rmtree(tmpOutDir)
--- a/output/.gitignore
+++ b/output/.gitignore
@ -0,0 +1,2 @@
+*
+!.htaccess
--- a/output/.htaccess
+++ b/output/.htaccess
@ -0,0 +1,3 @@
+<Files tellico.xml>
+    Require all denied
+</Files>
--- a/tcparser.py
+++ b/tcparser.py
@ -1,9 +1,10 @@
 import xml.etree.ElementTree as ET
 import json
+import datetime

 # Parse Tellico's XML and get a library struct,
 # a stripped version of our library in a Python-friendly format
-def getLibrary(path):
+def getLibrary(path, lastUpdate):
    # Get XML string from file
    fh = open(path)
    xmlstring = fh.read()
@ -17,7 +18,9 @@ def getLibrary(path):

    collection = root[0]

-    library = list()
+    library = dict()
+    library['lastupdate'] = lastUpdate
+    library['books'] = list()

    for i in collection.findall('entry'):
        newbook = dict()
@ -44,7 +47,7 @@ def getLibrary(path):
            for k in j.findall('author'):
                newbook['authors'].append(k.text)

-        library.append(newbook)
+        library['books'].append(newbook)

    return library

@ -77,8 +80,14 @@ def getHTML(library):
    body.append(main)
    main.append(h1)

+    # Last database update string
+    p = ET.Element('p')
+    # p.text = 'Last DB update: ' + str(main.lu)
+    p.text = 'Last database update ' + datetime.date.fromtimestamp(library['lastupdate']).strftime('%d %B %Y')
+    main.append(p)
+
    # Check for empty resultset
-    if len(library) == 0:
+    if len(library['books']) == 0:
        p = ET.Element('p')
        p.text = "No items"
        main.append(p)
@ -99,7 +108,7 @@ def getHTML(library):
        tr.append(th)

    # Add a row in our table for every book in the library object
-    for i in library:
+    for i in library['books']:
        tr = ET.Element('tr')

        id = ET.Element('td')
@ -108,7 +117,7 @@ def getHTML(library):

        cover = ET.Element('td')
        if i.get('cover'):
-            img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'images/' + i.get('cover')})
+            img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'output/images/' + i.get('cover')})
            cover.append(img)

        tr.append(cover)
@ -150,15 +159,19 @@ def getHTML(library):
 # Filter results using following filter functions and order by title
 ####################################################################
 def filter(library, title='', author=''):
-    result = list()
+    resultBookList = list()

-    for i in library:
+    for i in library['books']:
        if filterTitle(i, title) and filterAuthor(i, author):
-            result.append(i)
+            resultBookList.append(i)

-    sortedResult = sorted(result, key=lambda k: k.get('title', ''))
+    sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))

-    return sortedResult
+    result = dict()
+    result['lastupdate'] = library['lastupdate']
+    result['books'] = sortedResultBookList
+
+    return result

 # Filter by title
 def filterTitle(book, filter):