From 312a4829aa727d5c44c77f178a5fd0b28097d854 Mon Sep 17 00:00:00 2001
From: giomba <giomba@glgprograms.it>
Date: Sun, 28 Oct 2018 11:03:26 +0100
Subject: [PATCH] Database extraction only when needed

Keeps a timestamp of last database update, and extracts it only if
changed from last extraction (more efficient).
Modified library struct, now contains not only a book list but also some
informations about the library (specific last update timestamp).
Last database update is shown in html and json output.
Added .htaccess for Apache webserver to prevent tellico files leaks from
output directory.
---
 .gitignore        |  1 +
 main.py           | 32 +++++++++++++++++++-------------
 output/.gitignore |  2 ++
 output/.htaccess  |  3 +++
 tcparser.py       | 35 ++++++++++++++++++++++++-----------
 5 files changed, 49 insertions(+), 24 deletions(-)
 create mode 100644 output/.gitignore
 create mode 100644 output/.htaccess
diff --git a/.gitignore b/.gitignore
index 4fb8631..f343551 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ __pycache__/
 test/
 input/
 images/
+.directory
diff --git a/main.py b/main.py
index 8e86fd7..c4283a3 100755
--- a/main.py
+++ b/main.py
@@ -22,8 +22,9 @@ import json
 import sys
 import cgitb, cgi
 import zipfile
-import tempfile
 import shutil as sh
+import os
+import time
 
 # Our custom library (again no pun intended)
 import tcparser
@@ -49,17 +50,25 @@ print()
 ### End of HTTP headers:  it is now safe to output things
 ##########################################################
 
-# Unzip Tellico .tc database in temporary directory
-tmpOutDir = tempfile.mkdtemp()
-zipHandler = zipfile.ZipFile(settings.path, 'r')
-zipHandler.extractall(tmpOutDir)
-zipHandler.close()
-# Move images to webserver folder
-sh.rmtree('./images')
-sh.move(tmpOutDir + '/images', '.')
+luh = open('./output/lastupdate.txt', 'r')
+lu = int(float(luh.read()))
+luh.close()
+
+mtime = os.path.getmtime(settings.path)
+
+outdir = './output'
+
+if int(lu) < int(mtime):
+    # Unzip Tellico .tc database
+    zipHandler = zipfile.ZipFile(settings.path, 'r')
+    zipHandler.extractall(outdir)
+    zipHandler.close()
+    luh = open('./output/lastupdate.txt', 'w')
+    luh.write(str(time.time()))
+    luh.close()
 
 # Get a Python-friendly library struct from XML file
-library = tcparser.getLibrary(tmpOutDir + "/tellico.xml")
+library = tcparser.getLibrary(outdir + "/tellico.xml", lu)
 
 ### Get filters to search for books ###
 try:
@@ -81,6 +90,3 @@ if format == 'html':
 if format == 'json':
     # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
     print(json.dumps(result, indent=4))
-
-# Delete temp files
-sh.rmtree(tmpOutDir)
diff --git a/output/.gitignore b/output/.gitignore
new file mode 100644
index 0000000..651aa3a
--- /dev/null
+++ b/output/.gitignore
@@ -0,0 +1,2 @@
+*
+!.htaccess
diff --git a/output/.htaccess b/output/.htaccess
new file mode 100644
index 0000000..c804eac
--- /dev/null
+++ b/output/.htaccess
@@ -0,0 +1,3 @@
+<Files tellico.xml>
+    Require all denied
+</Files>
diff --git a/tcparser.py b/tcparser.py
index de04339..1763f7f 100644
--- a/tcparser.py
+++ b/tcparser.py
@@ -1,9 +1,10 @@
 import xml.etree.ElementTree as ET
 import json
+import datetime
 
 # Parse Tellico's XML and get a library struct,
 # a stripped version of our library in a Python-friendly format
-def getLibrary(path):
+def getLibrary(path, lastUpdate):
     # Get XML string from file
     fh = open(path)
     xmlstring = fh.read()
@@ -17,7 +18,9 @@ def getLibrary(path):
 
     collection = root[0]
 
-    library = list()
+    library = dict()
+    library['lastupdate'] = lastUpdate
+    library['books'] = list()
 
     for i in collection.findall('entry'):
         newbook = dict()
@@ -44,7 +47,7 @@ def getLibrary(path):
             for k in j.findall('author'):
                 newbook['authors'].append(k.text)
 
-        library.append(newbook)
+        library['books'].append(newbook)
 
     return library
 
@@ -77,8 +80,14 @@ def getHTML(library):
     body.append(main)
     main.append(h1)
 
+    # Last database update string
+    p = ET.Element('p')
+    # p.text = 'Last DB update: ' + str(main.lu)
+    p.text = 'Last database update ' + datetime.date.fromtimestamp(library['lastupdate']).strftime('%d %B %Y')
+    main.append(p)
+
     # Check for empty resultset
-    if len(library) == 0:
+    if len(library['books']) == 0:
         p = ET.Element('p')
         p.text = "No items"
         main.append(p)
@@ -99,7 +108,7 @@ def getHTML(library):
         tr.append(th)
 
     # Add a row in our table for every book in the library object
-    for i in library:
+    for i in library['books']:
         tr = ET.Element('tr')
 
         id = ET.Element('td')
@@ -108,7 +117,7 @@ def getHTML(library):
 
         cover = ET.Element('td')
         if i.get('cover'):
-            img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'images/' + i.get('cover')})
+            img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': 'output/images/' + i.get('cover')})
             cover.append(img)
 
         tr.append(cover)
@@ -150,15 +159,19 @@ def getHTML(library):
 # Filter results using following filter functions and order by title
 ####################################################################
 def filter(library, title='', author=''):
-    result = list()
+    resultBookList = list()
 
-    for i in library:
+    for i in library['books']:
         if filterTitle(i, title) and filterAuthor(i, author):
-            result.append(i)
+            resultBookList.append(i)
 
-    sortedResult = sorted(result, key=lambda k: k.get('title', ''))
+    sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))
 
-    return sortedResult
+    result = dict()
+    result['lastupdate'] = library['lastupdate']
+    result['books'] = sortedResultBookList
+
+    return result
 
 # Filter by title
 def filterTitle(book, filter):