Debian Apache adaptation

- Proper UTF-8 handling - .htaccess CGI and environment variables
2018-11-03 12:52:36 +01:00 · 2018-11-03 12:52:36 +01:00 · 7273d3a079
parent 312a4829aa
commit 7273d3a079
4 changed files with 24 additions and 22 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,6 @@ test/
 input/
 images/
 .directory
+*.swp
+*~
+*.pyc
--- a/.htaccess
+++ b/.htaccess
@ -0,0 +1,3 @@
+SetEnv LANG en_US.UTF-8
+Options +ExecCGI
+AddHandler cgi-script .py
--- a/main.py
+++ b/main.py
@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 # -*- coding: UTF-8 -*-

 # GET me using a web browser,
@ -85,8 +85,9 @@ except KeyError:
 result = tcparser.filter(library, title=title, author=author)

 if format == 'html':
-    html = tcparser.getHTML(result)
-    ET.dump(html)
+    htmlTree = tcparser.getHTML(result)
+    htmlString = ET.tostring(htmlTree.getroot(), encoding='unicode', method='html')
+    print(htmlString)
 if format == 'json':
    # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
    print(json.dumps(result, indent=4))
--- a/tcparser.py
+++ b/tcparser.py
@ -1,28 +1,23 @@
 import xml.etree.ElementTree as ET
 import json
 import datetime
+import sys

 # Parse Tellico's XML and get a library struct,
 # a stripped version of our library in a Python-friendly format
 def getLibrary(path, lastUpdate):
-    # Get XML string from file
-    fh = open(path)
-    xmlstring = fh.read()
-    # Get rid of XML namespace
-    xmlstring = xmlstring.replace('xmlns="http://periapsis.org/tellico/"', '')
+    namespace = {'tellico': 'http://periapsis.org/tellico/' }

-    root = ET.fromstring(xmlstring)
+    xml = ET.parse(path)
+    root = xml.getroot()

-    if root[0].tag != 'collection':
-        sys.exit('No collection found')
-
-    collection = root[0]
+    collection = root.find('tellico:collection', namespace)

    library = dict()
    library['lastupdate'] = lastUpdate
    library['books'] = list()

-    for i in collection.findall('entry'):
+    for i in collection.findall('tellico:entry', namespace):
        newbook = dict()
        newbook['id'] = int(i.attrib['id'])
        # these `for` overwrite previous values,
@ -30,21 +25,21 @@ def getLibrary(path, lastUpdate):
        # actually there is no 1to1 conversion between XML and JSON, so this operation
        # can not be made automagically without doing some preliminary assumptions
        # (Possibly useful module: https://github.com/martinblech/xmltodict)
-        for j in i.iter('title'):
+        for j in i.iterfind('tellico:title', namespace):
            newbook['title'] = j.text
-        for j in i.iter('publisher'):
+        for j in i.iterfind('tellico:publisher', namespace):
            newbook['publisher'] = j.text
-        for j in i.iter('pub_year'):
+        for j in i.iterfind('tellico:pub_year', namespace):
            newbook['year'] = int(j.text)
-        for j in i.iter('isbn'):
+        for j in i.iterfind('tellico:isbn', namespace):
            newbook['isbn'] = j.text.replace('-', '')
-        for j in i.iter('pages'):
+        for j in i.iterfind('tellico:pages', namespace):
            newbook['pages'] = int(j.text)
-        for j in i.iter('cover'):
+        for j in i.iterfind('tellico:cover', namespace):
            newbook['cover'] = j.text
        newbook['authors'] = list()
-        for j in i.iter('authors'):
-            for k in j.findall('author'):
+        for j in i.iterfind('tellico:authors', namespace):
+            for k in j.findall('tellico:author', namespace):
                newbook['authors'].append(k.text)

        library['books'].append(newbook)