diff --git a/.gitignore b/.gitignore index f343551..db237ca 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ test/ input/ images/ .directory +*.swp +*~ +*.pyc diff --git a/.htaccess b/.htaccess new file mode 100644 index 0000000..cc6c746 --- /dev/null +++ b/.htaccess @@ -0,0 +1,3 @@ +SetEnv LANG en_US.UTF-8 +Options +ExecCGI +AddHandler cgi-script .py diff --git a/main.py b/main.py index c4283a3..65e70dc 100755 --- a/main.py +++ b/main.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: UTF-8 -*- # GET me using a web browser, @@ -85,8 +85,9 @@ except KeyError: result = tcparser.filter(library, title=title, author=author) if format == 'html': - html = tcparser.getHTML(result) - ET.dump(html) + htmlTree = tcparser.getHTML(result) + htmlString = ET.tostring(htmlTree.getroot(), encoding='unicode', method='html') + print(htmlString) if format == 'json': # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) print(json.dumps(result, indent=4)) diff --git a/tcparser.py b/tcparser.py index 1763f7f..09ac145 100644 --- a/tcparser.py +++ b/tcparser.py @@ -1,28 +1,23 @@ import xml.etree.ElementTree as ET import json import datetime +import sys # Parse Tellico's XML and get a library struct, # a stripped version of our library in a Python-friendly format def getLibrary(path, lastUpdate): - # Get XML string from file - fh = open(path) - xmlstring = fh.read() - # Get rid of XML namespace - xmlstring = xmlstring.replace('xmlns="http://periapsis.org/tellico/"', '') + namespace = {'tellico': 'http://periapsis.org/tellico/' } - root = ET.fromstring(xmlstring) + xml = ET.parse(path) + root = xml.getroot() - if root[0].tag != 'collection': - sys.exit('No collection found') - - collection = root[0] + collection = root.find('tellico:collection', namespace) library = dict() library['lastupdate'] = lastUpdate library['books'] = list() - for i in collection.findall('entry'): + for i in collection.findall('tellico:entry', namespace): newbook = dict() newbook['id'] = int(i.attrib['id']) # these `for` overwrite previous values, @@ -30,21 +25,21 @@ def getLibrary(path, lastUpdate): # actually there is no 1to1 conversion between XML and JSON, so this operation # can not be made automagically without doing some preliminary assumptions # (Possibly useful module: https://github.com/martinblech/xmltodict) - for j in i.iter('title'): + for j in i.iterfind('tellico:title', namespace): newbook['title'] = j.text - for j in i.iter('publisher'): + for j in i.iterfind('tellico:publisher', namespace): newbook['publisher'] = j.text - for j in i.iter('pub_year'): + for j in i.iterfind('tellico:pub_year', namespace): newbook['year'] = int(j.text) - for j in i.iter('isbn'): + for j in i.iterfind('tellico:isbn', namespace): newbook['isbn'] = j.text.replace('-', '') - for j in i.iter('pages'): + for j in i.iterfind('tellico:pages', namespace): newbook['pages'] = int(j.text) - for j in i.iter('cover'): + for j in i.iterfind('tellico:cover', namespace): newbook['cover'] = j.text newbook['authors'] = list() - for j in i.iter('authors'): - for k in j.findall('author'): + for j in i.iterfind('tellico:authors', namespace): + for k in j.findall('tellico:author', namespace): newbook['authors'].append(k.text) library['books'].append(newbook)