tpdf/tcparser.py

183 lines
5.6 KiB
Python

import xml.etree.ElementTree as ET
import json
import datetime
import sys
import glob
# Parse Tellico's XML and get a library struct,
# a stripped version of our library in a Python-friendly format
def getLibrary(path, lastUpdate):
namespace = {'tellico': 'http://periapsis.org/tellico/' }
xml = ET.parse(path)
root = xml.getroot()
collection = root.find('tellico:collection', namespace)
library = dict()
library['lastupdate'] = lastUpdate
library['books'] = list()
for i in collection.findall('tellico:entry', namespace):
newbook = dict()
newbook['id'] = int(i.attrib['id'])
# these `for` overwrite previous values,
# but it is not an issue since actually these are one shot only elements
# actually there is no 1to1 conversion between XML and JSON, so this operation
# can not be made automagically without doing some preliminary assumptions
# (Possibly useful module: https://github.com/martinblech/xmltodict)
for j in i.iterfind('tellico:title', namespace):
newbook['title'] = j.text
for j in i.iterfind('tellico:publisher', namespace):
newbook['publisher'] = j.text
for j in i.iterfind('tellico:pub_year', namespace):
newbook['year'] = int(j.text)
for j in i.iterfind('tellico:isbn', namespace):
newbook['isbn'] = j.text.replace('-', '')
for j in i.iterfind('tellico:pages', namespace):
newbook['pages'] = int(j.text)
for j in i.iterfind('tellico:cover', namespace):
newbook['cover'] = j.text
newbook['authors'] = list()
for j in i.iterfind('tellico:authors', namespace):
for k in j.findall('tellico:author', namespace):
newbook['authors'].append(k.text)
library['books'].append(newbook)
return library
# Given a custom Python-friendly library struct, get the HTML version of it
# Very useful for our webserver
def getHTML(library):
# Build the XML/HTML tree
tree = ET.ElementTree()
# Headers and other stuff needed for properly formatted HTML documents
# plus some titles
html = ET.Element('html')
head = ET.Element('head')
title = ET.Element('title')
linkstyle = ET.Element('link', attrib={'rel': 'stylesheet', 'type': 'text/css', 'media':'all', 'href': 'style.css'})
metacharset = ET.Element('meta', attrib={'charset': 'utf-8'})
body = ET.Element('body')
main = ET.Element('main')
table = ET.Element('table')
h1 = ET.Element('h1')
title.text = h1.text = 'TPDF - Tellico Parser anD Finder ' + glob.version
tree._setroot(html)
html.append(head)
head.append(title)
head.append(metacharset)
head.append(linkstyle)
html.append(body)
body.append(main)
main.append(h1)
# Last database update string
p = ET.Element('p')
# p.text = 'Last DB update: ' + str(main.lu)
p.text = 'Last database update ' + datetime.date.fromtimestamp(library['lastupdate']).strftime('%d %B %Y')
main.append(p)
# Check for empty resultset
if len(library['books']) == 0:
p = ET.Element('p')
p.text = "No items"
main.append(p)
return tree
main.append(table)
# Build a beautiful table header
thead = ET.Element('thead')
tr = ET.Element('tr')
table.append(thead)
thead.append(tr)
for i in ('ID', 'Cover', 'Title', 'Publisher', 'Year', 'ISBN', 'Pages', 'Author'):
th = ET.Element('th')
th.text = i
tr.append(th)
# Add a row in our table for every book in the library object
for i in library['books']:
tr = ET.Element('tr')
id = ET.Element('td')
id.text = str(i.get('id'))
tr.append(id)
cover = ET.Element('td')
if i.get('cover'):
img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': glob.conf['default']['outdir'] + '/images/' + i.get('cover')})
cover.append(img)
tr.append(cover)
title = ET.Element('td')
title.text = i.get('title')
tr.append(title)
publisher = ET.Element('td');
publisher.text = i.get('publisher')
tr.append(publisher)
year = ET.Element('td')
year.text = str(i.get('year', ''))
tr.append(year)
isbn = ET.Element('td')
isbn.text = i.get('isbn')
tr.append(isbn)
pages = ET.Element('td')
pages.text = str(i.get('pages', ''))
tr.append(pages)
authors = ET.Element('td')
ul = ET.Element('ul')
authors.append(ul)
for j in i['authors']:
li = ET.Element('li')
li.text = j
ul.append(li)
tr.append(authors)
table.append(tr)
# Our nice XML/HTML tree
return tree
# Filter results using following filter functions and order by title
####################################################################
def filter(library, title='', author=''):
resultBookList = list()
for i in library['books']:
if filterTitle(i, title) and filterAuthor(i, author):
resultBookList.append(i)
sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))
result = dict()
result['lastupdate'] = library['lastupdate']
result['books'] = sortedResultBookList
return result
# Filter by title
def filterTitle(book, filter):
return filter.lower() in book['title'].lower()
# Filter by author
def filterAuthor(book, filter):
for i in book['authors']:
if filter.lower() in i.lower():
return True
return False