tpdf/tcparser.py

import xml.etree.ElementTree as ET
import json
import datetime
import sys

import glob

# Parse Tellico's XML and get a library struct,
# a stripped version of our library in a Python-friendly format
def getLibrary(path, lastUpdate):
    namespace = {'tellico': 'http://periapsis.org/tellico/' }

    xml = ET.parse(path)
    root = xml.getroot()

    collection = root.find('tellico:collection', namespace)

    library = dict()
    library['lastupdate'] = lastUpdate
    library['books'] = list()

    for i in collection.findall('tellico:entry', namespace):
        newbook = dict()
        newbook['id'] = int(i.attrib['id'])
        # these `for` overwrite previous values,
        # but it is not an issue since actually these are one shot only elements
        # actually there is no 1to1 conversion between XML and JSON, so this operation
        # can not be made automagically without doing some preliminary assumptions
        # (Possibly useful module: https://github.com/martinblech/xmltodict)
        for j in i.iterfind('tellico:title', namespace):
            newbook['title'] = j.text
        for j in i.iterfind('tellico:publisher', namespace):
            newbook['publisher'] = j.text
        for j in i.iterfind('tellico:pub_year', namespace):
            newbook['year'] = int(j.text)
        for j in i.iterfind('tellico:isbn', namespace):
            newbook['isbn'] = j.text.replace('-', '')
        for j in i.iterfind('tellico:pages', namespace):
            newbook['pages'] = int(j.text)
        for j in i.iterfind('tellico:cover', namespace):
            newbook['cover'] = j.text
        newbook['authors'] = list()
        for j in i.iterfind('tellico:authors', namespace):
            for k in j.findall('tellico:author', namespace):
                newbook['authors'].append(k.text)

        library['books'].append(newbook)

    return library

# Given a custom Python-friendly library struct, get the HTML version of it
# Very useful for our webserver
def getHTML(library):
    # Build the XML/HTML tree
    tree = ET.ElementTree()

    # Headers and other stuff needed for properly formatted HTML documents
    # plus some titles
    html = ET.Element('html')
    head = ET.Element('head')
    title = ET.Element('title')
    linkstyle = ET.Element('link', attrib={'rel': 'stylesheet', 'type': 'text/css', 'media':'all', 'href': 'style.css'})
    metacharset = ET.Element('meta', attrib={'charset': 'utf-8'})
    body = ET.Element('body')
    main = ET.Element('main')
    table = ET.Element('table')
    h1 = ET.Element('h1')

    title.text = h1.text = 'TPDF - Tellico Parser anD Finder ' + glob.version

    tree._setroot(html)
    html.append(head)
    head.append(title)
    head.append(metacharset)
    head.append(linkstyle)
    html.append(body)
    body.append(main)
    main.append(h1)

    # Last database update string
    p = ET.Element('p')
    # p.text = 'Last DB update: ' + str(main.lu)
    p.text = 'Last database update ' + datetime.date.fromtimestamp(library['lastupdate']).strftime('%d %B %Y')
    main.append(p)

    # Check for empty resultset
    if len(library['books']) == 0:
        p = ET.Element('p')
        p.text = "No items"
        main.append(p)
        return tree

    main.append(table)

    # Build a beautiful table header
    thead = ET.Element('thead')
    tr = ET.Element('tr')

    table.append(thead)
    thead.append(tr)

    for i in ('ID', 'Cover', 'Title', 'Publisher', 'Year', 'ISBN', 'Pages', 'Author'):
        th = ET.Element('th')
        th.text = i
        tr.append(th)

    # Add a row in our table for every book in the library object
    for i in library['books']:
        tr = ET.Element('tr')

        id = ET.Element('td')
        id.text = str(i.get('id'))
        tr.append(id)

        cover = ET.Element('td')
        if i.get('cover'):
            img = ET.Element('img', attrib={'alt': 'Book "' + i.get('title') + '" cover', 'src': glob.conf['default']['outdir'] + '/images/' + i.get('cover')})
            cover.append(img)

        tr.append(cover)

        title = ET.Element('td')
        title.text = i.get('title')
        tr.append(title)

        publisher = ET.Element('td');
        publisher.text = i.get('publisher')
        tr.append(publisher)

        year = ET.Element('td')
        year.text = str(i.get('year', ''))
        tr.append(year)

        isbn = ET.Element('td')
        isbn.text = i.get('isbn')
        tr.append(isbn)

        pages = ET.Element('td')
        pages.text = str(i.get('pages', ''))
        tr.append(pages)

        authors = ET.Element('td')
        ul = ET.Element('ul')
        authors.append(ul)
        for j in i['authors']:
            li = ET.Element('li')
            li.text = j
            ul.append(li)
        tr.append(authors)

        table.append(tr)

    # Our nice XML/HTML tree
    return tree

# Filter results using following filter functions and order by title
####################################################################
def filter(library, title='', author=''):
    resultBookList = list()

    for i in library['books']:
        if filterTitle(i, title) and filterAuthor(i, author):
            resultBookList.append(i)

    sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))

    result = dict()
    result['lastupdate'] = library['lastupdate']
    result['books'] = sortedResultBookList

    return result

# Filter by title
def filterTitle(book, filter):
    return filter.lower() in book['title'].lower()

# Filter by author
def filterAuthor(book, filter):
    for i in book['authors']:
        if filter.lower() in i.lower():
            return True
    return False