tpdf/tcparser.py

import xml.etree.ElementTree as ET
import json

# Parse Tellico's XML and get a library struct,
# a stripped version of our library in a Python-friendly format
def getLibrary(path):
    # Get XML string from file
    fh = open(path)
    xmlstring = fh.read()
    # Get rid of XML namespace
    xmlstring = xmlstring.replace('xmlns="http://periapsis.org/tellico/"', '')

    root = ET.fromstring(xmlstring)

    if root[0].tag != 'collection':
        sys.exit('No collection found')

    collection = root[0]

    library = list()

    for i in collection.findall('entry'):
        newbook = dict()
        newbook['id'] = int(i.attrib['id'])
        # these `for` overwrite previous values,
        # but it is not an issue since actually these are one shot only elements
        for j in i.iter('title'):
            newbook['title'] = j.text
        for j in i.iter('publisher'):
            newbook['publisher'] = j.text
        for j in i.iter('pub_year'):
            newbook['year'] = int(j.text)
        for j in i.iter('isbn'):
            newbook['isbn'] = j.text.replace('-', '')
        for j in i.iter('pages'):
            newbook['pages'] = int(j.text)
        newbook['authors'] = list()
        for j in i.iter('authors'):
            for k in j.findall('author'):
                newbook['authors'].append(k.text)

        library.append(newbook)

    return library

# Given a custom Python-friendly library struct, get the HTML version of it
# Very useful for our webserver
def getHTML(library):
    # Build the XML/HTML tree
    tree = ET.ElementTree()

    # Headers and other stuff needed for properly formatted HTML documents
    html = ET.Element('html')
    head = ET.Element('head')
    title = ET.Element('title')
    linkstyle = ET.Element('link', attrib={'rel': 'stylesheet', 'type': 'text/css', 'media':'all', 'href': 'style.css'})
    metacharset = ET.Element('meta', attrib={'charset': 'utf-8'})
    body = ET.Element('body')
    main = ET.Element('main')
    table = ET.Element('table')

    title.text = 'Tellico parsed Library HTML'

    tree._setroot(html)
    html.append(head)
    head.append(title)
    head.append(metacharset)
    head.append(linkstyle)
    html.append(body)
    body.append(main)
    main.append(table)

    # Add a row in our table for every book in the library object
    for i in library:
        tr = ET.Element('tr')

        id = ET.Element('td')
        id.text = str(i.get('id'))
        tr.append(id)

        title = ET.Element('td')
        title.text = i.get('title')
        tr.append(title)

        publisher = ET.Element('td');
        publisher.text = i.get('publisher')
        tr.append(publisher)

        year = ET.Element('td')
        year.text = str(i.get('year'))
        tr.append(year)

        isbn = ET.Element('td')
        isbn.text = i.get('isbn')
        tr.append(isbn)

        pages = ET.Element('td')
        pages.text = str(i.get('pages'))
        tr.append(pages)

        authors = ET.Element('td')
        ul = ET.Element('ul')
        authors.append(ul)
        for j in i['authors']:
            li = ET.Element('li')
            li.text = j
            ul.append(li)
        tr.append(authors)

        table.append(tr)

    # Our nice XML/HTML tree
    return tree

# Filter results using following filter functions
##################################################
def filter(library, title='', author=''):
    # print ("<pre>" + json.dumps(library, indent=4) + "</pre>")
    result = list()

    for i in library:
        if filterTitle(i, title) and filterAuthor(i, author):
            result.append(i)

    return result

# Filter by title
def filterTitle(book, filter):
    return filter.lower() in book['title'].lower()

# Filter by author
def filterAuthor(book, filter):
    for i in book['authors']:
        if filter.lower() in i.lower():
            return True
    return False