tpdf/tcparser.py

80 lines
2.7 KiB
Python

import xml.etree.ElementTree as ET
import json
import datetime
import sys
import re
import glob
# Parse Tellico's XML and get a library struct,
# a stripped version of our library in a Python-friendly format
def getLibrary(path, lastUpdate):
namespace = {'tellico': 'http://periapsis.org/tellico/' }
xml = ET.parse(path)
root = xml.getroot()
collection = root.find('tellico:collection', namespace)
library = dict()
library['lastupdate'] = lastUpdate
library['books'] = list()
for i in collection.findall('tellico:entry', namespace):
newbook = dict()
newbook['id'] = int(i.attrib['id'])
# these `for` overwrite previous values,
# but it is not an issue since actually these are one shot only elements
# actually there is no 1to1 conversion between XML and JSON, so this operation
# can not be made automagically without doing some preliminary assumptions
# (Possibly useful module: https://github.com/martinblech/xmltodict)
for j in i.iterfind('tellico:title', namespace):
newbook['title'] = j.text
for j in i.iterfind('tellico:publisher', namespace):
newbook['publisher'] = j.text
for j in i.iterfind('tellico:pub_year', namespace):
match = re.search('[0-9]{4}', j.text)
newbook['year'] = int(match.group())
for j in i.iterfind('tellico:isbn', namespace):
newbook['isbn'] = j.text.replace('-', '')
for j in i.iterfind('tellico:pages', namespace):
newbook['pages'] = int(j.text)
for j in i.iterfind('tellico:cover', namespace):
newbook['cover'] = j.text
newbook['authors'] = list()
for j in i.iterfind('tellico:authors', namespace):
for k in j.findall('tellico:author', namespace):
newbook['authors'].append(k.text)
library['books'].append(newbook)
return library
# Filter results using following filter functions and order by title
####################################################################
def filter(library, title='', author=''):
resultBookList = list()
for i in library['books']:
if filterTitle(i, title) and filterAuthor(i, author):
resultBookList.append(i)
sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))
result = dict()
result['lastupdate'] = library['lastupdate']
result['books'] = sortedResultBookList
return result
# Filter by title
def filterTitle(book, filter):
return filter.lower() in book['title'].lower()
# Filter by author
def filterAuthor(book, filter):
for i in book['authors']:
if filter.lower() in i.lower():
return True
return False