import xml.etree.ElementTree as ET import json import datetime import sys import re import glob # Parse Tellico's XML and get a library struct, # a stripped version of our library in a Python-friendly format def getLibrary(path, lastUpdate): namespace = {'tellico': 'http://periapsis.org/tellico/' } xml = ET.parse(path) root = xml.getroot() collection = root.find('tellico:collection', namespace) library = dict() library['lastupdate'] = lastUpdate library['books'] = list() for i in collection.findall('tellico:entry', namespace): newbook = dict() newbook['id'] = int(i.attrib['id']) # these `for` overwrite previous values, # but it is not an issue since actually these are one shot only elements # actually there is no 1to1 conversion between XML and JSON, so this operation # can not be made automagically without doing some preliminary assumptions # (Possibly useful module: https://github.com/martinblech/xmltodict) for j in i.iterfind('tellico:title', namespace): newbook['title'] = j.text for j in i.iterfind('tellico:publisher', namespace): newbook['publisher'] = j.text for j in i.iterfind('tellico:pub_year', namespace): match = re.search('[0-9]{4}', j.text) newbook['year'] = int(match.group()) for j in i.iterfind('tellico:isbn', namespace): newbook['isbn'] = j.text.replace('-', '') for j in i.iterfind('tellico:pages', namespace): newbook['pages'] = int(j.text) for j in i.iterfind('tellico:cover', namespace): newbook['cover'] = j.text newbook['authors'] = list() for j in i.iterfind('tellico:authors', namespace): for k in j.findall('tellico:author', namespace): newbook['authors'].append(k.text) library['books'].append(newbook) return library # Filter results using following filter functions and order by title #################################################################### def filter(library, title='', author=''): resultBookList = list() for i in library['books']: if filterTitle(i, title) and filterAuthor(i, author): resultBookList.append(i) sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', '')) result = dict() result['lastupdate'] = library['lastupdate'] result['books'] = sortedResultBookList return result # Filter by title def filterTitle(book, filter): return filter.lower() in book['title'].lower() # Filter by author def filterAuthor(book, filter): for i in book['authors']: if filter.lower() in i.lower(): return True return False