tpdf/tcparser.py

80 lines
2.7 KiB
Python
Raw Normal View History

2018-10-20 16:46:05 +00:00
import xml.etree.ElementTree as ET
import json
import datetime
import sys
import re
2018-10-20 16:46:05 +00:00
2018-11-03 15:27:40 +00:00
import glob
2018-10-20 16:46:05 +00:00
# Parse Tellico's XML and get a library struct,
# a stripped version of our library in a Python-friendly format
def getLibrary(path, lastUpdate):
namespace = {'tellico': 'http://periapsis.org/tellico/' }
2018-10-20 16:46:05 +00:00
xml = ET.parse(path)
root = xml.getroot()
2018-10-20 16:46:05 +00:00
collection = root.find('tellico:collection', namespace)
2018-10-20 16:46:05 +00:00
library = dict()
library['lastupdate'] = lastUpdate
library['books'] = list()
2018-10-20 16:46:05 +00:00
for i in collection.findall('tellico:entry', namespace):
2018-10-20 16:46:05 +00:00
newbook = dict()
newbook['id'] = int(i.attrib['id'])
# these `for` overwrite previous values,
# but it is not an issue since actually these are one shot only elements
# actually there is no 1to1 conversion between XML and JSON, so this operation
# can not be made automagically without doing some preliminary assumptions
# (Possibly useful module: https://github.com/martinblech/xmltodict)
for j in i.iterfind('tellico:title', namespace):
2018-10-20 16:46:05 +00:00
newbook['title'] = j.text
for j in i.iterfind('tellico:publisher', namespace):
2018-10-20 16:46:05 +00:00
newbook['publisher'] = j.text
for j in i.iterfind('tellico:pub_year', namespace):
match = re.search('[0-9]{4}', j.text)
newbook['year'] = int(match.group())
for j in i.iterfind('tellico:isbn', namespace):
2018-10-20 16:46:05 +00:00
newbook['isbn'] = j.text.replace('-', '')
for j in i.iterfind('tellico:pages', namespace):
2018-10-20 16:46:05 +00:00
newbook['pages'] = int(j.text)
for j in i.iterfind('tellico:cover', namespace):
2018-10-21 13:00:19 +00:00
newbook['cover'] = j.text
2018-10-20 16:46:05 +00:00
newbook['authors'] = list()
for j in i.iterfind('tellico:authors', namespace):
for k in j.findall('tellico:author', namespace):
2018-10-20 16:46:05 +00:00
newbook['authors'].append(k.text)
library['books'].append(newbook)
2018-10-20 16:46:05 +00:00
return library
2018-10-21 10:26:28 +00:00
# Filter results using following filter functions and order by title
####################################################################
2018-10-20 16:46:05 +00:00
def filter(library, title='', author=''):
resultBookList = list()
2018-10-20 16:46:05 +00:00
for i in library['books']:
2018-10-20 16:46:05 +00:00
if filterTitle(i, title) and filterAuthor(i, author):
resultBookList.append(i)
sortedResultBookList = sorted(resultBookList, key=lambda k: k.get('title', ''))
2018-10-20 16:46:05 +00:00
result = dict()
result['lastupdate'] = library['lastupdate']
result['books'] = sortedResultBookList
2018-10-21 10:26:28 +00:00
return result
2018-10-20 16:46:05 +00:00
# Filter by title
def filterTitle(book, filter):
return filter.lower() in book['title'].lower()
# Filter by author
def filterAuthor(book, filter):
for i in book['authors']:
if filter.lower() in i.lower():
return True
return False