commit adc23057135662dc0c8c1d11b82cb54d1399a975 Author: giomba Date: Sat Oct 20 18:46:05 2018 +0200 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c9e0fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +test/ +input/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c8d3bc2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + A copy of the GNU General Public License can be found + here + diff --git a/README b/README new file mode 100644 index 0000000..3841c0f --- /dev/null +++ b/README @@ -0,0 +1,3 @@ +TPDF - Tellico Parser anD Finder + +A simple Tellico XML parser and finder for your book library, written in Python3 diff --git a/TODO b/TODO new file mode 100644 index 0000000..8ec2c84 --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +- Properly automagically read interesting parts of XML into JSON diff --git a/index.html b/index.html new file mode 100644 index 0000000..8bb842f --- /dev/null +++ b/index.html @@ -0,0 +1,24 @@ + + + + + + TPDF - Tellico Parser anD Finder + + + +

TPDF - Tellico Parser anD Finder

+ +
+ + + + + + + + +
+ + + diff --git a/main.py b/main.py new file mode 100755 index 0000000..f81f88f --- /dev/null +++ b/main.py @@ -0,0 +1,70 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- + +# GET me using a web browser, +# executing my code with a Python interpreter called by a CGI-compliant webserver! +# Example URI: +# http://www.example.org/path/main.py?format=format&title=title&author=author +# where: +# format: +# choose output format. Can be either `json` or `html` (default) +# title: +# title of the book to filter (optional) +# author: +# author of the book to filter (optional) +# +# Every parameter is optional. +# Please note that not providing filters results in all books in the library. + +# Useful libraries (no pun intended) +import xml.etree.ElementTree as ET +import json +import sys +import cgitb, cgi + +# Our custom library (again no pun intended) +import tcparser + +# Start CGI handling for webserver +cgitb.enable() +inputvars = cgi.FieldStorage() + +# Detect desired format +try: + format = inputvars['format'].value +except KeyError: + format = 'html' + +if format == 'html': + print('Content-Type: text/html; charset=utf-8') +else: + print('Content-Type: text/json; charset=utf-8') + +print() + +### End of HTTP headers: it is now safe to output things +########################################################## + +# Get a Python-friendly library struct +library = tcparser.getLibrary('input/tellico.xml') + +### Get filters to search for books ### +try: + title = inputvars['title'].value +except KeyError: + title = '' + +try: + author = inputvars['author'].value +except KeyError: + author = '' + + +result = tcparser.filter(library, title=title, author=author) + +if format == 'html': + html = tcparser.getHTML(result) + ET.dump(html) +if format == 'json': + # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) + print(json.dumps(result, indent=4)) diff --git a/style.css b/style.css new file mode 100644 index 0000000..8048b67 --- /dev/null +++ b/style.css @@ -0,0 +1,14 @@ +@charset "utf-8"; + +body { + background-color: #FFCECE; +} + +table { + border: 1px solid black; + border-collapse: collapse; +} + +td { + border: 1px solid black; +} diff --git a/tcparser.py b/tcparser.py new file mode 100644 index 0000000..9e06a34 --- /dev/null +++ b/tcparser.py @@ -0,0 +1,136 @@ +import xml.etree.ElementTree as ET +import json + +# Parse Tellico's XML and get a library struct, +# a stripped version of our library in a Python-friendly format +def getLibrary(path): + # Get XML string from file + fh = open(path) + xmlstring = fh.read() + # Get rid of XML namespace + xmlstring = xmlstring.replace('xmlns="http://periapsis.org/tellico/"', '') + + root = ET.fromstring(xmlstring) + + if root[0].tag != 'collection': + sys.exit('No collection found') + + collection = root[0] + + library = list() + + for i in collection.findall('entry'): + newbook = dict() + newbook['id'] = int(i.attrib['id']) + # these `for` overwrite previous values, + # but it is not an issue since actually these are one shot only elements + for j in i.iter('title'): + newbook['title'] = j.text + for j in i.iter('publisher'): + newbook['publisher'] = j.text + for j in i.iter('pub_year'): + newbook['year'] = int(j.text) + for j in i.iter('isbn'): + newbook['isbn'] = j.text.replace('-', '') + for j in i.iter('pages'): + newbook['pages'] = int(j.text) + newbook['authors'] = list() + for j in i.iter('authors'): + for k in j.findall('author'): + newbook['authors'].append(k.text) + + library.append(newbook) + + return library + +# Given a custom Python-friendly library struct, get the HTML version of it +# Very useful for our webserver +def getHTML(library): + # Build the XML/HTML tree + tree = ET.ElementTree() + + # Headers and other stuff needed for properly formatted HTML documents + html = ET.Element('html') + head = ET.Element('head') + title = ET.Element('title') + linkstyle = ET.Element('link', attrib={'rel': 'stylesheet', 'type': 'text/css', 'media':'all', 'href': 'style.css'}) + metacharset = ET.Element('meta', attrib={'charset': 'utf-8'}) + body = ET.Element('body') + main = ET.Element('main') + table = ET.Element('table') + + title.text = 'Tellico parsed Library HTML' + + tree._setroot(html) + html.append(head) + head.append(title) + head.append(metacharset) + head.append(linkstyle) + html.append(body) + body.append(main) + main.append(table) + + # Add a row in our table for every book in the library object + for i in library: + tr = ET.Element('tr') + + id = ET.Element('td') + id.text = str(i.get('id')) + tr.append(id) + + title = ET.Element('td') + title.text = i.get('title') + tr.append(title) + + publisher = ET.Element('td'); + publisher.text = i.get('publisher') + tr.append(publisher) + + year = ET.Element('td') + year.text = str(i.get('year')) + tr.append(year) + + isbn = ET.Element('td') + isbn.text = i.get('isbn') + tr.append(isbn) + + pages = ET.Element('td') + pages.text = str(i.get('pages')) + tr.append(pages) + + authors = ET.Element('td') + ul = ET.Element('ul') + authors.append(ul) + for j in i['authors']: + li = ET.Element('li') + li.text = j + ul.append(li) + tr.append(authors) + + table.append(tr) + + # Our nice XML/HTML tree + return tree + +# Filter results using following filter functions +################################################## +def filter(library, title='', author=''): + # print ("
" + json.dumps(library, indent=4) + "
") + result = list() + + for i in library: + if filterTitle(i, title) and filterAuthor(i, author): + result.append(i) + + return result + +# Filter by title +def filterTitle(book, filter): + return filter.lower() in book['title'].lower() + +# Filter by author +def filterAuthor(book, filter): + for i in book['authors']: + if filter.lower() in i.lower(): + return True + return False