From adc23057135662dc0c8c1d11b82cb54d1399a975 Mon Sep 17 00:00:00 2001 From: giomba Date: Sat, 20 Oct 2018 18:46:05 +0200 Subject: [PATCH] First commit --- .gitignore | 3 ++ LICENSE | 13 +++++ README | 3 ++ TODO | 1 + index.html | 24 ++++++++++ main.py | 70 +++++++++++++++++++++++++++ style.css | 14 ++++++ tcparser.py | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 264 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README create mode 100644 TODO create mode 100644 index.html create mode 100755 main.py create mode 100644 style.css create mode 100644 tcparser.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c9e0fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +test/ +input/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c8d3bc2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + A copy of the GNU General Public License can be found + here + diff --git a/README b/README new file mode 100644 index 0000000..3841c0f --- /dev/null +++ b/README @@ -0,0 +1,3 @@ +TPDF - Tellico Parser anD Finder + +A simple Tellico XML parser and finder for your book library, written in Python3 diff --git a/TODO b/TODO new file mode 100644 index 0000000..8ec2c84 --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +- Properly automagically read interesting parts of XML into JSON diff --git a/index.html b/index.html new file mode 100644 index 0000000..8bb842f --- /dev/null +++ b/index.html @@ -0,0 +1,24 @@ + + + + + + TPDF - Tellico Parser anD Finder + + + +

TPDF - Tellico Parser anD Finder

+ +
+ + + + + + + + +
+ + + diff --git a/main.py b/main.py new file mode 100755 index 0000000..f81f88f --- /dev/null +++ b/main.py @@ -0,0 +1,70 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- + +# GET me using a web browser, +# executing my code with a Python interpreter called by a CGI-compliant webserver! +# Example URI: +# http://www.example.org/path/main.py?format=format&title=title&author=author +# where: +# format: +# choose output format. Can be either `json` or `html` (default) +# title: +# title of the book to filter (optional) +# author: +# author of the book to filter (optional) +# +# Every parameter is optional. +# Please note that not providing filters results in all books in the library. + +# Useful libraries (no pun intended) +import xml.etree.ElementTree as ET +import json +import sys +import cgitb, cgi + +# Our custom library (again no pun intended) +import tcparser + +# Start CGI handling for webserver +cgitb.enable() +inputvars = cgi.FieldStorage() + +# Detect desired format +try: + format = inputvars['format'].value +except KeyError: + format = 'html' + +if format == 'html': + print('Content-Type: text/html; charset=utf-8') +else: + print('Content-Type: text/json; charset=utf-8') + +print() + +### End of HTTP headers: it is now safe to output things +########################################################## + +# Get a Python-friendly library struct +library = tcparser.getLibrary('input/tellico.xml') + +### Get filters to search for books ### +try: + title = inputvars['title'].value +except KeyError: + title = '' + +try: + author = inputvars['author'].value +except KeyError: + author = '' + + +result = tcparser.filter(library, title=title, author=author) + +if format == 'html': + html = tcparser.getHTML(result) + ET.dump(html) +if format == 'json': + # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-) + print(json.dumps(result, indent=4)) diff --git a/style.css b/style.css new file mode 100644 index 0000000..8048b67 --- /dev/null +++ b/style.css @@ -0,0 +1,14 @@ +@charset "utf-8"; + +body { + background-color: #FFCECE; +} + +table { + border: 1px solid black; + border-collapse: collapse; +} + +td { + border: 1px solid black; +} diff --git a/tcparser.py b/tcparser.py new file mode 100644 index 0000000..9e06a34 --- /dev/null +++ b/tcparser.py @@ -0,0 +1,136 @@ +import xml.etree.ElementTree as ET +import json + +# Parse Tellico's XML and get a library struct, +# a stripped version of our library in a Python-friendly format +def getLibrary(path): + # Get XML string from file + fh = open(path) + xmlstring = fh.read() + # Get rid of XML namespace + xmlstring = xmlstring.replace('xmlns="http://periapsis.org/tellico/"', '') + + root = ET.fromstring(xmlstring) + + if root[0].tag != 'collection': + sys.exit('No collection found') + + collection = root[0] + + library = list() + + for i in collection.findall('entry'): + newbook = dict() + newbook['id'] = int(i.attrib['id']) + # these `for` overwrite previous values, + # but it is not an issue since actually these are one shot only elements + for j in i.iter('title'): + newbook['title'] = j.text + for j in i.iter('publisher'): + newbook['publisher'] = j.text + for j in i.iter('pub_year'): + newbook['year'] = int(j.text) + for j in i.iter('isbn'): + newbook['isbn'] = j.text.replace('-', '') + for j in i.iter('pages'): + newbook['pages'] = int(j.text) + newbook['authors'] = list() + for j in i.iter('authors'): + for k in j.findall('author'): + newbook['authors'].append(k.text) + + library.append(newbook) + + return library + +# Given a custom Python-friendly library struct, get the HTML version of it +# Very useful for our webserver +def getHTML(library): + # Build the XML/HTML tree + tree = ET.ElementTree() + + # Headers and other stuff needed for properly formatted HTML documents + html = ET.Element('html') + head = ET.Element('head') + title = ET.Element('title') + linkstyle = ET.Element('link', attrib={'rel': 'stylesheet', 'type': 'text/css', 'media':'all', 'href': 'style.css'}) + metacharset = ET.Element('meta', attrib={'charset': 'utf-8'}) + body = ET.Element('body') + main = ET.Element('main') + table = ET.Element('table') + + title.text = 'Tellico parsed Library HTML' + + tree._setroot(html) + html.append(head) + head.append(title) + head.append(metacharset) + head.append(linkstyle) + html.append(body) + body.append(main) + main.append(table) + + # Add a row in our table for every book in the library object + for i in library: + tr = ET.Element('tr') + + id = ET.Element('td') + id.text = str(i.get('id')) + tr.append(id) + + title = ET.Element('td') + title.text = i.get('title') + tr.append(title) + + publisher = ET.Element('td'); + publisher.text = i.get('publisher') + tr.append(publisher) + + year = ET.Element('td') + year.text = str(i.get('year')) + tr.append(year) + + isbn = ET.Element('td') + isbn.text = i.get('isbn') + tr.append(isbn) + + pages = ET.Element('td') + pages.text = str(i.get('pages')) + tr.append(pages) + + authors = ET.Element('td') + ul = ET.Element('ul') + authors.append(ul) + for j in i['authors']: + li = ET.Element('li') + li.text = j + ul.append(li) + tr.append(authors) + + table.append(tr) + + # Our nice XML/HTML tree + return tree + +# Filter results using following filter functions +################################################## +def filter(library, title='', author=''): + # print ("
" + json.dumps(library, indent=4) + "
") + result = list() + + for i in library: + if filterTitle(i, title) and filterAuthor(i, author): + result.append(i) + + return result + +# Filter by title +def filterTitle(book, filter): + return filter.lower() in book['title'].lower() + +# Filter by author +def filterAuthor(book, filter): + for i in book['authors']: + if filter.lower() in i.lower(): + return True + return False