First commit

2018-10-20 18:46:05 +02:00 · 2018-10-20 18:46:05 +02:00 · adc2305713
commit adc2305713
8 changed files with 264 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+__pycache__/
+test/
+input/
--- a/13
+++ b/13
@ -0,0 +1,13 @@
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    A copy of the GNU General Public License can be found
+    here <http://www.gnu.org/licenses/>
+
--- a/3
+++ b/3
@ -0,0 +1,3 @@
+TPDF - Tellico Parser anD Finder
+
+A simple Tellico XML parser and finder for your book library, written in Python3
--- a/1
+++ b/1
@ -0,0 +1 @@
+-   Properly automagically read interesting parts of XML into JSON
--- a/index.html
+++ b/index.html
@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta charset="utf-8" />
+        <link rel="stylesheet" media="all" type="text/css" href="style.css" />
+        <title>TPDF - Tellico Parser anD Finder</title>
+    </head>
+
+    <body>
+        <h1>TPDF - Tellico Parser anD Finder</h1>
+
+        <form action="main.py" method="get">
+            <label for="title">Title</label>
+            <input type="text" name="title" />
+
+            <label for="author">Author</label>
+            <input type="text" name="author" />
+
+            <input type="submit" />
+            <input type="reset" />
+        </form>
+
+    </body>
+</html>
--- a/main.py
+++ b/main.py
@ -0,0 +1,70 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+
+# GET me using a web browser,
+# executing my code with a Python interpreter called by a CGI-compliant webserver!
+# Example URI:
+#   http://www.example.org/path/main.py?format=format&title=title&author=author
+# where:
+#   format:
+#       choose output format. Can be either `json` or `html` (default)
+#   title:
+#       title of the book to filter (optional)
+#   author:
+#       author of the book to filter (optional)
+#
+#   Every parameter is optional.
+#   Please note that not providing filters results in all books in the library.
+
+# Useful libraries (no pun intended)
+import xml.etree.ElementTree as ET
+import json
+import sys
+import cgitb, cgi
+
+# Our custom library (again no pun intended)
+import tcparser
+
+# Start CGI handling for webserver
+cgitb.enable()
+inputvars = cgi.FieldStorage()
+
+# Detect desired format
+try:
+    format = inputvars['format'].value
+except KeyError:
+    format = 'html'
+
+if format == 'html':
+    print('Content-Type: text/html; charset=utf-8')
+else:
+    print('Content-Type: text/json; charset=utf-8')
+
+print()
+
+### End of HTTP headers:  it is now safe to output things
+##########################################################
+
+# Get a Python-friendly library struct
+library = tcparser.getLibrary('input/tellico.xml')
+
+### Get filters to search for books ###
+try:
+    title = inputvars['title'].value
+except KeyError:
+    title = ''
+
+try:
+    author = inputvars['author'].value
+except KeyError:
+    author = ''
+
+
+result = tcparser.filter(library, title=title, author=author)
+
+if format == 'html':
+    html = tcparser.getHTML(result)
+    ET.dump(html)
+if format == 'json':
+    # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
+    print(json.dumps(result, indent=4))
--- a/style.css
+++ b/style.css
@ -0,0 +1,14 @@
+@charset "utf-8";
+
+body {
+  background-color: #FFCECE;
+}
+
+table {
+  border: 1px solid black;
+  border-collapse: collapse;
+}
+
+td {
+  border: 1px solid black;
+}
--- a/tcparser.py
+++ b/tcparser.py
@ -0,0 +1,136 @@
+import xml.etree.ElementTree as ET
+import json
+
+# Parse Tellico's XML and get a library struct,
+# a stripped version of our library in a Python-friendly format
+def getLibrary(path):
+    # Get XML string from file
+    fh = open(path)
+    xmlstring = fh.read()
+    # Get rid of XML namespace
+    xmlstring = xmlstring.replace('xmlns="http://periapsis.org/tellico/"', '')
+
+    root = ET.fromstring(xmlstring)
+
+    if root[0].tag != 'collection':
+        sys.exit('No collection found')
+
+    collection = root[0]
+
+    library = list()
+
+    for i in collection.findall('entry'):
+        newbook = dict()
+        newbook['id'] = int(i.attrib['id'])
+        # these `for` overwrite previous values,
+        # but it is not an issue since actually these are one shot only elements
+        for j in i.iter('title'):
+            newbook['title'] = j.text
+        for j in i.iter('publisher'):
+            newbook['publisher'] = j.text
+        for j in i.iter('pub_year'):
+            newbook['year'] = int(j.text)
+        for j in i.iter('isbn'):
+            newbook['isbn'] = j.text.replace('-', '')
+        for j in i.iter('pages'):
+            newbook['pages'] = int(j.text)
+        newbook['authors'] = list()
+        for j in i.iter('authors'):
+            for k in j.findall('author'):
+                newbook['authors'].append(k.text)
+
+        library.append(newbook)
+
+    return library
+
+# Given a custom Python-friendly library struct, get the HTML version of it
+# Very useful for our webserver
+def getHTML(library):
+    # Build the XML/HTML tree
+    tree = ET.ElementTree()
+
+    # Headers and other stuff needed for properly formatted HTML documents
+    html = ET.Element('html')
+    head = ET.Element('head')
+    title = ET.Element('title')
+    linkstyle = ET.Element('link', attrib={'rel': 'stylesheet', 'type': 'text/css', 'media':'all', 'href': 'style.css'})
+    metacharset = ET.Element('meta', attrib={'charset': 'utf-8'})
+    body = ET.Element('body')
+    main = ET.Element('main')
+    table = ET.Element('table')
+
+    title.text = 'Tellico parsed Library HTML'
+
+    tree._setroot(html)
+    html.append(head)
+    head.append(title)
+    head.append(metacharset)
+    head.append(linkstyle)
+    html.append(body)
+    body.append(main)
+    main.append(table)
+
+    # Add a row in our table for every book in the library object
+    for i in library:
+        tr = ET.Element('tr')
+
+        id = ET.Element('td')
+        id.text = str(i.get('id'))
+        tr.append(id)
+
+        title = ET.Element('td')
+        title.text = i.get('title')
+        tr.append(title)
+
+        publisher = ET.Element('td');
+        publisher.text = i.get('publisher')
+        tr.append(publisher)
+
+        year = ET.Element('td')
+        year.text = str(i.get('year'))
+        tr.append(year)
+
+        isbn = ET.Element('td')
+        isbn.text = i.get('isbn')
+        tr.append(isbn)
+
+        pages = ET.Element('td')
+        pages.text = str(i.get('pages'))
+        tr.append(pages)
+
+        authors = ET.Element('td')
+        ul = ET.Element('ul')
+        authors.append(ul)
+        for j in i['authors']:
+            li = ET.Element('li')
+            li.text = j
+            ul.append(li)
+        tr.append(authors)
+
+        table.append(tr)
+
+    # Our nice XML/HTML tree
+    return tree
+
+# Filter results using following filter functions
+##################################################
+def filter(library, title='', author=''):
+    # print ("<pre>" + json.dumps(library, indent=4) + "</pre>")
+    result = list()
+
+    for i in library:
+        if filterTitle(i, title) and filterAuthor(i, author):
+            result.append(i)
+
+    return result
+
+# Filter by title
+def filterTitle(book, filter):
+    return filter.lower() in book['title'].lower()
+
+# Filter by author
+def filterAuthor(book, filter):
+    for i in book['authors']:
+        if filter.lower() in i.lower():
+            return True
+    return False
				`@ -0,0 +1 @@`
				`- Properly automagically read interesting parts of XML into JSON`