First commit

2018-10-20 18:46:05 +02:00 · 2018-10-20 18:46:05 +02:00 · adc2305713
commit adc2305713
8 changed files with 264 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 __pycache__/
 test/
 input/
--- a/13
+++ b/13
@ -0,0 +1,13 @@
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    A copy of the GNU General Public License can be found
    here <http://www.gnu.org/licenses/>
--- a/3
+++ b/3
@ -0,0 +1,3 @@
 TPDF - Tellico Parser anD Finder
 A simple Tellico XML parser and finder for your book library, written in Python3
--- a/1
+++ b/1
@ -0,0 +1 @@
 -   Properly automagically read interesting parts of XML into JSON
--- a/index.html
+++ b/index.html
@ -0,0 +1,24 @@
 <!DOCTYPE html>
 <html>
    <head>
        <meta charset="utf-8" />
        <link rel="stylesheet" media="all" type="text/css" href="style.css" />
        <title>TPDF - Tellico Parser anD Finder</title>
    </head>
    <body>
        <h1>TPDF - Tellico Parser anD Finder</h1>
        <form action="main.py" method="get">
            <label for="title">Title</label>
            <input type="text" name="title" />
            <label for="author">Author</label>
            <input type="text" name="author" />
            <input type="submit" />
            <input type="reset" />
        </form>
    </body>
 </html>
--- a/main.py
+++ b/main.py
@ -0,0 +1,70 @@
 #!/usr/bin/python
 # -*- coding: UTF-8 -*-
 # GET me using a web browser,
 # executing my code with a Python interpreter called by a CGI-compliant webserver!
 # Example URI:
 #   http://www.example.org/path/main.py?format=format&title=title&author=author
 # where:
 #   format:
 #       choose output format. Can be either `json` or `html` (default)
 #   title:
 #       title of the book to filter (optional)
 #   author:
 #       author of the book to filter (optional)
 #
 #   Every parameter is optional.
 #   Please note that not providing filters results in all books in the library.
 # Useful libraries (no pun intended)
 import xml.etree.ElementTree as ET
 import json
 import sys
 import cgitb, cgi
 # Our custom library (again no pun intended)
 import tcparser
 # Start CGI handling for webserver
 cgitb.enable()
 inputvars = cgi.FieldStorage()
 # Detect desired format
 try:
    format = inputvars['format'].value
 except KeyError:
    format = 'html'
 if format == 'html':
    print('Content-Type: text/html; charset=utf-8')
 else:
    print('Content-Type: text/json; charset=utf-8')
 print()
 ### End of HTTP headers:  it is now safe to output things
 ##########################################################
 # Get a Python-friendly library struct
 library = tcparser.getLibrary('input/tellico.xml')
 ### Get filters to search for books ###
 try:
    title = inputvars['title'].value
 except KeyError:
    title = ''
 try:
    author = inputvars['author'].value
 except KeyError:
    author = ''
 result = tcparser.filter(library, title=title, author=author)
 if format == 'html':
    html = tcparser.getHTML(result)
    ET.dump(html)
 if format == 'json':
    # Wanna get a pretty JSON encoded library to do your nasty things offline at home? ;-)
    print(json.dumps(result, indent=4))
--- a/style.css
+++ b/style.css
@ -0,0 +1,14 @@
@charset "utf-8";
 body {
  background-color: #FFCECE;
 }
 table {
  border: 1px solid black;
  border-collapse: collapse;
 }
 td {
  border: 1px solid black;
 }
--- a/tcparser.py
+++ b/tcparser.py
@ -0,0 +1,136 @@
 import xml.etree.ElementTree as ET
 import json
 # Parse Tellico's XML and get a library struct,
 # a stripped version of our library in a Python-friendly format
 def getLibrary(path):
    # Get XML string from file
    fh = open(path)
    xmlstring = fh.read()
    # Get rid of XML namespace
    xmlstring = xmlstring.replace('xmlns="http://periapsis.org/tellico/"', '')
    root = ET.fromstring(xmlstring)
    if root[0].tag != 'collection':
        sys.exit('No collection found')
    collection = root[0]
    library = list()
    for i in collection.findall('entry'):
        newbook = dict()
        newbook['id'] = int(i.attrib['id'])
        # these `for` overwrite previous values,
        # but it is not an issue since actually these are one shot only elements
        for j in i.iter('title'):
            newbook['title'] = j.text
        for j in i.iter('publisher'):
            newbook['publisher'] = j.text
        for j in i.iter('pub_year'):
            newbook['year'] = int(j.text)
        for j in i.iter('isbn'):
            newbook['isbn'] = j.text.replace('-', '')
        for j in i.iter('pages'):
            newbook['pages'] = int(j.text)
        newbook['authors'] = list()
        for j in i.iter('authors'):
            for k in j.findall('author'):
                newbook['authors'].append(k.text)
        library.append(newbook)
    return library
 # Given a custom Python-friendly library struct, get the HTML version of it
 # Very useful for our webserver
 def getHTML(library):
    # Build the XML/HTML tree
    tree = ET.ElementTree()
    # Headers and other stuff needed for properly formatted HTML documents
    html = ET.Element('html')
    head = ET.Element('head')
    title = ET.Element('title')
    linkstyle = ET.Element('link', attrib={'rel': 'stylesheet', 'type': 'text/css', 'media':'all', 'href': 'style.css'})
    metacharset = ET.Element('meta', attrib={'charset': 'utf-8'})
    body = ET.Element('body')
    main = ET.Element('main')
    table = ET.Element('table')
    title.text = 'Tellico parsed Library HTML'
    tree._setroot(html)
    html.append(head)
    head.append(title)
    head.append(metacharset)
    head.append(linkstyle)
    html.append(body)
    body.append(main)
    main.append(table)
    # Add a row in our table for every book in the library object
    for i in library:
        tr = ET.Element('tr')
        id = ET.Element('td')
        id.text = str(i.get('id'))
        tr.append(id)
        title = ET.Element('td')
        title.text = i.get('title')
        tr.append(title)
        publisher = ET.Element('td');
        publisher.text = i.get('publisher')
        tr.append(publisher)
        year = ET.Element('td')
        year.text = str(i.get('year'))
        tr.append(year)
        isbn = ET.Element('td')
        isbn.text = i.get('isbn')
        tr.append(isbn)
        pages = ET.Element('td')
        pages.text = str(i.get('pages'))
        tr.append(pages)
        authors = ET.Element('td')
        ul = ET.Element('ul')
        authors.append(ul)
        for j in i['authors']:
            li = ET.Element('li')
            li.text = j
            ul.append(li)
        tr.append(authors)
        table.append(tr)
    # Our nice XML/HTML tree
    return tree
 # Filter results using following filter functions
 ##################################################
 def filter(library, title='', author=''):
    # print ("<pre>" + json.dumps(library, indent=4) + "</pre>")
    result = list()
    for i in library:
        if filterTitle(i, title) and filterAuthor(i, author):
            result.append(i)
    return result
 # Filter by title
 def filterTitle(book, filter):
    return filter.lower() in book['title'].lower()
 # Filter by author
 def filterAuthor(book, filter):
    for i in book['authors']:
        if filter.lower() in i.lower():
            return True
    return False
		`@ -0,0 +1,3 @@`
							`TPDF - Tellico Parser anD Finder`

							`A simple Tellico XML parser and finder for your book library, written in Python3`
		`@ -0,0 +1 @@`
							`- Properly automagically read interesting parts of XML into JSON`