Source code for egaia.utils

# -*- coding: utf-8 -*-
"""Multipurpose utilities for egaia."""
import re
import os
import readline
import subprocess

import markdown
import mammoth

re_uuid = re.compile(r"[0-F]{8}-[0-F]{4}-[0-F]{4}-[0-F]{4}-[0-F]{12}", re.I)

[docs]def rlinput(prompt, prefill=''): """Retrieve user input via interactive prompt""" def pre_input_hook(): readline.insert_text(prefill) readline.redisplay() readline.set_pre_input_hook(pre_input_hook) try: return raw_input(prompt) finally: readline.set_pre_input_hook(None)
[docs]def run(command): """Run a system command.""" try: result = subprocess.call(command) if result != 0: print "process ended with error: %s" % result return False except: print "Execution failed: %s" % command return False return result
[docs]def isotime(timestamp): import datetime t = datetime.datetime.fromtimestamp(int(timestamp)) return t.strftime('%Y-%m-%d %H:%M:%S')
[docs]def current_time(): import time return time.strftime("%Y-%m-%d")
[docs]def byteSize(num, suffix='B'): """Give a human-readable representation of a filesize.""" for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: if abs(num) < 1024.0: return "%3.1f %s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f %s%s" % (num, 'Yi', suffix)
[docs]def truncate(description, length=200): """Make a truncated description for index pages. The description truncates on the last full stop under the length limit, so very long descriptions should ideally be broken into several sentences.""" if length == -1 or len(description) <= length: return description parts = description[:length].rpartition('. ') d = parts[0] + parts[1] return d.rsplit(' ', 1)[0]
[docs]def md2html(text, meta=False): """Convert Markdown to HTML.""" md = markdown.Markdown( extensions=[ 'markdown.extensions.smarty', ]) if meta: return (md.convert(text), md.Meta) return md.convert(text)
[docs]def makeDocument(uuid): """Return a python-docx Document instance.""" from docx import Document from docx.shared import Inches import pkg_resources import datetime template = pkg_resources.resource_filename('egaia', 'static/template.docx') document = Document(template) document._body.clear_content() document.core_properties.modified = datetime.datetime.utcnow() document.core_properties.title = uuid return document
[docs]def readDocument(docx_file): """Return paragraph and table instances from a docx document""" from docx import Document if not os.path.exists(docx_file): return None try: document = Document(docx_file) except: print "Could not open %s" % docx_file return None return (document.paragraphs, document.tables)
[docs]def docx2str(docx_file): """Convert a docx document to a fragment string.""" style_map = """ p[style-name='Header'] => header > p.header-content p[style-name='Footer'] => footer > div.wrap > p.footer-content:fresh p[style-name='Title'] => div.wrap > h1.document-title:fresh p[style-name='Subtitle'] => div.wrap > h1.document-subtitle:fresh p[style-name='Author'] => div.wrap > p.author:fresh p[style-name='Date'] => div.wrap > p.date:fresh p[style-name='Lead'] => div.wrap > p.lead:fresh p[style-name='Figure'] => div.wrap > figure:fresh > p.figure:fresh p[style-name='Caption'] => div.wrap > figure > figcaption > p:fresh p[style-name='Aside'] => div.wrap > aside > p:fresh p[style-name='Quote'] => div.wrap > blockquote > p:fresh p[style-name='Address'] => div.wrap > address > p:fresh table => div.wrap > table.pure-table.pure-table-horizontal.pure-table-striped:fresh # DEFAULTS # https://github.com/mwilliamson/python-mammoth/blob/d61528737eaee72823258bf3460e2ac88e94a6a6/mammoth/options.py p.Heading1 => div.wrap > h1:fresh p.Heading2 => div.wrap > h2:fresh p.Heading3 => div.wrap > h3:fresh p.Heading4 => div.wrap > h4:fresh p.Heading5 => div.wrap > h5:fresh p.Heading6 => div.wrap > h6:fresh p[style-name='Heading 1'] => div.wrap > h1:fresh p[style-name='Heading 2'] => div.wrap > h2:fresh p[style-name='Heading 3'] => div.wrap > h3:fresh p[style-name='Heading 4'] => div.wrap > h4:fresh p[style-name='Heading 5'] => div.wrap > h5:fresh p[style-name='Heading 6'] => div.wrap > h6:fresh p[style-name='heading 1'] => div.wrap > h1:fresh p[style-name='heading 2'] => div.wrap > h2:fresh p[style-name='heading 3'] => div.wrap > h3:fresh p[style-name='heading 4'] => div.wrap > h4:fresh p[style-name='heading 5'] => div.wrap > h5:fresh p[style-name='heading 6'] => div.wrap > h6:fresh p[style-name='footnote text'] => p > span.footnote-text p[style-name='endnote text'] => p > span.endnote-text p[style-name='annotation text'] => div.wrap > p # LibreOffice p[style-name='Footnote'] => div.wrap > p.note.footnote p[style-name='Endnote'] => div.wrap > p.note.endnote p[style-name='First Paragraph'] => div.wrap > p.first-paragraph:fresh p:unordered-list(1) => div.wrap > ul > li:fresh p:unordered-list(2) => div.wrap > ul|ol > li > ul > li:fresh p:unordered-list(3) => div.wrap > ul|ol > li > ul|ol > li > ul > li:fresh p:unordered-list(4) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul > li:fresh p:unordered-list(5) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul|ol > li > ul > li:fresh p:ordered-list(1) => div.wrap > ol > li:fresh p:ordered-list(2) => div.wrap > ul|ol > li > ol > li:fresh p:ordered-list(3) => div.wrap > ul|ol > li > ul|ol > li > ol > li:fresh p:ordered-list(4) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ol > li:fresh p:ordered-list(5) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul|ol > li > ol > li:fresh p[style-name='Normal'] => div.wrap > p:fresh p[style-name='Text'] => div.wrap > p:fresh p.TextBody => div.wrap > p:fresh p.TableContents => p.table-contents:fresh p.TableHeading => p.table-heading:fresh # catchall p => div.wrap > p:fresh """ try: with open(docx_file, "rb") as doc: result = mammoth.convert_to_html(doc, style_map=style_map) html = result.value # The generated HTML #print result.messages except: print "Error processing docx file %s" % docx_file return None return html
[docs]def docx2html(document): """Convert a docx document to a standalone file.""" import egaia_config import egaia_parsefn import strings html = docx2str(document) if not html: return None meta_str = dict() meta_str['archive_name'] = egaia_config.getConfig('archive', 'archive_name').decode('utf-8') meta_str['archive_url'] = egaia_config.getConfig('archive', 'archive_url').decode('utf-8') meta_str['uuid'] = egaia_parsefn.getUuid(document) out = list() out.append('<!doctype html>') out.append('<html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1">') out.append('<title>{archive_name} | {uuid}</title>'.format(**meta_str)) out.append(strings.font_link) out.append(strings.pure_css) out.append(strings.local_item_css) out.append('</head><body>') out.append(html) out.append('</body></html>') #out.append("""<footer><div class="wrap">UUID: <a href="{archive_url}item/{uuid}/">{uuid}</a></div></footer></body></html>""".format(**meta_str) ) return u'\r\n'.join(out)
[docs]def rm(path): """Remove a file or directory.""" try: os.unlink(path) return True except: print "Error deleting %s" % path return False
[docs]def fmtTime(secs): """Convert seconds to hh:mm:ss""" m, s = divmod(secs, 60) h, m = divmod(m, 60) t = u"%d:%02d:%02d" % (h, m, s) return t