Source code for egaia.utils

# -*- coding: utf-8 -*-
"""Multipurpose utilities for egaia."""
import re
import os
import readline
import subprocess

import markdown
import mammoth

re_uuid = re.compile(r"[0-F]{8}-[0-F]{4}-[0-F]{4}-[0-F]{4}-[0-F]{12}", re.I)

[docs]def rlinput(prompt, prefill=''):
    """Retrieve user input via interactive prompt"""
    def pre_input_hook():
        readline.insert_text(prefill)
        readline.redisplay()
              
    readline.set_pre_input_hook(pre_input_hook)
    try:
        return raw_input(prompt)
    finally:
        readline.set_pre_input_hook(None)

[docs]def run(command):
    """Run a system command."""
    try:
        result = subprocess.call(command)
        if result != 0:
            print "process ended with error: %s" % result
            return False
    except:
        print "Execution failed: %s" % command
        return False
    return result

[docs]def isotime(timestamp):
    import datetime
    t = datetime.datetime.fromtimestamp(int(timestamp))
    return t.strftime('%Y-%m-%d %H:%M:%S')

[docs]def current_time():
    import time
    return time.strftime("%Y-%m-%d")

[docs]def byteSize(num, suffix='B'):
    """Give a human-readable representation of a filesize."""
    
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

[docs]def truncate(description, length=200):
    """Make a truncated description for index pages. The description truncates
    on the last full stop under the length limit, so very long descriptions
    should ideally be broken into several sentences."""

    if length == -1 or len(description) <= length:
        return description

    parts = description[:length].rpartition('. ')
    d = parts[0] + parts[1]
    return d.rsplit(' ', 1)[0]
    

[docs]def md2html(text, meta=False):
    """Convert Markdown to HTML."""
    md = markdown.Markdown(
                extensions=[
                    'markdown.extensions.smarty',
                    ])
    if meta:
        return (md.convert(text), md.Meta)

    return md.convert(text)

[docs]def makeDocument(uuid):
    """Return a python-docx Document instance."""
    
    from docx import Document
    from docx.shared import Inches
    import pkg_resources
    import datetime
    
    template = pkg_resources.resource_filename('egaia', 'static/template.docx')
    document = Document(template)
    document._body.clear_content()
    document.core_properties.modified = datetime.datetime.utcnow()
    document.core_properties.title = uuid
    
    return document


[docs]def readDocument(docx_file):
    """Return paragraph and table instances from a docx document"""
    from docx import Document
    
    if not os.path.exists(docx_file):
        return None
    try:
        document = Document(docx_file)
    except:
        print "Could not open %s" % docx_file
        return None

    return (document.paragraphs, document.tables)



[docs]def docx2str(docx_file):
    """Convert a docx document to a fragment string."""

    style_map = """
p[style-name='Header'] => header > p.header-content
p[style-name='Footer'] => footer > div.wrap > p.footer-content:fresh
p[style-name='Title'] => div.wrap > h1.document-title:fresh
p[style-name='Subtitle'] => div.wrap > h1.document-subtitle:fresh
p[style-name='Author'] => div.wrap > p.author:fresh
p[style-name='Date'] => div.wrap > p.date:fresh
p[style-name='Lead'] => div.wrap > p.lead:fresh

p[style-name='Figure'] => div.wrap > figure:fresh > p.figure:fresh
p[style-name='Caption'] => div.wrap > figure > figcaption > p:fresh
p[style-name='Aside'] => div.wrap > aside > p:fresh
p[style-name='Quote'] => div.wrap > blockquote > p:fresh
p[style-name='Address'] => div.wrap > address > p:fresh

table => div.wrap > table.pure-table.pure-table-horizontal.pure-table-striped:fresh

# DEFAULTS
# https://github.com/mwilliamson/python-mammoth/blob/d61528737eaee72823258bf3460e2ac88e94a6a6/mammoth/options.py
p.Heading1 => div.wrap > h1:fresh
p.Heading2 => div.wrap > h2:fresh
p.Heading3 => div.wrap > h3:fresh
p.Heading4 => div.wrap > h4:fresh
p.Heading5 => div.wrap > h5:fresh
p.Heading6 => div.wrap > h6:fresh
p[style-name='Heading 1'] => div.wrap > h1:fresh
p[style-name='Heading 2'] => div.wrap > h2:fresh
p[style-name='Heading 3'] => div.wrap > h3:fresh
p[style-name='Heading 4'] => div.wrap > h4:fresh
p[style-name='Heading 5'] => div.wrap > h5:fresh
p[style-name='Heading 6'] => div.wrap > h6:fresh
p[style-name='heading 1'] => div.wrap > h1:fresh
p[style-name='heading 2'] => div.wrap > h2:fresh
p[style-name='heading 3'] => div.wrap > h3:fresh
p[style-name='heading 4'] => div.wrap > h4:fresh
p[style-name='heading 5'] => div.wrap > h5:fresh
p[style-name='heading 6'] => div.wrap > h6:fresh
p[style-name='footnote text'] => p > span.footnote-text
p[style-name='endnote text'] => p > span.endnote-text
p[style-name='annotation text'] => div.wrap > p
        
# LibreOffice
p[style-name='Footnote'] => div.wrap > p.note.footnote
p[style-name='Endnote'] => div.wrap > p.note.endnote
p[style-name='First Paragraph'] => div.wrap > p.first-paragraph:fresh
p:unordered-list(1) => div.wrap > ul > li:fresh
p:unordered-list(2) => div.wrap > ul|ol > li > ul > li:fresh
p:unordered-list(3) => div.wrap > ul|ol > li > ul|ol > li > ul > li:fresh
p:unordered-list(4) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul > li:fresh
p:unordered-list(5) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul|ol > li > ul > li:fresh
p:ordered-list(1) => div.wrap > ol > li:fresh
p:ordered-list(2) => div.wrap > ul|ol > li > ol > li:fresh
p:ordered-list(3) => div.wrap > ul|ol > li > ul|ol > li > ol > li:fresh
p:ordered-list(4) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ol > li:fresh
p:ordered-list(5) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul|ol > li > ol > li:fresh
p[style-name='Normal'] => div.wrap > p:fresh
p[style-name='Text'] => div.wrap > p:fresh
p.TextBody => div.wrap > p:fresh
p.TableContents => p.table-contents:fresh
p.TableHeading => p.table-heading:fresh

# catchall
p => div.wrap > p:fresh
"""

    try:
        with open(docx_file, "rb") as doc:
            result = mammoth.convert_to_html(doc, style_map=style_map)
            html = result.value # The generated HTML    
            #print result.messages
    except:
        print "Error processing docx file %s" % docx_file
        return None
    return html


[docs]def docx2html(document):
    """Convert a docx document to a standalone file."""
    
    import egaia_config
    import egaia_parsefn
    import strings
    
    html = docx2str(document)
    
    if not html:
        return None
    
    meta_str = dict()
    meta_str['archive_name'] = egaia_config.getConfig('archive', 'archive_name').decode('utf-8')
    meta_str['archive_url'] = egaia_config.getConfig('archive', 'archive_url').decode('utf-8')
    meta_str['uuid'] = egaia_parsefn.getUuid(document)
    
    out = list()
    
    out.append('<!doctype html>')
    out.append('<html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1">')
    out.append('<title>{archive_name} | {uuid}</title>'.format(**meta_str))
    out.append(strings.font_link)
    out.append(strings.pure_css)
    out.append(strings.local_item_css)
    out.append('</head><body>')
    out.append(html)
    out.append('</body></html>')
    #out.append("""<footer><div class="wrap">UUID: <a href="{archive_url}item/{uuid}/">{uuid}</a></div></footer></body></html>""".format(**meta_str) )

    return u'\r\n'.join(out)


[docs]def rm(path):
    """Remove a file or directory."""
    try:
        os.unlink(path)
        return True
    except:
        print "Error deleting %s" % path
        return False

[docs]def fmtTime(secs):
    """Convert seconds to hh:mm:ss"""
    m, s = divmod(secs, 60)
    h, m = divmod(m, 60)
    t = u"%d:%02d:%02d" % (h, m, s)
    return t