# -*- coding: utf-8 -*-
"""Multipurpose utilities for egaia."""
import re
import os
import readline
import subprocess
import markdown
import mammoth
re_uuid = re.compile(r"[0-F]{8}-[0-F]{4}-[0-F]{4}-[0-F]{4}-[0-F]{12}", re.I)
[docs]def run(command):
"""Run a system command."""
try:
result = subprocess.call(command)
if result != 0:
print "process ended with error: %s" % result
return False
except:
print "Execution failed: %s" % command
return False
return result
[docs]def isotime(timestamp):
import datetime
t = datetime.datetime.fromtimestamp(int(timestamp))
return t.strftime('%Y-%m-%d %H:%M:%S')
[docs]def current_time():
import time
return time.strftime("%Y-%m-%d")
[docs]def byteSize(num, suffix='B'):
"""Give a human-readable representation of a filesize."""
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f %s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f %s%s" % (num, 'Yi', suffix)
[docs]def truncate(description, length=200):
"""Make a truncated description for index pages. The description truncates
on the last full stop under the length limit, so very long descriptions
should ideally be broken into several sentences."""
if length == -1 or len(description) <= length:
return description
parts = description[:length].rpartition('. ')
d = parts[0] + parts[1]
return d.rsplit(' ', 1)[0]
[docs]def md2html(text, meta=False):
"""Convert Markdown to HTML."""
md = markdown.Markdown(
extensions=[
'markdown.extensions.smarty',
])
if meta:
return (md.convert(text), md.Meta)
return md.convert(text)
[docs]def makeDocument(uuid):
"""Return a python-docx Document instance."""
from docx import Document
from docx.shared import Inches
import pkg_resources
import datetime
template = pkg_resources.resource_filename('egaia', 'static/template.docx')
document = Document(template)
document._body.clear_content()
document.core_properties.modified = datetime.datetime.utcnow()
document.core_properties.title = uuid
return document
[docs]def readDocument(docx_file):
"""Return paragraph and table instances from a docx document"""
from docx import Document
if not os.path.exists(docx_file):
return None
try:
document = Document(docx_file)
except:
print "Could not open %s" % docx_file
return None
return (document.paragraphs, document.tables)
[docs]def docx2str(docx_file):
"""Convert a docx document to a fragment string."""
style_map = """
p[style-name='Header'] => header > p.header-content
p[style-name='Footer'] => footer > div.wrap > p.footer-content:fresh
p[style-name='Title'] => div.wrap > h1.document-title:fresh
p[style-name='Subtitle'] => div.wrap > h1.document-subtitle:fresh
p[style-name='Author'] => div.wrap > p.author:fresh
p[style-name='Date'] => div.wrap > p.date:fresh
p[style-name='Lead'] => div.wrap > p.lead:fresh
p[style-name='Figure'] => div.wrap > figure:fresh > p.figure:fresh
p[style-name='Caption'] => div.wrap > figure > figcaption > p:fresh
p[style-name='Aside'] => div.wrap > aside > p:fresh
p[style-name='Quote'] => div.wrap > blockquote > p:fresh
p[style-name='Address'] => div.wrap > address > p:fresh
table => div.wrap > table.pure-table.pure-table-horizontal.pure-table-striped:fresh
# DEFAULTS
# https://github.com/mwilliamson/python-mammoth/blob/d61528737eaee72823258bf3460e2ac88e94a6a6/mammoth/options.py
p.Heading1 => div.wrap > h1:fresh
p.Heading2 => div.wrap > h2:fresh
p.Heading3 => div.wrap > h3:fresh
p.Heading4 => div.wrap > h4:fresh
p.Heading5 => div.wrap > h5:fresh
p.Heading6 => div.wrap > h6:fresh
p[style-name='Heading 1'] => div.wrap > h1:fresh
p[style-name='Heading 2'] => div.wrap > h2:fresh
p[style-name='Heading 3'] => div.wrap > h3:fresh
p[style-name='Heading 4'] => div.wrap > h4:fresh
p[style-name='Heading 5'] => div.wrap > h5:fresh
p[style-name='Heading 6'] => div.wrap > h6:fresh
p[style-name='heading 1'] => div.wrap > h1:fresh
p[style-name='heading 2'] => div.wrap > h2:fresh
p[style-name='heading 3'] => div.wrap > h3:fresh
p[style-name='heading 4'] => div.wrap > h4:fresh
p[style-name='heading 5'] => div.wrap > h5:fresh
p[style-name='heading 6'] => div.wrap > h6:fresh
p[style-name='footnote text'] => p > span.footnote-text
p[style-name='endnote text'] => p > span.endnote-text
p[style-name='annotation text'] => div.wrap > p
# LibreOffice
p[style-name='Footnote'] => div.wrap > p.note.footnote
p[style-name='Endnote'] => div.wrap > p.note.endnote
p[style-name='First Paragraph'] => div.wrap > p.first-paragraph:fresh
p:unordered-list(1) => div.wrap > ul > li:fresh
p:unordered-list(2) => div.wrap > ul|ol > li > ul > li:fresh
p:unordered-list(3) => div.wrap > ul|ol > li > ul|ol > li > ul > li:fresh
p:unordered-list(4) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul > li:fresh
p:unordered-list(5) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul|ol > li > ul > li:fresh
p:ordered-list(1) => div.wrap > ol > li:fresh
p:ordered-list(2) => div.wrap > ul|ol > li > ol > li:fresh
p:ordered-list(3) => div.wrap > ul|ol > li > ul|ol > li > ol > li:fresh
p:ordered-list(4) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ol > li:fresh
p:ordered-list(5) => div.wrap > ul|ol > li > ul|ol > li > ul|ol > li > ul|ol > li > ol > li:fresh
p[style-name='Normal'] => div.wrap > p:fresh
p[style-name='Text'] => div.wrap > p:fresh
p.TextBody => div.wrap > p:fresh
p.TableContents => p.table-contents:fresh
p.TableHeading => p.table-heading:fresh
# catchall
p => div.wrap > p:fresh
"""
try:
with open(docx_file, "rb") as doc:
result = mammoth.convert_to_html(doc, style_map=style_map)
html = result.value # The generated HTML
#print result.messages
except:
print "Error processing docx file %s" % docx_file
return None
return html
[docs]def docx2html(document):
"""Convert a docx document to a standalone file."""
import egaia_config
import egaia_parsefn
import strings
html = docx2str(document)
if not html:
return None
meta_str = dict()
meta_str['archive_name'] = egaia_config.getConfig('archive', 'archive_name').decode('utf-8')
meta_str['archive_url'] = egaia_config.getConfig('archive', 'archive_url').decode('utf-8')
meta_str['uuid'] = egaia_parsefn.getUuid(document)
out = list()
out.append('<!doctype html>')
out.append('<html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1">')
out.append('<title>{archive_name} | {uuid}</title>'.format(**meta_str))
out.append(strings.font_link)
out.append(strings.pure_css)
out.append(strings.local_item_css)
out.append('</head><body>')
out.append(html)
out.append('</body></html>')
#out.append("""<footer><div class="wrap">UUID: <a href="{archive_url}item/{uuid}/">{uuid}</a></div></footer></body></html>""".format(**meta_str) )
return u'\r\n'.join(out)
[docs]def rm(path):
"""Remove a file or directory."""
try:
os.unlink(path)
return True
except:
print "Error deleting %s" % path
return False
[docs]def fmtTime(secs):
"""Convert seconds to hh:mm:ss"""
m, s = divmod(secs, 60)
h, m = divmod(m, 60)
t = u"%d:%02d:%02d" % (h, m, s)
return t