Source code for egaia.egaia_make

# -*- coding: utf-8 -*-

import os
import re
import shutil
import json
import gettext
from string import Template
import codecs
import markdown
import collections

import egaia_config
import egaia_parsefn
import egaia_list
import egaia_bag
import pkg_resources
import utils
from egaia_sanitize import makeSlug
import egaia_root
import egaia_collage
import version
import egaia_docx
import egaia_derive

language = egaia_config.getConfig('archive', 'language')

pub = os.path.expanduser(egaia_config.getConfig('archive', 'pub_path'))

indexed_fields = ['DCTERMS.subject', 'DCTERMS.creator', 'DCTERMS.coverage', 
                    'DCTERMS.type', 'DCTERMS.language']

static_dir = os.path.join(pub, 'static')
default_thumb = os.path.join(static_dir, 'default_thumb.png')

archive = egaia_config.getConfig('archive', 'archive_name').decode('utf-8')
short_name = egaia_config.getConfig('archive', 'archive_prefix').decode('utf-8').upper()
remote_embeds = egaia_config.getConfig('archive', 'remote_embeds', boolean=True)

export_stills = egaia_config.getConfig('archive', 'export_stills', boolean=True)
date = utils.current_time()

cmd_convert = egaia_config.getConfig('system', 'cmd_convert')

core_metadata = egaia_docx.getCoreFields(filtered=False)
fields_rev = dict(egaia_config.printConfig(section='terms'))
fields = dict((value, key) for key, value in fields_rev.iteritems())
template_fields = dict(egaia_config.printConfig(section='template_fields'))

# Ignore these metadata fields in html output. Use lowercase keys.
# We skip the title fields because they are separately printed at the top
# Skip the organizational info because this will normally be included in the
# footer or elsewhere in HTML output
skip_fields = ['bag-software-agent', 'payload-oxum', 'title', 'DCTERMS.title',
    'source-organization', 'organization-address', 'contact-name',
    'contact-phone', 'contact-email', 'DCTERMS.format']

item_preview_template = u"""<div id="${identifier}" class="index-entry">
    <a href="/item/${identifier}/index-${language}.html">
      <img src="${thumb}" class="contain thumbnail thumb-img-cover">
    </a>
     <div class="index-description caption">
     <h4><a href="/item/${identifier}/index-${language}.html">${title}</a><br> <small>$creator</small></h4>
     $short_description
     <p>$subject_tags</p>
    </div><!--/index-description-->
</div> <!-- index-entry -->
"""

collection_preview_template = u"""<div class="index-entry" id="${identifier}">
    <a href="/collection/${identifier}/index-${language}.html">
      <img src="/collection/${identifier}/collection-thumb.jpg" class="thumbnail">
    </a>
     <div class="index-description caption">
     <h4><a href="/collection/${identifier}/index-${language}.html">$title</a></h4>
     $short_description
    </div><!--/index-description-->
</div> <!-- index-entry -->
"""

error_page_template = u"""<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="icon" type="image/png" href="/static/favicon.png">

    <title>$title</title>

    <link rel="stylesheet" href="/static/css/bootstrap.min.css">
    <link rel="stylesheet" href="/static/css/egaia.css">

    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
    <!--[if lt IE 9]>
      <script src="/static/js/html5shiv.min.js"></script>
      <script src="/static/js/respond.min.js"></script>
    <![endif]-->
  </head>

  <body>

    <div class="main-content container">
    <div class="jumbotron">
    <div class="alert alert-danger">
        <h1>$title</h1>
        <p>${message}</p>
        <hr>
        <p><span class="glyphicon glyphicon-home"></span>&nbsp;<a href="/" class="alert-link">${Home}</a></p>
    </div>

    </div> <!-- /container -->
    </div> <!-- /main-content -->

  </body>
</html>

"""

html_template = u"""<!DOCTYPE html>
<html lang="${language}">
  <head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <meta name="generator" content="egaia $version">
    
    <link rel="icon" type="image/png" href="/static/favicon.png">

    <title>$title - $archive</title>

    <link rel="stylesheet" href="/static/css/bootstrap.min.css">
    <link rel="stylesheet" href="/static/css/egaia.css">

    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
    <!--[if lt IE 9]>
      <script src="/static/js/html5shiv.min.js"></script>
      <script src="/static/js/respond.min.js"></script>
    <![endif]-->
  </head>

  <body>

    <!-- Fixed navbar -->
    <nav class="navbar navbar-default navbar-static-top">
      <div class="container">
        <div class="navbar-header">
          <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
            <span class="sr-only">$toggle</span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>
          <a class="navbar-brand" href="/index-${language}.html"><span class="visible-xs">$short_name</span><span class="hidden-xs">$archive</span></a>
        </div>
        <div id="navbar" class="navbar-collapse collapse navbar-right">
          <ul class="nav navbar-nav">
            <li><a href="/index-${language}.html">${About}</a></li>
            <li><a href="/collection/index-${language}.html">${Collections}</a></li>
            <li class="dropdown">
              <a href="/" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">${Items} <span class="caret"></span></a>
              <ul class="dropdown-menu">
                <li><a href="/item/index-${language}.html">${All_items}</a></li>
                <li role="separator" class="divider"></li>
                <li class="dropdown-header">${Keywords}</li>
                <li><a href="/DCTERMS.subject/index-${language}.html">${Subject}</a></li>
                <li><a href="/DCTERMS.coverage/index-${language}.html">${Coverage_area}</a></li>
                <li><a href="/DCTERMS.type/index-${language}.html">${Media_type}</a></li>
                <li><a href="/DCTERMS.creator/index-${language}.html">${Creator}</a></li>
                <li><a href="/DCTERMS.language/index-${language}.html">${Language}</a></li>
              </ul>
            </li>
          </ul>
        </div><!--/.nav-collapse -->
      </div>
    </nav>
    <div class="main-content">
    <div class="container">
    
    <div class="page-header">
    <h1>$title <small>$subtitle</small></h1>
    </div>
    
    $body

    </div> <!-- /container -->
    </div> <!-- /main-content -->

    <footer class="footer text-muted">
      <div class="container">
        <p id="generator">$last_updated $date. $generator</p>
      </div> <!-- /container -->
    </footer>
    
    <script src="/static/js/jquery.min.js"></script>
    <script src="/static/js/bootstrap.min.js"></script>
  </body>
</html>

"""

alias_template = """<!DOCTYPE html>
<html>
  <head>
    <title>${target}</title>
    <meta name="robots" content="noindex">
    <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
    <meta http-equiv="refresh" content="0; url=/item/${target}/${filename}"/>
  </head>
</html>"""

[docs]def init():
    """Initialize"""
    if not os.path.exists(static_dir):
        makeStaticDir()

# TEMPLATE: Deprecate here. Move the template strings into the
# config file.


#### DIRECTORY AND FILE COPY/WRITE FUNCTIONS

[docs]def makeStaticDir(force=False):
    if force and os.path.exists(static_dir):
        try:
            shutil.rmtree(static_dir)
        except:
            print "Error removing existing static dir!"
    default_static_dir = pkg_resources.resource_filename('egaia', 'static')
    print "copying %s to %s" % (default_static_dir, pub)
    try:
        shutil.copytree(default_static_dir, static_dir)
        return True
    except:
        # copy should fail silently if the file already exists
        print "Unable to write static dir!"
        return False

[docs]def getDestDir(uuid, type='item'):
    """create destination dir for items"""
    if uuid is None:
        return False
    destdir = os.path.join(pub, type, uuid)
    if not os.path.exists(destdir):
        os.makedirs(destdir)
    return destdir

[docs]def copyDerivatives(derivs, destdir, force=False):
    """Copy derivatives to the "pub" directory for distribution."""
    for fn in derivs:
        filename = os.path.basename(fn)
        dest = os.path.join(destdir, filename)
        if os.path.exists(dest) and not force:
            # check the modification times of the source and destination
            src_mtime = os.stat(fn).st_mtime
            dest_mtime = os.stat(dest).st_mtime

            # don't copy/link if the destination is newer or the same
            if src_mtime <= dest_mtime:
                continue
            os.unlink(dest)
        print "Copying %s..." % filename
        try:
            os.link(fn, dest)
        except:
            print "Unable to make a hardlink; copying instead"
            shutil.copy2(fn, dest)
    return

[docs]def writeHtml(html_content, destdir, filename=u'index-%s.html' % language):
    """Write html file"""
    html_file = os.path.join(destdir, filename)
    
    if not os.path.exists(destdir):
        os.makedirs(destdir)
    
    with codecs.open(html_file, mode='w', encoding='utf-8') as out:
        out.write(html_content)
    return


#### ELEMENTS FOR HTML OUTPUT

[docs]def getThumb(uuid, size='thumb'):
    """Identify and return the base filename of a thumb image from a list of
    derivatives.
    """
    # We don't return a full tag here because we need to manipulate the path
    # depending on whether we are in an index or elsewhere
    # This function identifies thumbnail images that are already in the
    # outpub (pub) directory.
    destdir = getDestDir(uuid)
    exclude = None
    thumbfile = egaia_list.listFiles(filepath=destdir, uuid=uuid, filter_type='df-%s' % size, exclude=exclude)
    
    if not thumbfile:
        return None
    return os.path.basename(thumbfile[0])

[docs]def makeEmbedTag(derivs, page):
    """Prepare an embeddable media tag from a list of derivatives. Return a
    full embed object tag, with url reference to the media file(s).
    This is usable in an item description page. The "page" parameter should
    be the uuid of the item.
    """

    if not derivs:
        return None
    for fn in derivs:
        src = '/'.join(['item', page, os.path.basename(fn)])
        if fn.endswith('.mp3'):
            return makeLocalEmbed(embed_type='audio',
                            src = src,
                            mtype = 'audio/mpeg')
        elif fn.endswith('.webm'): # we might also have mp4
            return makeLocalEmbed(embed_type='video',
                            src = src,
                            mtype = 'video/webm')
                            
        # Disable text embed; it is cleaner to separate document and metadata
        #~ elif fn.endswith('.html'):
            #~ return makeTextEmbed(fn)
    return None

[docs]def makeTextEmbed(fn):
    """Embed a text or html file."""
    html = ''
    with codecs.open(fn, mode='r', encoding='utf-8') as raw:
        html = raw.read()
    return u'<div class="text_embed">%s</div>' % html

[docs]def makeLocalEmbed(embed_type=None, src=None, mtype=None):
    """Assemble an html5 audio or video embed tag."""
    
    css_class = 'embed'
    if 'video' in embed_type:
        css_class = 'embed-responsive embed-responsive-16by9'
    return """<div class="%s">
               <%s controls>
                <source src="/%s" type="%s" class="embed-responsive-item">
                <a href="/%s">Download</a>
                </%s></div>""" % (css_class, embed_type, src, mtype, src, 
                                    embed_type)

[docs]def makeRemoteEmbed(url):
    """Prepare an embeddable video tag from a URL. Return an iframe.
    This is usable in an item description page.
    """
    
    return """<div class="embed-responsive embed-responsive-16by9">
        <iframe src="%s" scrolling="no" class="embed-responsive-item" 
        allowfullscreen></iframe></div>""" % url

[docs]def makeDownloadsList(derivs):
    """Prepare an html list of download links for derivatives"""

    if not derivs:
        return None
        
    downloads_list = list()
    
    for fn in derivs:
        filename = os.path.basename(fn)
        basename, uuid, extension = egaia_parsefn.parseFilename(filename)
        size = utils.byteSize(os.stat(fn).st_size)
        mtime = utils.isotime(os.stat(fn).st_mtime)
        if '.' in basename:
            fmt = basename.rpartition('.')[2]
        else:
            fmt = template_fields['source']
        downloads_list.append(
            u"""<tr><td><a href="/item/{uuid}/{filename}">{fmt}</a></td>
            <td>{ext}</td>
            <td>{size}</td>
            <td>{mtime}</td>
            </tr>""".format(
                ext=extension,
                uuid=uuid, 
                filename=filename,
                basename=basename,
                fmt=fmt,
                size=size,
                mtime=mtime
                )
            )

    return u'<table class="table table-condensed">' + u'\r\n'.join(sorted(downloads_list)) + u'</table>'
	


[docs]def makePaginationLinks(item_count, page, url_path, items_per_page):
    """Return pagination links"""

    pages = range(1, item_count + 1)
    radius = 3
    # https://github.com/Pylons/paginate/blob/master/paginate/__init__.py
    
    leftmost_page = max(1, (page - radius))
    rightmost_page = min((item_count + 1), (page + radius + 1))
    page_range = range(leftmost_page, rightmost_page)
    nav_links = []
    
    # the first page
    nav_links.append(u'<li><a href="/%s/index-%s.html">&laquo;</a></li>' % (url_path, language))
    
    for p in page_range:
        if p == page:
            nav_links.append(u'<li class="active"><a href="/%s">%s</a></li>' % (url_path, p))
        else:
            if p == 1:
                filename = u'index-%s.html' % language
            else:
                filename = u'index_%s-%s.html' % (p, language)
            nav_links.append(u'<li><a href="/%s/%s">%s</a></li>' % (url_path, filename, p))
    nav_links.append(u'<li><a href="/%s/index_%s-%s.html">&raquo;</a></li>' % (url_path, item_count, language))

    return u'<div class="clearfix"></div><nav aria-label="Page navigation"><ul class="pagination">%s</ul></nav>' % ' '.join(nav_links)


[docs]def generateList(items):
    """Create a responsive grid of items with thumbnails."""
    if len(items) == 0:
        return ' '
    out = list()
    for n in range(len(items)):
        # add offsets for fewer than three items
        offset = ''
        if len(items) == 2:
            offset = ' col-md-offset-1'
        #elif len(items) == 1:
        #    offset = ' col-sm-offset-3 col-md-offset-4'
        item_content = list()
        item_content.append(u'<div class="col-xs-12 col-sm-6 col-md-4%s">%s</div>' % (offset, items[n]))

        # add clearfixes. sm:2, md:3
        if (n+1) % 2 == 0:
            item_content.append(u'<div class="clearfix visible-sm-block"></div>')
        if (n+1) % 3 == 0:
            item_content.append(u'<div class="clearfix visible-md-block visible-lg-block"></div>')
        out.append(u'\r\n'.join(item_content))

    return out

[docs]def generatePlainList(items):
    """Generate a list of items, not as a grid."""
    if len(items) == 0:
        return ' '

    out = list()
    for n in range(len(items)):
        item_content = list()
        item_content.append('<div class="col-xs-12 col-md-6">%s</div>' % items[n])
        # add clearfixes. md:2
        if (n+1) % 2 == 0:
            item_content.append(u'<div class="clearfix"></div>')
        out.append('\r\n'.join(item_content))

    return out

[docs]def makeTags(tags, tagtype='tags'):
    """Generate an HTML list of keyword tags, from an input list."""
    if not tags:
        return []
    taglinks = list()
    if not isinstance(tags, list):
        tags = [tags]
    for tag in tags:
        link = '%s/%s/index-%s.html' % (tagtype, makeSlug(tag), language)
        taglinks.append('<a href="/%s">%s</a>' % (link, tag))
    return taglinks

[docs]def makeMetadataTable(data, nolinks=False):
    """Create an HTML definition list from a dict containing item metadata 
    (normally via json), or the values in bag-info.txt. 
    """
    
    meta_table = list()

    # convert everything to lowercase. The config file forces lowercase but
    # BagIt has request-header style title case keys.
    #row = {k.lower():v for k,v in data.iteritems()}
    row = data
    
    # go through the fields in order
    for label in core_metadata:
        #for element, label in fields.iteritems():
        element = fields[label]
        if element in skip_fields:
            continue
        l = row.get(label)
        if not l or l[0] == '':
            # the field does not exist in the row
            # empty list or string; ignore in html output
            continue
        meta_table.append(u'<h3 id="%s" class="element">%s</h3>' % ( element,
                label))
        
        # process the fields for which we have indexes
        
        if element in indexed_fields and not nolinks:
            tags = makeTags(row[label], tagtype=element)
            for tag in tags:
                meta_table.append(u'<p class="element-data">%s</p>' % tag)
        else:
            if not isinstance(row[label], list):
                row[label] = [row[label]]
            
            if all(isinstance(part, list) for part in row[label]):
                # add a table
                meta_table.append(u'<table class="element-data table table-responsive">')
                for part in row[label]:
                    row_content = u'<tr><td>' + u'</td><td>'.join(part) + u'</td></tr>'
                    meta_table.append(row_content.replace('\n', '<br>'))
                meta_table.append(u'</table>')

            else:
                # Convert the paragraphs individually to markdown
                # This is ALREADY wrapped in "p" elements, so use a div here
                meta_table.append(u'<div class="element-data">')
                txt = u'\n\n'.join(row.get(label))
                meta_table.append(utils.md2html(txt))
                #~ for part in row[label]:
                    #~ # for text that is NOT linked as keywords, process as markdown
                    #~ # this is mainly useful for getting bare urls to work as links
                    #~ part = utils.md2html(part)
                    #~ meta_table.append(part)
                meta_table.append(u'</div>')


    return u' '.join(meta_table)


#### HTML output

[docs]def makeCollectionJson(delete=False, outdir=None):
    """Create json representations for all items in the current collection."""
    
    collection_items = list()
    
    # FIXME: Use localized labels from config
    bag_info = egaia_bag.loadBag()
    collection_uuid = bag_info['External-Identifier'].decode('utf-8')
    
    collection_info = dict()
    # See if we have a metadata file in the bag root
    readme_file = os.path.join(egaia_root.get_root(), 'metadata-%s.docx' % language)
    if os.path.exists(readme_file):
        collection_info = egaia_docx.parseDocx(readme_file)
    else:
        print "could not read %s" % readme_file
    
    collection_name = u' '.join(collection_info.get('title', [bag_info['Title'].decode('utf-8')]))
    docx_files = egaia_list.listFiles(filter_type="metadata")

    # sort by input filename; otherwise they will be ordered by uuid
    for docx_file in sorted(docx_files):
        print "processing %s..." % docx_file

        uuid = egaia_parsefn.getUuid(docx_file)
        out_path = getDestDir(uuid, type='item')
        json_filepath = os.path.join(out_path, '%s-%s.json' % (uuid, language))
        if delete:
            utils.rm(json_filepath)
            continue

        # make the json file
        json_str = json.loads(egaia_docx.docx2json([docx_file]))[0]

        # we still need: collection_name, collection_uuid, thumb, med_img
        json_str['collection_name'] = collection_name
        json_str['collection_uuid'] = collection_uuid
        
        with codecs.open(json_filepath, mode='w', encoding='utf-8') as json_file:
            json_file.write(json.dumps(json_str, sort_keys=True, indent=4, 
                                ensure_ascii=False))
        
        collection_items.append(uuid)
    
    if outdir:
        # Don't export the collection json; we just want items
        # This is for the --json --outdir=XXX option
        return
    
    # Now process the collection metadata
    json_str = dict()
    

    collection_info['collection_items'] = collection_items
    collection_info['title'] = collection_name
    uuid = bag_info['External-Identifier']
    collection_info['identifier'] = uuid
    json_filepath = os.path.join(getDestDir(uuid, type='collection'), 
                                    '%s-%s.json' % (uuid, language))

    if delete:
        utils.rm(json_filepath)

    else:
        with codecs.open(json_filepath, mode='w', encoding='utf-8') as json_file:
            json_file.write(json.dumps(collection_info, sort_keys=True, indent=4))
        
    return


[docs]def loadJson(uuid, type):
    """Load json metadata for an item."""

    json_filepath = os.path.join(getDestDir(uuid, type=type), '%s-%s.json' % (uuid, language))
    try:
        with codecs.open(json_filepath, mode='r', encoding='utf-8') as json_file:
            meta_str = json.loads(json_file.read())
    except:
        print "Error opening %s!" % json_filepath
        return None
    return meta_str


[docs]def updateItemIndex(uuid, delete=False):
    """Update the keyword indexes based on json metadata for a given item."""
    
    # This function reads the entire index for each keyword into memory.
    # We could easily do this by writing directly to disk, for scalability.
    
    meta_str = loadJson(uuid, type='item')
    if meta_str is None:
        print "could not open json for %s" % uuid
        return
        
    for index in indexed_fields:
        label = fields_rev[index]

        index_data = list()

        # load the existing index if available
        index_filepath = os.path.join(pub, '%s-%s.idx' % (index, language))
        if os.path.exists(index_filepath):
            with codecs.open(index_filepath, mode='r', encoding='utf-8') as index_file:
                for line in index_file:
                    # remove references to the item in existing index
                    if line.strip() == '':
                        continue
                    if not uuid in line:
                        # strip newlines
                        index_data.append(line.strip())

        if not delete:
            # if present, add the key and uuid to the index
            if meta_str.get(label):
                if isinstance(meta_str[label], list):
                    for value in meta_str[label]:
                        kw = value
                        if kw:
                            index_data.append(u'%s\t%s' % (kw, uuid))
                else:
                    kw = meta_str[label]
                    if kw:
                        index_data.append(u'%s\t%s' % (kw, uuid))
        # write the index to file
        with codecs.open(index_filepath, mode='w', encoding='utf-8') as index_file:
            out = u'\n'.join(index_data)
            index_file.write(out)

    return


[docs]def makeItemPreview(uuid):
    """Generate an html preview for an item, to be included in indexes
    or lists.
    """
    
    meta_str = loadJson(uuid, type='item')
    if meta_str is None:
        return
    
    subjects = meta_str.get(fields_rev['DCTERMS.subject'], [])
    
    if len(subjects) == 0 or subjects[0] == '':
        meta_str['subject_tags'] = ''
    else:
        subject_tag_list = makeTags(subjects, tagtype="DCTERMS.subject")
        meta_str['subject_tags'] =  fields_rev['DCTERMS.subject'] + u': ' + u', '.join(subject_tag_list)
    
    dc_description = meta_str.get(fields_rev['DCTERMS.description'], [''])
    desc = ' '.join(dc_description)
    meta_str['short_description'] = utils.md2html(utils.truncate(desc, length=200))
    meta_str['title'] = meta_str.get(fields_rev['DCTERMS.title'], '')
    meta_str['identifier'] = meta_str.get(fields_rev['DCTERMS.identifier'], [''])
    meta_str['language'] = language

    #thumb = meta_str.get('thumb', None)
    thumb = getThumb(uuid, size='thumb')
    
    if not thumb:
        meta_str['thumb'] = '/static/null.png'
    else:
        meta_str['thumb'] = '/item/%s/%s' % (uuid, thumb)
    
    meta_str['creator'] = '; '.join(meta_str.get(fields_rev['DCTERMS.creator'], list()))
    
    for k, v in meta_str.iteritems():
        if all(isinstance(i, list) for i in v) and len(v) > 0:
            # ignore tabular data?
            continue
            
        elif isinstance(v, list):
            meta_str[k] = ' '.join(v)

    meta_str.update(template_fields)
    html_content = Template(item_preview_template)
    index_entry = html_content.safe_substitute(meta_str)

    return index_entry


[docs]def makePreview(uuid, preview_type='item', delete=False):
    """Create an html preview for an item or collection, to be included in 
    indexes or lists.
    """

    previews_dir = os.path.join(pub, '.previews-%s' % language)
    index_path = os.path.join(previews_dir, uuid)
    
    if delete:
        if os.path.exists(index_path):
            os.unlink(index_path)
        return

    if preview_type == 'collection':
        preview_text = makeCollectionPreview(uuid)
    else:
        preview_text = makeItemPreview(uuid)
    if not preview_text:
        return None
    
    if not os.path.exists(previews_dir):
        os.makedirs(previews_dir)
    with codecs.open(index_path, mode='w', encoding='utf-8') as index_file:
        index_file.write(preview_text)
    return


[docs]def getPreview(uuid, preview_type='item'):
    """Retrieve an html preview for an item or collection."""
    
    preview_text = None
    index_path = os.path.join(pub, '.previews-%s' % language, uuid)
    
    if not os.path.exists(index_path):
        print index_path
        print 'generating preview for %s' % uuid
        return makePreview(uuid, preview_type=preview_type)
    
    with codecs.open(index_path, mode='r', encoding='utf-8') as index_file:
        preview_text = index_file.read()
    
    return preview_text


[docs]def makeItemPage(uuid, force=False, delete=False, nolinks=False):
    """Generate an html page for an item."""
    
    index_filepath = os.path.join(getDestDir(uuid, type='item'), 'index-%s.html' % language)
    
    if delete:
        utils.rm(index_filepath)
        return

    meta_str = loadJson(uuid, type='item')
    if meta_str is None:
        return
    
    exclude = []
    
    if export_stills is False:
        # exclude the video stills directory
        exclude += ['.df-still-', '.df-stills-index']
    
    if meta_str.get('public', [''])[0].lower() == 'false':
            # FIXME: This should be a whitelist, not a blacklist.
            exclude += ['.df-pdf', '.df-360p-vp9-400k', '.df-mp3', '.df-h264']

    destdir = getDestDir(uuid, type='item')
    
    # get a base list of all the derivatives to copy
    df = egaia_list.listFiles(uuid=uuid, filter_type='df', exclude=exclude)
    
    # we also want to copy the docx metadata file, so people can edit it
    metadata_file = egaia_list.listFiles(uuid=uuid, filter_type='metadata')
    
    # and we want to copy "mutable" (editable) files that collaborators might
    # download, edit, and send back to us
    of = egaia_list.listFiles(uuid=uuid, filter_type='orig', exclude=exclude)
    mutable = [x for x in of if x.endswith('.docx') or x.endswith('.svg')]
    
    all_derivs = df + metadata_file + mutable
    
    # Copy everything to the public directory by default
    if all_derivs:
        copyDerivatives(all_derivs, destdir, force=force)
    
    # Create a link to the files for download.
    # Ignore "medium" images, even if they are the primary distribution
    # format, since they are already embedded on the page
    dl_excludes = exclude + ['df-med', 'df-thumb']

    # Don't link to local downloads if remote embeds are available
    remote = meta_str.get(fields_rev['remote_embed_url'])
    
    if remote and remote_embeds is True:
        # Exclude the video download links, but copy them to the "pub"
        # directory anyway. This allows us to have all the derivative files
        # in place for offline distribution, while keeping them unlisted;
        # we can publish the catalogue telling rsync to exclude these files.
        # FIXME: Make this look at the filenames themselves, not just type.
        dl_excludes += ['.df-h264', '.df-360p-vp9-400k'] 

    derivs = egaia_list.listFiles(uuid=uuid, filter_type='df',
                        exclude=dl_excludes) + metadata_file + mutable

    # manage the embed tag
    embed = None

    if remote and remote_embeds is True:
        print "using REMOTE url: %s" % remote[0]
        embed = makeRemoteEmbed(remote[0])
    else:
        embed = makeEmbedTag(derivs, page=uuid)
    thumb = getThumb(uuid, size='med')
    metadata = makeMetadataTable(meta_str, nolinks=nolinks)
    downloads = makeDownloadsList(derivs)

    body = list()
    if thumb and not embed:
        # disable the link from the thumbnail image, because this can be
        # confusing. For instance, we may have the thumb for a pdf but it
        # links to an html version of the same document that looks completely
        # different from the thumb.
        #~ if main_link:
            #~ img = u'<p><a href="/item/%s/%s"><img src="/item/%s/%s" class="img-responsive"></a></p>' % (uuid, main_link, uuid, thumb)
        #~ else:

        img = u'<p class="embed-img-responsive"><img src="/item/%s/%s" class="img-responsive"></p>' % (uuid, thumb)
        body.append(img)
    if embed:
        body.append(embed)
    
    # add the metadata panel
    
    body.append(  u'<div class="element-set panel panel-default">'
                + u'<div class="panel-heading">'
                + u'<h3 class="panel-title">%s</h3>' % template_fields['Metadata']
                + u'</div>'
                + u'<div class="panel-body">'
                )

    if meta_str['collection_name'] and meta_str['collection_uuid']:
        body.append(u'<h3 id="parent-collection">%s</h3>' % fields['collection'])
        if nolinks:
            body.append(u'<p>%s</p>' % meta_str['collection_name'] )
        else:
            body.append(u"""<p><a href="/collection/%s/index-%s.html">
                        %s
                        </a></p>""" % ( meta_str['collection_uuid'], 
                            language, meta_str['collection_name'] ))

    if metadata:
        body.append(metadata)
    if downloads:
        body.append(u'<h3 id="item-files">%s</h3>' % template_fields['files'])
        body.append(downloads)
    body.append(u'</div><!-- /panel-body --></div><!-- /element-set -->')
    
    # The title string returned by the docx parser SHOULD be a list
    meta_str['title'] = ' '.join(meta_str.get('title', ['']))
    
    meta_str['subtitle'] = ''
    meta_str['date'] = date
    meta_str['version'] = version.get_version(pep440=True)
    meta_str['archive'] = archive
    meta_str['short_name'] = short_name
    meta_str['body'] = u' '.join(body)
    meta_str['language'] = language
    

    html_content = Template(html_template)
    meta_str.update(template_fields)
    index_entry = html_content.safe_substitute(meta_str)

    with codecs.open(index_filepath, mode='w', encoding='utf-8') as out_file:
        out_file.write(index_entry)

    # FIXME - main_link doesn't give a full path?
    #~ aliases = meta_str.get(fields_rev['alias'])
    #~ if aliases:
        #~ for alias in aliases:
            #~ if alias.strip() != '':
                #~ makeAlias(alias, main_link)
    
    return    


[docs]def makeCollectionPreview(uuid):
    """Generate the preview image and brief description for the collection that will be included in the list or indexes of collections.
    """
    meta_str = loadJson(uuid, type='collection')
    if meta_str is None:
        return ''
    
    meta_str['language'] = language
    dc_description = meta_str.get('description', [''])
    desc = ' '.join(dc_description)

    meta_str['short_description'] = utils.md2html(utils.truncate(desc, length=800))

    html_content = Template(collection_preview_template)
    meta_str.update(template_fields)
    index_entry = html_content.safe_substitute(meta_str)

    return index_entry


[docs]def makeCollectionPage(uuid, delete=False):
    """Generate an html description page for the current collection."""
    
    index_filepath = os.path.join(getDestDir(uuid, type='collection'), 'index-%s.html' % language)
    
    if delete:
        utils.rm(index_filepath)
        return

    meta_str = loadJson(uuid, type='collection')
    if meta_str is None:
        return
    destdir = getDestDir(uuid, type='collection')
    
    # copy the collection thumbnail images
    for i in ('collection-thumb.jpg', 'collection-cover.jpg'):
        fn = os.path.join(egaia_root.get_root(), i)
        
        if not os.path.exists(fn):
            egaia_collage.mkcollage()
        try:
            shutil.copy2(fn, destdir)
        except:
            print "Error copying collection thumb!"

    body = list()
    
    # pass "nolinks" because the collection fields are not indexed, so we
    # will get some 404 errors
    metadata = makeMetadataTable(meta_str, nolinks=True)
    collection_items = list()
    if 'collection_items' in meta_str and meta_str['collection_items']:
        for item in meta_str['collection_items']:
            collection_items.append(getPreview(item, preview_type='item'))
            
    body.append(u'<p><img src="/collection/%s/collection-cover.jpg" class="img-rounded img-responsive"></p>' % uuid)
    if metadata:

        body.append(  u'<div class="element-set panel panel-default">'
                    + u'<div class="panel-heading">'
                    + u'<h3 class="panel-title">%s</h3>' % template_fields['Metadata']
                    + u'</div>'
                    + u'<div class="panel-body">'
                    + u'<div>'
                    + metadata
                    + u'</div></div><!-- /panel-body --></div><!-- /element-set -->'
                    )

    if collection_items:
        body.append(u'<h2>Items</h2>')
        body.append(u'<div class="items-list container row">')
        body.append(' '.join(generateList(collection_items)))
        body.append(u'</div>') # bootstrap

    meta_str['subtitle'] = template_fields['collection']
    meta_str['date'] = date
    meta_str['version'] = version.get_version(pep440=True)
    meta_str['archive'] = archive
    meta_str['short_name'] = short_name
    meta_str['body'] = u' '.join(body)
    meta_str['language'] = language

    html_content = Template(html_template)
    meta_str.update(template_fields)
    index_entry = html_content.safe_substitute(meta_str)

    with codecs.open(index_filepath, mode='w', encoding='utf-8') as out_file:
        out_file.write(index_entry)
    return  

[docs]def makeIndexes():
    """Regenerate the indexes for the entire archive."""

    # items index. List the items available in the exported collection.
    for i in ('item', 'collection'):
        print "Generating %s index..." % i
        index = list()
        basedir = os.path.join(pub, i)
        items = os.listdir(basedir)
        # sort the items by date added to the archive
        items.sort(key=lambda x: os.path.getctime(os.path.join(basedir, x)),
            reverse=True)

        for item in items:
            if os.path.isfile(os.path.join(basedir, item)):
                continue
            
            preview = getPreview(item, preview_type=i)
            if preview:
                index.append(preview)
            
        if i == 'item':
            writeIndex(generateList(index), [i], 'items', 'index')
        else:
            writeIndex(generatePlainList(index), [i], 'collections', 'index')

    basedir = os.path.join(pub, 'item')

    for kw in indexed_fields:
        label = fields_rev[kw]
        print "Generating %s index..." % label

        idx_filepath = os.path.join(pub, '%s-%s.idx' % (kw, language))
        if not os.path.exists(idx_filepath):
            print "No metadata index found: %s" % kw
            continue
        
        tags = dict()

        with codecs.open(idx_filepath, mode='r', encoding='utf-8') as idx_file:
            # read and parcel out
            # we have a file containing tag<TAB>uuid
            
            for line in idx_file:
                if not '\t' in line:
                    continue
                tag_name, uuid = line.strip().split('\t')
                if not tag_name in tags:
                    tags[tag_name] = list()
                tags[tag_name].append(uuid)
            
            for tag_name, uuids in tags.iteritems():
                index = list()
                for item in uuids:
                    index.append(getPreview(item, preview_type='item'))
                target = [kw, makeSlug(tag_name)]
                
                writeIndex(generateList(index), 
                                target,
                                tag_name, 
                                template_fields[u'%s_index' % label])
                
            # make a list of tags, for {TYPE}/index-${language}.html
            out = list()
            for tag in sorted(tags):
                link = u'%s/%s/index-%s.html' % (kw, makeSlug(tag), language)
                tag_item_count = len(tags[tag])
                out.append(u'<p><a href="/%s">%s</a> <span class="text-muted">(%s)</span></p>' % (link,
                    tag, tag_item_count))
            
            writeIndex(out, [makeSlug(kw)], label, template_fields['keyword_index'], paginate_size=500)

[docs]def writeIndex(out, target, title, subtitle, paginate_size=24):
    """Prepare an index page."""
    
    dest_dir = os.path.join(*[pub] + target)
    dest_url_path = '/'.join(target)
    
    # maximum 24 items per page
    # should be a multiple of 12, to work with clearfixes
    paginated = [out[i:i+paginate_size] for i in range(0, len(out), paginate_size)]
    total_pages = len(paginated)

    cur_page = 1
    for p in paginated:
        if total_pages > 1:
            page_links = makePaginationLinks(total_pages, cur_page, dest_url_path,
                                                paginate_size)
        else:
            page_links = ''
        
        body = u'\r\n'.join(p) + page_links

        html_content = Template(html_template)
        page_vars = {   'title': title,
                        'subtitle': subtitle,
                        'archive': archive,
                        'short_name': short_name,
                        'body': body,
                        'date': date,
                        'version': version.get_version(pep440=True),
                        'language': language,
                    }
        page_vars.update(template_fields)
        
        out_path = os.path.join(pub, dest_dir)
        if cur_page == 1:
            filename = u'index-%s.html' % language
        else:
            filename = u'index_%s-%s.html' % (cur_page, language)
        cur_page = cur_page+1
        output = html_content.safe_substitute(page_vars)

        writeHtml(output, out_path, filename)
        
    return

[docs]def makeAlias(alias, target):
    """Generate an alias (redirect page) for an item. Target should be a
    UUID; alias should be a filename."""
    
    html_content = Template(alias_template)
    out = html_content.safe_substitute({'target': target, 'language': language})
    fn = os.path.join(pub, makeSlug(alias))
    print "making alias from %s to %s" % (fn, target)

    with open(fn, 'w') as outfile:
        outfile.write(out)
    return

[docs]def makeHomePage():
    """Generate a home page for the archive."""
    
    docx_file = os.path.expanduser(egaia_config.getConfig('archive', 'home_page'))
    
    if not os.path.exists(docx_file):
        print "File %s not found!" % docx_file
        return False
    
    html = utils.docx2str(docx_file)
    
    if not html:
        print "Error converting docx to html"
        return False

    html_content = Template(html_template)
    meta_str = dict()
    meta_str['title'] = archive
    meta_str['body'] = u'<div class="static-page col-sm-12 col-md-9">' + html + u'</div>'
    meta_str['subtitle'] = ''
    meta_str['date'] = date
    meta_str['version'] = version.get_version(pep440=True)
    meta_str['archive'] = archive
    meta_str['short_name'] = short_name
    meta_str['language'] = language

    meta_str.update(template_fields)
    page = html_content.safe_substitute(meta_str)
    
    fn = os.path.join(pub, u'index-%s.html' % language)
    with codecs.open(fn, mode='w', encoding='utf-8') as out:
        out.write(page)
        
[docs]def makeErrorPages():
    """Generate 404 and other error pages for the archive."""
    
    codes = {
            '404': 'Sorry, the resource you requested could not be found.',
            '403': 'Sorry, you do not have permission to access this page.',
            '500': 'Server error.'
        }
    
    for (code, message) in codes.iteritems():
        kwargs = {  'title':code,
                    'message':message,
                    'Home':template_fields['Home']
                    }
        html_content = Template(error_page_template)
        page = html_content.safe_substitute(**kwargs)
        
        fn = os.path.join(pub, u'.%s.html' % code)
        with codecs.open(fn, mode='w', encoding='utf-8') as out:
            out.write(page)
  


def _cli(args):
    """egaia make
    
    Generate indexes and html pages for items in a collection.
    
    Usage:
        egaia make --help
        egaia make [ --force | --delete ] [ --nolinks ] TARGET...
    
    Targets:
        all
        json
        database
        item-pages
        collection-page
        indexes
        item-previews
        collection-preview
        home-page
        static
     """
    
    
    items = egaia_list.listItems()
    init()

    if 'all' in args['TARGET']:
        args['TARGET'].extend((
                     'json', 
                     'database', 
                     'item-pages', 
                     'collection-page', 
                     'indexes',
                     'item-previews',
                     'collection-preview'
                     ))
        
    if 'json' in args['TARGET'] and not args['--delete']:
        # don't delete the json yet as we may still need it
        # but we must create it before item pages, etc.
        makeCollectionJson()

    for item in items:
        
        if item is None:
            continue
        
        if 'item-pages' in args['TARGET']:
            makeItemPage(item, force=args['--force'], delete=args['--delete'],
                nolinks=args['--nolinks'])

        if 'item-previews' in args['TARGET']:
            makePreview(item, preview_type='item', delete=args['--delete'])
        
        if 'database' in args['TARGET']:
            updateItemIndex(item, delete=args['--delete'])
        
    bag_info = egaia_bag.loadBag()
    collection_uuid = bag_info['External-Identifier']
    
    if 'collection-preview' in args['TARGET']:
        makePreview(collection_uuid, preview_type='collection', delete=args['--delete'])
    
    if 'collection-page' in args['TARGET']:
        makeCollectionPage(collection_uuid, delete=args['--delete'])
    
    # it is now safe to delete the json
    if args['--delete']:
        makeCollectionJson(delete=args['--delete'])

    if 'indexes' in args['TARGET']:
        # FIXME: support delete flag
        makeIndexes()

    if 'home-page' in args['TARGET']:
        makeHomePage()

    if 'static' in args['TARGET']:
        makeStaticDir(force=True)
        makeErrorPages()