Source code for egaia.egaia_make

# -*- coding: utf-8 -*-

import os
import re
import shutil
import json
import gettext
from string import Template
import codecs
import markdown
import collections

import egaia_config
import egaia_parsefn
import egaia_list
import egaia_bag
import pkg_resources
import utils
from egaia_sanitize import makeSlug
import egaia_root
import egaia_collage
import version
import egaia_docx
import egaia_derive

language = egaia_config.getConfig('archive', 'language')

pub = os.path.expanduser(egaia_config.getConfig('archive', 'pub_path'))

indexed_fields = ['DCTERMS.subject', 'DCTERMS.creator', 'DCTERMS.coverage', 
                    'DCTERMS.type', 'DCTERMS.language']

static_dir = os.path.join(pub, 'static')
default_thumb = os.path.join(static_dir, 'default_thumb.png')

archive = egaia_config.getConfig('archive', 'archive_name').decode('utf-8')
short_name = egaia_config.getConfig('archive', 'archive_prefix').decode('utf-8').upper()
remote_embeds = egaia_config.getConfig('archive', 'remote_embeds', boolean=True)

export_stills = egaia_config.getConfig('archive', 'export_stills', boolean=True)
date = utils.current_time()

cmd_convert = egaia_config.getConfig('system', 'cmd_convert')

core_metadata = egaia_docx.getCoreFields(filtered=False)
fields_rev = dict(egaia_config.printConfig(section='terms'))
fields = dict((value, key) for key, value in fields_rev.iteritems())
template_fields = dict(egaia_config.printConfig(section='template_fields'))

# Ignore these metadata fields in html output. Use lowercase keys.
# We skip the title fields because they are separately printed at the top
# Skip the organizational info because this will normally be included in the
# footer or elsewhere in HTML output
skip_fields = ['bag-software-agent', 'payload-oxum', 'title', 'DCTERMS.title',
    'source-organization', 'organization-address', 'contact-name',
    'contact-phone', 'contact-email', 'DCTERMS.format']

item_preview_template = u"""<div id="${identifier}" class="index-entry">
    <a href="/item/${identifier}/index-${language}.html">
      <img src="${thumb}" class="contain thumbnail thumb-img-cover">
    </a>
     <div class="index-description caption">
     <h4><a href="/item/${identifier}/index-${language}.html">${title}</a><br> <small>$creator</small></h4>
     $short_description
     <p>$subject_tags</p>
    </div><!--/index-description-->
</div> <!-- index-entry -->
"""

collection_preview_template = u"""<div class="index-entry" id="${identifier}">
    <a href="/collection/${identifier}/index-${language}.html">
      <img src="/collection/${identifier}/collection-thumb.jpg" class="thumbnail">
    </a>
     <div class="index-description caption">
     <h4><a href="/collection/${identifier}/index-${language}.html">$title</a></h4>
     $short_description
    </div><!--/index-description-->
</div> <!-- index-entry -->
"""

error_page_template = u"""<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="icon" type="image/png" href="/static/favicon.png">

    <title>$title</title>

    <link rel="stylesheet" href="/static/css/bootstrap.min.css">
    <link rel="stylesheet" href="/static/css/egaia.css">

    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
    <!--[if lt IE 9]>
      <script src="/static/js/html5shiv.min.js"></script>
      <script src="/static/js/respond.min.js"></script>
    <![endif]-->
  </head>

  <body>

    <div class="main-content container">
    <div class="jumbotron">
    <div class="alert alert-danger">
        <h1>$title</h1>
        <p>${message}</p>
        <hr>
        <p><span class="glyphicon glyphicon-home"></span>&nbsp;<a href="/" class="alert-link">${Home}</a></p>
    </div>

    </div> <!-- /container -->
    </div> <!-- /main-content -->

  </body>
</html>

"""

html_template = u"""<!DOCTYPE html>
<html lang="${language}">
  <head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <meta name="generator" content="egaia $version">
    
    <link rel="icon" type="image/png" href="/static/favicon.png">

    <title>$title - $archive</title>

    <link rel="stylesheet" href="/static/css/bootstrap.min.css">
    <link rel="stylesheet" href="/static/css/egaia.css">

    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
    <!--[if lt IE 9]>
      <script src="/static/js/html5shiv.min.js"></script>
      <script src="/static/js/respond.min.js"></script>
    <![endif]-->
  </head>

  <body>

    <!-- Fixed navbar -->
    <nav class="navbar navbar-default navbar-static-top">
      <div class="container">
        <div class="navbar-header">
          <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
            <span class="sr-only">$toggle</span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>
          <a class="navbar-brand" href="/index-${language}.html"><span class="visible-xs">$short_name</span><span class="hidden-xs">$archive</span></a>
        </div>
        <div id="navbar" class="navbar-collapse collapse navbar-right">
          <ul class="nav navbar-nav">
            <li><a href="/index-${language}.html">${About}</a></li>
            <li><a href="/collection/index-${language}.html">${Collections}</a></li>
            <li class="dropdown">
              <a href="/" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">${Items} <span class="caret"></span></a>
              <ul class="dropdown-menu">
                <li><a href="/item/index-${language}.html">${All_items}</a></li>
                <li role="separator" class="divider"></li>
                <li class="dropdown-header">${Keywords}</li>
                <li><a href="/DCTERMS.subject/index-${language}.html">${Subject}</a></li>
                <li><a href="/DCTERMS.coverage/index-${language}.html">${Coverage_area}</a></li>
                <li><a href="/DCTERMS.type/index-${language}.html">${Media_type}</a></li>
                <li><a href="/DCTERMS.creator/index-${language}.html">${Creator}</a></li>
                <li><a href="/DCTERMS.language/index-${language}.html">${Language}</a></li>
              </ul>
            </li>
          </ul>
        </div><!--/.nav-collapse -->
      </div>
    </nav>
    <div class="main-content">
    <div class="container">
    
    <div class="page-header">
    <h1>$title <small>$subtitle</small></h1>
    </div>
    
    $body

    </div> <!-- /container -->
    </div> <!-- /main-content -->

    <footer class="footer text-muted">
      <div class="container">
        <p id="generator">$last_updated $date. $generator</p>
      </div> <!-- /container -->
    </footer>
    
    <script src="/static/js/jquery.min.js"></script>
    <script src="/static/js/bootstrap.min.js"></script>
  </body>
</html>

"""

alias_template = """<!DOCTYPE html>
<html>
  <head>
    <title>${target}</title>
    <meta name="robots" content="noindex">
    <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
    <meta http-equiv="refresh" content="0; url=/item/${target}/${filename}"/>
  </head>
</html>"""

[docs]def init(): """Initialize""" if not os.path.exists(static_dir): makeStaticDir()
# TEMPLATE: Deprecate here. Move the template strings into the # config file. #### DIRECTORY AND FILE COPY/WRITE FUNCTIONS
[docs]def makeStaticDir(force=False): if force and os.path.exists(static_dir): try: shutil.rmtree(static_dir) except: print "Error removing existing static dir!" default_static_dir = pkg_resources.resource_filename('egaia', 'static') print "copying %s to %s" % (default_static_dir, pub) try: shutil.copytree(default_static_dir, static_dir) return True except: # copy should fail silently if the file already exists print "Unable to write static dir!" return False
[docs]def getDestDir(uuid, type='item'): """create destination dir for items""" if uuid is None: return False destdir = os.path.join(pub, type, uuid) if not os.path.exists(destdir): os.makedirs(destdir) return destdir
[docs]def copyDerivatives(derivs, destdir, force=False): """Copy derivatives to the "pub" directory for distribution.""" for fn in derivs: filename = os.path.basename(fn) dest = os.path.join(destdir, filename) if os.path.exists(dest) and not force: # check the modification times of the source and destination src_mtime = os.stat(fn).st_mtime dest_mtime = os.stat(dest).st_mtime # don't copy/link if the destination is newer or the same if src_mtime <= dest_mtime: continue os.unlink(dest) print "Copying %s..." % filename try: os.link(fn, dest) except: print "Unable to make a hardlink; copying instead" shutil.copy2(fn, dest) return
[docs]def writeHtml(html_content, destdir, filename=u'index-%s.html' % language): """Write html file""" html_file = os.path.join(destdir, filename) if not os.path.exists(destdir): os.makedirs(destdir) with codecs.open(html_file, mode='w', encoding='utf-8') as out: out.write(html_content) return
#### ELEMENTS FOR HTML OUTPUT
[docs]def getThumb(uuid, size='thumb'): """Identify and return the base filename of a thumb image from a list of derivatives. """ # We don't return a full tag here because we need to manipulate the path # depending on whether we are in an index or elsewhere # This function identifies thumbnail images that are already in the # outpub (pub) directory. destdir = getDestDir(uuid) exclude = None thumbfile = egaia_list.listFiles(filepath=destdir, uuid=uuid, filter_type='df-%s' % size, exclude=exclude) if not thumbfile: return None return os.path.basename(thumbfile[0])
[docs]def makeEmbedTag(derivs, page): """Prepare an embeddable media tag from a list of derivatives. Return a full embed object tag, with url reference to the media file(s). This is usable in an item description page. The "page" parameter should be the uuid of the item. """ if not derivs: return None for fn in derivs: src = '/'.join(['item', page, os.path.basename(fn)]) if fn.endswith('.mp3'): return makeLocalEmbed(embed_type='audio', src = src, mtype = 'audio/mpeg') elif fn.endswith('.webm'): # we might also have mp4 return makeLocalEmbed(embed_type='video', src = src, mtype = 'video/webm') # Disable text embed; it is cleaner to separate document and metadata #~ elif fn.endswith('.html'): #~ return makeTextEmbed(fn) return None
[docs]def makeTextEmbed(fn): """Embed a text or html file.""" html = '' with codecs.open(fn, mode='r', encoding='utf-8') as raw: html = raw.read() return u'<div class="text_embed">%s</div>' % html
[docs]def makeLocalEmbed(embed_type=None, src=None, mtype=None): """Assemble an html5 audio or video embed tag.""" css_class = 'embed' if 'video' in embed_type: css_class = 'embed-responsive embed-responsive-16by9' return """<div class="%s"> <%s controls> <source src="/%s" type="%s" class="embed-responsive-item"> <a href="/%s">Download</a> </%s></div>""" % (css_class, embed_type, src, mtype, src, embed_type)
[docs]def makeRemoteEmbed(url): """Prepare an embeddable video tag from a URL. Return an iframe. This is usable in an item description page. """ return """<div class="embed-responsive embed-responsive-16by9"> <iframe src="%s" scrolling="no" class="embed-responsive-item" allowfullscreen></iframe></div>""" % url
[docs]def makeDownloadsList(derivs): """Prepare an html list of download links for derivatives""" if not derivs: return None downloads_list = list() for fn in derivs: filename = os.path.basename(fn) basename, uuid, extension = egaia_parsefn.parseFilename(filename) size = utils.byteSize(os.stat(fn).st_size) mtime = utils.isotime(os.stat(fn).st_mtime) if '.' in basename: fmt = basename.rpartition('.')[2] else: fmt = template_fields['source'] downloads_list.append( u"""<tr><td><a href="/item/{uuid}/{filename}">{fmt}</a></td> <td>{ext}</td> <td>{size}</td> <td>{mtime}</td> </tr>""".format( ext=extension, uuid=uuid, filename=filename, basename=basename, fmt=fmt, size=size, mtime=mtime ) ) return u'<table class="table table-condensed">' + u'\r\n'.join(sorted(downloads_list)) + u'</table>'
[docs]def generateList(items): """Create a responsive grid of items with thumbnails.""" if len(items) == 0: return ' ' out = list() for n in range(len(items)): # add offsets for fewer than three items offset = '' if len(items) == 2: offset = ' col-md-offset-1' #elif len(items) == 1: # offset = ' col-sm-offset-3 col-md-offset-4' item_content = list() item_content.append(u'<div class="col-xs-12 col-sm-6 col-md-4%s">%s</div>' % (offset, items[n])) # add clearfixes. sm:2, md:3 if (n+1) % 2 == 0: item_content.append(u'<div class="clearfix visible-sm-block"></div>') if (n+1) % 3 == 0: item_content.append(u'<div class="clearfix visible-md-block visible-lg-block"></div>') out.append(u'\r\n'.join(item_content)) return out
[docs]def generatePlainList(items): """Generate a list of items, not as a grid.""" if len(items) == 0: return ' ' out = list() for n in range(len(items)): item_content = list() item_content.append('<div class="col-xs-12 col-md-6">%s</div>' % items[n]) # add clearfixes. md:2 if (n+1) % 2 == 0: item_content.append(u'<div class="clearfix"></div>') out.append('\r\n'.join(item_content)) return out
[docs]def makeTags(tags, tagtype='tags'): """Generate an HTML list of keyword tags, from an input list.""" if not tags: return [] taglinks = list() if not isinstance(tags, list): tags = [tags] for tag in tags: link = '%s/%s/index-%s.html' % (tagtype, makeSlug(tag), language) taglinks.append('<a href="/%s">%s</a>' % (link, tag)) return taglinks
[docs]def makeMetadataTable(data, nolinks=False): """Create an HTML definition list from a dict containing item metadata (normally via json), or the values in bag-info.txt. """ meta_table = list() # convert everything to lowercase. The config file forces lowercase but # BagIt has request-header style title case keys. #row = {k.lower():v for k,v in data.iteritems()} row = data # go through the fields in order for label in core_metadata: #for element, label in fields.iteritems(): element = fields[label] if element in skip_fields: continue l = row.get(label) if not l or l[0] == '': # the field does not exist in the row # empty list or string; ignore in html output continue meta_table.append(u'<h3 id="%s" class="element">%s</h3>' % ( element, label)) # process the fields for which we have indexes if element in indexed_fields and not nolinks: tags = makeTags(row[label], tagtype=element) for tag in tags: meta_table.append(u'<p class="element-data">%s</p>' % tag) else: if not isinstance(row[label], list): row[label] = [row[label]] if all(isinstance(part, list) for part in row[label]): # add a table meta_table.append(u'<table class="element-data table table-responsive">') for part in row[label]: row_content = u'<tr><td>' + u'</td><td>'.join(part) + u'</td></tr>' meta_table.append(row_content.replace('\n', '<br>')) meta_table.append(u'</table>') else: # Convert the paragraphs individually to markdown # This is ALREADY wrapped in "p" elements, so use a div here meta_table.append(u'<div class="element-data">') txt = u'\n\n'.join(row.get(label)) meta_table.append(utils.md2html(txt)) #~ for part in row[label]: #~ # for text that is NOT linked as keywords, process as markdown #~ # this is mainly useful for getting bare urls to work as links #~ part = utils.md2html(part) #~ meta_table.append(part) meta_table.append(u'</div>') return u' '.join(meta_table)
#### HTML output
[docs]def makeCollectionJson(delete=False, outdir=None): """Create json representations for all items in the current collection.""" collection_items = list() # FIXME: Use localized labels from config bag_info = egaia_bag.loadBag() collection_uuid = bag_info['External-Identifier'].decode('utf-8') collection_info = dict() # See if we have a metadata file in the bag root readme_file = os.path.join(egaia_root.get_root(), 'metadata-%s.docx' % language) if os.path.exists(readme_file): collection_info = egaia_docx.parseDocx(readme_file) else: print "could not read %s" % readme_file collection_name = u' '.join(collection_info.get('title', [bag_info['Title'].decode('utf-8')])) docx_files = egaia_list.listFiles(filter_type="metadata") # sort by input filename; otherwise they will be ordered by uuid for docx_file in sorted(docx_files): print "processing %s..." % docx_file uuid = egaia_parsefn.getUuid(docx_file) out_path = getDestDir(uuid, type='item') json_filepath = os.path.join(out_path, '%s-%s.json' % (uuid, language)) if delete: utils.rm(json_filepath) continue # make the json file json_str = json.loads(egaia_docx.docx2json([docx_file]))[0] # we still need: collection_name, collection_uuid, thumb, med_img json_str['collection_name'] = collection_name json_str['collection_uuid'] = collection_uuid with codecs.open(json_filepath, mode='w', encoding='utf-8') as json_file: json_file.write(json.dumps(json_str, sort_keys=True, indent=4, ensure_ascii=False)) collection_items.append(uuid) if outdir: # Don't export the collection json; we just want items # This is for the --json --outdir=XXX option return # Now process the collection metadata json_str = dict() collection_info['collection_items'] = collection_items collection_info['title'] = collection_name uuid = bag_info['External-Identifier'] collection_info['identifier'] = uuid json_filepath = os.path.join(getDestDir(uuid, type='collection'), '%s-%s.json' % (uuid, language)) if delete: utils.rm(json_filepath) else: with codecs.open(json_filepath, mode='w', encoding='utf-8') as json_file: json_file.write(json.dumps(collection_info, sort_keys=True, indent=4)) return
[docs]def loadJson(uuid, type): """Load json metadata for an item.""" json_filepath = os.path.join(getDestDir(uuid, type=type), '%s-%s.json' % (uuid, language)) try: with codecs.open(json_filepath, mode='r', encoding='utf-8') as json_file: meta_str = json.loads(json_file.read()) except: print "Error opening %s!" % json_filepath return None return meta_str
[docs]def updateItemIndex(uuid, delete=False): """Update the keyword indexes based on json metadata for a given item.""" # This function reads the entire index for each keyword into memory. # We could easily do this by writing directly to disk, for scalability. meta_str = loadJson(uuid, type='item') if meta_str is None: print "could not open json for %s" % uuid return for index in indexed_fields: label = fields_rev[index] index_data = list() # load the existing index if available index_filepath = os.path.join(pub, '%s-%s.idx' % (index, language)) if os.path.exists(index_filepath): with codecs.open(index_filepath, mode='r', encoding='utf-8') as index_file: for line in index_file: # remove references to the item in existing index if line.strip() == '': continue if not uuid in line: # strip newlines index_data.append(line.strip()) if not delete: # if present, add the key and uuid to the index if meta_str.get(label): if isinstance(meta_str[label], list): for value in meta_str[label]: kw = value if kw: index_data.append(u'%s\t%s' % (kw, uuid)) else: kw = meta_str[label] if kw: index_data.append(u'%s\t%s' % (kw, uuid)) # write the index to file with codecs.open(index_filepath, mode='w', encoding='utf-8') as index_file: out = u'\n'.join(index_data) index_file.write(out) return
[docs]def makeItemPreview(uuid): """Generate an html preview for an item, to be included in indexes or lists. """ meta_str = loadJson(uuid, type='item') if meta_str is None: return subjects = meta_str.get(fields_rev['DCTERMS.subject'], []) if len(subjects) == 0 or subjects[0] == '': meta_str['subject_tags'] = '' else: subject_tag_list = makeTags(subjects, tagtype="DCTERMS.subject") meta_str['subject_tags'] = fields_rev['DCTERMS.subject'] + u': ' + u', '.join(subject_tag_list) dc_description = meta_str.get(fields_rev['DCTERMS.description'], ['']) desc = ' '.join(dc_description) meta_str['short_description'] = utils.md2html(utils.truncate(desc, length=200)) meta_str['title'] = meta_str.get(fields_rev['DCTERMS.title'], '') meta_str['identifier'] = meta_str.get(fields_rev['DCTERMS.identifier'], ['']) meta_str['language'] = language #thumb = meta_str.get('thumb', None) thumb = getThumb(uuid, size='thumb') if not thumb: meta_str['thumb'] = '/static/null.png' else: meta_str['thumb'] = '/item/%s/%s' % (uuid, thumb) meta_str['creator'] = '; '.join(meta_str.get(fields_rev['DCTERMS.creator'], list())) for k, v in meta_str.iteritems(): if all(isinstance(i, list) for i in v) and len(v) > 0: # ignore tabular data? continue elif isinstance(v, list): meta_str[k] = ' '.join(v) meta_str.update(template_fields) html_content = Template(item_preview_template) index_entry = html_content.safe_substitute(meta_str) return index_entry
[docs]def makePreview(uuid, preview_type='item', delete=False): """Create an html preview for an item or collection, to be included in indexes or lists. """ previews_dir = os.path.join(pub, '.previews-%s' % language) index_path = os.path.join(previews_dir, uuid) if delete: if os.path.exists(index_path): os.unlink(index_path) return if preview_type == 'collection': preview_text = makeCollectionPreview(uuid) else: preview_text = makeItemPreview(uuid) if not preview_text: return None if not os.path.exists(previews_dir): os.makedirs(previews_dir) with codecs.open(index_path, mode='w', encoding='utf-8') as index_file: index_file.write(preview_text) return
[docs]def getPreview(uuid, preview_type='item'): """Retrieve an html preview for an item or collection.""" preview_text = None index_path = os.path.join(pub, '.previews-%s' % language, uuid) if not os.path.exists(index_path): print index_path print 'generating preview for %s' % uuid return makePreview(uuid, preview_type=preview_type) with codecs.open(index_path, mode='r', encoding='utf-8') as index_file: preview_text = index_file.read() return preview_text
[docs]def makeItemPage(uuid, force=False, delete=False, nolinks=False): """Generate an html page for an item.""" index_filepath = os.path.join(getDestDir(uuid, type='item'), 'index-%s.html' % language) if delete: utils.rm(index_filepath) return meta_str = loadJson(uuid, type='item') if meta_str is None: return exclude = [] if export_stills is False: # exclude the video stills directory exclude += ['.df-still-', '.df-stills-index'] if meta_str.get('public', [''])[0].lower() == 'false': # FIXME: This should be a whitelist, not a blacklist. exclude += ['.df-pdf', '.df-360p-vp9-400k', '.df-mp3', '.df-h264'] destdir = getDestDir(uuid, type='item') # get a base list of all the derivatives to copy df = egaia_list.listFiles(uuid=uuid, filter_type='df', exclude=exclude) # we also want to copy the docx metadata file, so people can edit it metadata_file = egaia_list.listFiles(uuid=uuid, filter_type='metadata') # and we want to copy "mutable" (editable) files that collaborators might # download, edit, and send back to us of = egaia_list.listFiles(uuid=uuid, filter_type='orig', exclude=exclude) mutable = [x for x in of if x.endswith('.docx') or x.endswith('.svg')] all_derivs = df + metadata_file + mutable # Copy everything to the public directory by default if all_derivs: copyDerivatives(all_derivs, destdir, force=force) # Create a link to the files for download. # Ignore "medium" images, even if they are the primary distribution # format, since they are already embedded on the page dl_excludes = exclude + ['df-med', 'df-thumb'] # Don't link to local downloads if remote embeds are available remote = meta_str.get(fields_rev['remote_embed_url']) if remote and remote_embeds is True: # Exclude the video download links, but copy them to the "pub" # directory anyway. This allows us to have all the derivative files # in place for offline distribution, while keeping them unlisted; # we can publish the catalogue telling rsync to exclude these files. # FIXME: Make this look at the filenames themselves, not just type. dl_excludes += ['.df-h264', '.df-360p-vp9-400k'] derivs = egaia_list.listFiles(uuid=uuid, filter_type='df', exclude=dl_excludes) + metadata_file + mutable # manage the embed tag embed = None if remote and remote_embeds is True: print "using REMOTE url: %s" % remote[0] embed = makeRemoteEmbed(remote[0]) else: embed = makeEmbedTag(derivs, page=uuid) thumb = getThumb(uuid, size='med') metadata = makeMetadataTable(meta_str, nolinks=nolinks) downloads = makeDownloadsList(derivs) body = list() if thumb and not embed: # disable the link from the thumbnail image, because this can be # confusing. For instance, we may have the thumb for a pdf but it # links to an html version of the same document that looks completely # different from the thumb. #~ if main_link: #~ img = u'<p><a href="/item/%s/%s"><img src="/item/%s/%s" class="img-responsive"></a></p>' % (uuid, main_link, uuid, thumb) #~ else: img = u'<p class="embed-img-responsive"><img src="/item/%s/%s" class="img-responsive"></p>' % (uuid, thumb) body.append(img) if embed: body.append(embed) # add the metadata panel body.append( u'<div class="element-set panel panel-default">' + u'<div class="panel-heading">' + u'<h3 class="panel-title">%s</h3>' % template_fields['Metadata'] + u'</div>' + u'<div class="panel-body">' ) if meta_str['collection_name'] and meta_str['collection_uuid']: body.append(u'<h3 id="parent-collection">%s</h3>' % fields['collection']) if nolinks: body.append(u'<p>%s</p>' % meta_str['collection_name'] ) else: body.append(u"""<p><a href="/collection/%s/index-%s.html"> %s </a></p>""" % ( meta_str['collection_uuid'], language, meta_str['collection_name'] )) if metadata: body.append(metadata) if downloads: body.append(u'<h3 id="item-files">%s</h3>' % template_fields['files']) body.append(downloads) body.append(u'</div><!-- /panel-body --></div><!-- /element-set -->') # The title string returned by the docx parser SHOULD be a list meta_str['title'] = ' '.join(meta_str.get('title', [''])) meta_str['subtitle'] = '' meta_str['date'] = date meta_str['version'] = version.get_version(pep440=True) meta_str['archive'] = archive meta_str['short_name'] = short_name meta_str['body'] = u' '.join(body) meta_str['language'] = language html_content = Template(html_template) meta_str.update(template_fields) index_entry = html_content.safe_substitute(meta_str) with codecs.open(index_filepath, mode='w', encoding='utf-8') as out_file: out_file.write(index_entry) # FIXME - main_link doesn't give a full path? #~ aliases = meta_str.get(fields_rev['alias']) #~ if aliases: #~ for alias in aliases: #~ if alias.strip() != '': #~ makeAlias(alias, main_link) return
[docs]def makeCollectionPreview(uuid): """Generate the preview image and brief description for the collection that will be included in the list or indexes of collections. """ meta_str = loadJson(uuid, type='collection') if meta_str is None: return '' meta_str['language'] = language dc_description = meta_str.get('description', ['']) desc = ' '.join(dc_description) meta_str['short_description'] = utils.md2html(utils.truncate(desc, length=800)) html_content = Template(collection_preview_template) meta_str.update(template_fields) index_entry = html_content.safe_substitute(meta_str) return index_entry
[docs]def makeCollectionPage(uuid, delete=False): """Generate an html description page for the current collection.""" index_filepath = os.path.join(getDestDir(uuid, type='collection'), 'index-%s.html' % language) if delete: utils.rm(index_filepath) return meta_str = loadJson(uuid, type='collection') if meta_str is None: return destdir = getDestDir(uuid, type='collection') # copy the collection thumbnail images for i in ('collection-thumb.jpg', 'collection-cover.jpg'): fn = os.path.join(egaia_root.get_root(), i) if not os.path.exists(fn): egaia_collage.mkcollage() try: shutil.copy2(fn, destdir) except: print "Error copying collection thumb!" body = list() # pass "nolinks" because the collection fields are not indexed, so we # will get some 404 errors metadata = makeMetadataTable(meta_str, nolinks=True) collection_items = list() if 'collection_items' in meta_str and meta_str['collection_items']: for item in meta_str['collection_items']: collection_items.append(getPreview(item, preview_type='item')) body.append(u'<p><img src="/collection/%s/collection-cover.jpg" class="img-rounded img-responsive"></p>' % uuid) if metadata: body.append( u'<div class="element-set panel panel-default">' + u'<div class="panel-heading">' + u'<h3 class="panel-title">%s</h3>' % template_fields['Metadata'] + u'</div>' + u'<div class="panel-body">' + u'<div>' + metadata + u'</div></div><!-- /panel-body --></div><!-- /element-set -->' ) if collection_items: body.append(u'<h2>Items</h2>') body.append(u'<div class="items-list container row">') body.append(' '.join(generateList(collection_items))) body.append(u'</div>') # bootstrap meta_str['subtitle'] = template_fields['collection'] meta_str['date'] = date meta_str['version'] = version.get_version(pep440=True) meta_str['archive'] = archive meta_str['short_name'] = short_name meta_str['body'] = u' '.join(body) meta_str['language'] = language html_content = Template(html_template) meta_str.update(template_fields) index_entry = html_content.safe_substitute(meta_str) with codecs.open(index_filepath, mode='w', encoding='utf-8') as out_file: out_file.write(index_entry) return
[docs]def makeIndexes(): """Regenerate the indexes for the entire archive.""" # items index. List the items available in the exported collection. for i in ('item', 'collection'): print "Generating %s index..." % i index = list() basedir = os.path.join(pub, i) items = os.listdir(basedir) # sort the items by date added to the archive items.sort(key=lambda x: os.path.getctime(os.path.join(basedir, x)), reverse=True) for item in items: if os.path.isfile(os.path.join(basedir, item)): continue preview = getPreview(item, preview_type=i) if preview: index.append(preview) if i == 'item': writeIndex(generateList(index), [i], 'items', 'index') else: writeIndex(generatePlainList(index), [i], 'collections', 'index') basedir = os.path.join(pub, 'item') for kw in indexed_fields: label = fields_rev[kw] print "Generating %s index..." % label idx_filepath = os.path.join(pub, '%s-%s.idx' % (kw, language)) if not os.path.exists(idx_filepath): print "No metadata index found: %s" % kw continue tags = dict() with codecs.open(idx_filepath, mode='r', encoding='utf-8') as idx_file: # read and parcel out # we have a file containing tag<TAB>uuid for line in idx_file: if not '\t' in line: continue tag_name, uuid = line.strip().split('\t') if not tag_name in tags: tags[tag_name] = list() tags[tag_name].append(uuid) for tag_name, uuids in tags.iteritems(): index = list() for item in uuids: index.append(getPreview(item, preview_type='item')) target = [kw, makeSlug(tag_name)] writeIndex(generateList(index), target, tag_name, template_fields[u'%s_index' % label]) # make a list of tags, for {TYPE}/index-${language}.html out = list() for tag in sorted(tags): link = u'%s/%s/index-%s.html' % (kw, makeSlug(tag), language) tag_item_count = len(tags[tag]) out.append(u'<p><a href="/%s">%s</a> <span class="text-muted">(%s)</span></p>' % (link, tag, tag_item_count)) writeIndex(out, [makeSlug(kw)], label, template_fields['keyword_index'], paginate_size=500)
[docs]def writeIndex(out, target, title, subtitle, paginate_size=24): """Prepare an index page.""" dest_dir = os.path.join(*[pub] + target) dest_url_path = '/'.join(target) # maximum 24 items per page # should be a multiple of 12, to work with clearfixes paginated = [out[i:i+paginate_size] for i in range(0, len(out), paginate_size)] total_pages = len(paginated) cur_page = 1 for p in paginated: if total_pages > 1: page_links = makePaginationLinks(total_pages, cur_page, dest_url_path, paginate_size) else: page_links = '' body = u'\r\n'.join(p) + page_links html_content = Template(html_template) page_vars = { 'title': title, 'subtitle': subtitle, 'archive': archive, 'short_name': short_name, 'body': body, 'date': date, 'version': version.get_version(pep440=True), 'language': language, } page_vars.update(template_fields) out_path = os.path.join(pub, dest_dir) if cur_page == 1: filename = u'index-%s.html' % language else: filename = u'index_%s-%s.html' % (cur_page, language) cur_page = cur_page+1 output = html_content.safe_substitute(page_vars) writeHtml(output, out_path, filename) return
[docs]def makeAlias(alias, target): """Generate an alias (redirect page) for an item. Target should be a UUID; alias should be a filename.""" html_content = Template(alias_template) out = html_content.safe_substitute({'target': target, 'language': language}) fn = os.path.join(pub, makeSlug(alias)) print "making alias from %s to %s" % (fn, target) with open(fn, 'w') as outfile: outfile.write(out) return
[docs]def makeHomePage(): """Generate a home page for the archive.""" docx_file = os.path.expanduser(egaia_config.getConfig('archive', 'home_page')) if not os.path.exists(docx_file): print "File %s not found!" % docx_file return False html = utils.docx2str(docx_file) if not html: print "Error converting docx to html" return False html_content = Template(html_template) meta_str = dict() meta_str['title'] = archive meta_str['body'] = u'<div class="static-page col-sm-12 col-md-9">' + html + u'</div>' meta_str['subtitle'] = '' meta_str['date'] = date meta_str['version'] = version.get_version(pep440=True) meta_str['archive'] = archive meta_str['short_name'] = short_name meta_str['language'] = language meta_str.update(template_fields) page = html_content.safe_substitute(meta_str) fn = os.path.join(pub, u'index-%s.html' % language) with codecs.open(fn, mode='w', encoding='utf-8') as out: out.write(page)
[docs]def makeErrorPages(): """Generate 404 and other error pages for the archive.""" codes = { '404': 'Sorry, the resource you requested could not be found.', '403': 'Sorry, you do not have permission to access this page.', '500': 'Server error.' } for (code, message) in codes.iteritems(): kwargs = { 'title':code, 'message':message, 'Home':template_fields['Home'] } html_content = Template(error_page_template) page = html_content.safe_substitute(**kwargs) fn = os.path.join(pub, u'.%s.html' % code) with codecs.open(fn, mode='w', encoding='utf-8') as out: out.write(page)
def _cli(args): """egaia make Generate indexes and html pages for items in a collection. Usage: egaia make --help egaia make [ --force | --delete ] [ --nolinks ] TARGET... Targets: all json database item-pages collection-page indexes item-previews collection-preview home-page static """ items = egaia_list.listItems() init() if 'all' in args['TARGET']: args['TARGET'].extend(( 'json', 'database', 'item-pages', 'collection-page', 'indexes', 'item-previews', 'collection-preview' )) if 'json' in args['TARGET'] and not args['--delete']: # don't delete the json yet as we may still need it # but we must create it before item pages, etc. makeCollectionJson() for item in items: if item is None: continue if 'item-pages' in args['TARGET']: makeItemPage(item, force=args['--force'], delete=args['--delete'], nolinks=args['--nolinks']) if 'item-previews' in args['TARGET']: makePreview(item, preview_type='item', delete=args['--delete']) if 'database' in args['TARGET']: updateItemIndex(item, delete=args['--delete']) bag_info = egaia_bag.loadBag() collection_uuid = bag_info['External-Identifier'] if 'collection-preview' in args['TARGET']: makePreview(collection_uuid, preview_type='collection', delete=args['--delete']) if 'collection-page' in args['TARGET']: makeCollectionPage(collection_uuid, delete=args['--delete']) # it is now safe to delete the json if args['--delete']: makeCollectionJson(delete=args['--delete']) if 'indexes' in args['TARGET']: # FIXME: support delete flag makeIndexes() if 'home-page' in args['TARGET']: makeHomePage() if 'static' in args['TARGET']: makeStaticDir(force=True) makeErrorPages()