# -*- coding: utf-8 -*-
import os
import re
import shutil
import json
import gettext
from string import Template
import codecs
import markdown
import collections
import egaia_config
import egaia_parsefn
import egaia_list
import egaia_bag
import pkg_resources
import utils
from egaia_sanitize import makeSlug
import egaia_root
import egaia_collage
import version
import egaia_docx
import egaia_derive
language = egaia_config.getConfig('archive', 'language')
pub = os.path.expanduser(egaia_config.getConfig('archive', 'pub_path'))
indexed_fields = ['DCTERMS.subject', 'DCTERMS.creator', 'DCTERMS.coverage',
'DCTERMS.type', 'DCTERMS.language']
static_dir = os.path.join(pub, 'static')
default_thumb = os.path.join(static_dir, 'default_thumb.png')
archive = egaia_config.getConfig('archive', 'archive_name').decode('utf-8')
short_name = egaia_config.getConfig('archive', 'archive_prefix').decode('utf-8').upper()
remote_embeds = egaia_config.getConfig('archive', 'remote_embeds', boolean=True)
export_stills = egaia_config.getConfig('archive', 'export_stills', boolean=True)
date = utils.current_time()
cmd_convert = egaia_config.getConfig('system', 'cmd_convert')
core_metadata = egaia_docx.getCoreFields(filtered=False)
fields_rev = dict(egaia_config.printConfig(section='terms'))
fields = dict((value, key) for key, value in fields_rev.iteritems())
template_fields = dict(egaia_config.printConfig(section='template_fields'))
# Ignore these metadata fields in html output. Use lowercase keys.
# We skip the title fields because they are separately printed at the top
# Skip the organizational info because this will normally be included in the
# footer or elsewhere in HTML output
skip_fields = ['bag-software-agent', 'payload-oxum', 'title', 'DCTERMS.title',
'source-organization', 'organization-address', 'contact-name',
'contact-phone', 'contact-email', 'DCTERMS.format']
item_preview_template = u"""<div id="${identifier}" class="index-entry">
<a href="/item/${identifier}/index-${language}.html">
<img src="${thumb}" class="contain thumbnail thumb-img-cover">
</a>
<div class="index-description caption">
<h4><a href="/item/${identifier}/index-${language}.html">${title}</a><br> <small>$creator</small></h4>
$short_description
<p>$subject_tags</p>
</div><!--/index-description-->
</div> <!-- index-entry -->
"""
collection_preview_template = u"""<div class="index-entry" id="${identifier}">
<a href="/collection/${identifier}/index-${language}.html">
<img src="/collection/${identifier}/collection-thumb.jpg" class="thumbnail">
</a>
<div class="index-description caption">
<h4><a href="/collection/${identifier}/index-${language}.html">$title</a></h4>
$short_description
</div><!--/index-description-->
</div> <!-- index-entry -->
"""
error_page_template = u"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="icon" type="image/png" href="/static/favicon.png">
<title>$title</title>
<link rel="stylesheet" href="/static/css/bootstrap.min.css">
<link rel="stylesheet" href="/static/css/egaia.css">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>
<script src="/static/js/html5shiv.min.js"></script>
<script src="/static/js/respond.min.js"></script>
<![endif]-->
</head>
<body>
<div class="main-content container">
<div class="jumbotron">
<div class="alert alert-danger">
<h1>$title</h1>
<p>${message}</p>
<hr>
<p><span class="glyphicon glyphicon-home"></span> <a href="/" class="alert-link">${Home}</a></p>
</div>
</div> <!-- /container -->
</div> <!-- /main-content -->
</body>
</html>
"""
html_template = u"""<!DOCTYPE html>
<html lang="${language}">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="generator" content="egaia $version">
<link rel="icon" type="image/png" href="/static/favicon.png">
<title>$title - $archive</title>
<link rel="stylesheet" href="/static/css/bootstrap.min.css">
<link rel="stylesheet" href="/static/css/egaia.css">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>
<script src="/static/js/html5shiv.min.js"></script>
<script src="/static/js/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Fixed navbar -->
<nav class="navbar navbar-default navbar-static-top">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
<span class="sr-only">$toggle</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="/index-${language}.html"><span class="visible-xs">$short_name</span><span class="hidden-xs">$archive</span></a>
</div>
<div id="navbar" class="navbar-collapse collapse navbar-right">
<ul class="nav navbar-nav">
<li><a href="/index-${language}.html">${About}</a></li>
<li><a href="/collection/index-${language}.html">${Collections}</a></li>
<li class="dropdown">
<a href="/" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">${Items} <span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="/item/index-${language}.html">${All_items}</a></li>
<li role="separator" class="divider"></li>
<li class="dropdown-header">${Keywords}</li>
<li><a href="/DCTERMS.subject/index-${language}.html">${Subject}</a></li>
<li><a href="/DCTERMS.coverage/index-${language}.html">${Coverage_area}</a></li>
<li><a href="/DCTERMS.type/index-${language}.html">${Media_type}</a></li>
<li><a href="/DCTERMS.creator/index-${language}.html">${Creator}</a></li>
<li><a href="/DCTERMS.language/index-${language}.html">${Language}</a></li>
</ul>
</li>
</ul>
</div><!--/.nav-collapse -->
</div>
</nav>
<div class="main-content">
<div class="container">
<div class="page-header">
<h1>$title <small>$subtitle</small></h1>
</div>
$body
</div> <!-- /container -->
</div> <!-- /main-content -->
<footer class="footer text-muted">
<div class="container">
<p id="generator">$last_updated $date. $generator</p>
</div> <!-- /container -->
</footer>
<script src="/static/js/jquery.min.js"></script>
<script src="/static/js/bootstrap.min.js"></script>
</body>
</html>
"""
alias_template = """<!DOCTYPE html>
<html>
<head>
<title>${target}</title>
<meta name="robots" content="noindex">
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta http-equiv="refresh" content="0; url=/item/${target}/${filename}"/>
</head>
</html>"""
[docs]def init():
"""Initialize"""
if not os.path.exists(static_dir):
makeStaticDir()
# TEMPLATE: Deprecate here. Move the template strings into the
# config file.
#### DIRECTORY AND FILE COPY/WRITE FUNCTIONS
[docs]def makeStaticDir(force=False):
if force and os.path.exists(static_dir):
try:
shutil.rmtree(static_dir)
except:
print "Error removing existing static dir!"
default_static_dir = pkg_resources.resource_filename('egaia', 'static')
print "copying %s to %s" % (default_static_dir, pub)
try:
shutil.copytree(default_static_dir, static_dir)
return True
except:
# copy should fail silently if the file already exists
print "Unable to write static dir!"
return False
[docs]def getDestDir(uuid, type='item'):
"""create destination dir for items"""
if uuid is None:
return False
destdir = os.path.join(pub, type, uuid)
if not os.path.exists(destdir):
os.makedirs(destdir)
return destdir
[docs]def copyDerivatives(derivs, destdir, force=False):
"""Copy derivatives to the "pub" directory for distribution."""
for fn in derivs:
filename = os.path.basename(fn)
dest = os.path.join(destdir, filename)
if os.path.exists(dest) and not force:
# check the modification times of the source and destination
src_mtime = os.stat(fn).st_mtime
dest_mtime = os.stat(dest).st_mtime
# don't copy/link if the destination is newer or the same
if src_mtime <= dest_mtime:
continue
os.unlink(dest)
print "Copying %s..." % filename
try:
os.link(fn, dest)
except:
print "Unable to make a hardlink; copying instead"
shutil.copy2(fn, dest)
return
[docs]def writeHtml(html_content, destdir, filename=u'index-%s.html' % language):
"""Write html file"""
html_file = os.path.join(destdir, filename)
if not os.path.exists(destdir):
os.makedirs(destdir)
with codecs.open(html_file, mode='w', encoding='utf-8') as out:
out.write(html_content)
return
#### ELEMENTS FOR HTML OUTPUT
[docs]def getThumb(uuid, size='thumb'):
"""Identify and return the base filename of a thumb image from a list of
derivatives.
"""
# We don't return a full tag here because we need to manipulate the path
# depending on whether we are in an index or elsewhere
# This function identifies thumbnail images that are already in the
# outpub (pub) directory.
destdir = getDestDir(uuid)
exclude = None
thumbfile = egaia_list.listFiles(filepath=destdir, uuid=uuid, filter_type='df-%s' % size, exclude=exclude)
if not thumbfile:
return None
return os.path.basename(thumbfile[0])
[docs]def makeEmbedTag(derivs, page):
"""Prepare an embeddable media tag from a list of derivatives. Return a
full embed object tag, with url reference to the media file(s).
This is usable in an item description page. The "page" parameter should
be the uuid of the item.
"""
if not derivs:
return None
for fn in derivs:
src = '/'.join(['item', page, os.path.basename(fn)])
if fn.endswith('.mp3'):
return makeLocalEmbed(embed_type='audio',
src = src,
mtype = 'audio/mpeg')
elif fn.endswith('.webm'): # we might also have mp4
return makeLocalEmbed(embed_type='video',
src = src,
mtype = 'video/webm')
# Disable text embed; it is cleaner to separate document and metadata
#~ elif fn.endswith('.html'):
#~ return makeTextEmbed(fn)
return None
[docs]def makeTextEmbed(fn):
"""Embed a text or html file."""
html = ''
with codecs.open(fn, mode='r', encoding='utf-8') as raw:
html = raw.read()
return u'<div class="text_embed">%s</div>' % html
[docs]def makeLocalEmbed(embed_type=None, src=None, mtype=None):
"""Assemble an html5 audio or video embed tag."""
css_class = 'embed'
if 'video' in embed_type:
css_class = 'embed-responsive embed-responsive-16by9'
return """<div class="%s">
<%s controls>
<source src="/%s" type="%s" class="embed-responsive-item">
<a href="/%s">Download</a>
</%s></div>""" % (css_class, embed_type, src, mtype, src,
embed_type)
[docs]def makeRemoteEmbed(url):
"""Prepare an embeddable video tag from a URL. Return an iframe.
This is usable in an item description page.
"""
return """<div class="embed-responsive embed-responsive-16by9">
<iframe src="%s" scrolling="no" class="embed-responsive-item"
allowfullscreen></iframe></div>""" % url
[docs]def makeDownloadsList(derivs):
"""Prepare an html list of download links for derivatives"""
if not derivs:
return None
downloads_list = list()
for fn in derivs:
filename = os.path.basename(fn)
basename, uuid, extension = egaia_parsefn.parseFilename(filename)
size = utils.byteSize(os.stat(fn).st_size)
mtime = utils.isotime(os.stat(fn).st_mtime)
if '.' in basename:
fmt = basename.rpartition('.')[2]
else:
fmt = template_fields['source']
downloads_list.append(
u"""<tr><td><a href="/item/{uuid}/{filename}">{fmt}</a></td>
<td>{ext}</td>
<td>{size}</td>
<td>{mtime}</td>
</tr>""".format(
ext=extension,
uuid=uuid,
filename=filename,
basename=basename,
fmt=fmt,
size=size,
mtime=mtime
)
)
return u'<table class="table table-condensed">' + u'\r\n'.join(sorted(downloads_list)) + u'</table>'
[docs]def generateList(items):
"""Create a responsive grid of items with thumbnails."""
if len(items) == 0:
return ' '
out = list()
for n in range(len(items)):
# add offsets for fewer than three items
offset = ''
if len(items) == 2:
offset = ' col-md-offset-1'
#elif len(items) == 1:
# offset = ' col-sm-offset-3 col-md-offset-4'
item_content = list()
item_content.append(u'<div class="col-xs-12 col-sm-6 col-md-4%s">%s</div>' % (offset, items[n]))
# add clearfixes. sm:2, md:3
if (n+1) % 2 == 0:
item_content.append(u'<div class="clearfix visible-sm-block"></div>')
if (n+1) % 3 == 0:
item_content.append(u'<div class="clearfix visible-md-block visible-lg-block"></div>')
out.append(u'\r\n'.join(item_content))
return out
[docs]def generatePlainList(items):
"""Generate a list of items, not as a grid."""
if len(items) == 0:
return ' '
out = list()
for n in range(len(items)):
item_content = list()
item_content.append('<div class="col-xs-12 col-md-6">%s</div>' % items[n])
# add clearfixes. md:2
if (n+1) % 2 == 0:
item_content.append(u'<div class="clearfix"></div>')
out.append('\r\n'.join(item_content))
return out
#### HTML output
[docs]def makeCollectionJson(delete=False, outdir=None):
"""Create json representations for all items in the current collection."""
collection_items = list()
# FIXME: Use localized labels from config
bag_info = egaia_bag.loadBag()
collection_uuid = bag_info['External-Identifier'].decode('utf-8')
collection_info = dict()
# See if we have a metadata file in the bag root
readme_file = os.path.join(egaia_root.get_root(), 'metadata-%s.docx' % language)
if os.path.exists(readme_file):
collection_info = egaia_docx.parseDocx(readme_file)
else:
print "could not read %s" % readme_file
collection_name = u' '.join(collection_info.get('title', [bag_info['Title'].decode('utf-8')]))
docx_files = egaia_list.listFiles(filter_type="metadata")
# sort by input filename; otherwise they will be ordered by uuid
for docx_file in sorted(docx_files):
print "processing %s..." % docx_file
uuid = egaia_parsefn.getUuid(docx_file)
out_path = getDestDir(uuid, type='item')
json_filepath = os.path.join(out_path, '%s-%s.json' % (uuid, language))
if delete:
utils.rm(json_filepath)
continue
# make the json file
json_str = json.loads(egaia_docx.docx2json([docx_file]))[0]
# we still need: collection_name, collection_uuid, thumb, med_img
json_str['collection_name'] = collection_name
json_str['collection_uuid'] = collection_uuid
with codecs.open(json_filepath, mode='w', encoding='utf-8') as json_file:
json_file.write(json.dumps(json_str, sort_keys=True, indent=4,
ensure_ascii=False))
collection_items.append(uuid)
if outdir:
# Don't export the collection json; we just want items
# This is for the --json --outdir=XXX option
return
# Now process the collection metadata
json_str = dict()
collection_info['collection_items'] = collection_items
collection_info['title'] = collection_name
uuid = bag_info['External-Identifier']
collection_info['identifier'] = uuid
json_filepath = os.path.join(getDestDir(uuid, type='collection'),
'%s-%s.json' % (uuid, language))
if delete:
utils.rm(json_filepath)
else:
with codecs.open(json_filepath, mode='w', encoding='utf-8') as json_file:
json_file.write(json.dumps(collection_info, sort_keys=True, indent=4))
return
[docs]def loadJson(uuid, type):
"""Load json metadata for an item."""
json_filepath = os.path.join(getDestDir(uuid, type=type), '%s-%s.json' % (uuid, language))
try:
with codecs.open(json_filepath, mode='r', encoding='utf-8') as json_file:
meta_str = json.loads(json_file.read())
except:
print "Error opening %s!" % json_filepath
return None
return meta_str
[docs]def updateItemIndex(uuid, delete=False):
"""Update the keyword indexes based on json metadata for a given item."""
# This function reads the entire index for each keyword into memory.
# We could easily do this by writing directly to disk, for scalability.
meta_str = loadJson(uuid, type='item')
if meta_str is None:
print "could not open json for %s" % uuid
return
for index in indexed_fields:
label = fields_rev[index]
index_data = list()
# load the existing index if available
index_filepath = os.path.join(pub, '%s-%s.idx' % (index, language))
if os.path.exists(index_filepath):
with codecs.open(index_filepath, mode='r', encoding='utf-8') as index_file:
for line in index_file:
# remove references to the item in existing index
if line.strip() == '':
continue
if not uuid in line:
# strip newlines
index_data.append(line.strip())
if not delete:
# if present, add the key and uuid to the index
if meta_str.get(label):
if isinstance(meta_str[label], list):
for value in meta_str[label]:
kw = value
if kw:
index_data.append(u'%s\t%s' % (kw, uuid))
else:
kw = meta_str[label]
if kw:
index_data.append(u'%s\t%s' % (kw, uuid))
# write the index to file
with codecs.open(index_filepath, mode='w', encoding='utf-8') as index_file:
out = u'\n'.join(index_data)
index_file.write(out)
return
[docs]def makeItemPreview(uuid):
"""Generate an html preview for an item, to be included in indexes
or lists.
"""
meta_str = loadJson(uuid, type='item')
if meta_str is None:
return
subjects = meta_str.get(fields_rev['DCTERMS.subject'], [])
if len(subjects) == 0 or subjects[0] == '':
meta_str['subject_tags'] = ''
else:
subject_tag_list = makeTags(subjects, tagtype="DCTERMS.subject")
meta_str['subject_tags'] = fields_rev['DCTERMS.subject'] + u': ' + u', '.join(subject_tag_list)
dc_description = meta_str.get(fields_rev['DCTERMS.description'], [''])
desc = ' '.join(dc_description)
meta_str['short_description'] = utils.md2html(utils.truncate(desc, length=200))
meta_str['title'] = meta_str.get(fields_rev['DCTERMS.title'], '')
meta_str['identifier'] = meta_str.get(fields_rev['DCTERMS.identifier'], [''])
meta_str['language'] = language
#thumb = meta_str.get('thumb', None)
thumb = getThumb(uuid, size='thumb')
if not thumb:
meta_str['thumb'] = '/static/null.png'
else:
meta_str['thumb'] = '/item/%s/%s' % (uuid, thumb)
meta_str['creator'] = '; '.join(meta_str.get(fields_rev['DCTERMS.creator'], list()))
for k, v in meta_str.iteritems():
if all(isinstance(i, list) for i in v) and len(v) > 0:
# ignore tabular data?
continue
elif isinstance(v, list):
meta_str[k] = ' '.join(v)
meta_str.update(template_fields)
html_content = Template(item_preview_template)
index_entry = html_content.safe_substitute(meta_str)
return index_entry
[docs]def makePreview(uuid, preview_type='item', delete=False):
"""Create an html preview for an item or collection, to be included in
indexes or lists.
"""
previews_dir = os.path.join(pub, '.previews-%s' % language)
index_path = os.path.join(previews_dir, uuid)
if delete:
if os.path.exists(index_path):
os.unlink(index_path)
return
if preview_type == 'collection':
preview_text = makeCollectionPreview(uuid)
else:
preview_text = makeItemPreview(uuid)
if not preview_text:
return None
if not os.path.exists(previews_dir):
os.makedirs(previews_dir)
with codecs.open(index_path, mode='w', encoding='utf-8') as index_file:
index_file.write(preview_text)
return
[docs]def getPreview(uuid, preview_type='item'):
"""Retrieve an html preview for an item or collection."""
preview_text = None
index_path = os.path.join(pub, '.previews-%s' % language, uuid)
if not os.path.exists(index_path):
print index_path
print 'generating preview for %s' % uuid
return makePreview(uuid, preview_type=preview_type)
with codecs.open(index_path, mode='r', encoding='utf-8') as index_file:
preview_text = index_file.read()
return preview_text
[docs]def makeItemPage(uuid, force=False, delete=False, nolinks=False):
"""Generate an html page for an item."""
index_filepath = os.path.join(getDestDir(uuid, type='item'), 'index-%s.html' % language)
if delete:
utils.rm(index_filepath)
return
meta_str = loadJson(uuid, type='item')
if meta_str is None:
return
exclude = []
if export_stills is False:
# exclude the video stills directory
exclude += ['.df-still-', '.df-stills-index']
if meta_str.get('public', [''])[0].lower() == 'false':
# FIXME: This should be a whitelist, not a blacklist.
exclude += ['.df-pdf', '.df-360p-vp9-400k', '.df-mp3', '.df-h264']
destdir = getDestDir(uuid, type='item')
# get a base list of all the derivatives to copy
df = egaia_list.listFiles(uuid=uuid, filter_type='df', exclude=exclude)
# we also want to copy the docx metadata file, so people can edit it
metadata_file = egaia_list.listFiles(uuid=uuid, filter_type='metadata')
# and we want to copy "mutable" (editable) files that collaborators might
# download, edit, and send back to us
of = egaia_list.listFiles(uuid=uuid, filter_type='orig', exclude=exclude)
mutable = [x for x in of if x.endswith('.docx') or x.endswith('.svg')]
all_derivs = df + metadata_file + mutable
# Copy everything to the public directory by default
if all_derivs:
copyDerivatives(all_derivs, destdir, force=force)
# Create a link to the files for download.
# Ignore "medium" images, even if they are the primary distribution
# format, since they are already embedded on the page
dl_excludes = exclude + ['df-med', 'df-thumb']
# Don't link to local downloads if remote embeds are available
remote = meta_str.get(fields_rev['remote_embed_url'])
if remote and remote_embeds is True:
# Exclude the video download links, but copy them to the "pub"
# directory anyway. This allows us to have all the derivative files
# in place for offline distribution, while keeping them unlisted;
# we can publish the catalogue telling rsync to exclude these files.
# FIXME: Make this look at the filenames themselves, not just type.
dl_excludes += ['.df-h264', '.df-360p-vp9-400k']
derivs = egaia_list.listFiles(uuid=uuid, filter_type='df',
exclude=dl_excludes) + metadata_file + mutable
# manage the embed tag
embed = None
if remote and remote_embeds is True:
print "using REMOTE url: %s" % remote[0]
embed = makeRemoteEmbed(remote[0])
else:
embed = makeEmbedTag(derivs, page=uuid)
thumb = getThumb(uuid, size='med')
metadata = makeMetadataTable(meta_str, nolinks=nolinks)
downloads = makeDownloadsList(derivs)
body = list()
if thumb and not embed:
# disable the link from the thumbnail image, because this can be
# confusing. For instance, we may have the thumb for a pdf but it
# links to an html version of the same document that looks completely
# different from the thumb.
#~ if main_link:
#~ img = u'<p><a href="/item/%s/%s"><img src="/item/%s/%s" class="img-responsive"></a></p>' % (uuid, main_link, uuid, thumb)
#~ else:
img = u'<p class="embed-img-responsive"><img src="/item/%s/%s" class="img-responsive"></p>' % (uuid, thumb)
body.append(img)
if embed:
body.append(embed)
# add the metadata panel
body.append( u'<div class="element-set panel panel-default">'
+ u'<div class="panel-heading">'
+ u'<h3 class="panel-title">%s</h3>' % template_fields['Metadata']
+ u'</div>'
+ u'<div class="panel-body">'
)
if meta_str['collection_name'] and meta_str['collection_uuid']:
body.append(u'<h3 id="parent-collection">%s</h3>' % fields['collection'])
if nolinks:
body.append(u'<p>%s</p>' % meta_str['collection_name'] )
else:
body.append(u"""<p><a href="/collection/%s/index-%s.html">
%s
</a></p>""" % ( meta_str['collection_uuid'],
language, meta_str['collection_name'] ))
if metadata:
body.append(metadata)
if downloads:
body.append(u'<h3 id="item-files">%s</h3>' % template_fields['files'])
body.append(downloads)
body.append(u'</div><!-- /panel-body --></div><!-- /element-set -->')
# The title string returned by the docx parser SHOULD be a list
meta_str['title'] = ' '.join(meta_str.get('title', ['']))
meta_str['subtitle'] = ''
meta_str['date'] = date
meta_str['version'] = version.get_version(pep440=True)
meta_str['archive'] = archive
meta_str['short_name'] = short_name
meta_str['body'] = u' '.join(body)
meta_str['language'] = language
html_content = Template(html_template)
meta_str.update(template_fields)
index_entry = html_content.safe_substitute(meta_str)
with codecs.open(index_filepath, mode='w', encoding='utf-8') as out_file:
out_file.write(index_entry)
# FIXME - main_link doesn't give a full path?
#~ aliases = meta_str.get(fields_rev['alias'])
#~ if aliases:
#~ for alias in aliases:
#~ if alias.strip() != '':
#~ makeAlias(alias, main_link)
return
[docs]def makeCollectionPreview(uuid):
"""Generate the preview image and brief description for the collection that will be included in the list or indexes of collections.
"""
meta_str = loadJson(uuid, type='collection')
if meta_str is None:
return ''
meta_str['language'] = language
dc_description = meta_str.get('description', [''])
desc = ' '.join(dc_description)
meta_str['short_description'] = utils.md2html(utils.truncate(desc, length=800))
html_content = Template(collection_preview_template)
meta_str.update(template_fields)
index_entry = html_content.safe_substitute(meta_str)
return index_entry
[docs]def makeCollectionPage(uuid, delete=False):
"""Generate an html description page for the current collection."""
index_filepath = os.path.join(getDestDir(uuid, type='collection'), 'index-%s.html' % language)
if delete:
utils.rm(index_filepath)
return
meta_str = loadJson(uuid, type='collection')
if meta_str is None:
return
destdir = getDestDir(uuid, type='collection')
# copy the collection thumbnail images
for i in ('collection-thumb.jpg', 'collection-cover.jpg'):
fn = os.path.join(egaia_root.get_root(), i)
if not os.path.exists(fn):
egaia_collage.mkcollage()
try:
shutil.copy2(fn, destdir)
except:
print "Error copying collection thumb!"
body = list()
# pass "nolinks" because the collection fields are not indexed, so we
# will get some 404 errors
metadata = makeMetadataTable(meta_str, nolinks=True)
collection_items = list()
if 'collection_items' in meta_str and meta_str['collection_items']:
for item in meta_str['collection_items']:
collection_items.append(getPreview(item, preview_type='item'))
body.append(u'<p><img src="/collection/%s/collection-cover.jpg" class="img-rounded img-responsive"></p>' % uuid)
if metadata:
body.append( u'<div class="element-set panel panel-default">'
+ u'<div class="panel-heading">'
+ u'<h3 class="panel-title">%s</h3>' % template_fields['Metadata']
+ u'</div>'
+ u'<div class="panel-body">'
+ u'<div>'
+ metadata
+ u'</div></div><!-- /panel-body --></div><!-- /element-set -->'
)
if collection_items:
body.append(u'<h2>Items</h2>')
body.append(u'<div class="items-list container row">')
body.append(' '.join(generateList(collection_items)))
body.append(u'</div>') # bootstrap
meta_str['subtitle'] = template_fields['collection']
meta_str['date'] = date
meta_str['version'] = version.get_version(pep440=True)
meta_str['archive'] = archive
meta_str['short_name'] = short_name
meta_str['body'] = u' '.join(body)
meta_str['language'] = language
html_content = Template(html_template)
meta_str.update(template_fields)
index_entry = html_content.safe_substitute(meta_str)
with codecs.open(index_filepath, mode='w', encoding='utf-8') as out_file:
out_file.write(index_entry)
return
[docs]def makeIndexes():
"""Regenerate the indexes for the entire archive."""
# items index. List the items available in the exported collection.
for i in ('item', 'collection'):
print "Generating %s index..." % i
index = list()
basedir = os.path.join(pub, i)
items = os.listdir(basedir)
# sort the items by date added to the archive
items.sort(key=lambda x: os.path.getctime(os.path.join(basedir, x)),
reverse=True)
for item in items:
if os.path.isfile(os.path.join(basedir, item)):
continue
preview = getPreview(item, preview_type=i)
if preview:
index.append(preview)
if i == 'item':
writeIndex(generateList(index), [i], 'items', 'index')
else:
writeIndex(generatePlainList(index), [i], 'collections', 'index')
basedir = os.path.join(pub, 'item')
for kw in indexed_fields:
label = fields_rev[kw]
print "Generating %s index..." % label
idx_filepath = os.path.join(pub, '%s-%s.idx' % (kw, language))
if not os.path.exists(idx_filepath):
print "No metadata index found: %s" % kw
continue
tags = dict()
with codecs.open(idx_filepath, mode='r', encoding='utf-8') as idx_file:
# read and parcel out
# we have a file containing tag<TAB>uuid
for line in idx_file:
if not '\t' in line:
continue
tag_name, uuid = line.strip().split('\t')
if not tag_name in tags:
tags[tag_name] = list()
tags[tag_name].append(uuid)
for tag_name, uuids in tags.iteritems():
index = list()
for item in uuids:
index.append(getPreview(item, preview_type='item'))
target = [kw, makeSlug(tag_name)]
writeIndex(generateList(index),
target,
tag_name,
template_fields[u'%s_index' % label])
# make a list of tags, for {TYPE}/index-${language}.html
out = list()
for tag in sorted(tags):
link = u'%s/%s/index-%s.html' % (kw, makeSlug(tag), language)
tag_item_count = len(tags[tag])
out.append(u'<p><a href="/%s">%s</a> <span class="text-muted">(%s)</span></p>' % (link,
tag, tag_item_count))
writeIndex(out, [makeSlug(kw)], label, template_fields['keyword_index'], paginate_size=500)
[docs]def writeIndex(out, target, title, subtitle, paginate_size=24):
"""Prepare an index page."""
dest_dir = os.path.join(*[pub] + target)
dest_url_path = '/'.join(target)
# maximum 24 items per page
# should be a multiple of 12, to work with clearfixes
paginated = [out[i:i+paginate_size] for i in range(0, len(out), paginate_size)]
total_pages = len(paginated)
cur_page = 1
for p in paginated:
if total_pages > 1:
page_links = makePaginationLinks(total_pages, cur_page, dest_url_path,
paginate_size)
else:
page_links = ''
body = u'\r\n'.join(p) + page_links
html_content = Template(html_template)
page_vars = { 'title': title,
'subtitle': subtitle,
'archive': archive,
'short_name': short_name,
'body': body,
'date': date,
'version': version.get_version(pep440=True),
'language': language,
}
page_vars.update(template_fields)
out_path = os.path.join(pub, dest_dir)
if cur_page == 1:
filename = u'index-%s.html' % language
else:
filename = u'index_%s-%s.html' % (cur_page, language)
cur_page = cur_page+1
output = html_content.safe_substitute(page_vars)
writeHtml(output, out_path, filename)
return
[docs]def makeAlias(alias, target):
"""Generate an alias (redirect page) for an item. Target should be a
UUID; alias should be a filename."""
html_content = Template(alias_template)
out = html_content.safe_substitute({'target': target, 'language': language})
fn = os.path.join(pub, makeSlug(alias))
print "making alias from %s to %s" % (fn, target)
with open(fn, 'w') as outfile:
outfile.write(out)
return
[docs]def makeHomePage():
"""Generate a home page for the archive."""
docx_file = os.path.expanduser(egaia_config.getConfig('archive', 'home_page'))
if not os.path.exists(docx_file):
print "File %s not found!" % docx_file
return False
html = utils.docx2str(docx_file)
if not html:
print "Error converting docx to html"
return False
html_content = Template(html_template)
meta_str = dict()
meta_str['title'] = archive
meta_str['body'] = u'<div class="static-page col-sm-12 col-md-9">' + html + u'</div>'
meta_str['subtitle'] = ''
meta_str['date'] = date
meta_str['version'] = version.get_version(pep440=True)
meta_str['archive'] = archive
meta_str['short_name'] = short_name
meta_str['language'] = language
meta_str.update(template_fields)
page = html_content.safe_substitute(meta_str)
fn = os.path.join(pub, u'index-%s.html' % language)
with codecs.open(fn, mode='w', encoding='utf-8') as out:
out.write(page)
[docs]def makeErrorPages():
"""Generate 404 and other error pages for the archive."""
codes = {
'404': 'Sorry, the resource you requested could not be found.',
'403': 'Sorry, you do not have permission to access this page.',
'500': 'Server error.'
}
for (code, message) in codes.iteritems():
kwargs = { 'title':code,
'message':message,
'Home':template_fields['Home']
}
html_content = Template(error_page_template)
page = html_content.safe_substitute(**kwargs)
fn = os.path.join(pub, u'.%s.html' % code)
with codecs.open(fn, mode='w', encoding='utf-8') as out:
out.write(page)
def _cli(args):
"""egaia make
Generate indexes and html pages for items in a collection.
Usage:
egaia make --help
egaia make [ --force | --delete ] [ --nolinks ] TARGET...
Targets:
all
json
database
item-pages
collection-page
indexes
item-previews
collection-preview
home-page
static
"""
items = egaia_list.listItems()
init()
if 'all' in args['TARGET']:
args['TARGET'].extend((
'json',
'database',
'item-pages',
'collection-page',
'indexes',
'item-previews',
'collection-preview'
))
if 'json' in args['TARGET'] and not args['--delete']:
# don't delete the json yet as we may still need it
# but we must create it before item pages, etc.
makeCollectionJson()
for item in items:
if item is None:
continue
if 'item-pages' in args['TARGET']:
makeItemPage(item, force=args['--force'], delete=args['--delete'],
nolinks=args['--nolinks'])
if 'item-previews' in args['TARGET']:
makePreview(item, preview_type='item', delete=args['--delete'])
if 'database' in args['TARGET']:
updateItemIndex(item, delete=args['--delete'])
bag_info = egaia_bag.loadBag()
collection_uuid = bag_info['External-Identifier']
if 'collection-preview' in args['TARGET']:
makePreview(collection_uuid, preview_type='collection', delete=args['--delete'])
if 'collection-page' in args['TARGET']:
makeCollectionPage(collection_uuid, delete=args['--delete'])
# it is now safe to delete the json
if args['--delete']:
makeCollectionJson(delete=args['--delete'])
if 'indexes' in args['TARGET']:
# FIXME: support delete flag
makeIndexes()
if 'home-page' in args['TARGET']:
makeHomePage()
if 'static' in args['TARGET']:
makeStaticDir(force=True)
makeErrorPages()