# -*- coding: utf-8 -*-
import os
import internetarchive
from qtfaststart import processor as qt
import json
import egaia_root
import egaia_list
import egaia_docx
import egaia_config
import egaia_parsefn
import utils
[docs]def listVideos(uuid=None, new=True):
"""Get a list of videos from the current collection that have not been
uploaded, and matching the uuid if specified. If new is False, we will
not filter out the previously uploaded items.
"""
# list original filenames in the current collection, matching uuid if given
filenames = egaia_list.listFiles(filter_type='df-h264', uuid=uuid)
# parse the metadata
filtered_items = list()
items = json.loads(egaia_docx.collectionMetadata(), encoding='utf-8')
# get the metadata labels dict
fields = dict(egaia_config.printConfig(section='terms'))
# filter to videos that don't have remote_embed_url
for item in items:
item_uuid = item.get(fields['DCTERMS.identifier'])[0]
if uuid and not uuid in item_uuid:
# "not uuid in item"
continue
itemtype = item.get(fields['DCTERMS.type'], None)
if not itemtype or not 'moving image' in itemtype[0].lower():
# "not moving image"
continue
remote = item.get(fields['remote_embed_url'], None)
# we want: remote is True AND new is False
# or: remote is False AND new is True
if remote and new is True:
# "remote and new"
continue
if not remote and new is False:
# "not remote and not new"
continue
if remote and not any('archive.org' in i for i in remote):
# "not archive.org in remote"
continue
# retrieve the file path from the list generated above
for filename in filenames:
if item_uuid and item_uuid in filename:
item['filename'] = filename
(basename, ext) = os.path.splitext(os.path.basename(filename))
# Force IA to generate an "mp4" derivative for the embed player
if ext.lower() == '.mp4':
ext = '.mpeg4'
# Add "HD" so we can get an SD/HD toggle in the embed player
# as the HD source videos have too high a bitrate to be
# streamable except over a very fast Internet connection.
# See https://blog.archive.org/2013/02/09/new-video-and-audio-player-video-multiple-qualities-related-videos-and-more/
# The IA parser is picky, though -- our h264 videos don't get
# processed as such!
# Don't use the "df-xxx" tags, as this confuses IA into thinking
# these are different videos.
item['remote_filename'] = item_uuid + ext
break
filtered_items.append(item)
return filtered_items
[docs]def post(items, upload=True, faststart=False, collection='opensource_movies',
dry_run=False):
"""Upload items to the Internet Archive. Takes a list of metadata
dictionaries, corresponding to csv rows, as provided by listVideos().
If upload is not True, the remote metadata will be updated but items will
not be re-uploaded."""
archive = egaia_config.getConfig('archive', 'archive_name')
ia_noindex = egaia_config.getConfig('archive', 'ia_noindex', boolean=True)
# get the metadata labels dict
fields = dict(egaia_config.printConfig(section='terms'))
for item in items:
item_uuid = item.get(fields['DCTERMS.identifier'])[0]
print "processing %s..." % item_uuid
url = '/'.join([
egaia_config.getConfig('archive', 'archive_url').strip('/'),
'item',
item_uuid ])
link = u'Details: <a href="{url}">{url}</a>'.format(url=url)
desc = item.get(fields['DCTERMS.description'], [])
# Currently CRLF sequences get converted to "<BR>" by IA
description = u'<br>\r\n'.join(desc + [link])
meta_dict = dict( title=item.get(fields['DCTERMS.title'])[0],
collection=collection,
mediatype='movies',
contributor=archive,
creator=item.get(fields['DCTERMS.creator']),
description=description,
subject=item.get(fields['DCTERMS.subject']),
)
if ia_noindex is True:
meta_dict['noindex'] = 'true'
if dry_run:
print 'item: ' + item_uuid
print 'files: ' + item['remote_filename'] + item['filename']
print 'metadata: '
print meta_dict
print
continue
if upload:
try:
r = internetarchive.upload(item_uuid,
files={item['remote_filename']:item['filename']},
metadata=meta_dict,
verbose=True
)
except:
print "Upload failed!"
continue
updateDocx(item_uuid)
else:
try:
r = internetarchive.modify_metadata(item_uuid,
metadata=meta_dict)
except:
print "Update failed!"
continue
print "Server response: %s" % r
[docs]def updateDocx(uuid):
"""Populate the "remote_embed_url" metadata field for an item."""
fields = dict(egaia_config.printConfig(section='terms'))
url = u'https://archive.org/embed/%s' % uuid
json_str = u'{"%s": "%s"}' % (fields['remote_embed_url'], url)
fn = egaia_docx.filter_files(u'*.%s.*' % uuid)
egaia_docx.writeDocx(filename=fn[0], append=json_str)
def _cli(args):
"""egaia archivedotorg
Publish videos, along with their Dublin Core metadata, to the Internet
Archive and embed the published versions in html pages generated by
egaia. This command also allows you to update remote item metadata.
Usage:
egaia archivedotorg --help
egaia archivedotorg ( --upload | --update ) [ --collection=COLLECTION ]
[ --item=ITEM ] [ --dry-run ]
egaia archivedotorg --update-docx --item=ITEM
"""
if not egaia_root.get_root():
exit('Please run "egaia init" to create a new collection here.')
if args['--collection']:
collection = args['--collection']
else:
collection = 'opensource_movies'
if args['--item'] is not None:
args['--item'] = egaia_parsefn.getUuid(args['--item'])
if args['--upload']:
post( listVideos(uuid=args['--item']),
upload=True,
collection=collection,
dry_run=args['--dry-run']
)
elif args['--update']:
post( listVideos(uuid=args['--item'], new=False),
upload=False,
collection=collection,
dry_run=args['--dry-run']
)
elif args['--update-docx']:
# FIXME: Just update everything...
updateDocx(args['--item'])