Source code for egaia.egaia_parsefn

import os
import mimetypes
import magic
import re

[docs]def parseFilename(path): """Parse a tagged file and return a triple: (basename, uuid, extension).""" filepath = os.path.basename(path) if not '.' in filepath: # Add an extension to extensionless files and directories ext = None if os.path.isdir(filepath): # Use "dir" extension for directories, so we can identify them # from tree listings alone ext = '.dir' elif os.path.exists(filepath): # If we are processing a real file (not just a filename), # get the filetype from the file signature mtype = magic.from_file(filepath, mime=True) ext = mimetypes.guess_extension(mtype, strict=True) if not ext: # Unknown mimetype ext = '.null' filepath = filepath + ext return separate(filepath)
[docs]def separate(filepath): """Search for the UUID in a string representing a filepath, and separate""" # Caution: re.search/match fails if we have more than one UUID in the path! re_uuid = re.compile("[0-F]{8}-[0-F]{4}-[0-F]{4}-[0-F]{4}-[0-F]{12}", re.I) try: uuid = re.findall(re_uuid, filepath)[-1] fileparts = filepath.split(uuid) return (fileparts[0].strip('.'), uuid, fileparts[1].strip('.')) except: (base, ext) = os.path.splitext(filepath) return (base, None, ext.strip('.'))
[docs]def getUuid(filepath): """Return the uuid for a tagged filename, if available, or None.""" parts = parseFilename(filepath) return parts[1]
[docs]def getBasename(filepath): """Return the base part of a tagged filename.""" parts = parseFilename(filepath) return parts[0]
[docs]def getExtension(filepath): """Return the extension for a filename.""" parts = parseFilename(filepath) return parts[2]
[docs]def getFormat(filepath): """Return the format of a derivative file.""" m = re.search("df-(.*?)\.", filepath) try: fmt = m.group(1) except: return None return fmt
def _cli(args): """egaia parsefn Parse a given filename and return the basename, uuid, and extension. This is intended primarily as a utility function for other tools. If only the filename is given, a triple is returned containing the base, uuid, and extension of the filename. Usage: egaia parsefn --help egaia parsefn [ --base | --uuid | --ext ] FILENAME """ if args['--base']: print getBasename(args['FILENAME']) elif args['--uuid']: print getUuid(args['FILENAME']) elif args['--ext']: print getExtension(args['FILENAME']) else: print parseFilename(args['FILENAME'])