Source code for surround.data.metadata

from collections.abc import Mapping

import os
import json
import yaml

from .util import get_formats_from_directory, get_formats_from_files, get_types_from_formats

[docs]class Metadata(Mapping): """ Represents metadata of a Data Container. Responsibilities: - Create metadata, explorting to YAML string and/or file - Generate default metadata as per schema - Automatically generate values to fields based on files given - Get/set properties """ # 'key_name': (TYPE, REQUIRED, SUB_SCHEMA) SCHEMA = { 'v0.1': { 'version': (str, True, None), 'summary': (dict, True, { 'title': (str, True, None), 'creator': (str, True, None), 'subject': (list, True, None), 'description': (str, True, None), 'publisher': (str, True, None), 'contributor': (str, True, None), 'date': (str, True, None), 'types': (list, True, None), 'formats': (list, True, None), 'identifier': (str, True, None), 'source': (str, False, None), 'language': (str, True, None), 'rights': (str, True, None), 'under-ethics': (bool, True, None), }), 'manifests': (list, False, { 'path': (str, True, None), 'description': (str, True, None), 'types': (list, True, None), 'formats': (list, True, None), 'language': (str, True, None), }) } } def __init__(self, version='v0.1'): """ :param version: the version of the schema to use (default: v0.1) :type version: str """ self.version = version self.__storage = self.generate_default(version)
[docs] def generate_default(self, version): """ Generate a dictionary with all required fields created as per the schema. :param version: which version of the schema to use :type version: str :returns: the dictionary with default values :rtype: dict """ def gen_dict(schema): result = {} for key, value in schema.items(): typ = value[0] required = value[1] sub_schema = value[2] if required and typ is dict: result[key] = gen_dict(sub_schema) elif key == 'version': result[key] = version elif required: result[key] = typ() return result return gen_dict(self.SCHEMA[version])
[docs] def generate_from_files(self, files, root, root_level_dirs): """ Automatically generate metadata from a list of files such as: - Formats (mime types) - Types (types from vocab) - Group manifests (each root level directory is considered a group) :param files: list of files to generate from :type files: list :param root: path to the root of the folder container the files :type root: str :param root_level_dirs: list of directories in the root :type root_level_dirs: list """ formats = get_formats_from_files(files) types = get_types_from_formats(formats) if root_level_dirs: types.append("Collection") self.__storage['summary']['formats'] = formats self.__storage['summary']['types'] = types if root_level_dirs: self.__storage['manifests'] = [] for root_dir in root_level_dirs: formats = get_formats_from_directory(os.path.join(root, root_dir)) types = get_types_from_formats(formats) if 'Collection' not in types: types.append('Collection') self.__storage['manifests'].append({ 'path': root_dir, 'description': None, 'formats': formats, 'types': types, 'language': None, })
[docs] def generate_from_directory(self, directory): """ Automatically generate metadata from a directory, such as: - Formats (mime types) - Types (types from vocab) - Group manifests (each root level directory is considered a group) :param directory: path to the directory to generate from :type directory: str """ root_level_dirs = [] all_files = [] for root, dirs, files in os.walk(directory): for name in files: all_files.append(os.path.join(root, name)) if os.path.abspath(root) == os.path.abspath(directory): root_level_dirs.extend(dirs) self.generate_from_files(all_files, directory, root_level_dirs)
[docs] def generate_from_file(self, filepath): """ Automatically generate metadata from a single file :param filepath: path to the file :type filepath: str """ formats = get_formats_from_files([filepath]) types = get_types_from_formats(formats) self.__storage['summary']['formats'] = formats self.__storage['summary']['types'] = types
[docs] def generate_manifest_for_group(self, group_name, files, formats=None): """ Generate a manifest for a group of files where the manifest contains: - path - description - language - formats (mime types) - types (from vocab) Store the manifest in the metadata storage plus return it. :param group_name: name of the group :type group_name: str :param files: list of files in the group :type files: list :param formats: list of formats in the group :type formats: list :returns: the manifest created :rtype: dict """ if not formats: formats = [] formats.extend(get_formats_from_files(files)) types = get_types_from_formats(formats) # Add collection to types if more than one file or format if 'Collection' not in types and (len(files) > 1 or (formats and len(formats) > 1)): types.append('Collection') if 'manifests' not in self.__storage: self.__storage['manifests'] = [] manifest = { 'path': group_name, 'description': None, 'language': None, 'formats': formats, 'types': types } self.__storage['manifests'].append(manifest) return manifest
[docs] def load_from_path(self, path): """ Load metadata from file (YAML) :param path: path to the YAML file :type path: str """ with open(path, "r") as yaml_file: self.__storage = yaml.safe_load(yaml_file.read())
[docs] def load_from_data(self, data): """ Load metadata from a YAML string :param data: YAML string :type data: str """ self.__storage = yaml.safe_load(data)
[docs] def save_to_path(self, path): """ Save metadata to YAML file :param path: path to save file to :type path: str """ with open(path, "w+") as yaml_file: yaml.dump(self.__storage, yaml_file)
[docs] def save_to_data(self): """ Returns metadata as string formatted in YAML :returns: the data in YAML string :rtype: str """ return yaml.dump(self.__storage)
[docs] def save_to_json(self, indent=4): """ Returns metadata as string formatted in JSON :param indent: number of spaces in indentations :type indent: int :returns: the data in JSON format :rtype: str """ return json.dumps(self.__storage, indent=indent)
[docs] def save_to_json_file(self, path, indent=4): """ Saves metadata to JSON file :param path: path to file to export to :type path: str :param indent: number of spaces in indentations :type indent: int """ with open(path, "w+") as f: json.dump(self.__storage, f, indent=indent)
[docs] def get_property(self, path): """ Get the value of a property given a path in dot notation e.g. summary.title ``metadata.get_property('summary.title')`` would retrieve ``Test name`` from the following:: summary: title: Test name :param path: path to the property using dot notation :type path: str :returns: the value of the property, none otherwise :rtype: any """ keys = path.split(".") def traverse_dict(keys, container): key = keys[0] if key in container: if len(keys) > 1 and isinstance(container[key], dict): return traverse_dict(keys[1:], container[key]) return container[key] return None return traverse_dict(keys, self.__storage)
[docs] def set_property(self, path, value): """ Set the value of a property given a path in dot notation e.g. summary.title ``metadata.set_property('summary.title')`` would set the title of the data container. :param path: path to the property in dot notation :type path: str :param value: value to set to the property :type value: any """ keys = path.split(".") def update_dict(keys, collection, value): key = keys[0] if len(keys) > 1 and isinstance(collection[key], dict): collection[key] = update_dict(keys[1:], collection[key], value) else: collection[key] = value return collection self.__storage = update_dict(keys, self.__storage, value)
def __getitem__(self, key): return self.__storage[key] def __iter__(self): return iter(self.__storage) def __len__(self): return len(self.__storage)