Source code for lsst.verify.metricset

#
# LSST Data Management System
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# See COPYRIGHT file at the top of the source tree.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program.  If not,
# see <https://www.lsstcorp.org/LegalNotices/>.
#
from __future__ import print_function, division

__all__ = ['MetricSet']

import os
import glob

from astropy.table import Table

import lsst.pex.exceptions
from lsst.utils import getPackageDir
from .jsonmixin import JsonSerializationMixin
from .metric import Metric
from .naming import Name
from .yamlutils import load_ordered_yaml


[docs]class MetricSet(JsonSerializationMixin):
    """A collection of `Metric`\ s.

    Parameters
    ----------
    metrics : sequence of `Metric` instances, optional
        `Metric`\ s to be contained within the ``MetricSet``.
    """

    def __init__(self, metrics=None):
        # Internal dict of Metrics. The MetricSet manages access through its
        # own mapping API.
        self._metrics = {}

        if metrics is not None:
            for metric in metrics:
                if not isinstance(metric, Metric):
                    message = '{0!r} is not a Metric-type'.format(metric)
                    raise TypeError(message)
                self._metrics[metric.name] = metric

    @classmethod
[docs]    def load_metrics_package(cls, package_name_or_path='verify_metrics',
                             subset=None):
        """Create a MetricSet from a Verification Framework metrics package.

        Parameters
        ----------
        package_name_or_path : `str`, optional
            Name of an EUPS package that hosts metric and specification
            definition YAML files **or** the file path to a metrics package.
            ``'verify_metrics'`` is the default package, and is where metrics
            and specifications are defined for most packages.
        subset : `str`, optional
            If set, only metrics for this package are loaded. For example, if
            ``subset='validate_drp'``, only ``validate_drp`` metrics are
            included in the `MetricSet`. This argument is equivalent to the
            `MetricSet.subset` method. Default is `None`.

        Returns
        -------
        metric_set : `MetricSet`
            A `MetricSet` containing `Metric` instances.

        See also
        --------
        lsst.verify.MetricSet.load_single_package

        Notes
        -----
        EUPS packages that host metrics and specification definitions for the
        Verification Framework have top-level directories named ``'metrics'``
        and ``'specs'``. The metrics package chosen with the
        ``package_name_or_path`` argument. The default metric package for
        LSST Science Pipelines is ``verify_metrics``.

        To make a `MetricSet` from a single package's YAML metric definition
        file that **is not** contained in a metrics package,
        use `load_single_package` instead.
        """
        try:
            # Try an EUPS package name
            package_dir = getPackageDir(package_name_or_path)
        except lsst.pex.exceptions.NotFoundError:
            # Try as a filesystem path instead
            package_dir = package_name_or_path
        finally:
            package_dir = os.path.abspath(package_dir)

        metrics_dirname = os.path.join(package_dir, 'metrics')
        if not os.path.isdir(metrics_dirname):
            message = 'Metrics directory {0} not found'
            raise OSError(message.format(metrics_dirname))

        metrics = []

        if subset is not None:
            # Load only a single package's YAML file
            metrics_yaml_paths = [os.path.join(metrics_dirname,
                                               '{0}.yaml'.format(subset))]
        else:
            # Load all package's YAML files
            metrics_yaml_paths = glob.glob(os.path.join(metrics_dirname,
                                                        '*.yaml'))

        for metrics_yaml_path in metrics_yaml_paths:
            new_metrics = MetricSet._load_metrics_yaml(metrics_yaml_path)
            metrics.extend(new_metrics)

        return cls(metrics)

    @classmethod
[docs]    def load_single_package(cls, metrics_yaml_path):
        """Create a MetricSet from a single YAML file containing metric
        definitions for a single package.

        Returns
        -------
        metric_set : `MetricSet`
            A `MetricSet` containing `Metric` instances found in the YAML
            file.

        See also
        --------
        lsst.verify.MetricSet.load_metrics_package

        Notes
        -----
        The YAML file's name, without extension, is taken as the package
        name for all metrics.

        For example, ``validate_drp.yaml`` contains metrics that are
        identified as belonging to the ``validate_drp`` package.
        """
        metrics = MetricSet._load_metrics_yaml(metrics_yaml_path)
        return cls(metrics)

    @staticmethod
    def _load_metrics_yaml(metrics_yaml_path):
        # package name is inferred from YAML file name (by definition)
        metrics_yaml_path = os.path.abspath(metrics_yaml_path)
        package_name = os.path.splitext(os.path.basename(metrics_yaml_path))[0]

        metrics = []
        with open(metrics_yaml_path) as f:
            yaml_doc = load_ordered_yaml(f)
            for metric_name, metric_doc in yaml_doc.items():
                name = Name(package=package_name, metric=metric_name)
                # throw away a 'name' field if there happens to be one
                metric_doc.pop('name', None)
                # Create metric instance
                metric = Metric.deserialize(name=name, **metric_doc)
                metrics.append(metric)
        return metrics

    @classmethod
[docs]    def deserialize(cls, metrics=None):
        """Deserialize metric JSON objects into a MetricSet instance.

        Parameters
        ----------
        metrics : `list`
            List of metric JSON serializations (typically created by
            `MetricSet.json`).

        Returns
        -------
        metric_set : `MetricSet`
            `MetricSet` instance.
        """
        instance = cls()
        for metric_doc in metrics:
            metric = Metric.deserialize(**metric_doc)
            instance.insert(metric)
        return instance

    @property
    def json(self):
        """A JSON-serializable object (`list`)."""
        doc = JsonSerializationMixin._jsonify_list(
            [metric for name, metric in self.items()]
        )
        return doc

    def __getitem__(self, key):
        if not isinstance(key, Name):
            key = Name(metric=key)
        return self._metrics[key]

    def __setitem__(self, key, value):
        if not isinstance(key, Name):
            key = Name(metric=key)

        # Key name must be for a metric
        if not key.is_metric:
            message = 'Key {0!r} is not a metric name'.format(key)
            raise KeyError(message)

        # value must be a metric type
        if not isinstance(value, Metric):
            message = 'Expected {0!s}={1!r} to be a Metric-type'.format(
                key, value)
            raise TypeError(message)

        # Metric name and key name must be consistent
        if value.name != key:
            message = 'Key {0!s} inconsistent with Metric {0!s}'
            raise KeyError(message.format(key, value))

        self._metrics[key] = value

    def __delitem__(self, key):
        if not isinstance(key, Name):
            key = Name(metric=key)
        del self._metrics[key]

    def __len__(self):
        return len(self._metrics)

    def __contains__(self, key):
        if not isinstance(key, Name):
            key = Name(metric=key)
        return key in self._metrics

    def __iter__(self):
        for key in self._metrics:
            yield key

    def __str__(self):
        count = len(self)
        if count == 0:
            count_str = 'empty'
        elif count == 1:
            count_str = '1 Metric'
        else:
            count_str = '{count:d} Metrics'.format(count=count)
        return '<MetricSet: {0}>'.format(count_str)

    def __eq__(self, other):
        if len(self) != len(other):
            return False

        for name, metric in self.items():
            try:
                if metric != other[name]:
                    return False
            except KeyError:
                return False

        return True

    def __ne__(self, other):
        return not self.__eq__(other)

    def __iadd__(self, other):
        """Merge another `MetricSet` into this one.

        Parameters
        ---------
        other : `MetricSet`
            Another `MetricSet`. Metrics in ``other`` that do exist in this
            set are added to this one. Metrics in ``other`` replace metrics of
            the same name in this one.

        Returns
        -------
        self : `MetricSet`
            This `MetricSet`.

        Notes
        -----
        Equivalent to `update`.
        """
        self.update(other)
        return self

[docs]    def insert(self, metric):
        """Insert a `Metric` into the set.

        Any pre-existing metric with the same name is replaced

        Parameters
        ----------
        metric : `Metric`
            A metric.
        """
        self[metric.name] = metric

[docs]    def keys(self):
        """Get a list of metric names included in the set

        Returns
        -------
        keys : `list` of `Name`
            List of `Name`\ s included in the set.
        """
        return self._metrics.keys()

[docs]    def items(self):
        """Iterate over ``(name, metric)`` pairs in the set.

        Yields
        ------
        item : tuple
            Tuple containing:

            - `Name` of the `Metric`
            - `Metric` instance
        """
        for item in self._metrics.items():
            yield item

[docs]    def subset(self, package=None, tags=None):
        """Create a new `MetricSet` with metrics belonging to a single
        package and/or tag.

        Parameters
        ----------
        package : `str` or `lsst.verify.Name`, optional
            Name of the package to subset metrics by. If the package name
            is ``'pkg_a'``, then metric ``'pkg_a.metric_1'`` would be
            **included** in the subset, while ``'pkg_b.metric_2'`` would be
            **excluded**.
        tags : sequence of `str`, optional
            Tags to select metrics by. These tags must be a subset (``<=``)
            of the `Metric.tags` for the metric to be selected.

        Returns
        -------
        metric_subset : `MetricSet`
            Subset of this metric set containing only metrics belonging
            to the specified package and/or tag.

        Notes
        -----
        If both ``package`` and ``tag`` are provided then the resulting
        `MetricSet` contains the **intersection** of the package-based and
        tag-based selections. That is, metrics will belong to ``package``
        and posess the tag ``tag``.
        """
        if package is not None and not isinstance(package, Name):
            package = Name(package=package)

        if tags is not None:
            tags = set(tags)

        if package is not None and tags is None:
            metrics = [metric for metric_name, metric in self._metrics.items()
                       if metric_name in package]

        elif package is not None and tags is not None:
            metrics = [metric for metric_name, metric in self._metrics.items()
                       if metric_name in package
                       if tags <= metric.tags]

        elif package is None and tags is not None:
            metrics = [metric for metric_name, metric in self._metrics.items()
                       if tags <= metric.tags]

        else:
            metrics = []

        return MetricSet(metrics)

[docs]    def update(self, other):
        """Merge another `MetricSet` into this one.

        Parameters
        ----------
        other : `MetricSet`
            Another `MetricSet`. Metrics in ``other`` that do exist in this
            set are added to this one. Metrics in ``other`` replace metrics of
            the same name in this one.
        """
        for _, metric in other.items():
            self.insert(metric)

    def _repr_html_(self):
        """Make an HTML representation of metrics for Jupyter notebooks.
        """
        name_col = []
        tags_col = []
        units_col = []
        description_col = []
        reference_col = []

        metric_names = list(self.keys())
        metric_names.sort()

        for metric_name in metric_names:
            metric = self[metric_name]

            name_col.append(str(metric_name))

            tags = list(metric.tags)
            tags.sort()
            tags_col.append(', '.join(tags))

            units_col.append("{0:latex}".format(metric.unit))

            description_col.append(metric.description)

            reference_col.append(metric.reference)

        table = Table([name_col, description_col, units_col, reference_col,
                       tags_col],
                      names=['Name', 'Description', 'Units', 'Reference',
                             'Tags'])
        return table._repr_html_()
Navigation

Source code for lsst.verify.metricset