Source code for skcriteria.core.data

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
# Copyright (c) 2022, 2023, 2024 QuatroPe
# All rights reserved.

# =============================================================================
# DOCS
# =============================================================================

"""Data abstraction layer.

This module defines the DecisionMatrix object, which internally encompasses
the alternative matrix, weights and objectives (MIN, MAX) of the criteria.

"""

# =============================================================================
# IMPORTS
# =============================================================================


import functools
from collections import abc

import methodtools

import numpy as np

import pandas as pd
from pandas.io.formats import format as pd_fmt

from .dominance import DecisionMatrixDominanceAccessor
from .objectives import Objective
from .plot import DecisionMatrixPlotter
from .stats import DecisionMatrixStatsAccessor
from ..utils import (
    DiffEqualityMixin,
    deprecated,
    df_temporal_header,
    diff,
    doc_inherit,
)


# =============================================================================
# SLICERS ARRAY
# =============================================================================
class _ACArray(np.ndarray, abc.Mapping):
    """Immutable Array to provide access to the alternative and criteria \
    values.

    The behavior is the same as a numpy.ndarray but if the slice it receives
    is a value contained in the array it uses an external function
    to access the series with that criteria/alternative.

    Besides this it has the typical methods of a dictionary.

    """

    def __new__(cls, input_array, skc_slicer):
        obj = np.asarray(input_array).view(cls)
        obj._skc_slicer = skc_slicer
        return obj

    @doc_inherit(np.ndarray.__getitem__)
    def __getitem__(self, k):
        try:
            if k in self:
                return self._skc_slicer(k).copy()
            return super().__getitem__(k)
        except IndexError:
            raise IndexError(k)

    def __setitem__(self, k, v):
        """Raise an AttributeError, this object are read-only."""
        raise AttributeError("_SlicerArray are read-only")

    @doc_inherit(abc.Mapping.items)
    def items(self):
        return ((e, self[e]) for e in self)

    @doc_inherit(abc.Mapping.keys)
    def keys(self):
        return iter(self)

    @doc_inherit(abc.Mapping.values)
    def values(self):
        return (self[e] for e in self)


class _Loc:
    """Locator abstraction.

    this class ensures that the correct objectives and weights are applied to
    the sliced ``DecisionMatrix``.

    """

    def __init__(self, name, real_loc, objectives, weights):
        self._name = name
        self._real_loc = real_loc
        self._objectives = objectives
        self._weights = weights

    @property
    def name(self):
        """The name of the locator."""
        return self._name

    def __getitem__(self, slc):
        """dm[slc] <==> dm.__getitem__(slc)."""
        df = self._real_loc.__getitem__(slc)
        if isinstance(df, pd.Series):
            df = df.to_frame().T

            dtypes = self._real_loc.obj.dtypes
            dtypes = dtypes[dtypes.index.isin(df.columns)]

            df = df.astype(dtypes)

        objectives = self._objectives
        objectives = objectives[objectives.index.isin(df.columns)].to_numpy()

        weights = self._weights
        weights = weights[weights.index.isin(df.columns)].to_numpy()

        return DecisionMatrix(df, objectives, weights)


# =============================================================================
# DECISION MATRIX
# =============================================================================



[docs]
class DecisionMatrix(DiffEqualityMixin):
    """Representation of all data needed in the MCDA analysis.

    This object gathers everything necessary to represent a data set used
    in MCDA:

     - An alternative matrix where each row is an alternative and each
       column is of a different criteria.
     - An optimization objective (Minimize, Maximize) for each criterion.
     - A weight for each criterion.
     - An independent type of data for each criterion

     DecisionMatrix has two main forms of construction:

     1. Use the default constructor of the DecisionMatrix class
        :py:class:`pandas.DataFrame` where the index is the alternatives
        and the columns are the criteria; an iterable with the objectives with
        the same amount of elements that columns/criteria has the dataframe;
        and an iterable with the weights also with the same amount of elements
        as criteria.

        .. code-block:: pycon

        >>> import pandas as pd
        >>> from skcriteria import DecisionMatrix, mkdm

        >>> data_df = pd.DataFrame(
        ...     [[1, 2, 3], [4, 5, 6]],
        ...     index=["A0", "A1"],
        ...     columns=["C0", "C1", "C2"]
        ... )
        >>> objectives = [min, max, min]
        >>> weights = [1, 1, 1]

        >>> dm = DecisionMatrix(data_df, objectives, weights)
        >>> dm
           C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0]
        A0         1         2         3
        A1         4         5         6
        [2 Alternatives x 3 Criteria]

    2. Use the classmethod `DecisionMatrix.from_mcda_data` which requests the
       data in a more natural way for this type of analysis
       (the weights, the criteria / alternative names, and the data types
       are optional)

       >>> DecisionMatrix.from_mcda_data(
       ...     [[1, 2, 3], [4, 5, 6]],
       ...     [min, max, min],
       ...     [1, 1, 1])
          C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0]
       A0         1         2         3
       A1         4         5         6
       [2 Alternatives x 3 Criteria]

        For simplicity a function is offered at the module level analogous to
        ``from_mcda_data`` called ``mkdm`` (make decision matrix).

    Parameters
    ----------
    data_df: :py:class:`pandas.DatFrame`
        Dataframe where the index is the alternatives and the columns
        are the criteria.
    objectives: :py:class:`numpy.ndarray`
        Aan iterable with the targets with sense of optimality of every
        criteria (You can use any alias defined in Objective)
        the same length as columns/criteria has the data_df.
    weights: :py:class:`numpy.ndarray`
        An iterable with the weights also with the same amount of elements
        as criteria.

    """

    def __init__(self, data_df, objectives, weights):
        self._data_df = (
            data_df.copy(deep=True)
            if isinstance(data_df, pd.DataFrame)
            else pd.DataFrame(data_df, copy=True)
        )

        self._objectives = np.array(objectives, dtype=object, copy=True)
        self._weights = np.array(weights, dtype=float, copy=True)

        if not (
            len(self._data_df.columns)
            == len(self._weights)
            == len(self._objectives)
        ):
            raise ValueError(
                "The number of weights, and objectives must be equal to the "
                "number of criteria (number of columns in data_df)"
            )

    # CUSTOM CONSTRUCTORS =====================================================


[docs]
    @classmethod
    def from_mcda_data(
        cls,
        matrix,
        objectives,
        *,
        weights=None,
        alternatives=None,
        criteria=None,
        dtypes=None,
    ):
        """Create a new DecisionMatrix object.

        This method receives the parts of the matrix, in what conceptually
        the matrix of alternatives is usually divided

        Parameters
        ----------
        matrix: Iterable
            The matrix of alternatives. Where every row is an alternative
            and every column is a criteria.

        objectives: Iterable
            The array with the sense of optimality of every
            criteria. You can use any alias provided by the objective class.

        weights: Iterable o None (default ``None``)
            Optional weights of the criteria. If is ``None`` all the criteria
            are weighted with 1.

        alternatives: Iterable o None (default ``None``)
            Optional names of the alternatives. If is ``None``,
            al the alternatives are names "A[n]" where n is the number of
            the row of `matrix` statring at 0.

        criteria: Iterable o None (default ``None``)
            Optional names of the criteria. If is ``None``,
            al the alternatives are names "C[m]" where m is the number of
            the columns of `matrix` statring at 0.

        dtypes: Iterable o None (default ``None``)
            Optional types of the criteria. If is None, the type is inferred
            automatically by pandas.

        Returns
        -------
        :py:class:`DecisionMatrix`
            A new decision matrix.


        Example
        -------


        >>> DecisionMatrix.from_mcda_data(
        ...     [[1, 2, 3], [4, 5, 6]],
        ...     [min, max, min],
        ...     [1, 1, 1])
           C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0]
        A0         1         2         3
        A1         4         5         6
        [2 Alternatives x 3 Criteria]

        For simplicity a function is offered at the module level analogous to
        ``from_mcda_data`` called ``mkdm`` (make decision matrix).

        Notes
        -----
        This functionality generates more sensitive defaults than using the
        constructor of the DecisionMatrix class but is slower.

        """
        # first we need the number of alternatives and criteria
        try:
            a_number, c_number = np.shape(matrix)
        except ValueError:
            matrix_ndim = np.ndim(matrix)
            raise ValueError(
                f"'matrix' must have 2 dimensions, found {matrix_ndim} instead"
            )

        alternatives = np.asarray(
            [f"A{idx}" for idx in range(a_number)]
            if alternatives is None
            else alternatives
        )
        if len(alternatives) != a_number:
            raise ValueError(f"'alternatives' must have {a_number} elements")

        criteria = np.asarray(
            [f"C{idx}" for idx in range(c_number)]
            if criteria is None
            else criteria
        )

        if len(criteria) != c_number:
            raise ValueError(f"'criteria' must have {c_number} elements")

        weights = np.asarray(np.ones(c_number) if weights is None else weights)

        data_df = pd.DataFrame(matrix, index=alternatives, columns=criteria)

        if dtypes is not None and len(dtypes) != c_number:
            raise ValueError(f"'dtypes' must have {c_number} elements")
        elif dtypes is not None:
            dtypes = {c: dt for c, dt in zip(criteria, dtypes)}
            data_df = data_df.astype(dtypes)

        return cls(data_df=data_df, objectives=objectives, weights=weights)


    # MCDA ====================================================================
    #     This properties are useful to access interactively to the
    #     underlying data a. Except for alternatives and criteria all other
    #     properties expose the data as dataframes or series

    @property
    def alternatives(self):
        """Names of the alternatives.

        From this array you can also access the values of the alternatives as
        ``pandas.Series``.

        """
        arr = self._data_df.index.to_numpy(copy=True)
        slicer = self._data_df.loc.__getitem__
        return _ACArray(arr, slicer)

    @property
    def criteria(self):
        """Names of the criteria.

        From this array you can also access the values of the criteria as
        ``pandas.Series``.

        """
        arr = self._data_df.columns.to_numpy(copy=True)
        slicer = self._data_df.__getitem__
        return _ACArray(arr, slicer)

    @property
    def weights(self):
        """Weights of the criteria."""
        return pd.Series(
            self._weights,
            dtype=float,
            index=self._data_df.columns.copy(deep=True),
            name="Weights",
            copy=True,
        )

    @property
    def objectives(self):
        """Objectives of the criteria as ``Objective`` instances."""
        return pd.Series(
            [Objective.from_alias(a) for a in self._objectives],
            index=self._data_df.columns,
            name="Objectives",
            copy=True,
        )

    @property
    def minwhere(self):
        """Mask with value True if the criterion is to be minimized."""
        mask = self.objectives == Objective.MIN
        mask.name = "minwhere"
        return mask

    @property
    def maxwhere(self):
        """Mask with value True if the criterion is to be maximized."""
        mask = self.objectives == Objective.MAX
        mask.name = "maxwhere"
        return mask

    # READ ONLY PROPERTIES ====================================================

    @property
    def iobjectives(self):
        """Objectives of the criteria as ``int``.

        - Minimize = Objective.MIN.value
        - Maximize = Objective.MAX.value

        """
        return pd.Series(
            [o.value for o in self.objectives],
            dtype=np.int8,
            index=self._data_df.columns.copy(deep=True),
            copy=True,
        )

    @property
    def matrix(self):
        """Alternatives matrix as pandas DataFrame.

        The matrix excludes weights and objectives.

        If you want to create a DataFrame with objectives and weights, use
        ``DecisionMatrix.to_dataframe()``

        """
        mtx = self._data_df.copy(deep=True)
        mtx.index = self._data_df.index.copy(deep=True)
        mtx.index.name = "Alternatives"
        mtx.columns = self._data_df.columns.copy(deep=True)
        mtx.columns.name = "Criteria"
        return mtx

    @property
    def dtypes(self):
        """Dtypes of the criteria."""
        series = self._data_df.dtypes.copy(deep=True)
        series.index = self._data_df.dtypes.index.copy(deep=True)
        return series

    # ACCESSORS (YES, WE USE CACHED PROPERTIES IS THE EASIEST WAY) ============

    @methodtools.lru_cache(maxsize=None)
    @property
    def plot(self):
        """Plot accessor."""
        return DecisionMatrixPlotter(self)

    @methodtools.lru_cache(maxsize=None)
    @property
    def stats(self):
        """Descriptive statistics accessor."""
        return DecisionMatrixStatsAccessor(self)

    @methodtools.lru_cache(maxsize=None)
    @property
    def dominance(self):
        """Dominance information accessor."""
        return DecisionMatrixDominanceAccessor(self)

    # UTILITIES ===============================================================


[docs]
    def copy(self, **kwargs):
        """Return a deep copy of the current DecisionMatrix.

        This method is also useful for manually modifying the values of the
        DecisionMatrix object.

        Parameters
        ----------
        kwargs :
            The same parameters supported by ``from_mcda_data()``. The values
            provided replace the existing ones in the object to be copied.

        Returns
        -------
        :py:class:`DecisionMatrix`
            A new decision matrix.

        """
        dmdict = self.to_dict()
        dmdict.update(kwargs)

        return self.from_mcda_data(**dmdict)



[docs]
    def to_dataframe(self):
        """Convert the entire DecisionMatrix into a dataframe.

        The objectives and weights ara added as rows before the alternatives.

        Returns
        -------
        :py:class:`pd.DataFrame`
            A Decision matrix as pandas DataFrame.

        Example
        -------
        .. code-block:: pycon

           >>> dm = DecisionMatrix.from_mcda_data(
           >>> dm
           ...     [[1, 2, 3], [4, 5, 6]],
           ...     [min, max, min],
           ...     [1, 1, 1])
               C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0]
           A0         1         2         3
           A1         4         5         6

           >>> dm.to_dataframe()
                       C0   C1   C2
           objectives  MIN  MAX  MIN
           weights     1.0  1.0  1.0
           A0            1    2    3
           A1            4    5    6

        """
        data = np.vstack((self.objectives, self.weights, self.matrix))
        index = np.hstack((["objectives", "weights"], self.alternatives))
        df = pd.DataFrame(data, index=index, columns=self.criteria, copy=True)
        return df



[docs]
    def to_dict(self):
        """Return a dict representation of the data.

        All the values are represented as numpy array.
        """
        return {
            "matrix": self.matrix.to_numpy(copy=True),
            "objectives": self.iobjectives.to_numpy(copy=True),
            "weights": self.weights.to_numpy(copy=True),
            "dtypes": self.dtypes.to_numpy(copy=True),
            "alternatives": np.array(self.alternatives, copy=True),
            "criteria": np.array(self.criteria, copy=True),
        }



[docs]
    @deprecated(
        reason=(
            "Use ``DecisionMatrix.stats()``, "
            "``DecisionMatrix.stats('describe)`` or "
            "``DecisionMatrix.stats.describe()`` instead."
        ),
        version="0.6",
    )
    def describe(self, **kwargs):
        """Generate descriptive statistics.

        Descriptive statistics include those that summarize the central
        tendency, dispersion and shape of a dataset's distribution,
        excluding ``NaN`` values.

        Parameters
        ----------
        Same parameters as ``pandas.DataFrame.describe()``.

        Returns
        -------
        ``pandas.DataFrame``
            Summary statistics of DecisionMatrix provided.

        """
        return self._data_df.describe(**kwargs)


    # CMP =====================================================================

    @property
    def shape(self):
        """Return a tuple with (number_of_alternatives, number_of_criteria).

        dm.shape <==> np.shape(dm)

        """
        return np.shape(self._data_df)

    def __len__(self):
        """Return the number ot alternatives.

        dm.__len__() <==> len(dm).

        """
        return len(self._data_df)


[docs]
    @doc_inherit(DiffEqualityMixin.diff)
    def diff(
        self, other, rtol=1e-05, atol=1e-08, equal_nan=True, check_dtypes=False
    ):
        # all the validations only works if we have the same shape
        same_shape = (
            (np.shape(self) == np.shape(other))
            if isinstance(other, DecisionMatrix)
            else False
        )

        # Check if have the same shape and if all elements are equal.
        def same_shape_array_equal(left_value, right_value):
            return same_shape and np.array_equal(
                left_value, right_value, equal_nan=False
            )

        # Check if have the same shape and if all elements are close.
        def same_shape_array_allclose(left_value, right_value):
            return same_shape and np.allclose(
                left_value,
                right_value,
                rtol=rtol,
                atol=atol,
                equal_nan=equal_nan,
            )

        members = {
            "shape": np.array_equal,  # the shape must be equal
            "criteria": same_shape_array_equal,
            "alternatives": same_shape_array_equal,
            "objectives": same_shape_array_equal,
            "weights": same_shape_array_allclose,
            "matrix": same_shape_array_allclose,
        }

        if check_dtypes:
            members["dtypes"] = same_shape_array_equal

        the_diff = diff(self, other, **members)

        return the_diff


    # SLICES ==================================================================

    def __getitem__(self, slc):
        """dm[slc] <==> dm.__getitem__(slc)."""
        df = self._data_df.__getitem__(slc)
        if isinstance(df, pd.Series):
            df = df.to_frame()

            dtypes = self._data_df.dtypes
            dtypes = dtypes[dtypes.index.isin(df.columns)]

            df = df.astype(dtypes)

        objectives = self.objectives
        objectives = objectives[objectives.index.isin(df.columns)].to_numpy(
            copy=True
        )

        weights = self.weights
        weights = weights[weights.index.isin(df.columns)].to_numpy(copy=True)

        return DecisionMatrix(df, objectives, weights)

    @property
    def loc(self):
        """Access a group of alternatives and criteria by label(s) or a \
        boolean array.

        ``.loc[]`` is primarily alternative label based, but may also be used
        with a boolean array.

        Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an
        instance of ``DecisionMatrix``.

        """
        return _Loc("loc", self._data_df.loc, self.objectives, self.weights)

    @property
    def iloc(self):
        """Purely integer-location based indexing for selection by position.

        ``.iloc[]`` is primarily integer position based (from ``0`` to
        ``length-1`` of the axis), but may also be used with a boolean
        array.

        Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an
        instance of ``DecisionMatrix``.

        """
        return _Loc("iloc", self._data_df.iloc, self.objectives, self.weights)

    # REPR ====================================================================

    def _get_cow_headers(
        self, only=None, fmt="{criteria}[{objective}{weight}]"
    ):
        """Columns names with COW (Criteria, Objective, Weight)."""
        criteria = self._data_df.columns.to_series()
        objectives = self.objectives
        weights = self.weights

        if only:
            mask = self._data_df.columns.isin(only)
            criteria = criteria[mask][only]
            objectives = objectives[mask][only]
            weights = weights[mask][only]

        weights = pd_fmt.format_array(weights, None)

        headers = []
        for crit, obj, weight in zip(criteria, objectives, weights):
            header = fmt.format(
                criteria=crit, objective=obj.to_symbol(), weight=weight
            )
            headers.append(header)
        return np.array(headers)

    def _get_axc_dimensions(self):
        """Dimension footnote with AxC (Alternatives x Criteria)."""
        a_number, c_number = self.shape
        dimensions = f"{a_number} Alternatives x {c_number} Criteria"
        return dimensions

    def __repr__(self):
        """dm.__repr__() <==> repr(dm)."""
        header = self._get_cow_headers()
        dimensions = self._get_axc_dimensions()

        with df_temporal_header(self._data_df, header) as df:
            with pd.option_context("display.show_dimensions", False):
                original_string = repr(df)

        # add dimension
        string = f"{original_string}\n[{dimensions}]"

        return string

    def _repr_html_(self):
        """Return a html representation for a particular DecisionMatrix.

        Mainly for IPython notebook.
        """
        header = self._get_cow_headers()
        dimensions = self._get_axc_dimensions()

        # retrieve the original string
        with df_temporal_header(self._data_df, header) as df:
            with pd.option_context("display.show_dimensions", False):
                original_html = df._repr_html_()

        # add dimension
        html = (
            "<div class='decisionmatrix'>\n"
            f"{original_html}"
            f"<em class='decisionmatrix-dim'>{dimensions}</em>\n"
            "</div>"
        )

        return html



# =============================================================================
# factory
# =============================================================================



[docs]
@functools.wraps(DecisionMatrix.from_mcda_data)
def mkdm(*args, **kwargs):
    """Alias for DecisionMatrix.from_mcda_data."""
    return DecisionMatrix.from_mcda_data(*args, **kwargs)