Source code for skcriteria.core.data

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
# Copyright (c) 2022, 2023, 2024 QuatroPe
# All rights reserved.

# =============================================================================
# DOCS
# =============================================================================

"""Data abstraction layer.

This module defines the DecisionMatrix object, which internally encompasses
the alternative matrix, weights and objectives (MIN, MAX) of the criteria.

"""

# =============================================================================
# IMPORTS
# =============================================================================


import functools
from collections import abc

import methodtools

import numpy as np

import pandas as pd
from pandas.io.formats import format as pd_fmt

from .dominance import DecisionMatrixDominanceAccessor
from .objectives import Objective
from .plot import DecisionMatrixPlotter
from .stats import DecisionMatrixStatsAccessor
from ..utils import (
    DiffEqualityMixin,
    deprecated,
    df_temporal_header,
    diff,
    doc_inherit,
)


# =============================================================================
# SLICERS ARRAY
# =============================================================================
class _ACArray(np.ndarray, abc.Mapping):
    """Immutable Array to provide access to the alternative and criteria \
    values.

    The behavior is the same as a numpy.ndarray but if the slice it receives
    is a value contained in the array it uses an external function
    to access the series with that criteria/alternative.

    Besides this it has the typical methods of a dictionary.

    """

    def __new__(cls, input_array, skc_slicer):
        obj = np.asarray(input_array).view(cls)
        obj._skc_slicer = skc_slicer
        return obj

    @doc_inherit(np.ndarray.__getitem__)
    def __getitem__(self, k):
        try:
            if k in self:
                return self._skc_slicer(k).copy()
            return super().__getitem__(k)
        except IndexError:
            raise IndexError(k)

    def __setitem__(self, k, v):
        """Raise an AttributeError, this object are read-only."""
        raise AttributeError("_SlicerArray are read-only")

    @doc_inherit(abc.Mapping.items)
    def items(self):
        return ((e, self[e]) for e in self)

    @doc_inherit(abc.Mapping.keys)
    def keys(self):
        return iter(self)

    @doc_inherit(abc.Mapping.values)
    def values(self):
        return (self[e] for e in self)


class _Loc:
    """Locator abstraction.

    this class ensures that the correct objectives and weights are applied to
    the sliced ``DecisionMatrix``.

    """

    def __init__(self, name, real_loc, objectives, weights):
        self._name = name
        self._real_loc = real_loc
        self._objectives = objectives
        self._weights = weights

    @property
    def name(self):
        """The name of the locator."""
        return self._name

    def __getitem__(self, slc):
        """dm[slc] <==> dm.__getitem__(slc)."""
        df = self._real_loc.__getitem__(slc)
        if isinstance(df, pd.Series):
            df = df.to_frame().T

            dtypes = self._real_loc.obj.dtypes
            dtypes = dtypes[dtypes.index.isin(df.columns)]

            df = df.astype(dtypes)

        objectives = self._objectives
        objectives = objectives[objectives.index.isin(df.columns)].to_numpy()

        weights = self._weights
        weights = weights[weights.index.isin(df.columns)].to_numpy()

        return DecisionMatrix(df, objectives, weights)


# =============================================================================
# DECISION MATRIX
# =============================================================================


[docs] class DecisionMatrix(DiffEqualityMixin): """Representation of all data needed in the MCDA analysis. This object gathers everything necessary to represent a data set used in MCDA: - An alternative matrix where each row is an alternative and each column is of a different criteria. - An optimization objective (Minimize, Maximize) for each criterion. - A weight for each criterion. - An independent type of data for each criterion DecisionMatrix has two main forms of construction: 1. Use the default constructor of the DecisionMatrix class :py:class:`pandas.DataFrame` where the index is the alternatives and the columns are the criteria; an iterable with the objectives with the same amount of elements that columns/criteria has the dataframe; and an iterable with the weights also with the same amount of elements as criteria. .. code-block:: pycon >>> import pandas as pd >>> from skcriteria import DecisionMatrix, mkdm >>> data_df = pd.DataFrame( ... [[1, 2, 3], [4, 5, 6]], ... index=["A0", "A1"], ... columns=["C0", "C1", "C2"] ... ) >>> objectives = [min, max, min] >>> weights = [1, 1, 1] >>> dm = DecisionMatrix(data_df, objectives, weights) >>> dm C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 [2 Alternatives x 3 Criteria] 2. Use the classmethod `DecisionMatrix.from_mcda_data` which requests the data in a more natural way for this type of analysis (the weights, the criteria / alternative names, and the data types are optional) >>> DecisionMatrix.from_mcda_data( ... [[1, 2, 3], [4, 5, 6]], ... [min, max, min], ... [1, 1, 1]) C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 [2 Alternatives x 3 Criteria] For simplicity a function is offered at the module level analogous to ``from_mcda_data`` called ``mkdm`` (make decision matrix). Parameters ---------- data_df: :py:class:`pandas.DatFrame` Dataframe where the index is the alternatives and the columns are the criteria. objectives: :py:class:`numpy.ndarray` Aan iterable with the targets with sense of optimality of every criteria (You can use any alias defined in Objective) the same length as columns/criteria has the data_df. weights: :py:class:`numpy.ndarray` An iterable with the weights also with the same amount of elements as criteria. """ def __init__(self, data_df, objectives, weights): self._data_df = ( data_df.copy(deep=True) if isinstance(data_df, pd.DataFrame) else pd.DataFrame(data_df, copy=True) ) self._objectives = np.array(objectives, dtype=object, copy=True) self._weights = np.array(weights, dtype=float, copy=True) if not ( len(self._data_df.columns) == len(self._weights) == len(self._objectives) ): raise ValueError( "The number of weights, and objectives must be equal to the " "number of criteria (number of columns in data_df)" ) # CUSTOM CONSTRUCTORS =====================================================
[docs] @classmethod def from_mcda_data( cls, matrix, objectives, *, weights=None, alternatives=None, criteria=None, dtypes=None, ): """Create a new DecisionMatrix object. This method receives the parts of the matrix, in what conceptually the matrix of alternatives is usually divided Parameters ---------- matrix: Iterable The matrix of alternatives. Where every row is an alternative and every column is a criteria. objectives: Iterable The array with the sense of optimality of every criteria. You can use any alias provided by the objective class. weights: Iterable o None (default ``None``) Optional weights of the criteria. If is ``None`` all the criteria are weighted with 1. alternatives: Iterable o None (default ``None``) Optional names of the alternatives. If is ``None``, al the alternatives are names "A[n]" where n is the number of the row of `matrix` statring at 0. criteria: Iterable o None (default ``None``) Optional names of the criteria. If is ``None``, al the alternatives are names "C[m]" where m is the number of the columns of `matrix` statring at 0. dtypes: Iterable o None (default ``None``) Optional types of the criteria. If is None, the type is inferred automatically by pandas. Returns ------- :py:class:`DecisionMatrix` A new decision matrix. Example ------- >>> DecisionMatrix.from_mcda_data( ... [[1, 2, 3], [4, 5, 6]], ... [min, max, min], ... [1, 1, 1]) C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 [2 Alternatives x 3 Criteria] For simplicity a function is offered at the module level analogous to ``from_mcda_data`` called ``mkdm`` (make decision matrix). Notes ----- This functionality generates more sensitive defaults than using the constructor of the DecisionMatrix class but is slower. """ # first we need the number of alternatives and criteria try: a_number, c_number = np.shape(matrix) except ValueError: matrix_ndim = np.ndim(matrix) raise ValueError( f"'matrix' must have 2 dimensions, found {matrix_ndim} instead" ) alternatives = np.asarray( [f"A{idx}" for idx in range(a_number)] if alternatives is None else alternatives ) if len(alternatives) != a_number: raise ValueError(f"'alternatives' must have {a_number} elements") criteria = np.asarray( [f"C{idx}" for idx in range(c_number)] if criteria is None else criteria ) if len(criteria) != c_number: raise ValueError(f"'criteria' must have {c_number} elements") weights = np.asarray(np.ones(c_number) if weights is None else weights) data_df = pd.DataFrame(matrix, index=alternatives, columns=criteria) if dtypes is not None and len(dtypes) != c_number: raise ValueError(f"'dtypes' must have {c_number} elements") elif dtypes is not None: dtypes = {c: dt for c, dt in zip(criteria, dtypes)} data_df = data_df.astype(dtypes) return cls(data_df=data_df, objectives=objectives, weights=weights)
# MCDA ==================================================================== # This properties are useful to access interactively to the # underlying data a. Except for alternatives and criteria all other # properties expose the data as dataframes or series @property def alternatives(self): """Names of the alternatives. From this array you can also access the values of the alternatives as ``pandas.Series``. """ arr = self._data_df.index.to_numpy(copy=True) slicer = self._data_df.loc.__getitem__ return _ACArray(arr, slicer) @property def criteria(self): """Names of the criteria. From this array you can also access the values of the criteria as ``pandas.Series``. """ arr = self._data_df.columns.to_numpy(copy=True) slicer = self._data_df.__getitem__ return _ACArray(arr, slicer) @property def weights(self): """Weights of the criteria.""" return pd.Series( self._weights, dtype=float, index=self._data_df.columns.copy(deep=True), name="Weights", copy=True, ) @property def objectives(self): """Objectives of the criteria as ``Objective`` instances.""" return pd.Series( [Objective.from_alias(a) for a in self._objectives], index=self._data_df.columns, name="Objectives", copy=True, ) @property def minwhere(self): """Mask with value True if the criterion is to be minimized.""" mask = self.objectives == Objective.MIN mask.name = "minwhere" return mask @property def maxwhere(self): """Mask with value True if the criterion is to be maximized.""" mask = self.objectives == Objective.MAX mask.name = "maxwhere" return mask # READ ONLY PROPERTIES ==================================================== @property def iobjectives(self): """Objectives of the criteria as ``int``. - Minimize = Objective.MIN.value - Maximize = Objective.MAX.value """ return pd.Series( [o.value for o in self.objectives], dtype=np.int8, index=self._data_df.columns.copy(deep=True), copy=True, ) @property def matrix(self): """Alternatives matrix as pandas DataFrame. The matrix excludes weights and objectives. If you want to create a DataFrame with objectives and weights, use ``DecisionMatrix.to_dataframe()`` """ mtx = self._data_df.copy(deep=True) mtx.index = self._data_df.index.copy(deep=True) mtx.index.name = "Alternatives" mtx.columns = self._data_df.columns.copy(deep=True) mtx.columns.name = "Criteria" return mtx @property def dtypes(self): """Dtypes of the criteria.""" series = self._data_df.dtypes.copy(deep=True) series.index = self._data_df.dtypes.index.copy(deep=True) return series # ACCESSORS (YES, WE USE CACHED PROPERTIES IS THE EASIEST WAY) ============ @methodtools.lru_cache(maxsize=None) @property def plot(self): """Plot accessor.""" return DecisionMatrixPlotter(self) @methodtools.lru_cache(maxsize=None) @property def stats(self): """Descriptive statistics accessor.""" return DecisionMatrixStatsAccessor(self) @methodtools.lru_cache(maxsize=None) @property def dominance(self): """Dominance information accessor.""" return DecisionMatrixDominanceAccessor(self) # UTILITIES ===============================================================
[docs] def copy(self, **kwargs): """Return a deep copy of the current DecisionMatrix. This method is also useful for manually modifying the values of the DecisionMatrix object. Parameters ---------- kwargs : The same parameters supported by ``from_mcda_data()``. The values provided replace the existing ones in the object to be copied. Returns ------- :py:class:`DecisionMatrix` A new decision matrix. """ dmdict = self.to_dict() dmdict.update(kwargs) return self.from_mcda_data(**dmdict)
[docs] def to_dataframe(self): """Convert the entire DecisionMatrix into a dataframe. The objectives and weights ara added as rows before the alternatives. Returns ------- :py:class:`pd.DataFrame` A Decision matrix as pandas DataFrame. Example ------- .. code-block:: pycon >>> dm = DecisionMatrix.from_mcda_data( >>> dm ... [[1, 2, 3], [4, 5, 6]], ... [min, max, min], ... [1, 1, 1]) C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 >>> dm.to_dataframe() C0 C1 C2 objectives MIN MAX MIN weights 1.0 1.0 1.0 A0 1 2 3 A1 4 5 6 """ data = np.vstack((self.objectives, self.weights, self.matrix)) index = np.hstack((["objectives", "weights"], self.alternatives)) df = pd.DataFrame(data, index=index, columns=self.criteria, copy=True) return df
[docs] def to_dict(self): """Return a dict representation of the data. All the values are represented as numpy array. """ return { "matrix": self.matrix.to_numpy(copy=True), "objectives": self.iobjectives.to_numpy(copy=True), "weights": self.weights.to_numpy(copy=True), "dtypes": self.dtypes.to_numpy(copy=True), "alternatives": np.array(self.alternatives, copy=True), "criteria": np.array(self.criteria, copy=True), }
[docs] @deprecated( reason=( "Use ``DecisionMatrix.stats()``, " "``DecisionMatrix.stats('describe)`` or " "``DecisionMatrix.stats.describe()`` instead." ), version="0.6", ) def describe(self, **kwargs): """Generate descriptive statistics. Descriptive statistics include those that summarize the central tendency, dispersion and shape of a dataset's distribution, excluding ``NaN`` values. Parameters ---------- Same parameters as ``pandas.DataFrame.describe()``. Returns ------- ``pandas.DataFrame`` Summary statistics of DecisionMatrix provided. """ return self._data_df.describe(**kwargs)
# CMP ===================================================================== @property def shape(self): """Return a tuple with (number_of_alternatives, number_of_criteria). dm.shape <==> np.shape(dm) """ return np.shape(self._data_df) def __len__(self): """Return the number ot alternatives. dm.__len__() <==> len(dm). """ return len(self._data_df)
[docs] @doc_inherit(DiffEqualityMixin.diff) def diff( self, other, rtol=1e-05, atol=1e-08, equal_nan=True, check_dtypes=False ): # all the validations only works if we have the same shape same_shape = ( (np.shape(self) == np.shape(other)) if isinstance(other, DecisionMatrix) else False ) # Check if have the same shape and if all elements are equal. def same_shape_array_equal(left_value, right_value): return same_shape and np.array_equal( left_value, right_value, equal_nan=False ) # Check if have the same shape and if all elements are close. def same_shape_array_allclose(left_value, right_value): return same_shape and np.allclose( left_value, right_value, rtol=rtol, atol=atol, equal_nan=equal_nan, ) members = { "shape": np.array_equal, # the shape must be equal "criteria": same_shape_array_equal, "alternatives": same_shape_array_equal, "objectives": same_shape_array_equal, "weights": same_shape_array_allclose, "matrix": same_shape_array_allclose, } if check_dtypes: members["dtypes"] = same_shape_array_equal the_diff = diff(self, other, **members) return the_diff
# SLICES ================================================================== def __getitem__(self, slc): """dm[slc] <==> dm.__getitem__(slc).""" df = self._data_df.__getitem__(slc) if isinstance(df, pd.Series): df = df.to_frame() dtypes = self._data_df.dtypes dtypes = dtypes[dtypes.index.isin(df.columns)] df = df.astype(dtypes) objectives = self.objectives objectives = objectives[objectives.index.isin(df.columns)].to_numpy( copy=True ) weights = self.weights weights = weights[weights.index.isin(df.columns)].to_numpy(copy=True) return DecisionMatrix(df, objectives, weights) @property def loc(self): """Access a group of alternatives and criteria by label(s) or a \ boolean array. ``.loc[]`` is primarily alternative label based, but may also be used with a boolean array. Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an instance of ``DecisionMatrix``. """ return _Loc("loc", self._data_df.loc, self.objectives, self.weights) @property def iloc(self): """Purely integer-location based indexing for selection by position. ``.iloc[]`` is primarily integer position based (from ``0`` to ``length-1`` of the axis), but may also be used with a boolean array. Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an instance of ``DecisionMatrix``. """ return _Loc("iloc", self._data_df.iloc, self.objectives, self.weights) # REPR ==================================================================== def _get_cow_headers( self, only=None, fmt="{criteria}[{objective}{weight}]" ): """Columns names with COW (Criteria, Objective, Weight).""" criteria = self._data_df.columns.to_series() objectives = self.objectives weights = self.weights if only: mask = self._data_df.columns.isin(only) criteria = criteria[mask][only] objectives = objectives[mask][only] weights = weights[mask][only] weights = pd_fmt.format_array(weights, None) headers = [] for crit, obj, weight in zip(criteria, objectives, weights): header = fmt.format( criteria=crit, objective=obj.to_symbol(), weight=weight ) headers.append(header) return np.array(headers) def _get_axc_dimensions(self): """Dimension footnote with AxC (Alternatives x Criteria).""" a_number, c_number = self.shape dimensions = f"{a_number} Alternatives x {c_number} Criteria" return dimensions def __repr__(self): """dm.__repr__() <==> repr(dm).""" header = self._get_cow_headers() dimensions = self._get_axc_dimensions() with df_temporal_header(self._data_df, header) as df: with pd.option_context("display.show_dimensions", False): original_string = repr(df) # add dimension string = f"{original_string}\n[{dimensions}]" return string def _repr_html_(self): """Return a html representation for a particular DecisionMatrix. Mainly for IPython notebook. """ header = self._get_cow_headers() dimensions = self._get_axc_dimensions() # retrieve the original string with df_temporal_header(self._data_df, header) as df: with pd.option_context("display.show_dimensions", False): original_html = df._repr_html_() # add dimension html = ( "<div class='decisionmatrix'>\n" f"{original_html}" f"<em class='decisionmatrix-dim'>{dimensions}</em>\n" "</div>" ) return html
# ============================================================================= # factory # =============================================================================
[docs] @functools.wraps(DecisionMatrix.from_mcda_data) def mkdm(*args, **kwargs): """Alias for DecisionMatrix.from_mcda_data.""" return DecisionMatrix.from_mcda_data(*args, **kwargs)