Source code for skcriteria.core.data

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
# Copyright (c) 2022, QuatroPe
# All rights reserved.

# =============================================================================
# DOCS
# =============================================================================

"""Data abstraction layer.

This module defines the DecisionMatrix object, which internally encompasses
the alternative matrix,   weights and objectives (MIN, MAX) of the criteria.

"""

# =============================================================================
# IMPORTS
# =============================================================================


import enum
import functools
from collections import abc

import numpy as np

import pandas as pd
from pandas.io.formats import format as pd_fmt

import pyquery as pq


from .dominance import DecisionMatrixDominanceAccessor
from .plot import DecisionMatrixPlotter
from .stats import DecisionMatrixStatsAccessor
from ..utils import deprecated, doc_inherit


# =============================================================================
# CONSTANTS
# =============================================================================
[docs]class Objective(enum.Enum): """Representation of criteria objectives (Minimize, Maximize).""" #: Internal representation of minimize criteria MIN = -1 #: Internal representation of maximize criteria MAX = 1 # INTERNALS =============================================================== _MIN_STR = "\u25bc" _MAX_STR = "\u25b2" #: Another way to name the maximization criteria. _MAX_ALIASES = frozenset( [ MAX, _MAX_STR, max, np.max, np.nanmax, np.amax, "max", "maximize", "+", ">", ] ) #: Another ways to name the minimization criteria. _MIN_ALIASES = frozenset( [ MIN, _MIN_STR, min, np.min, np.nanmin, np.amin, "min", "minimize", "<", "-", ] ) # CUSTOM CONSTRUCTOR ======================================================
[docs] @classmethod def construct_from_alias(cls, alias): """Return the alias internal representation of the objective.""" if isinstance(alias, cls): return alias if isinstance(alias, str): alias = alias.lower() if alias in cls._MAX_ALIASES.value: return cls.MAX if alias in cls._MIN_ALIASES.value: return cls.MIN raise ValueError(f"Invalid criteria objective {alias}")
# METHODS ================================================================= def __str__(self): """Convert the objective to an string.""" return self.name
[docs] def to_string(self): """Return the printable representation of the objective.""" if self.value in Objective._MIN_ALIASES.value: return Objective._MIN_STR.value if self.value in Objective._MAX_ALIASES.value: return Objective._MAX_STR.value
# ============================================================================= # _SLICER ARRAY # ============================================================================= class _ACArray(np.ndarray, abc.Mapping): """Immutable Array to provide access to the alternative and criteria \ values. The behavior is the same as a numpy.ndarray but if the slice it receives is a value contained in the array it uses an external function to access the series with that criteria/alternative. Besides this it has the typical methods of a dictionary. """ def __new__(cls, input_array, skc_slicer): obj = np.asarray(input_array).view(cls) obj._skc_slicer = skc_slicer return obj @doc_inherit(np.ndarray.__getitem__) def __getitem__(self, k): try: if k in self: return self._skc_slicer(k) return super().__getitem__(k) except IndexError: raise IndexError(k) def __setitem__(self, k, v): """Raise an AttributeError, this object are read-only.""" raise AttributeError("_SlicerArray are read-only") @doc_inherit(abc.Mapping.items) def items(self): return ((e, self[e]) for e in self) @doc_inherit(abc.Mapping.keys) def keys(self): return iter(self) @doc_inherit(abc.Mapping.values) def values(self): return (self[e] for e in self) # ============================================================================= # DECISION MATRIX # =============================================================================
[docs]class DecisionMatrix: """Representation of all data needed in the MCDA analysis. This object gathers everything necessary to represent a data set used in MCDA: - An alternative matrix where each row is an alternative and each column is of a different criteria. - An optimization objective (Minimize, Maximize) for each criterion. - A weight for each criterion. - An independent type of data for each criterion DecisionMatrix has two main forms of construction: 1. Use the default constructor of the DecisionMatrix class :py:class:`pandas.DataFrame` where the index is the alternatives and the columns are the criteria; an iterable with the objectives with the same amount of elements that columns/criteria has the dataframe; and an iterable with the weights also with the same amount of elements as criteria. .. code-block:: pycon >>> import pandas as pd >>> from skcriteria import DecisionMatrix, mkdm >>> data_df = pd.DataFrame( ... [[1, 2, 3], [4, 5, 6]], ... index=["A0", "A1"], ... columns=["C0", "C1", "C2"] ... ) >>> objectives = [min, max, min] >>> weights = [1, 1, 1] >>> dm = DecisionMatrix(data_df, objectives, weights) >>> dm C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 [2 Alternatives x 3 Criteria] 2. Use the classmethod `DecisionMatrix.from_mcda_data` which requests the data in a more natural way for this type of analysis (the weights, the criteria / alternative names, and the data types are optional) >>> DecisionMatrix.from_mcda_data( ... [[1, 2, 3], [4, 5, 6]], ... [min, max, min], ... [1, 1, 1]) C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 [2 Alternatives x 3 Criteria] For simplicity a function is offered at the module level analogous to ``from_mcda_data`` called ``mkdm`` (make decision matrix). Parameters ---------- data_df: :py:class:`pandas.DatFrame` Dataframe where the index is the alternatives and the columns are the criteria. objectives: :py:class:`numpy.ndarray` Aan iterable with the targets with sense of optimality of every criteria (You can use any alias defined in Objective) the same length as columns/criteria has the data_df. weights: :py:class:`numpy.ndarray` An iterable with the weights also with the same amount of elements as criteria. """ def __init__(self, data_df, objectives, weights): self._data_df = ( data_df.copy() if isinstance(data_df, pd.DataFrame) else pd.DataFrame(data_df) ) self._objectives = np.asarray(objectives, dtype=object) self._weights = np.asanyarray(weights, dtype=float) if not ( len(self._data_df.columns) == len(self._weights) == len(self._objectives) ): raise ValueError( "The number of weights, and objectives must be equal to the " "number of criteria (number of columns in data_df)" ) # CUSTOM CONSTRUCTORS =====================================================
[docs] @classmethod def from_mcda_data( cls, matrix, objectives, weights=None, alternatives=None, criteria=None, dtypes=None, ): """Create a new DecisionMatrix object. This method receives the parts of the matrix, in what conceptually the matrix of alternatives is usually divided Parameters ---------- matrix: Iterable The matrix of alternatives. Where every row is an alternative and every column is a criteria. objectives: Iterable The array with the sense of optimality of every criteria. You can use any alias provided by the objective class. weights: Iterable o None (default ``None``) Optional weights of the criteria. If is ``None`` all the criteria are weighted with 1. alternatives: Iterable o None (default ``None``) Optional names of the alternatives. If is ``None``, al the alternatives are names "A[n]" where n is the number of the row of `matrix` statring at 0. criteria: Iterable o None (default ``None``) Optional names of the criteria. If is ``None``, al the alternatives are names "C[m]" where m is the number of the columns of `matrix` statring at 0. dtypes: Iterable o None (default ``None``) Optional types of the criteria. If is None, the type is inferred automatically by pandas. Returns ------- :py:class:`DecisionMatrix` A new decision matrix. Example ------- >>> DecisionMatrix.from_mcda_data( ... [[1, 2, 3], [4, 5, 6]], ... [min, max, min], ... [1, 1, 1]) C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 [2 Alternatives x 3 Criteria] For simplicity a function is offered at the module level analogous to ``from_mcda_data`` called ``mkdm`` (make decision matrix). Notes ----- This functionality generates more sensitive defaults than using the constructor of the DecisionMatrix class but is slower. """ # first we need the number of alternatives and criteria try: a_number, c_number = np.shape(matrix) except ValueError: matrix_ndim = np.ndim(matrix) raise ValueError( f"'matrix' must have 2 dimensions, found {matrix_ndim} instead" ) alternatives = np.asarray( [f"A{idx}" for idx in range(a_number)] if alternatives is None else alternatives ) if len(alternatives) != a_number: raise ValueError(f"'alternatives' must have {a_number} elements") criteria = np.asarray( [f"C{idx}" for idx in range(c_number)] if criteria is None else criteria ) if len(criteria) != c_number: raise ValueError(f"'criteria' must have {c_number} elements") weights = np.asarray(np.ones(c_number) if weights is None else weights) data_df = pd.DataFrame(matrix, index=alternatives, columns=criteria) if dtypes is not None and len(dtypes) != c_number: raise ValueError(f"'dtypes' must have {c_number} elements") elif dtypes is not None: dtypes = {c: dt for c, dt in zip(criteria, dtypes)} data_df = data_df.astype(dtypes) return cls(data_df=data_df, objectives=objectives, weights=weights)
# MCDA ==================================================================== # This properties are usefull to access interactively to the # underlying data a. Except for alternatives and criteria all other # properties expose the data as dataframes or series @property def alternatives(self): """Names of the alternatives.""" arr = self._data_df.index.to_numpy() slicer = self._data_df.loc.__getitem__ return _ACArray(arr, slicer) @property def criteria(self): """Names of the criteria.""" arr = self._data_df.columns.to_numpy() slicer = self._data_df.__getitem__ return _ACArray(arr, slicer) @property def weights(self): """Weights of the criteria.""" return pd.Series( self._weights, dtype=float, index=self._data_df.columns, name="Weights", ) @property def objectives(self): """Objectives of the criteria as ``Objective`` instances.""" return pd.Series( [Objective.construct_from_alias(a) for a in self._objectives], index=self._data_df.columns, name="Objectives", ) @property def minwhere(self): """Mask with value True if the criterion is to be minimized.""" mask = self.objectives == Objective.MIN mask.name = "minwhere" return mask @property def maxwhere(self): """Mask with value True if the criterion is to be maximized.""" mask = self.objectives == Objective.MAX mask.name = "maxwhere" return mask # READ ONLY PROPERTIES ==================================================== @property def iobjectives(self): """Objectives of the criteria as ``int``. - Minimize = Objective.MIN.value - Maximize = Objective.MAX.value """ return pd.Series( [o.value for o in self.objectives], dtype=np.int8, index=self._data_df.columns, ) @property def matrix(self): """Alternatives matrix as pandas DataFrame. The matrix excludes weights and objectives. If you want to create a DataFrame with objetvies and weights, use ``DecisionMatrix.to_dataframe()`` """ return self._data_df.copy() @property def dtypes(self): """Dtypes of the criteria.""" return self._data_df.dtypes.copy() # ACCESSORS (YES, WE USE CACHED PROPERTIES IS THE EASIEST WAY) ============ @property @functools.lru_cache(maxsize=None) def plot(self): """Plot accessor.""" return DecisionMatrixPlotter(self) @property @functools.lru_cache(maxsize=None) def stats(self): """Descriptive statistics accessor.""" return DecisionMatrixStatsAccessor(self) @property @functools.lru_cache(maxsize=None) def dominance(self): """Dominance information accessor.""" return DecisionMatrixDominanceAccessor(self) # UTILITIES ===============================================================
[docs] def copy(self, **kwargs): """Return a deep copy of the current DecisionMatrix. This method is also useful for manually modifying the values of the DecisionMatrix object. Parameters ---------- kwargs : The same parameters supported by ``from_mcda_data()``. The values provided replace the existing ones in the object to be copied. Returns ------- :py:class:`DecisionMatrix` A new decision matrix. """ dmdict = self.to_dict() dmdict.update(kwargs) return self.from_mcda_data(**dmdict)
[docs] def to_dataframe(self): """Convert the entire DecisionMatrix into a dataframe. The objectives and weights ara added as rows before the alternatives. Returns ------- :py:class:`pd.DataFrame` A Decision matrix as pandas DataFrame. Example ------- .. code-block:: pycon >>> dm = DecisionMatrix.from_mcda_data( >>> dm ... [[1, 2, 3], [4, 5, 6]], ... [min, max, min], ... [1, 1, 1]) C0[▼ 1.0] C1[▲ 1.0] C2[▲ 1.0] A0 1 2 3 A1 4 5 6 >>> dm.to_dataframe() C0 C1 C2 objectives MIN MAX MIN weights 1.0 1.0 1.0 A0 1 2 3 A1 4 5 6 """ data = np.vstack((self.objectives, self.weights, self.matrix)) index = np.hstack((["objectives", "weights"], self.alternatives)) df = pd.DataFrame(data, index=index, columns=self.criteria, copy=True) return df
[docs] def to_dict(self): """Return a dict representation of the data. All the values are represented as numpy array. """ return { "matrix": self.matrix.to_numpy(), "objectives": self.iobjectives.to_numpy(), "weights": self.weights.to_numpy(), "dtypes": self.dtypes.to_numpy(), "alternatives": np.asarray(self.alternatives), "criteria": np.asarray(self.criteria), }
[docs] @deprecated( reason=( "Use 'DecisionMatrix.stats()', " "'DecisionMatrix.stats(\"describe\")' or " "'DecisionMatrix.stats.describe()' instead." ), version=0.6, ) def describe(self, **kwargs): """Generate descriptive statistics. Descriptive statistics include those that summarize the central tendency, dispersion and shape of a dataset's distribution, excluding ``NaN`` values. Parameters ---------- Same parameters as ``pandas.DataFrame.describe()``. Returns ------- ``pandas.DataFrame`` Summary statistics of DecisionMatrix provided. """ return self._data_df.describe(**kwargs)
# CMP ===================================================================== @property def shape(self): """Return a tuple with (number_of_alternatives, number_of_criteria). dm.shape <==> np.shape(dm) """ return np.shape(self._data_df) def __len__(self): """Return the number ot alternatives. dm.__len__() <==> len(dm). """ return len(self._data_df)
[docs] def equals(self, other): """Return True if the decision matrix are equal. This method calls `DecisionMatrix.aquals` whitout tolerance. Parameters ---------- other : :py:class:`skcriteria.DecisionMatrix` Other instance to compare. Returns ------- equals : :py:class:`bool:py:class:` Returns True if the two dm are equals. See Also -------- aequals, :py:func:`numpy.isclose`, :py:func:`numpy.all`, :py:func:`numpy.any`, :py:func:`numpy.equal`, :py:func:`numpy.allclose`. """ return self.aequals(other, 0, 0, False)
[docs] def aequals(self, other, rtol=1e-05, atol=1e-08, equal_nan=False): """Return True if the decision matrix are equal within a tolerance. The tolerance values are positive, typically very small numbers. The relative difference (`rtol` * abs(`b`)) and the absolute difference `atol` are added together to compare against the absolute difference between `a` and `b`. NaNs are treated as equal if they are in the same place and if ``equal_nan=True``. Infs are treated as equal if they are in the same place and of the same sign in both arrays. The proceeds as follows: - If ``other`` is the same object return ``True``. - If ``other`` is not instance of 'DecisionMatrix', has different shape 'criteria', 'alternatives' or 'objectives' returns ``False``. - Next check the 'weights' and the matrix itself using the provided tolerance. Parameters ---------- other : :py:class:`skcriteria.DecisionMatrix` Other instance to compare. rtol : float The relative tolerance parameter (see Notes in :py:func:`numpy.allclose`). atol : float The absolute tolerance parameter (see Notes in :py:func:`numpy.allclose`). equal_nan : bool Whether to compare NaN's as equal. If True, NaN's in dm will be considered equal to NaN's in `other` in the output array. Returns ------- aequals : :py:class:`bool:py:class:` Returns True if the two dm are equal within the given tolerance; False otherwise. See Also -------- equals, :py:func:`numpy.isclose`, :py:func:`numpy.all`, :py:func:`numpy.any`, :py:func:`numpy.equal`, :py:func:`numpy.allclose`. """ return (self is other) or ( isinstance(other, DecisionMatrix) and np.shape(self) == np.shape(other) and np.array_equal(self.criteria, other.criteria) and np.array_equal(self.alternatives, other.alternatives) and np.array_equal(self.objectives, other.objectives) and np.allclose( self.weights, other.weights, rtol=rtol, atol=atol, equal_nan=equal_nan, ) and np.allclose( self.matrix, other.matrix, rtol=rtol, atol=atol, equal_nan=equal_nan, ) )
# repr ==================================================================== def _get_cow_headers(self): """Columns names with COW (Criteria, Objective, Weight).""" headers = [] fmt_weights = pd_fmt.format_array(self.weights, None) for c, o, w in zip(self.criteria, self.objectives, fmt_weights): header = f"{c}[{o.to_string()}{w}]" headers.append(header) return headers def _get_axc_dimensions(self): """Dimension foote with AxC (Alternativs x Criteria).""" a_number, c_number = self.shape dimensions = f"{a_number} Alternatives x {c_number} Criteria" return dimensions def __repr__(self): """dm.__repr__() <==> repr(dm).""" header = self._get_cow_headers() dimensions = self._get_axc_dimensions() max_rows = pd.get_option("display.max_rows") min_rows = pd.get_option("display.min_rows") max_cols = pd.get_option("display.max_columns") max_colwidth = pd.get_option("display.max_colwidth") width = ( pd.io.formats.console.get_console_size()[0] if pd.get_option("display.expand_frame_repr") else None ) original_string = self._data_df.to_string( max_rows=max_rows, min_rows=min_rows, max_cols=max_cols, line_width=width, max_colwidth=max_colwidth, show_dimensions=False, header=header, ) # add dimension string = f"{original_string}\n[{dimensions}]" return string def _repr_html_(self): """Return a html representation for a particular DecisionMatrix. Mainly for IPython notebook. """ header = dict(zip(self.criteria, self._get_cow_headers())) dimensions = self._get_axc_dimensions() # retrieve the original string with pd.option_context("display.show_dimensions", False): original_html = self._data_df._repr_html_() # add dimension html = ( "<div class='decisionmatrix'>\n" f"{original_html}" f"<em class='decisionmatrix-dim'>{dimensions}</em>\n" "</div>" ) # now we need to change the table header d = pq.PyQuery(html) for th in d("div.decisionmatrix table.dataframe > thead > tr > th"): crit = th.text th.text = header.get(crit, crit) return str(d)
# ============================================================================= # factory # =============================================================================
[docs]@functools.wraps(DecisionMatrix.from_mcda_data) def mkdm(*args, **kwargs): """Alias for DecisionMatrix.from_mcda_data.""" return DecisionMatrix.from_mcda_data(*args, **kwargs)