Source code for skcriteria.preprocessing.weighters

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
# Copyright (c) 2022-2025 QuatroPe
# All rights reserved.

# =============================================================================
# DOCS
# =============================================================================

"""Functionalities for weight the criteria.

In addition to the main functionality, an MCDA agnostic function is offered
to calculate weights to a matrix along an arbitrary axis.


"""

# =============================================================================
# IMPORTS
# =============================================================================

from ..utils import hidden

with hidden():
    import abc
    import warnings

    import numpy as np

    import pandas as pd

    import scipy.stats

    from ._preprocessing_base import SKCTransformerABC
    from .scalers import matrix_scale_by_cenit_distance
    from ..core import Objective
    from ..utils import deprecated, doc_inherit

# =============================================================================
# BASE CLASS
# =============================================================================


[docs] class SKCWeighterABC(SKCTransformerABC): """Abstract class capable of determine the weights of the matrix. This abstract class require to redefine ``_weight_matrix``, instead of ``_transform_data``. """ _skcriteria_abstract_class = True @abc.abstractmethod def _weight_matrix(self, matrix, objectives, weights): """Calculate a new array of weights. Parameters ---------- matrix: :py:class:`numpy.ndarray` The decision matrix to weights. objectives: :py:class:`numpy.ndarray` The objectives in numeric format. weights: :py:class:`numpy.ndarray` The original weights Returns ------- :py:class:`numpy.ndarray` An array of weights. """ raise NotImplementedError() @doc_inherit(SKCTransformerABC._transform_data) def _transform_data(self, matrix, objectives, weights, **kwargs): new_weights = self._weight_matrix( matrix=matrix, objectives=objectives, weights=weights ) kwargs.update( matrix=matrix, objectives=objectives, weights=new_weights ) return kwargs
# ============================================================================= # SAME WEIGHT # =============================================================================
[docs] def equal_weights(matrix, base_value=1): r"""Use the same weights for all criteria. The result values are normalized by the number of columns. .. math:: w_j = \frac{base\_value}{m} Where $m$ is the number os columns/criteria in matrix. Parameters ---------- matrix: :py:class:`numpy.ndarray` like. The matrix of alternatives on which to calculate weights. base_value: int or float. Value to be normalized by the number of criteria to create the weights. Returns ------- :py:class:`numpy.ndarray` array of weights Examples -------- .. code-block:: pycon >>> from skcriteria.preprocess import equal_weights >>> mtx = [[1, 2], [3, 4]] >>> equal_weights(mtx) array([0.5, 0.5]) """ ncriteria = np.shape(matrix)[1] weights = base_value / ncriteria return np.full(ncriteria, weights, dtype=float)
[docs] class EqualWeighter(SKCWeighterABC): """Assigns the same weights to all criteria. The algorithm calculates the weights as the ratio of ``base_value`` by the total criteria. """ _skcriteria_parameters = ["base_value"] def __init__(self, base_value=1.0): self._base_value = float(base_value) @property def base_value(self): """Value to be normalized by the number of criteria.""" return self._base_value @doc_inherit(SKCWeighterABC._weight_matrix) def _weight_matrix(self, matrix, **kwargs): return equal_weights(matrix, self.base_value)
# ============================================================================= # # =============================================================================
[docs] def std_weights(matrix): r"""Calculate weights as the standard deviation of each criterion. The result is normalized by the number of columns. .. math:: w_j = \frac{s_j}{m} Where $m$ is the number os columns/criteria in matrix. Parameters ---------- matrix: :py:class:`numpy.ndarray` like. The matrix of alternatives on which to calculate weights. Returns ------- :py:class:`numpy.ndarray` array of weights Examples -------- .. code-block:: pycon >>> from skcriteria.preprocess import std_weights >>> mtx = [[1, 2], [3, 4]] >>> std_weights(mtx) array([0.5, 0.5]) """ std = np.std(matrix, axis=0, ddof=1) return std / np.sum(std)
[docs] class StdWeighter(SKCWeighterABC): """Set as weight the normalized standard deviation of each criterion.""" _skcriteria_parameters = [] @doc_inherit(SKCWeighterABC._weight_matrix) def _weight_matrix(self, matrix, **kwargs): return std_weights(matrix)
# ============================================================================= # # =============================================================================
[docs] def entropy_weights(matrix): """Calculate the weights as the complement of the entropy of each \ criterion. It uses the underlying ``scipy.stats.entropy`` function which assumes that the values of the criteria are probabilities of a distribution. The logarithmic base to use is the number of rows/alternatives in the matrix. This routine will normalize the sum of the weights to 1. See Also -------- scipy.stats.entropy : Calculate the entropy of a distribution for given probability values. """ base = len(matrix) entropy = scipy.stats.entropy(matrix, base=base, axis=0) entropy_divergence = 1 - entropy return entropy_divergence / np.sum(entropy_divergence)
[docs] class EntropyWeighter(SKCWeighterABC): """Assigns the complement of the entropy of the criteria as weights. It uses the underlying ``scipy.stats.entropy`` function which assumes that the values of the criteria are probabilities of a distribution. The logarithmic base to use is the number of rows/alternatives in the matrix. This transformer will normalize the sum of the weights to 1. See Also -------- scipy.stats.entropy : Calculate the entropy of a distribution for given probability values. """ _skcriteria_parameters = [] @doc_inherit(SKCWeighterABC._weight_matrix) def _weight_matrix(self, matrix, **kwargs): return entropy_weights(matrix)
# ============================================================================= # # =============================================================================
[docs] @deprecated( reason="Please use ``pd.DataFrame(arr.T).correlation('pearson')``", version="0.8", ) def pearson_correlation(arr): """Return Pearson product-moment correlation coefficients. This function is a thin wrapper of ``numpy.corrcoef``. Parameters ---------- arr: array like A 1-D or 2-D array containing multiple variables and observations. Each row of arr represents a variable, and each column a single observation of all those variables. Returns ------- R: numpy.ndarray The correlation coefficient matrix of the variables. See Also -------- numpy.corrcoef : Return Pearson product-moment correlation coefficients. """ return np.corrcoef(arr)
[docs] @deprecated( reason="Please use ``pd.DataFrame(arr.T).correlation('spearman')``", version="0.8", ) def spearman_correlation(arr): """Calculate a Spearman correlation coefficient. This function is a thin wrapper of ``scipy.stats.spearmanr``. Parameters ---------- arr: array like A 1-D or 2-D array containing multiple variables and observations. Each row of arr represents a variable, and each column a single observation of all those variables. Returns ------- R: numpy.ndarray The correlation coefficient matrix of the variables. See Also -------- scipy.stats.spearmanr : Calculate a Spearman correlation coefficient with associated p-value. """ return scipy.stats.spearmanr(arr.T, axis=0).correlation
[docs] def critic_weights(matrix, objectives, correlation="pearson", scale=True): """Execute the CRITIC method without any validation.""" # The paper: # Diakoulaki, D., Mavrotas, G., & Papayannakis, L. (1995). # Determining objective weights in multiple criteria problems: # The critic method. Computers & Operations Research, 22(7), 763-770. # and equation 1 of the paper matrix = np.asarray(matrix, dtype=float) # equation 2 an 3 of the paper matrix = ( matrix_scale_by_cenit_distance(matrix, objectives=objectives) if scale else matrix ) # equation 4 corr = pd.DataFrame(matrix).corr(method=correlation).to_numpy(copy=True) one_minus_corr = 1 - corr # equation 5 dindex = np.std(matrix, axis=0) uweights = dindex * np.sum(one_minus_corr, axis=0) # equation 6 weights = uweights / np.sum(uweights) return weights
[docs] class CRITIC(SKCWeighterABC): """CRITIC (CRiteria Importance Through Intercriteria Correlation). The method aims at the determination of objective weights of relative importance in MCDM problems. The weights derived incorporate both contrast intensity and conflict which are contained in the structure of the decision problem. Parameters ---------- correlation: str ["pearson", "spearman", "kendall"] or callable. This is the correlation function used to evaluate the discordance between two criteria. In other words, what conflict does one criterion a criterion with respect to the decision made by the other criteria. By default the ``pearson`` correlation is used, and the ``spearman`` and ``kendall`` correlation is also available implemented. It is also possible to provide a callable with input two 1d arrays and returning a float. Note that the returned matrix from corr will have 1 along the diagonals and will be symmetric regardless of the callable's behavior scale: bool (default ``True``) True if it is necessary to scale the data with ``skcriteria.preprocessing.matrix_scale_by_cenit_distance`` prior to calculating the correlation Warnings -------- UserWarning: If some objective is to minimize. The original paper only suggests using it against maximization criteria, but there is no real mathematical constraint to use it for minimization. References ---------- :cite:p:`diakoulaki1995determining` """ CORRELATION = ("pearson", "spearman", "kendall") _skcriteria_parameters = ["correlation", "scale"] def __init__(self, correlation="pearson", scale=True): if not (correlation in self.CORRELATION or callable(correlation)): corr_keys = ", ".join(f"'{c}'" for c in self.CORRELATION) raise ValueError(f"Correlation must be {corr_keys} or a callable") self._correlation = correlation self._scale = bool(scale) @property def scale(self): """Return if it is necessary to scale the data.""" return self._scale @property def correlation(self): """Correlation function.""" return self._correlation @doc_inherit(SKCWeighterABC._weight_matrix) def _weight_matrix(self, matrix, objectives, **kwargs): if Objective.MIN.value in objectives: warnings.warn( "Although CRITIC can operate with minimization objectives, " "this is not recommended. Consider reversing the weights " "for these cases." ) return critic_weights( matrix, objectives, correlation=self.correlation, scale=self.scale )
[docs] @deprecated( reason="Use ``skcriteria.preprocessing.weighters.CRITIC`` instead", version="0.8", ) @doc_inherit(CRITIC, warn_class=False) class Critic(CRITIC): pass
# ============================================================================= # MEREC # ============================================================================= def _merec_norm(matrix, objectives): """ Simple linear normalization of the decision matrix using MEREC logic. For benefit criteria, divide by the column maximum. For cost criteria, divide the column minimum by each value. """ where_max = np.equal(objectives, Objective.MAX.value) maxs = matrix.max(axis=0) mins = matrix.min(axis=0) normalized_matrix = np.where(where_max, mins / matrix, matrix / maxs) return normalized_matrix
[docs] def merec_weights(matrix, objectives): """Execute the MEREC method without any validation.""" matrix = np.asarray(matrix, dtype=float) n_criteria = matrix.shape[1] # Apply MEREC normalization based on each criterion's objective. normalized_matrix = _merec_norm(matrix, objectives=objectives) # overall performance of each alternative using all criteria. performance = np.log( 1 + np.mean(np.abs(np.log(normalized_matrix)), axis=1, keepdims=True) ) # performance of each alternative after removing each criterion. log_matrix = np.abs(np.log(normalized_matrix)) exclusion_mask = np.ones((n_criteria, n_criteria)) - np.eye( n_criteria ) # mask to exclude one criterion at a time performance_reduce = np.log(1 + (log_matrix @ exclusion_mask) / n_criteria) # deviations between full and reduced performance. deviations = np.sum(np.abs(performance_reduce - performance), axis=0) # normalize the deviations to obtain criterion weights. weights = deviations / np.sum(deviations) return weights
[docs] class MEREC(SKCWeighterABC): """MEREC: Method based on the Removal Effects of Criteria. The MEREC method computes objective weights for each criterion based on its impact on the overall performance of alternatives when removed. The idea is that the more a criterion affects the total evaluation when excluded, the more important it is. This implementation includes a simple linear normalization. Reference --------- :cite:p:`keshavarz2021determination` """ _skcriteria_parameters = [] @doc_inherit(SKCWeighterABC._weight_matrix) def _weight_matrix(self, matrix, objectives, **kwargs): return merec_weights(matrix, objectives=objectives)
# ============================================================================= # GINI # =============================================================================
[docs] def gini_weights(matrix): r""" Calculates weights using the Gini coefficient. Computes the weights for each criterion (column) of the input matrix by calculating the Gini coefficient of each column, then normalizing those values to sum to 1. The columns are sorted to use the more efficient formula for the Gini coefficient: .. math:: G = \frac{1}{n} \left( n + 1 - 2 \cdot \frac{ \sum_{i=1}^n \left( \sum_{j=1}^i x_j \right) }{ \sum_{i=1}^n x_i } \right) """ n = matrix.shape[0] sorted_columns = np.sort(matrix, axis=0) column_sums = np.sum(sorted_columns, axis=0) # sum_of_cumulatives is the nested sum described in the formula above: # sum from i = 1 to n of (sum from j = 1 to i of x_j) cumulative_sums = np.cumsum(sorted_columns, axis=0) sum_of_cumulatives = np.sum(cumulative_sums, axis=0) gini = (n + 1 - 2 * sum_of_cumulatives / column_sums) / n # weights are the normalized ginis of each column return gini / np.sum(gini)
[docs] class GiniWeighter(SKCWeighterABC): """ Calculates the weights with the Gini coefficient. The method aims at the determination of objective weights of relative importance in MCDM problems. It uses the Gini coefficient of the data of each criterion to assign the weights, giving a higher weight to a more unequal distribution. It takes the decision matrix as a parameter. References ---------- :cite:p:`li2009new` """ _skcriteria_parameters = [] @doc_inherit(SKCWeighterABC._weight_matrix) def _weight_matrix(self, matrix, **kwargs): return gini_weights(matrix)
# ============================================================================= # RANCOM # =============================================================================
[docs] def rancom_weights(weights): """RANCOM (RANking COMparison) weighting method. The RANCOM method is designed to handle expert inaccuracies in multi-criteria decision making by transforming initial weight values through ranking comparison. The method builds a Matrix of Ranking Comparison (MAC) where all weights are compared pairwise, then calculates Summed Criteria Weights (SWC) to derive final normalized weights. The method operates under the following assumptions: - The sum of input weights equals 1 - Lower weight values correspond to higher importance - Ties between criteria are allowed Algorithm Steps: 1. Convert weights to rankings (lower weight = higher rank/importance) 2. Build MAC (Matrix of Ranking Comparison): An nxn matrix where rankings are compared pairwise with values: - aij = 1 if rank_i < rank_j (criteria i is more important than j) - aij = 0.5 if rank_i = rank_j (criteria i and j have equal importance) - aij = 0 if rank_i > rank_j (criteria i is less important than j) 3. Calculate SWC (Summed Criteria Weights): Sum each row of the MAC matrix 4. Normalize final weights: wi = SWCi / sum(SWC) Parameters ---------- weights: array-like Input weights. Lower values correspond to higher importance. Notes ----- - RANCOM is particularly useful when dealing with subjective weight assignments from experts where small inaccuracies in weight specification can significantly impact results. - The method provides a systematic way to handle ranking inconsistencies. - Unlike other weighting methods, RANCOM transforms existing weights rather than deriving weights from the decision matrix. Examples -------- .. code-block:: pycon >>> from skcriteria.preprocessing import rancom_weights >>> weights = [0.4, 0.2, 0.25, 0.05] >>> rancom_weights(weights) array([0.4375, 0.1875, 0.3125, 0.0625]) """ # Normalize weights if necessary weights_sum = np.sum(weights) if weights_sum != 1: weights /= weights_sum # Convert weights to rankings (lower weight = higher rank/importance) # Reverse weights so that lower weight values get higher ranks reversed_weights = -weights rankings = scipy.stats.rankdata(reversed_weights, method="dense") # Build MAC matrix based on rankings rank_i = rankings.reshape(-1, 1) rank_j = rankings.reshape(1, -1) rancom_matrix = np.where( rank_i < rank_j, 1, np.where(rank_i == rank_j, 0.5, 0) ) summed_criteria_weights = np.sum(rancom_matrix, axis=1) total_swc = np.sum(summed_criteria_weights) result = summed_criteria_weights / total_swc return result
[docs] class RANCOM(SKCWeighterABC): """ Ranking Comparison (RANCOM) method. The RANCOM method is designed to handle expert inaccuracies in multi-criteria decision making by transforming initial weight values through ranking comparison. The method builds a Matrix of Ranking Comparison (MAC) where all weights are compared pairwise, then calculates Summed Criteria Weights (SWC) to derive final normalized weights. RANCOM uses predefined weights provided through the weighting process and does not require additional configuration parameters. Warnings -------- UserWarning If there are fewer than five weights. The original paper suggests that RANCOM works better with five or more criteria, though nothing prevents its use with four or fewer criteria. References ---------- :cite:p:`WIECKOWSKI2023106114` """ _skcriteria_parameters = [] @doc_inherit(SKCWeighterABC._weight_matrix) def _weight_matrix(self, matrix, objectives, weights): if len(weights) < 5: warnings.warn( "RANCOM method proves to be a more suitable solution to " "handle the expert inaccuracies for the problems with 5 or " "more criteria. Despite this, nothing prevents its use with " "four or fewer." ) return rancom_weights(weights)