Source code for fairdo.metrics.group

import numpy as np
import warnings
from itertools import product

from fairdo.utils.helper import generate_pairs


[docs]def statistical_parity_abs_diff_multi(y: np.array, z: np.array, agg_attribute=np.max, agg_group=np.max, positive_label=1, **kwargs) -> float: """ Calculate the absolute difference in statistical parity for multiple non-binary protected attributes. Protected attributes `z[i]` can be binary or non-binary. Parameters ---------- y: np.array Flattened binary array of shape (n_samples,), can be the prediction or the truth label. z: np.array Array of shape (n_samples, n_protected_attributes) representing the protected attribute. agg_attribute: callable, optional Aggregation function for the attribute. Default is np.sum. agg_group: callable, optional Aggregation function for the group. Default is np.sum. positive_label: int, optional Label considered as positive. Default is 1. Returns ------- float Aggregated attribute disparity. """ # check input if len(z.shape) < 2: z = z.reshape(-1, 1) # invert privileged and positive label if required if positive_label == 0: y = 1 - y y = y.astype(int) # z = z.astype(int) # get unique values for each attribute groups = [np.unique(z[:, i]) for i in range(z.shape[1])] # print(groups) # get statistical parity for each attribute attributes_disparity = [] for k, zk in enumerate(groups): # calculate statistical parities for all groups in one pass parities = {i: np.sum(y & (z[:, k] == i)) / np.sum(z[:, k] == i) for i in zk} # parities = {i: np.mean(y[z[:, k] == i]) for i in zk} # slower than the above method # generate all possible pairs of values for the attribute if agg_group == np.max: group_disparity = np.max(list(parities.values())) - np.min(list(parities.values())) attributes_disparity.append(group_disparity) else: pairs = generate_pairs(zk) group_disparity = [np.abs(parities[i] - parities[j]) for i, j in pairs] try: attributes_disparity.append(agg_group(group_disparity)) except ValueError: warnings.warn(f"Could not aggregate disparity for attribute {k} with aggregation function {agg_group}. " f"The disparity for this attribute is {group_disparity}. " f"Returning disparity of 0.") attributes_disparity.append(0) return agg_attribute(attributes_disparity)
[docs]def statistical_parity_abs_diff_intersectionality(y: np.array, z: np.array, agg_group=np.max, **kwargs) -> float: """ Calculate the absolute difference in statistical parity for multiple non-binary protected attributes. Intersections from all protected attributes are considered. Protected attributes `z[i]` can be binary or non-binary. Parameters ---------- y: np.array Flattened binary array of shape (n_samples,), can be the prediction or the truth label. z: np.array Array of shape (n_samples, n_protected_attributes) representing the protected attribute. agg_group: callable, optional Aggregation function for the group. Default is np.sum. **kwargs: dict Additional keyword arguments. """ z_subgroups = np.apply_along_axis(lambda x: ''.join(map(str, x)), axis=1, arr=z) all_subgroups = list(set(z_subgroups)) parities = {i: np.sum(y & (z_subgroups == i)) / np.sum(z_subgroups == i) for i in all_subgroups} if agg_group == np.max: group_disparity = np.max(list(parities.values())) - np.min(list(parities.values())) else: pairs = generate_pairs(list(all_subgroups)) group_disparity = [np.abs(parities[i] - parities[j]) for i, j in pairs] return agg_group(group_disparity)
[docs]def statistical_parity_abs_diff(y: np.array, z: np.array, agg_group=np.sum, **kwargs) -> float: """ Calculate the absolute value of the statistical parity difference between all groups inside a protected attribute. The protected attribute `z` can be binary or non-binary. Returned value is aggregated with `agg_group`. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened array of shape y, represents the protected attribute. Can represent non-binary protected attribute. agg_group: callable, optional Aggregation function for the group. Default is np.sum. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float The absolute value of the statistical parity difference. """ if z.ndim > 1: raise ValueError("z must be a 1D array") return statistical_parity_abs_diff_multi(y=y, z=z, agg_group=agg_group, **kwargs)
[docs]def statistical_parity_abs_diff_sum(y: np.array, z: np.array, **kwargs) -> float: """ Calculate the maximum of statistical parity absolute differences between all groups in a protected attribute. The protected attribute `z` can be binary or non-binary. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened array of shape y, represents the protected attribute. Can represent non-binary protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float Average of the absolute value of the statistical parity differences between all groups. """ return statistical_parity_abs_diff(y=y, z=z, agg_group=np.sum, **kwargs)
[docs]def statistical_parity_abs_diff_mean(y: np.array, z: np.array, **kwargs) -> float: """ Calculate the sum of statistical parity absolute differences between all groups and return the average score. The protected attribute `z` can be binary or non-binary. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened array of shape y, represents the protected attribute. Can represent non-binary protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float Average of the absolute value of the statistical parity differences between all groups. """ return statistical_parity_abs_diff(y=y, z=z, agg_group=np.mean, **kwargs)
[docs]def statistical_parity_abs_diff_max(y: np.array, z: np.array, **kwargs) -> float: """ Calculate the maximum of statistical parity absolute differences between all groups in a protected attribute. The protected attribute `z` can be binary or non-binary. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened array of shape y, represents the protected attribute. Can represent non-binary protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float Average of the absolute value of the statistical parity differences between all groups. """ return statistical_parity_abs_diff(y=y, z=z, agg_group=np.max, **kwargs)
[docs]def statistical_parity_difference(y: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Calculate the difference in statistical parity according to [1]. The protected attribute `z` must be binary. Returned value can be negative. [1] A Maximal Correlation Framework for Fair Machine Learning (Lee et al. 2022) (https://arxiv.org/abs/2106.00051) Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened binary array of shape y, represents the protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float The difference in statistical parity between unprivileged and privileged groups. """ if z.ndim > 1: raise ValueError("z must be a 1D array") # invert privileged and positive label if required if privileged_group == 0: z = 1 - z if positive_label == 0: y = 1 - y y = y.astype(int) z = z.astype(int) priv = np.sum(y & z) / np.sum(z) unpriv = np.sum(y & (1 - z)) / np.sum(1 - z) return unpriv - priv
[docs]def mean_difference(*args, **kwargs) -> float: """ Alias for the `statistical_parity_difference` function. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened binary array of shape y, represents the protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float The difference in statistical parity between unprivileged and privileged groups. """ return statistical_parity_difference(*args, **kwargs)
[docs]def disparate_impact_ratio(y: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Calculate the Disparate Impact ratio. The protected attribute `z` must be binary. This function computes the ratio of probabilities of positive outcomes for the unprivileged group to the privileged group. A value of 1 indicates fairness, while a value < 1 indicates discrimination towards the unprivileged group. A value of > 1 would indicate discrimination towards the privileged group. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened binary array of shape y, represents the protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float The Disparate Impact ratio. """ # invert privileged and positive label if required if privileged_group == 0: z = 1 - z if positive_label == 0: y = 1 - y y = y.astype(int) z = z.astype(int) priv = np.sum(y & z) / np.sum(z) unpriv = np.sum(y & (1 - z)) / np.sum(1 - z) if priv == 0: warnings.warn("Disparate impact cannot be calculated. y=1 and z=1 are not apparent in the dataset.") warnings.warn("Return 1 (fair).") return 1 return unpriv / priv
[docs]def disparate_impact_ratio_objective(y: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Calculate the objective Disparate Impact ratio. The protected attribute `z` must be binary. This function computes the absolute difference between 1 and the Disparate Impact ratio. It can be used as an objective function to minimize discrimination towards the unprivileged group (and the privileged group). Lower values indicate less discrimination. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened binary array of shape y, represents the protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float The objective Disparate Impact ratio. """ return np.abs(1 - disparate_impact_ratio(y, z, positive_label, privileged_group, **kwargs))
[docs]def disparate_impact_ratio_deviation(y: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Calculate the difference in objective Disparate Impact ratio. The protected attribute `z` must be binary. This function computes the difference between 1 and the Disparate Impact ratio. A value of 0 indicates fairness. A positive value indicates discrimination towards the unprivileged group. A negative value indicates discrimination towards the privileged group. Parameters ---------- y: np.array Flattened binary array, can be the prediction or the truth label. z: np.array Flattened binary array of shape y, represents the protected attribute. positive_label: int, optional Label considered as positive. Default is 1. privileged_group: int, optional Label considered as privileged. Default is 1. Returns ------- float The difference in objective Disparate Impact ratio. """ return 1 - disparate_impact_ratio(y, z, positive_label, privileged_group, **kwargs)
[docs]def equal_opportunity_difference(y_true: np.array, y_pred: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Compute the difference in Equality of Opportunity [1] between the privileged group and the unprivileged group. Equality of Opportunity [1] is a fairness metric that measures the difference in true positive rates between the privileged and unprivileged groups. This function returns a float representing that difference. A value of 0 indicates perfect fairness, positive values indicate bias against the unprivileged group, while negative values indicate bias against the privileged group. [1] Equality of Opportunity (Hardt, Price, Srebro, 2016)](https://arxiv.org/abs/1610.02413) Parameters ---------- y_true: numpy.array The true binary labels as a flattened array. y_pred: numpy.array The predicted binary labels from the model. Should be of the same shape as y_true. z: numpy.array The protected attribute as a binary array. This array indicates the group (privileged or unprivileged) for each instance in the data. Should be of the same shape as y_true. positive_label: int, optional (default=1) The label considered as positive in the dataset. privileged_group: int, optional (default=1) The label that denotes the privileged group. If 0, the function will treat the unprivileged group as the privileged group. Returns ------- float The difference in Equality of Opportunity between the privileged and unprivileged groups. """ # invert privileged and positive label if required if privileged_group == 0: z = 1 - z if positive_label == 0: y_true = 1 - y_true y_pred = 1 - y_pred y_true = y_true.astype(int) y_pred = y_pred.astype(int) z = z.astype(int) priv_eo = np.sum(y_pred & y_true & z) / np.sum(y_true & z) unpriv_eo = np.sum(y_pred & y_true & (1 - z)) / np.sum(y_true & (1 - z)) return priv_eo - unpriv_eo
[docs]def equal_opportunity_abs_diff(*args, **kwargs): """ Compute the absolute difference in Equality of Opportunity [1]. [1] Equality of Opportunity (Hardt, Price, Srebro, 2016) (https://arxiv.org/abs/1610.02413) Parameters ---------- *args: arguments Variable length argument list to be passed to `equal_opportunity_difference` function. **kwargs: keyword arguments Arbitrary keyword arguments to be passed to `equal_opportunity_difference` function. Returns ------- float The absolute difference in Equality of Opportunity between privileged and unprivileged groups. """ return np.abs(equal_opportunity_difference(*args, **kwargs))
[docs]def predictive_equality_difference(y_true: np.array, y_pred: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Calculate the difference in Predictive Equality. Parameters ---------- y_true: numpy.array True binary labels as a flattened array. y_pred: numpy.array Predicted binary labels as a flattened array. Must have same shape as y_true. z: numpy.array Binary array denoting privileged (1) or unprivileged (0) group. Same shape as y_true. positive_label: int, optional Label considered as positive, default is 1. privileged_group: int, optional Label representing the privileged group, default is 1. Returns ------- float The difference in Predictive Equality between privileged and unprivileged groups. """ # invert privileged and positive label if required if privileged_group == 0: z = 1 - z if positive_label == 0: y_true = 1 - y_true y_pred = 1 - y_pred y_true = y_true.astype(int) y_pred = y_pred.astype(int) z = z.astype(int) priv_eo = np.sum(y_pred & (1-y_true) & z) / np.sum((1-y_true) & z) unpriv_eo = np.sum(y_pred & (1-y_true) & (1-z)) / np.sum((1-y_true) & (1-z)) return priv_eo - unpriv_eo
[docs]def predictive_equality_abs_diff(*args, **kwargs): """ Compute the absolute difference in Predictive Equality. Parameters ---------- *args: arguments Variable length argument list to be passed to `predictive_equality_difference` function. **kwargs: keyword arguments Arbitrary keyword arguments to be passed to `predictive_equality_difference` function. Returns ------- float The absolute difference in Predictive Equality between privileged and unprivileged groups. """ return np.abs(predictive_equality_difference(*args, **kwargs))
[docs]def average_odds_difference(y_true: np.array, y_pred: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Calculate the difference in Average Odds between privileged and unprivileged groups. [1] Equality of Opportunity in Supervised Learning (Hardt, Price, Srebro, 2016) (https://arxiv.org/abs/1610.02413) Parameters ---------- y_true: numpy.array Flattened array of true binary labels. y_pred: numpy.array Flattened array of predicted binary labels. Must have same shape as y_true. z: numpy.array Binary array indicating privileged (1) or unprivileged (0) group. Same shape as y_true. positive_label: int, optional Label considered as positive, default is 1. privileged_group: int, optional Label denoting the privileged group, default is 1. Returns ------- float The difference in Average Odds between privileged and unprivileged groups. """ # invert privileged and positive label if required if privileged_group == 0: z = 1 - z if positive_label == 0: y_true = 1 - y_true y_pred = 1 - y_pred eod = equal_opportunity_difference(y_true, y_pred, z, positive_label, privileged_group) ped = predictive_equality_difference(y_true, y_pred, z, positive_label, privileged_group) return (eod + ped)/2
[docs]def average_odds_error(y_true: np.array, y_pred: np.array, z: np.array, positive_label=1, privileged_group=1, **kwargs) -> float: """ Compute the Average Odds Error. Can be used as an objective function to minimize. Parameters ---------- y_true: numpy.array Flattened array of true binary labels. y_pred: numpy.array Flattened array of predicted binary labels. Must have same shape as y_true. z: numpy.array Binary array indicating privileged (1) or unprivileged (0) group. Same shape as y_true. positive_label: int, optional Label considered as positive, default is 1. privileged_group: int, optional Label denoting the privileged group, default is 1. Returns ------- float The Average Odds Error between privileged and unprivileged groups. """ # invert privileged and positive label if required if privileged_group == 0: z = 1 - z if positive_label == 0: y_true = 1 - y_true y_pred = 1 - y_pred eod = equal_opportunity_abs_diff(y_true, y_pred, z, positive_label, privileged_group) ped = predictive_equality_abs_diff(y_true, y_pred, z, positive_label, privileged_group) return (eod + ped)/2