adding methods of prior work to git

2024-05-09 16:24:20 +02:00 · 2024-05-09 16:24:20 +02:00 · 67ed6e4c6c
parent 5284e04c90
commit 67ed6e4c6c
1 changed files with 88 additions and 0 deletions
--- a/Retrieval/methods.py
+++ b/Retrieval/methods.py
@ -0,0 +1,88 @@
+
+"""
+This file implements some of the methods presented in the FAccT'22 paper by
+Ghazimatin, Kleindessner, Russell, Abedjan, and Golebiowski,
+Measuring Fairness of Rankings under Noisy Sensitive Information.
+
+In particular, it implements two variants of a method relying on M3=rND:
+one in which the assumed graphical model is P(Â,A,S) = P(Â|A)*P(S|A) (called "b")
+and another in which the assumed graphical model is P(Â,A,S) = P(Â|A)*P(S|Â) (called "d")
+"""
+
+import numpy as np
+from abc import ABC, abstractmethod
+from sklearn.metrics import confusion_matrix
+
+from quapy.method.aggregative import CC
+
+
+class AbstractM3rND(ABC):
+    def __init__(self, classifier):
+        self.quantifier = CC(classifier)
+
+    def proxy_labels(self, instances):
+        return self.quantifier.classify(instances)
+
+    def quantify(self, instances):
+        return self.quantifier.quantify(instances)
+
+    @abstractmethod
+    def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray):
+        ...
+
+    def get_confusion_matrix(self, X, y, additive_smoothing=0.5):
+        """
+        Some confusion matrices may contain 0 values for certain classes, and this causes
+        instabilities in the correction. If requested, applies additive smoothing. Default
+        is adding half a count.
+
+        :param X: array-like with the covariates
+        :param y: array-like with the true labels
+        :param additive_smoothing: float, default 0.5
+        :return: the confusion matrix C with entries Cij=P(Y=i,Ŷ=j)
+        """
+        proxy_labels = self.proxy_labels(X)
+        true_labels = y
+        labels = self.quantifier.classes_
+        conf_matrix = confusion_matrix(true_labels, proxy_labels, labels=labels)
+        if additive_smoothing > 0:
+            conf_matrix = conf_matrix.astype(float) + additive_smoothing
+        return conf_matrix
+
+
+class M3rND_ModelB(AbstractM3rND):
+    def __init__(self, classifier):
+        super().__init__(classifier)
+
+    def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray):
+        # conf_matrix contains values Cij=P(Y=i,Ŷ=j)
+        # truecond_matrix contains values Cij=P(Ŷ=j|Y=i) (truecond stands for "conditioned on true labels")
+        truecond_matrix = conf_matrix / conf_matrix.sum(axis=1, keepdims=True)
+        p = truecond_matrix[0, 1]  # P(hat{A}=1|A=0)
+        q = truecond_matrix[1, 0]  # P(hat{A}=0|A=1)
+        den = (1 - p - q)
+        if den != 0:
+            corr = 1./den
+            rND_estim = rND_estim * corr
+        return rND_estim
+
+
+class M3rND_ModelD(AbstractM3rND):
+    def __init__(self, classifier):
+        super().__init__(classifier)
+
+    def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray):
+        # conf_matrix contains values Cij=P(Y=i,Ŷ=j)
+        # truecond_matrix contains values Cij=P(Ŷ=j|Y=i) (truecond stands for "conditioned on true labels")
+        truecond_matrix = conf_matrix / conf_matrix.sum(axis=1, keepdims=True)
+        prev_A = conf_matrix.sum(axis=1)
+        beta = prev_A[1]  # P(A)
+        p = truecond_matrix[0, 1]  # P(hat{A}=1|A=0)
+        q = truecond_matrix[1, 0]  # P(hat{A}=0|A=1)
+        x = (1 - q) * beta + p * (1 - beta)
+        y = q * beta + (1 - p) * (1 - beta)
+        if x != 0 and y != 0:
+            corr = ((((1 - q) * beta) / x) - (q * beta / y))
+            rND_estim = rND_estim * corr
+        return rND_estim
+