From 5f26bc7059d9b0c5a43bc9233d1a348a49850853 Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Sun, 5 Nov 2023 14:15:43 +0100
Subject: [PATCH] negative entropy confidence added

---
 quacc/evaluation/method.py | 32 ++++++++++++++++++++++++++++++--
 quacc/method/base.py       | 15 +++++++++++++--
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/quacc/evaluation/method.py b/quacc/evaluation/method.py
index a66f60a..6f1fd62 100644
--- a/quacc/evaluation/method.py
+++ b/quacc/evaluation/method.py
@@ -91,6 +91,32 @@ def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
     )
 
 
+@method
+def binne_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(
+        c_model,
+        SLD(LogisticRegression()),
+        confidence="entropy",
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(
+        c_model,
+        SLD(LogisticRegression()),
+        confidence="entropy",
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
 @method
 def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
     v_train, v_val = validation.split_stratified(0.6, random_state=0)
@@ -101,7 +127,7 @@ def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
             "q__classifier__C": np.logspace(-3, 3, 7),
             "q__classifier__class_weight": [None, "balanced"],
             "q__recalib": [None, "bcts", "vs"],
-            "confidence": [None, "max_conf"],
+            "confidence": [None, "max_conf", "entropy"],
         },
         refit=False,
         protocol=UPP(v_val, repeats=100),
@@ -123,7 +149,7 @@ def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
             "q__classifier__C": np.logspace(-3, 3, 7),
             "q__classifier__class_weight": [None, "balanced"],
             "q__recalib": [None, "bcts", "vs"],
-            "confidence": [None, "max_conf"],
+            "confidence": [None, "max_conf", "entropy"],
         },
         refit=False,
         protocol=UPP(v_val, repeats=100),
@@ -200,6 +226,7 @@ def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
         param_grid={
             "q__classifier__C": np.logspace(-3, 3, 7),
             "q__classifier__class_weight": [None, "balanced"],
+            "confidence": [None, "max_conf", "entropy"],
         },
         refit=False,
         protocol=UPP(v_val, repeats=100),
@@ -220,6 +247,7 @@ def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
         param_grid={
             "q__classifier__C": np.logspace(-3, 3, 7),
             "q__classifier__class_weight": [None, "balanced"],
+            "confidence": [None, "max_conf", "entropy"],
         },
         refit=False,
         protocol=UPP(v_val, repeats=100),
diff --git a/quacc/method/base.py b/quacc/method/base.py
index a57509f..a7389f4 100644
--- a/quacc/method/base.py
+++ b/quacc/method/base.py
@@ -31,11 +31,22 @@ class BaseAccuracyEstimator(BaseQuantifier):
         self.classifier = classifier
 
     def __get_confidence(self):
+        def max_conf(probas):
+            _mc = np.max(probas, axis=-1)
+            _min = 1.0 / probas.shape[1]
+            _norm_mc = (_mc - _min) / (1.0 - _min)
+            return _norm_mc
+
+        def entropy(probas):
+            _ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
+            return _ent
+
         if self.confidence is None:
             return None
 
         __confs = {
-            "max_conf": lambda probas: np.max(probas, axis=-1).reshape((len(probas), 1))
+            "max_conf": max_conf,
+            "entropy": entropy,
         }
         return __confs.get(self.confidence, None)
 
@@ -43,7 +54,7 @@ class BaseAccuracyEstimator(BaseQuantifier):
         _ext = pred_proba
         _f_conf = self.__get_confidence()
         if _f_conf is not None:
-            _confs = _f_conf(pred_proba)
+            _confs = _f_conf(pred_proba).reshape((len(pred_proba), 1))
             _ext = np.concatenate((_confs, pred_proba), axis=1)
 
         return _ext