ATC baseline added, rcv1 dataset added

2023-09-14 01:52:19 +02:00 · 2023-09-14 01:52:19 +02:00 · 37392c6545
parent b47b229ba7
commit 37392c6545
7 changed files with 138 additions and 13 deletions
--- a/.coverage
+++ b/.coverage
--- a/garg22_ATC/ATC_helper.py
+++ b/garg22_ATC/ATC_helper.py
@ -0,0 +1,34 @@
+import numpy as np 
+
+def get_entropy(probs): 
+	return np.sum( np.multiply(probs, np.log(probs + 1e-20))  , axis=1)
+
+def get_max_conf(probs):
+	return np.max(probs, axis=-1)
+	
+def find_ATC_threshold(scores, labels): 
+    sorted_idx = np.argsort(scores)
+    
+    sorted_scores = scores[sorted_idx]
+    sorted_labels = labels[sorted_idx]
+    
+    fp = np.sum(labels==0)
+    fn = 0.0
+    
+    min_fp_fn = np.abs(fp - fn)
+    thres = 0.0
+    for i in range(len(labels)): 
+        if sorted_labels[i] == 0: 
+            fp -= 1
+        else: 
+            fn += 1
+        
+        if np.abs(fp - fn) < min_fp_fn: 
+            min_fp_fn = np.abs(fp - fn)
+            thres = sorted_scores[i]
+    
+    return min_fp_fn, thres
+
+
+def get_ATC_acc(thres, scores): 
+    return np.mean(scores>=thres)*100.0
--- a/garg22_ATC/pycache/ATC_helper.cpython-311.pyc
+++ b/garg22_ATC/pycache/ATC_helper.cpython-311.pyc
--- a/quacc/baseline.py
+++ b/quacc/baseline.py
@ -1,14 +1,77 @@
-
 from statistics import mean
 from typing import Dict
 from sklearn.base import BaseEstimator
 from sklearn.model_selection import cross_validate
 from quapy.data import LabelledCollection
+from garg22_ATC.ATC_helper import (
+    find_ATC_threshold,
+    get_ATC_acc,
+    get_entropy,
+    get_max_conf,
+)
+import numpy as np


-def kfcv(c_model: BaseEstimator, train: LabelledCollection) -> Dict:
+
+def kfcv(c_model: BaseEstimator, validation: LabelledCollection) -> Dict:
    scoring = ["f1_macro"]
-    scores = cross_validate(c_model, train.X, train.y, scoring=scoring)
+    scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
+    return {"f1_score": mean(scores["test_f1_macro"])}
+
+
+def ATC_MC(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    test: LabelledCollection,
+    predict_method="predict_proba",
+):
+    c_model_predict = getattr(c_model, predict_method)
+
+    ## Load ID validation data probs and labels
+    val_probs, val_labels = c_model_predict(validation.X), validation.y
+
+    ## Load OOD test data probs
+    test_probs = c_model_predict(test.X)
+
+    ## score function, e.g., negative entropy or argmax confidence
+    val_scores = get_max_conf(val_probs)
+    val_preds = np.argmax(val_probs, axis=-1)
+
+    test_scores = get_max_conf(test_probs)
+
+    _, ATC_thres = find_ATC_threshold(val_scores, val_labels == val_preds)
+    ATC_accuracy = get_ATC_acc(ATC_thres, test_scores)
+
    return {
-        "f1_score": mean(scores["test_f1_macro"])
+        "true_acc": 100*np.mean(np.argmax(test_probs, axis=-1) == test.y),
+        "pred_acc": ATC_accuracy
    }
+
+def ATC_NE(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    test: LabelledCollection,
+    predict_method="predict_proba",
+):
+    c_model_predict = getattr(c_model, predict_method)
+
+    ## Load ID validation data probs and labels
+    val_probs, val_labels = c_model_predict(validation.X), validation.y
+
+    ## Load OOD test data probs
+    test_probs = c_model_predict(test.X)
+
+    ## score function, e.g., negative entropy or argmax confidence
+    val_scores = get_entropy(val_probs)
+    val_preds = np.argmax(val_probs, axis=-1)
+
+    test_scores = get_entropy(test_probs)
+
+    _, ATC_thres = find_ATC_threshold(val_scores, val_labels == val_preds)
+    ATC_accuracy = get_ATC_acc(ATC_thres, test_scores)
+
+    return {
+        "true_acc": 100*np.mean(np.argmax(test_probs, axis=-1) == test.y),
+        "pred_acc": ATC_accuracy
+    }
+
--- a/quacc/dataset.py
+++ b/quacc/dataset.py
@ -1,7 +1,35 @@
+from typing import Tuple
+import numpy as np
+from quapy.data.base import LabelledCollection
 import quapy as qp
+from sklearn.conftest import fetch_rcv1

-def get_imdb_traintest():
-    return qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
+TRAIN_VAL_PROP = 0.5

-def get_spambase_traintest():
-    return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
+
+def get_imdb() -> Tuple[LabelledCollection]:
+    train, test = qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
+    train, validation = train.split_stratified(train_prop=TRAIN_VAL_PROP)
+    return train, validation, test
+
+
+def get_spambase():
+    train, test = qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
+    train, validation = train.split_stratified(train_prop=TRAIN_VAL_PROP)
+    return train, validation, test
+
+
+def get_rcv1(sample_size=100):
+    dataset = fetch_rcv1()
+
+    target_labels = [
+        (target, dataset.target[:, ind].toarray().flatten())
+        for (ind, target) in enumerate(dataset.target_names)
+    ]
+    filtered_target_labels = filter(
+        lambda _, labels: np.sum(labels) >= sample_size, target_labels
+    )
+    return {
+        target: LabelledCollection(dataset.data, labels, classes=[0, 1])
+        for (target, labels) in filtered_target_labels
+    }
--- a/quacc/main.py
+++ b/quacc/main.py
@ -9,7 +9,7 @@ from quacc.estimator import (
    MulticlassAccuracyEstimator,
 )

-from quacc.dataset import get_imdb_traintest
+from quacc.dataset import get_imdb

 qp.environ["SAMPLE_SIZE"] = 100

@ -20,7 +20,7 @@ dataset_name = "imdb"

 def estimate_multiclass():
    print(dataset_name)
-    train, test = get_imdb_traintest(dataset_name)
+    train, validation, test = get_imdb(dataset_name)

    model = LogisticRegression()

@ -59,7 +59,7 @@ def estimate_multiclass():

 def estimate_binary():
    print(dataset_name)
-    train, test = get_imdb_traintest(dataset_name)
+    train, validation, test = get_imdb(dataset_name)

    model = LogisticRegression()

--- a/tests/test_baseline.py
+++ b/tests/test_baseline.py
@ -1,12 +1,12 @@

 from sklearn.linear_model import LogisticRegression
 from quacc.baseline import kfcv
-from quacc.dataset import get_spambase_traintest
+from quacc.dataset import get_spambase


 class TestBaseline:

    def test_kfcv(self):
-        train, _ = get_spambase_traintest()
+        train, _, _ = get_spambase()
        c_model = LogisticRegression()
        assert "f1_score" in kfcv(c_model, train)