QuAcc/quacc/evaluation/baseline.py

from statistics import mean
from typing import Dict

import numpy as np
from quapy.data import LabelledCollection
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_validate
import sklearn.metrics as metrics
from quapy.protocol import (
    AbstractStochasticSeededProtocol,
    OnLabelledCollectionProtocol,
)

from .report import EvaluationReport

import elsahar19_rca.rca as rca
import garg22_ATC.ATC_helper as atc
import guillory21_doc.doc as doc
import jiang18_trustscore.trustscore as trustscore


def kfcv(
    c_model: BaseEstimator, 
    validation: LabelledCollection,
    protocol: AbstractStochasticSeededProtocol,
    predict_method="predict"
):
    c_model_predict = getattr(c_model, predict_method)

    scoring = ["accuracy", "f1_macro"]
    scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
    acc_score = mean(scores["test_accuracy"])
    f1_score = mean(scores["test_f1_macro"])

    # ensure that the protocol returns a LabelledCollection for each iteration
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")

    report = EvaluationReport(prefix="kfcv")
    for test in protocol():
        test_preds = c_model_predict(test.X)
        meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
        report.append_row(
            test.prevalence(),
            acc_score=(1. - acc_score),
            f1_score=f1_score,
            acc=meta_acc,
            f1=meta_f1,
        )
    
    return report


def reference(
    c_model: BaseEstimator,
    validation: LabelledCollection,
    protocol: AbstractStochasticSeededProtocol,
):
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
    c_model_predict = getattr(c_model, "predict_proba")
    report = EvaluationReport(prefix="ref")
    for test in protocol():
        test_probs = c_model_predict(test.X)
        test_preds = np.argmax(test_probs, axis=-1)
        report.append_row(
            test.prevalence(), 
            acc_score=(1 - metrics.accuracy_score(test.y, test_preds)),
            f1_score=metrics.f1_score(test.y, test_preds),
        )

    return report


def atc_mc(
    c_model: BaseEstimator,
    validation: LabelledCollection,
    protocol: AbstractStochasticSeededProtocol,
    predict_method="predict_proba",
):
    c_model_predict = getattr(c_model, predict_method)

    ## Load ID validation data probs and labels
    val_probs, val_labels = c_model_predict(validation.X), validation.y

    ## score function, e.g., negative entropy or argmax confidence
    val_scores = atc.get_max_conf(val_probs)
    val_preds = np.argmax(val_probs, axis=-1)
    _, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)

    # ensure that the protocol returns a LabelledCollection for each iteration
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")

    report = EvaluationReport(prefix="atc_mc")
    for test in protocol():
        ## Load OOD test data probs
        test_probs = c_model_predict(test.X)
        test_preds = np.argmax(test_probs, axis=-1)
        test_scores = atc.get_max_conf(test_probs)
        atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
        meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
        f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
        report.append_row(
            test.prevalence(),
            acc=meta_acc,
            acc_score=1.0 - atc_accuracy,
            f1_score=f1_score,
            f1=meta_f1,
        )

    return report


def atc_ne(
    c_model: BaseEstimator,
    validation: LabelledCollection,
    protocol: AbstractStochasticSeededProtocol,
    predict_method="predict_proba",
):
    c_model_predict = getattr(c_model, predict_method)

    ## Load ID validation data probs and labels
    val_probs, val_labels = c_model_predict(validation.X), validation.y

    ## score function, e.g., negative entropy or argmax confidence
    val_scores = atc.get_entropy(val_probs)
    val_preds = np.argmax(val_probs, axis=-1)
    _, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)

    # ensure that the protocol returns a LabelledCollection for each iteration
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")

    report = EvaluationReport(prefix="atc_ne")
    for test in protocol():
        ## Load OOD test data probs
        test_probs = c_model_predict(test.X)
        test_preds = np.argmax(test_probs, axis=-1)
        test_scores = atc.get_entropy(test_probs)
        atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
        meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
        f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
        report.append_row(
            test.prevalence(),
            acc=meta_acc,
            acc_score=(1.0 - atc_accuracy),
            f1_score=f1_score,
            f1=meta_f1,
        )

    return report


def trust_score(
    c_model: BaseEstimator,
    validation: LabelledCollection,
    test: LabelledCollection,
    predict_method="predict",
):
    c_model_predict = getattr(c_model, predict_method)

    test_pred = c_model_predict(test.X)

    trust_model = trustscore.TrustScore()
    trust_model.fit(validation.X, validation.y)

    return trust_model.get_score(test.X, test_pred)


def doc_feat(
    c_model: BaseEstimator,
    validation: LabelledCollection,
    protocol: AbstractStochasticSeededProtocol,
    predict_method="predict_proba",
):
    c_model_predict = getattr(c_model, predict_method)

    val_probs, val_labels = c_model_predict(validation.X), validation.y
    val_scores = np.max(val_probs, axis=-1)
    val_preds = np.argmax(val_probs, axis=-1)
    v1acc = np.mean(val_preds == val_labels) * 100

    # ensure that the protocol returns a LabelledCollection for each iteration
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")

    report = EvaluationReport(prefix="doc_feat")
    for test in protocol():
        test_probs = c_model_predict(test.X)
        test_preds = np.argmax(test_probs, axis=-1)
        test_scores = np.max(test_probs, axis=-1)
        score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
        meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
        report.append_row(test.prevalence(), acc=meta_acc, acc_score=(1.0 - score))

    return report


def rca_score(
    c_model: BaseEstimator,
    validation: LabelledCollection,
    protocol: AbstractStochasticSeededProtocol,
    predict_method="predict",
):
    c_model_predict = getattr(c_model, predict_method)
    val_pred1 = c_model_predict(validation.X)

    # ensure that the protocol returns a LabelledCollection for each iteration
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")

    report = EvaluationReport(prefix="rca")
    for test in protocol():
        try:
            test_pred = c_model_predict(test.X)
            c_model2 = rca.clone_fit(c_model, test.X, test_pred)
            c_model2_predict = getattr(c_model2, predict_method)
            val_pred2 = c_model2_predict(validation.X)
            rca_score = rca.get_score(val_pred1, val_pred2, validation.y)
            meta_score = abs(
                rca_score - (1 - metrics.accuracy_score(test.y, test_pred))
            )
            report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
        except ValueError:
            report.append_row(
                test.prevalence(), acc=float("nan"), acc_score=float("nan")
            )

    return report


def rca_star_score(
    c_model: BaseEstimator,
    validation: LabelledCollection,
    protocol: AbstractStochasticSeededProtocol,
    predict_method="predict",
):
    c_model_predict = getattr(c_model, predict_method)
    validation1, validation2 = validation.split_stratified(
        train_prop=0.5, random_state=0
    )
    val1_pred = c_model_predict(validation1.X)
    c_model1 = rca.clone_fit(c_model, validation1.X, val1_pred)
    c_model1_predict = getattr(c_model1, predict_method)
    val2_pred1 = c_model1_predict(validation2.X)

    # ensure that the protocol returns a LabelledCollection for each iteration
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")

    report = EvaluationReport(prefix="rca_star")
    for test in protocol():
        try:
            test_pred = c_model_predict(test.X)
            c_model2 = rca.clone_fit(c_model, test.X, test_pred)
            c_model2_predict = getattr(c_model2, predict_method)
            val2_pred2 = c_model2_predict(validation2.X)
            rca_star_score = rca.get_score(val2_pred1, val2_pred2, validation2.y)
            meta_score = abs(
                rca_star_score - (1 - metrics.accuracy_score(test.y, test_pred))
            )
            report.append_row(
                test.prevalence(), acc=meta_score, acc_score=rca_star_score
            )
        except ValueError:
            report.append_row(
                test.prevalence(), acc=float("nan"), acc_score=float("nan")
            )

    return report
kfcv baseline implemented 2023-09-13 00:11:20 +02:00			`from statistics import mean`
lipton bbse imported 2023-09-22 01:40:36 +02:00			`from typing import Dict`

			`import numpy as np`
			`from quapy.data import LabelledCollection`
kfcv baseline implemented 2023-09-13 00:11:20 +02:00			`from sklearn.base import BaseEstimator`
			`from sklearn.model_selection import cross_validate`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`import sklearn.metrics as metrics`
baseline testing 2023-09-24 02:21:18 +02:00			`from quapy.protocol import (`
			`AbstractStochasticSeededProtocol,`
			`OnLabelledCollectionProtocol,`
			`)`
lipton bbse imported 2023-09-22 01:40:36 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`from .report import EvaluationReport`

lipton bbse imported 2023-09-22 01:40:36 +02:00			`import elsahar19_rca.rca as rca`
guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`import garg22_ATC.ATC_helper as atc`
			`import guillory21_doc.doc as doc`
lipton bbse imported 2023-09-22 01:40:36 +02:00			`import jiang18_trustscore.trustscore as trustscore`
kfcv baseline implemented 2023-09-13 00:11:20 +02:00
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`def kfcv(`
			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
			`protocol: AbstractStochasticSeededProtocol,`
			`predict_method="predict"`
			`):`
			`c_model_predict = getattr(c_model, predict_method)`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`scoring = ["accuracy", "f1_macro"]`
			`scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)`
			`acc_score = mean(scores["test_accuracy"])`
			`f1_score = mean(scores["test_f1_macro"])`
baseline testing 2023-09-24 02:21:18 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`# ensure that the protocol returns a LabelledCollection for each iteration`
			`protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")`
baseline testing 2023-09-24 02:21:18 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report = EvaluationReport(prefix="kfcv")`
			`for test in protocol():`
			`test_preds = c_model_predict(test.X)`
			`meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))`
			`meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))`
			`report.append_row(`
			`test.prevalence(),`
			`acc_score=(1. - acc_score),`
			`f1_score=f1_score,`
			`acc=meta_acc,`
			`f1=meta_f1,`
			`)`

			`return report`


			`def reference(`
			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
			`protocol: AbstractStochasticSeededProtocol,`
			`):`
			`protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")`
			`c_model_predict = getattr(c_model, "predict_proba")`
			`report = EvaluationReport(prefix="ref")`
			`for test in protocol():`
			`test_probs = c_model_predict(test.X)`
			`test_preds = np.argmax(test_probs, axis=-1)`
			`report.append_row(`
			`test.prevalence(),`
			`acc_score=(1 - metrics.accuracy_score(test.y, test_preds)),`
			`f1_score=metrics.f1_score(test.y, test_preds),`
			`)`
baseline testing 2023-09-24 02:21:18 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`return report`
baseline testing 2023-09-24 02:21:18 +02:00

guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`def atc_mc(`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
baseline testing 2023-09-24 02:21:18 +02:00			`protocol: AbstractStochasticSeededProtocol,`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00			`predict_method="predict_proba",`
			`):`
			`c_model_predict = getattr(c_model, predict_method)`

			`## Load ID validation data probs and labels`
			`val_probs, val_labels = c_model_predict(validation.X), validation.y`

			`## score function, e.g., negative entropy or argmax confidence`
guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`val_scores = atc.get_max_conf(val_probs)`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00			`val_preds = np.argmax(val_probs, axis=-1)`
Comments fixed 2023-09-18 09:24:20 +02:00			`_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00
baseline testing 2023-09-24 02:21:18 +02:00			`# ensure that the protocol returns a LabelledCollection for each iteration`
			`protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")`

baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report = EvaluationReport(prefix="atc_mc")`
baseline testing 2023-09-24 02:21:18 +02:00			`for test in protocol():`
			`## Load OOD test data probs`
			`test_probs = c_model_predict(test.X)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`test_preds = np.argmax(test_probs, axis=-1)`
baseline testing 2023-09-24 02:21:18 +02:00			`test_scores = atc.get_max_conf(test_probs)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)`
			`meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))`
			`f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)`
			`meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))`
			`report.append_row(`
			`test.prevalence(),`
			`acc=meta_acc,`
			`acc_score=1.0 - atc_accuracy,`
			`f1_score=f1_score,`
			`f1=meta_f1,`
			`)`

			`return report`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00
trust score imported 2023-09-16 01:59:49 +02:00
guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`def atc_ne(`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
baseline testing 2023-09-24 02:21:18 +02:00			`protocol: AbstractStochasticSeededProtocol,`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00			`predict_method="predict_proba",`
			`):`
			`c_model_predict = getattr(c_model, predict_method)`

			`## Load ID validation data probs and labels`
			`val_probs, val_labels = c_model_predict(validation.X), validation.y`

			`## score function, e.g., negative entropy or argmax confidence`
guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`val_scores = atc.get_entropy(val_probs)`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00			`val_preds = np.argmax(val_probs, axis=-1)`
guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00
baseline testing 2023-09-24 02:21:18 +02:00			`# ensure that the protocol returns a LabelledCollection for each iteration`
			`protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")`

baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report = EvaluationReport(prefix="atc_ne")`
baseline testing 2023-09-24 02:21:18 +02:00			`for test in protocol():`
			`## Load OOD test data probs`
			`test_probs = c_model_predict(test.X)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`test_preds = np.argmax(test_probs, axis=-1)`
baseline testing 2023-09-24 02:21:18 +02:00			`test_scores = atc.get_entropy(test_probs)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)`
			`meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))`
			`f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)`
			`meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))`
			`report.append_row(`
			`test.prevalence(),`
			`acc=meta_acc,`
			`acc_score=(1.0 - atc_accuracy),`
			`f1_score=f1_score,`
			`f1=meta_f1,`
			`)`

			`return report`
ATC baseline added, rcv1 dataset added 2023-09-14 01:52:19 +02:00
trust score imported 2023-09-16 01:59:49 +02:00
			`def trust_score(`
			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
			`test: LabelledCollection,`
			`predict_method="predict",`
			`):`
			`c_model_predict = getattr(c_model, predict_method)`

			`test_pred = c_model_predict(test.X)`

guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`trust_model = trustscore.TrustScore()`
trust score imported 2023-09-16 01:59:49 +02:00			`trust_model.fit(validation.X, validation.y)`

			`return trust_model.get_score(test.X, test_pred)`

guillory21 imported as baseline 2023-09-17 21:47:34 +02:00
			`def doc_feat(`
			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
baseline testing 2023-09-24 02:21:18 +02:00			`protocol: AbstractStochasticSeededProtocol,`
guillory21 imported as baseline 2023-09-17 21:47:34 +02:00			`predict_method="predict_proba",`
			`):`
			`c_model_predict = getattr(c_model, predict_method)`

			`val_probs, val_labels = c_model_predict(validation.X), validation.y`
			`val_scores = np.max(val_probs, axis=-1)`
			`val_preds = np.argmax(val_probs, axis=-1)`
Comments fixed 2023-09-18 09:24:20 +02:00			`v1acc = np.mean(val_preds == val_labels) * 100`
baseline testing 2023-09-24 02:21:18 +02:00
			`# ensure that the protocol returns a LabelledCollection for each iteration`
			`protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")`

baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report = EvaluationReport(prefix="doc_feat")`
baseline testing 2023-09-24 02:21:18 +02:00			`for test in protocol():`
			`test_probs = c_model_predict(test.X)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`test_preds = np.argmax(test_probs, axis=-1)`
baseline testing 2023-09-24 02:21:18 +02:00			`test_scores = np.max(test_probs, axis=-1)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0`
			`meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))`
			`report.append_row(test.prevalence(), acc=meta_acc, acc_score=(1.0 - score))`

			`return report`
elsahar baseline imported 2023-09-18 18:19:13 +02:00

			`def rca_score(`
			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
baseline testing 2023-09-24 02:21:18 +02:00			`protocol: AbstractStochasticSeededProtocol,`
elsahar baseline imported 2023-09-18 18:19:13 +02:00			`predict_method="predict",`
			`):`
			`c_model_predict = getattr(c_model, predict_method)`
			`val_pred1 = c_model_predict(validation.X)`

baseline testing 2023-09-24 02:21:18 +02:00			`# ensure that the protocol returns a LabelledCollection for each iteration`
			`protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")`

baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report = EvaluationReport(prefix="rca")`
baseline testing 2023-09-24 02:21:18 +02:00			`for test in protocol():`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`try:`
baseline testing 2023-09-24 02:21:18 +02:00			`test_pred = c_model_predict(test.X)`
			`c_model2 = rca.clone_fit(c_model, test.X, test_pred)`
			`c_model2_predict = getattr(c_model2, predict_method)`
			`val_pred2 = c_model2_predict(validation.X)`
baseline performance test updated 2023-09-26 07:58:40 +02:00			`rca_score = rca.get_score(val_pred1, val_pred2, validation.y)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`meta_score = abs(`
			`rca_score - (1 - metrics.accuracy_score(test.y, test_pred))`
			`)`
			`report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)`
baseline testing 2023-09-24 02:21:18 +02:00			`except ValueError:`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report.append_row(`
			`test.prevalence(), acc=float("nan"), acc_score=float("nan")`
			`)`
baseline testing 2023-09-24 02:21:18 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`return report`
baseline testing 2023-09-24 02:21:18 +02:00
elsahar baseline imported 2023-09-18 18:19:13 +02:00
			`def rca_star_score(`
			`c_model: BaseEstimator,`
			`validation: LabelledCollection,`
baseline testing 2023-09-24 02:21:18 +02:00			`protocol: AbstractStochasticSeededProtocol,`
elsahar baseline imported 2023-09-18 18:19:13 +02:00			`predict_method="predict",`
			`):`
			`c_model_predict = getattr(c_model, predict_method)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`validation1, validation2 = validation.split_stratified(`
			`train_prop=0.5, random_state=0`
			`)`
elsahar baseline imported 2023-09-18 18:19:13 +02:00			`val1_pred = c_model_predict(validation1.X)`
baseline testing 2023-09-24 02:21:18 +02:00			`c_model1 = rca.clone_fit(c_model, validation1.X, val1_pred)`
elsahar baseline imported 2023-09-18 18:19:13 +02:00			`c_model1_predict = getattr(c_model1, predict_method)`
			`val2_pred1 = c_model1_predict(validation2.X)`

baseline testing 2023-09-24 02:21:18 +02:00			`# ensure that the protocol returns a LabelledCollection for each iteration`
			`protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")`

baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report = EvaluationReport(prefix="rca_star")`
baseline testing 2023-09-24 02:21:18 +02:00			`for test in protocol():`
			`try:`
			`test_pred = c_model_predict(test.X)`
			`c_model2 = rca.clone_fit(c_model, test.X, test_pred)`
			`c_model2_predict = getattr(c_model2, predict_method)`
			`val2_pred2 = c_model2_predict(validation2.X)`
baseline performance test updated 2023-09-26 07:58:40 +02:00			`rca_star_score = rca.get_score(val2_pred1, val2_pred2, validation2.y)`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`meta_score = abs(`
			`rca_star_score - (1 - metrics.accuracy_score(test.y, test_pred))`
			`)`
			`report.append_row(`
			`test.prevalence(), acc=meta_score, acc_score=rca_star_score`
baseline testing 2023-09-24 02:21:18 +02:00			`)`
			`except ValueError:`
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`report.append_row(`
			`test.prevalence(), acc=float("nan"), acc_score=float("nan")`
			`)`
baseline testing 2023-09-24 02:21:18 +02:00
baselines refactored and updated, report updated 2023-10-19 02:36:53 +02:00			`return report`