2023-06-02 19:36:54 +02:00
|
|
|
import pandas as pd
|
2023-05-11 21:43:59 +02:00
|
|
|
import quapy as qp
|
2023-06-02 19:36:54 +02:00
|
|
|
from quapy.protocol import APP
|
2023-07-26 00:38:23 +02:00
|
|
|
from sklearn.linear_model import LogisticRegression
|
2023-05-11 21:43:59 +02:00
|
|
|
|
2023-05-20 20:23:17 +02:00
|
|
|
import quacc.evaluation as eval
|
2023-07-27 03:16:41 +02:00
|
|
|
from quacc.estimator import (
|
|
|
|
BinaryQuantifierAccuracyEstimator,
|
|
|
|
MulticlassAccuracyEstimator,
|
|
|
|
)
|
2023-05-20 20:23:17 +02:00
|
|
|
|
2023-09-13 00:11:20 +02:00
|
|
|
from quacc.dataset import get_imdb_traintest
|
2023-05-20 20:23:17 +02:00
|
|
|
|
2023-06-02 19:36:54 +02:00
|
|
|
qp.environ["SAMPLE_SIZE"] = 100
|
2023-05-11 21:43:59 +02:00
|
|
|
|
2023-06-02 19:36:54 +02:00
|
|
|
pd.set_option("display.float_format", "{:.4f}".format)
|
2023-05-11 21:43:59 +02:00
|
|
|
|
2023-07-27 03:16:41 +02:00
|
|
|
dataset_name = "imdb"
|
2023-05-11 21:43:59 +02:00
|
|
|
|
2023-07-27 03:16:41 +02:00
|
|
|
|
|
|
|
def estimate_multiclass():
|
|
|
|
print(dataset_name)
|
2023-09-13 00:11:20 +02:00
|
|
|
train, test = get_imdb_traintest(dataset_name)
|
2023-06-05 21:54:22 +02:00
|
|
|
|
2023-07-26 00:38:23 +02:00
|
|
|
model = LogisticRegression()
|
2023-06-05 21:54:22 +02:00
|
|
|
|
2023-06-08 15:20:11 +02:00
|
|
|
print(f"fitting model {model.__class__.__name__}...", end=" ", flush=True)
|
2023-05-20 20:23:17 +02:00
|
|
|
model.fit(*train.Xy)
|
2023-06-05 21:54:22 +02:00
|
|
|
print("fit")
|
|
|
|
|
2023-07-26 00:38:23 +02:00
|
|
|
estimator = MulticlassAccuracyEstimator(model)
|
2023-06-05 21:54:22 +02:00
|
|
|
|
2023-07-26 00:38:23 +02:00
|
|
|
print(
|
|
|
|
f"fitting qmodel {estimator.q_model.__class__.__name__}...", end=" ", flush=True
|
|
|
|
)
|
2023-05-20 20:23:17 +02:00
|
|
|
estimator.fit(train)
|
2023-06-05 21:54:22 +02:00
|
|
|
print("fit")
|
|
|
|
|
|
|
|
n_prevalences = 21
|
|
|
|
repreats = 1000
|
|
|
|
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
|
2023-06-08 15:20:11 +02:00
|
|
|
print(
|
|
|
|
f"Tests:\n\
|
2023-06-05 21:54:22 +02:00
|
|
|
protocol={protocol.__class__.__name__}\n\
|
|
|
|
n_prevalences={n_prevalences}\n\
|
|
|
|
repreats={repreats}\n\
|
|
|
|
executing...\n"
|
|
|
|
)
|
|
|
|
df = eval.evaluation_report(
|
|
|
|
estimator,
|
|
|
|
protocol,
|
|
|
|
aggregate=True,
|
|
|
|
)
|
2023-07-27 03:16:41 +02:00
|
|
|
# print(df.to_latex())
|
2023-05-20 20:23:17 +02:00
|
|
|
print(df.to_string())
|
2023-07-27 03:16:41 +02:00
|
|
|
# print(df.to_html())
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def estimate_binary():
|
|
|
|
print(dataset_name)
|
2023-09-13 00:11:20 +02:00
|
|
|
train, test = get_imdb_traintest(dataset_name)
|
2023-07-27 03:16:41 +02:00
|
|
|
|
|
|
|
model = LogisticRegression()
|
|
|
|
|
|
|
|
print(f"fitting model {model.__class__.__name__}...", end=" ", flush=True)
|
|
|
|
model.fit(*train.Xy)
|
|
|
|
print("fit")
|
2023-05-20 20:23:17 +02:00
|
|
|
|
2023-07-27 03:16:41 +02:00
|
|
|
estimator = BinaryQuantifierAccuracyEstimator(model)
|
|
|
|
|
|
|
|
print(
|
|
|
|
f"fitting qmodel {estimator.q_model_0.__class__.__name__}...",
|
|
|
|
end=" ",
|
|
|
|
flush=True,
|
|
|
|
)
|
|
|
|
estimator.fit(train)
|
|
|
|
print("fit")
|
2023-05-20 20:23:17 +02:00
|
|
|
|
2023-07-27 03:16:41 +02:00
|
|
|
n_prevalences = 21
|
|
|
|
repreats = 1000
|
|
|
|
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
|
|
|
|
print(
|
|
|
|
f"Tests:\n\
|
|
|
|
protocol={protocol.__class__.__name__}\n\
|
|
|
|
n_prevalences={n_prevalences}\n\
|
|
|
|
repreats={repreats}\n\
|
|
|
|
executing...\n"
|
|
|
|
)
|
|
|
|
df = eval.evaluation_report(
|
|
|
|
estimator,
|
|
|
|
protocol,
|
|
|
|
aggregate=True,
|
|
|
|
)
|
|
|
|
# print(df.to_latex(float_format="{:.4f}".format))
|
|
|
|
print(df.to_string())
|
|
|
|
# print(df.to_html())
|
|
|
|
print()
|
2023-05-17 14:02:29 +02:00
|
|
|
|
|
|
|
|
2023-05-11 21:43:59 +02:00
|
|
|
if __name__ == "__main__":
|
2023-07-27 03:16:41 +02:00
|
|
|
estimate_multiclass()
|