QuAcc/quacc/main.py

64 lines
1.4 KiB
Python
Raw Normal View History

import pandas as pd
import quapy as qp
2023-05-20 20:23:17 +02:00
from quapy.method.aggregative import SLD
from quapy.protocol import APP
2023-06-08 15:20:11 +02:00
from sklearn.svm import SVC
2023-05-20 20:23:17 +02:00
import quacc.evaluation as eval
from quacc.estimator import AccuracyEstimator
from .data import get_dataset
2023-05-20 20:23:17 +02:00
qp.environ["SAMPLE_SIZE"] = 100
pd.set_option("display.float_format", "{:.4f}".format)
2023-05-20 20:23:17 +02:00
def test_2(dataset_name):
train, test = get_dataset(dataset_name)
2023-06-05 21:54:22 +02:00
2023-06-08 15:20:11 +02:00
model = SVC(probability=True)
2023-06-05 21:54:22 +02:00
2023-06-08 15:20:11 +02:00
print(f"fitting model {model.__class__.__name__}...", end=" ", flush=True)
2023-05-20 20:23:17 +02:00
model.fit(*train.Xy)
2023-06-05 21:54:22 +02:00
print("fit")
2023-06-08 15:20:11 +02:00
qmodel = SLD(SVC(probability=True))
2023-06-05 21:54:22 +02:00
estimator = AccuracyEstimator(model, qmodel)
2023-06-08 15:20:11 +02:00
print(f"fitting qmodel {qmodel.__class__.__name__}...", end=" ", flush=True)
2023-05-20 20:23:17 +02:00
estimator.fit(train)
2023-06-05 21:54:22 +02:00
print("fit")
n_prevalences = 21
repreats = 1000
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
2023-06-08 15:20:11 +02:00
print(
f"Tests:\n\
2023-06-05 21:54:22 +02:00
protocol={protocol.__class__.__name__}\n\
n_prevalences={n_prevalences}\n\
repreats={repreats}\n\
executing...\n"
)
df = eval.evaluation_report(
estimator,
protocol,
aggregate=True,
)
2023-05-20 20:23:17 +02:00
print(df.to_string())
2023-05-17 14:02:29 +02:00
def main():
for dataset_name in [
2023-06-05 21:54:22 +02:00
"imdb",
2023-06-08 15:20:11 +02:00
# "hp",
# "spambase",
2023-05-17 14:02:29 +02:00
]:
print(dataset_name)
2023-05-20 20:23:17 +02:00
test_2(dataset_name)
2023-05-17 14:02:29 +02:00
print("*" * 50)
if __name__ == "__main__":
2023-05-17 14:02:29 +02:00
main()