update
This commit is contained in:
parent
1e7161e681
commit
531d22573b
10
conf.yaml
10
conf.yaml
|
@ -71,14 +71,14 @@ test_conf: &test_conf
|
|||
|
||||
main:
|
||||
confs: &main_confs
|
||||
- DATASET_NAME: imdb
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
other_confs:
|
||||
- DATASET_NAME: imdb
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: GCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: MCAT
|
||||
other_confs:
|
||||
|
||||
sld_lr_conf: &sld_lr_conf
|
||||
|
||||
|
@ -348,9 +348,7 @@ baselines_conf: &baselines_conf
|
|||
COMP_ESTIMATORS:
|
||||
- doc
|
||||
- atc_mc
|
||||
- mandoline
|
||||
- rca
|
||||
- rca_star
|
||||
- naive
|
||||
N_JOBS: -2
|
||||
|
||||
confs: *main_confs
|
||||
|
@ -406,4 +404,4 @@ timing_conf: &timing_conf
|
|||
|
||||
confs: *main_confs
|
||||
|
||||
exec: *kde_lr_gs_conf
|
||||
exec: *baselines_conf
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
#!/bin/bash
|
||||
|
||||
CMD="cp"
|
||||
DEST="~/tesi_docker/"
|
||||
CMD="scp"
|
||||
DEST="andreaesuli@edge-nd1.isti.cnr.it:~/raid/lorenzo/"
|
||||
# CMD="cp"
|
||||
# DEST="~/tesi_docker/"
|
||||
|
||||
bash -c "${CMD} -r quacc ${DEST}"
|
||||
bash -c "${CMD} -r baselines ${DEST}"
|
||||
|
|
2
log
2
log
|
@ -3,6 +3,8 @@
|
|||
if [[ "${1}" == "r" ]]; then
|
||||
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
|
||||
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
|
||||
elif [[ "${1}" == "d" ]]; then
|
||||
ssh andreaesuli@edge-nd1.isti.cnr.it tail -n 500 -f /home/andreaesuli/raid/lorenzo/quacc.log | bat -P --language=log
|
||||
else
|
||||
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
|
||||
fi
|
||||
|
|
|
@ -126,7 +126,9 @@ class DatasetProvider:
|
|||
|
||||
# provare min_df=5
|
||||
def __imdb(self, **kwargs):
|
||||
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
||||
return qp.datasets.fetch_reviews(
|
||||
"imdb", data_home="./quapy_data", tfidf=True, min_df=3
|
||||
).train_test
|
||||
|
||||
def __rcv1(self, target, **kwargs):
|
||||
n_train = 23149
|
||||
|
@ -135,7 +137,7 @@ class DatasetProvider:
|
|||
if target is None or target not in available_targets:
|
||||
raise ValueError(f"Invalid target {target}")
|
||||
|
||||
dataset = fetch_rcv1()
|
||||
dataset = fetch_rcv1(data_home="./scikit_learn_data")
|
||||
target_index = np.where(dataset.target_names == target)[0]
|
||||
all_train_d = dataset.data[:n_train, :]
|
||||
test_d = dataset.data[n_train:, :]
|
||||
|
|
|
@ -68,6 +68,38 @@ def kfcv(
|
|||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def naive(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
f1_average = "binary" if validation.n_classes == 2 else "macro"
|
||||
|
||||
val_preds = c_model_predict(validation.X)
|
||||
val_acc = metrics.accuracy_score(validation.y, val_preds)
|
||||
val_f1 = metrics.f1_score(validation.y, val_preds, average=f1_average)
|
||||
|
||||
report = EvaluationReport(name="naive")
|
||||
for test in protocol():
|
||||
test_preds = c_model_predict(test.X)
|
||||
acc_score = metrics.accuracy_score(test.y, test_preds)
|
||||
f1_score = metrics.f1_score(test.y, test_preds, average=f1_average)
|
||||
meta_acc = abs(val_acc - acc_score)
|
||||
meta_f1 = abs(val_f1 - f1_score)
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc_score=acc_score,
|
||||
f1_score=f1_score,
|
||||
acc=meta_acc,
|
||||
f1=meta_f1,
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def ref(
|
||||
c_model: BaseEstimator,
|
||||
|
@ -556,4 +588,3 @@ def kdex2(
|
|||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||
|
||||
return report
|
||||
|
||||
|
|
|
@ -380,9 +380,9 @@ __kde_lr_set = [
|
|||
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||
# gs kde
|
||||
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ),
|
||||
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ),
|
||||
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True),
|
||||
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ),
|
||||
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ),
|
||||
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
|
||||
E("kde_lr_gs"),
|
||||
]
|
||||
|
||||
|
@ -458,6 +458,7 @@ __methods_set = (
|
|||
+ __kde_lr_set
|
||||
+ __dense_kde_lr_set
|
||||
+ __dense_kde_rbf_set
|
||||
+ [E("QuAcc")]
|
||||
)
|
||||
|
||||
_methods = {m.name: m for m in __methods_set}
|
||||
|
|
|
@ -140,6 +140,14 @@ class CompReport:
|
|||
"mul_kde_lr_gs",
|
||||
"m3w_kde_lr_gs",
|
||||
],
|
||||
"QuAcc": [
|
||||
"bin_sld_lr_gs",
|
||||
"mul_sld_lr_gs",
|
||||
"m3w_sld_lr_gs",
|
||||
"bin_kde_lr_gs",
|
||||
"mul_kde_lr_gs",
|
||||
"m3w_kde_lr_gs",
|
||||
],
|
||||
}
|
||||
|
||||
for name, methods in _mapping.items():
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# Additional covariates percentage
|
||||
|
||||
Rate of usage of additional covariates, recalibration and "balanced" class_weight
|
||||
during grid search:
|
||||
|
||||
| method | av % | recalib % | rebalance % |
|
||||
| --------------: | :----: | :-------: | :---------: |
|
||||
| imdb_sld_lr | 81.49% | 77.78% | 59.26% |
|
||||
| imdb_kde_lr | 71.43% | NA | 88.18% |
|
||||
| rcv1_CCAT_sld_lr| 62.97% | 70.38% | 77.78% |
|
||||
| rcv1_CCAT_kde_lr| 78.06% | NA | 84.82% |
|
||||
| rcv1_GCAT_sld_lr| 76.93% | 61.54% | 65.39% |
|
||||
| rcv1_GCAT_kde_lr| 71.36% | NA | 78.65% |
|
||||
| rcv1_MCAT_sld_lr| 62.97% | 48.15% | 74.08% |
|
||||
| rcv1_MCAT_kde_lr| 71.03% | NA | 68.70% |
|
Loading…
Reference in New Issue