switching to devel

2023-10-30 09:41:52 +01:00 · 2023-10-30 09:41:52 +01:00 · 0f4008e18d
parent 3243fd90f8
commit 0f4008e18d
3 changed files with 45 additions and 42 deletions
--- a/distribution_matching/commons.py
+++ b/distribution_matching/commons.py
@ -8,7 +8,7 @@ from distribution_matching.method_dirichlety import DIRy
 from sklearn.linear_model import LogisticRegression
 from method_kdey_closed_efficient import KDEyclosed_efficient

-METHODS  = ['EMQ', 'EMQ-C', 'DM', 'DM-T', 'DM-HD', 'KDEy-DMhd3', 'DM-CS', 'KDEy-closed++', 'KDEy-ML'] #['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM', 'KDEy-DMhd3', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #, 'KDEy-DMhd2'] #, 'KDEy-DMhd2', 'DM-HD'] 'KDEy-DMjs', 'KDEy-DM', 'KDEy-ML+', 'KDEy-DMhd3+',
+METHODS  = ['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM-T', 'DM-HD', 'KDEy-DMhd3', 'DM-CS', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM', 'KDEy-DMhd3', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #, 'KDEy-DMhd2'] #, 'KDEy-DMhd2', 'DM-HD'] 'KDEy-DMjs', 'KDEy-DM', 'KDEy-ML+', 'KDEy-DMhd3+', 'EMQ-C',
 BIN_METHODS = [x.replace('-OvA', '') for x in METHODS]


@ -63,6 +63,9 @@ def new_method(method, **lr_kwargs):
        method_params = {'exact_train_prev': [False], 'recalib': ['bcts']}
        param_grid = {**method_params, **hyper_LR}
        quantifier = EMQ(lr)
+    elif method == 'HDy':
+        param_grid = hyper_LR
+        quantifier = HDy(lr)
    elif method == 'HDy-OvA':
        param_grid = {'binary_quantifier__' + key: val for key, val in hyper_LR.items()}
        quantifier = OneVsAllAggregative(HDy(lr))
--- a/distribution_matching/lequa_experiments.py
+++ b/distribution_matching/lequa_experiments.py
@ -2,64 +2,64 @@ import pickle
 import numpy as np
 import os
 import pandas as pd
-from distribution_matching.commons import METHODS, new_method, show_results
+from distribution_matching.commons import METHODS, BIN_METHODS, new_method, show_results

 import quapy as qp
 from quapy.model_selection import GridSearchQ


-
 if __name__ == '__main__':

-    qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
-    qp.environ['N_JOBS'] = -1
-    for optim in ['mae', 'mrae']:
+    for task in ['T1A', 'T1B']:
+        qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE[task]
+        qp.environ['N_JOBS'] = -1
+        for optim in ['mae', 'mrae']:

-        result_dir = f'results/lequa/{optim}'
+            result_dir = f'results/lequa/{task}/{optim}'

-        os.makedirs(result_dir, exist_ok=True)
+            os.makedirs(result_dir, exist_ok=True)

-        for method in METHODS:
+            for method in (METHODS if task=='T1B' else BIN_METHODS):

-            print('Init method', method)
+                print('Init method', method)

-            result_path = f'{result_dir}/{method}'
+                result_path = f'{result_dir}/{method}'

-            if os.path.exists(result_path+'.csv'):
-                print(f'file {result_path}.csv already exist; skipping')
-                continue
+                if os.path.exists(result_path+'.csv'):
+                    print(f'file {result_path}.csv already exist; skipping')
+                    continue

-            with open(result_path+'.csv', 'wt') as csv:
-                csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+                with open(result_path+'.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')

-                dataset = 'T1B'
-                train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
-                print(f'init {dataset} #instances: {len(train)}')
-                param_grid, quantifier = new_method(method)
+                    dataset = task
+                    train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
+                    print(f'init {dataset} #instances: {len(train)}')
+                    param_grid, quantifier = new_method(method)

-                if param_grid is not None:
-                    modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim)
+                    if param_grid is not None:
+                        modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim)

-                    modsel.fit(train)
-                    print(f'best params {modsel.best_params_}')
-                    print(f'best score {modsel.best_score_}')
-                    pickle.dump(
-                        (modsel.best_params_, modsel.best_score_,),
-                        open(f'{result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+                        modsel.fit(train)
+                        print(f'best params {modsel.best_params_}')
+                        print(f'best score {modsel.best_score_}')
+                        pickle.dump(
+                            (modsel.best_params_, modsel.best_score_,),
+                            open(f'{result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)

-                    quantifier = modsel.best_model()
-                else:
-                    print('debug mode... skipping model selection')
-                    quantifier.fit(train)
+                        quantifier = modsel.best_model()
+                    else:
+                        print('debug mode... skipping model selection')
+                        quantifier.fit(train)

-                report = qp.evaluation.evaluation_report(
-                    quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'],
-                    verbose=True, verbose_error=optim[1:], n_jobs=-1
-                )
-                means = report.mean()
-                report.to_csv(result_path+'.dataframe')
-                csv.write(f'{method}\tLeQua-T1B\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
-                csv.flush()
-                print(means)
+                    report = qp.evaluation.evaluation_report(
+                        quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'],
+                        verbose=True, verbose_error=optim[1:], n_jobs=-1
+                    )
+                    means = report.mean()
+                    report.to_csv(result_path+'.dataframe')
+                    csv.write(f'{method}\tLeQua-{task}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                    csv.flush()
+                    print(means)

        show_results(result_path)
--- a/laboratory/method_dxs.py
+++ b/laboratory/method_dxs.py
@ -79,7 +79,7 @@ if __name__ == '__main__':
    repeats = 10
    error = 'mae'

-    div = 'HD'
+    div = 'topsoe'

    # generates tuples (dataset, method, method_name)
    # (the dataset is needed for methods that process the dataset differently)