more uci datasets, plots improved (higher fonts), and evaluation script that shows numerical results in command line
This commit is contained in:
parent
e609c262b4
commit
1d89301089
|
@ -0,0 +1,28 @@
|
||||||
|
import quapy as qp
|
||||||
|
import settings
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from glob import glob
|
||||||
|
import itertools
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||||
|
|
||||||
|
resultdir = './results'
|
||||||
|
methods = ['*']
|
||||||
|
|
||||||
|
def evaluate_results(methods, datasets, error_name):
|
||||||
|
results_str = []
|
||||||
|
error = qp.error.from_name(error_name)
|
||||||
|
for method, dataset in itertools.product(methods, datasets):
|
||||||
|
for experiment in glob(f'{resultdir}/{dataset}-{method}-{error_name}.pkl'):
|
||||||
|
true_prevalences, estim_prevalences, tr_prev, te_prev, te_prev_estim, best_params = \
|
||||||
|
pickle.load(open(experiment, 'rb'))
|
||||||
|
result = error(true_prevalences, estim_prevalences)
|
||||||
|
string = f'{pathlib.Path(experiment).name}: {result:.3f}'
|
||||||
|
results_str.append(string)
|
||||||
|
results_str = sorted(results_str)
|
||||||
|
for r in results_str:
|
||||||
|
print(r)
|
||||||
|
|
||||||
|
evaluate_results(methods=['epacc*mae1k'], datasets=['*'], error_name='mae')
|
|
@ -10,6 +10,7 @@ from os.path import join
|
||||||
|
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||||
|
plotext='png'
|
||||||
|
|
||||||
resultdir = './results'
|
resultdir = './results'
|
||||||
plotdir = './plots'
|
plotdir = './plots'
|
||||||
|
@ -30,7 +31,7 @@ def gather_results(methods, error_name):
|
||||||
def plot_error_by_drift(methods, error_name, logscale=False, path=None):
|
def plot_error_by_drift(methods, error_name, logscale=False, path=None):
|
||||||
print('plotting error by drift')
|
print('plotting error by drift')
|
||||||
if path is not None:
|
if path is not None:
|
||||||
path = join(path, f'error_by_drift_{error_name}.pdf')
|
path = join(path, f'error_by_drift_{error_name}.{plotext}')
|
||||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||||
qp.plot.error_by_drift(
|
qp.plot.error_by_drift(
|
||||||
method_names,
|
method_names,
|
||||||
|
@ -51,9 +52,9 @@ def diagonal_plot(methods, error_name, path=None):
|
||||||
if path is not None:
|
if path is not None:
|
||||||
path = join(path, f'diag_{error_name}')
|
path = join(path, f'diag_{error_name}')
|
||||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, show_std=False, savepath=path+'_neg.pdf')
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, show_std=False, savepath=f'{path}_neg.{plotext}')
|
||||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, show_std=False, savepath=path+'_neu.pdf')
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, show_std=False, savepath=f'{path}_neu.{plotext}')
|
||||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, show_std=False, savepath=path+'_pos.pdf')
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, show_std=False, savepath=f'{path}_pos.{plotext}')
|
||||||
|
|
||||||
|
|
||||||
def binary_bias_global(methods, error_name, path=None):
|
def binary_bias_global(methods, error_name, path=None):
|
||||||
|
@ -61,9 +62,9 @@ def binary_bias_global(methods, error_name, path=None):
|
||||||
if path is not None:
|
if path is not None:
|
||||||
path = join(path, f'globalbias_{error_name}')
|
path = join(path, f'globalbias_{error_name}')
|
||||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', savepath=path+'_neg.pdf')
|
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', savepath=f'{path}_neg.{plotext}')
|
||||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', savepath=path+'_neu.pdf')
|
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', savepath=f'{path}_neu.{plotext}')
|
||||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', savepath=path+'_pos.pdf')
|
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', savepath=f'{path}_pos.{plotext}')
|
||||||
|
|
||||||
|
|
||||||
def binary_bias_bins(methods, error_name, path=None):
|
def binary_bias_bins(methods, error_name, path=None):
|
||||||
|
@ -71,24 +72,24 @@ def binary_bias_bins(methods, error_name, path=None):
|
||||||
if path is not None:
|
if path is not None:
|
||||||
path = join(path, f'localbias_{error_name}')
|
path = join(path, f'localbias_{error_name}')
|
||||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, savepath=path+'_neg.pdf')
|
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, savepath=f'{path}_neg.{plotext}')
|
||||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, savepath=path+'_neu.pdf')
|
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, savepath=f'{path}_neu.{plotext}')
|
||||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, savepath=path+'_pos.pdf')
|
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, savepath=f'{path}_pos.{plotext}')
|
||||||
|
|
||||||
|
|
||||||
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
||||||
new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet']
|
new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet']
|
||||||
new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet']
|
new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet']
|
||||||
|
|
||||||
# plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||||
# plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
|
plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
|
||||||
|
|
||||||
# diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||||
# diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||||
|
|
||||||
binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||||
binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||||
|
|
||||||
# binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
#binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||||
# binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
#binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
nice = {
|
nice = {
|
||||||
'mae':'AE',
|
'mae':'AE',
|
||||||
|
|
|
@ -10,6 +10,8 @@ from . import model_selection
|
||||||
from . import classification
|
from . import classification
|
||||||
from quapy.method.base import isprobabilistic, isaggregative
|
from quapy.method.base import isprobabilistic, isaggregative
|
||||||
|
|
||||||
|
__version__ = '0.1'
|
||||||
|
|
||||||
environ = {
|
environ = {
|
||||||
'SAMPLE_SIZE': None,
|
'SAMPLE_SIZE': None,
|
||||||
'UNK_TOKEN': '[UNK]',
|
'UNK_TOKEN': '[UNK]',
|
||||||
|
@ -18,6 +20,5 @@ environ = {
|
||||||
'PAD_INDEX': 1,
|
'PAD_INDEX': 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def isbinary(x):
|
def isbinary(x):
|
||||||
return x.binary
|
return x.binary
|
|
@ -148,7 +148,11 @@ UCI_DATASETS = ['acute.a', 'acute.b',
|
||||||
'pageblocks.5',
|
'pageblocks.5',
|
||||||
#'phoneme', # <-- I haven't found this one...
|
#'phoneme', # <-- I haven't found this one...
|
||||||
'semeion',
|
'semeion',
|
||||||
'sonar'] # ongoing...
|
'sonar',
|
||||||
|
'spambase',
|
||||||
|
'spectf',
|
||||||
|
'tictactoe',
|
||||||
|
'transfusion'] # ongoing...
|
||||||
|
|
||||||
def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3):
|
def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3):
|
||||||
|
|
||||||
|
@ -180,8 +184,11 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
||||||
'mammographic': 'Mammographic Mass',
|
'mammographic': 'Mammographic Mass',
|
||||||
'pageblocks.5': 'Page Blocks Classification (5)',
|
'pageblocks.5': 'Page Blocks Classification (5)',
|
||||||
'semeion': 'Semeion Handwritten Digit (8)',
|
'semeion': 'Semeion Handwritten Digit (8)',
|
||||||
'sonar': 'Sonar, Mines vs. Rocks'
|
'sonar': 'Sonar, Mines vs. Rocks',
|
||||||
|
'spambase': 'Spambase Data Set',
|
||||||
|
'spectf': 'SPECTF Heart Data',
|
||||||
|
'tictactoe': 'Tic-Tac-Toe Endgame Database',
|
||||||
|
'transfusion': 'Blood Transfusion Service Center Data Set '
|
||||||
}
|
}
|
||||||
|
|
||||||
# the identifier is an alias for the dataset group, it's part of the url data-folder, and is the name we use
|
# the identifier is an alias for the dataset group, it's part of the url data-folder, and is the name we use
|
||||||
|
@ -208,8 +215,11 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
||||||
'mammographic': 'mammographic-masses',
|
'mammographic': 'mammographic-masses',
|
||||||
'pageblocks.5': 'page-blocks',
|
'pageblocks.5': 'page-blocks',
|
||||||
'semeion': 'semeion',
|
'semeion': 'semeion',
|
||||||
'sonar': 'undocumented/connectionist-bench/sonar'
|
'sonar': 'undocumented/connectionist-bench/sonar',
|
||||||
|
'spambase': 'spambase',
|
||||||
|
'spectf': 'spect',
|
||||||
|
'tictactoe': 'tic-tac-toe',
|
||||||
|
'transfusion': 'blood-transfusion'
|
||||||
}
|
}
|
||||||
|
|
||||||
# the filename is the name of the file within the data_folder indexed by the identifier
|
# the filename is the name of the file within the data_folder indexed by the identifier
|
||||||
|
@ -219,7 +229,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
||||||
'statlog/german': 'german.data-numeric',
|
'statlog/german': 'german.data-numeric',
|
||||||
'mammographic-masses': 'mammographic_masses.data',
|
'mammographic-masses': 'mammographic_masses.data',
|
||||||
'page-blocks': 'page-blocks.data.Z',
|
'page-blocks': 'page-blocks.data.Z',
|
||||||
'undocumented/connectionist-bench/sonar': 'sonar.all-data'
|
'undocumented/connectionist-bench/sonar': 'sonar.all-data',
|
||||||
|
'spect': ['SPECTF.train', 'SPECTF.test'],
|
||||||
|
'blood-transfusion': 'transfusion.data'
|
||||||
}
|
}
|
||||||
|
|
||||||
# the filename containing the dataset description (if any)
|
# the filename containing the dataset description (if any)
|
||||||
|
@ -228,7 +240,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
||||||
'00193': None,
|
'00193': None,
|
||||||
'statlog/german': 'german.doc',
|
'statlog/german': 'german.doc',
|
||||||
'mammographic-masses': 'mammographic_masses.names',
|
'mammographic-masses': 'mammographic_masses.names',
|
||||||
'undocumented/connectionist-bench/sonar': 'sonar.names'
|
'undocumented/connectionist-bench/sonar': 'sonar.names',
|
||||||
|
'spect': 'SPECTF.names',
|
||||||
|
'blood-transfusion': 'transfusion.names'
|
||||||
}
|
}
|
||||||
|
|
||||||
identifier = identifier_map[dataset_name]
|
identifier = identifier_map[dataset_name]
|
||||||
|
@ -238,8 +252,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
||||||
|
|
||||||
URL = f'http://archive.ics.uci.edu/ml/machine-learning-databases/{identifier}'
|
URL = f'http://archive.ics.uci.edu/ml/machine-learning-databases/{identifier}'
|
||||||
data_dir = join(data_home, 'uci_datasets', identifier)
|
data_dir = join(data_home, 'uci_datasets', identifier)
|
||||||
data_path = join(data_dir, filename)
|
if isinstance(filename, str): # filename could be a list of files, in which case it will be processed later
|
||||||
download_file_if_not_exists(f'{URL}/{filename}', data_path)
|
data_path = join(data_dir, filename)
|
||||||
|
download_file_if_not_exists(f'{URL}/{filename}', data_path)
|
||||||
|
|
||||||
if descfile:
|
if descfile:
|
||||||
try:
|
try:
|
||||||
|
@ -368,11 +383,38 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
||||||
|
|
||||||
if identifier == 'undocumented/connectionist-bench/sonar':
|
if identifier == 'undocumented/connectionist-bench/sonar':
|
||||||
df = pd.read_csv(data_path, header=None, sep=',')
|
df = pd.read_csv(data_path, header=None, sep=',')
|
||||||
print(df)
|
|
||||||
X = df.iloc[:, 0:60].astype(float).values
|
X = df.iloc[:, 0:60].astype(float).values
|
||||||
y = df[60].values
|
y = df[60].values
|
||||||
y = binarize(y, pos_class='R')
|
y = binarize(y, pos_class='R')
|
||||||
|
|
||||||
|
if identifier == 'spambase':
|
||||||
|
df = pd.read_csv(data_path, header=None, sep=',')
|
||||||
|
X = df.iloc[:, 0:57].astype(float).values
|
||||||
|
y = df[57].values
|
||||||
|
y = binarize(y, pos_class=1)
|
||||||
|
|
||||||
|
if identifier == 'spect':
|
||||||
|
dfs = []
|
||||||
|
for file in filename:
|
||||||
|
data_path = join(data_dir, file)
|
||||||
|
download_file_if_not_exists(f'{URL}/{filename}', data_path)
|
||||||
|
dfs.append(pd.read_csv(data_path, header=None, sep=','))
|
||||||
|
df = pd.concat(dfs)
|
||||||
|
X = df.iloc[:, 1:45].astype(float).values
|
||||||
|
y = df[0].values
|
||||||
|
y = binarize(y, pos_class=0)
|
||||||
|
|
||||||
|
if identifier == 'tic-tac-toe':
|
||||||
|
df = pd.read_csv(data_path, header=None, sep=',')
|
||||||
|
X = df.iloc[:, 0:9].replace('o',0).replace('b',1).replace('x',2).values
|
||||||
|
y = df[9].values
|
||||||
|
y = binarize(y, pos_class='negative')
|
||||||
|
|
||||||
|
if identifier == 'blood-transfusion':
|
||||||
|
df = pd.read_csv(data_path, sep=',')
|
||||||
|
X = df.iloc[:, 0:4].astype(float).values
|
||||||
|
y = df.iloc[:, 4].values
|
||||||
|
y = binarize(y, pos_class=1)
|
||||||
|
|
||||||
data = LabelledCollection(X, y)
|
data = LabelledCollection(X, y)
|
||||||
data.stats()
|
data.stats()
|
||||||
|
|
|
@ -5,9 +5,11 @@ import numpy as np
|
||||||
from matplotlib import cm
|
from matplotlib import cm
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
from matplotlib.font_manager import FontProperties
|
||||||
|
|
||||||
plt.rcParams['figure.figsize'] = [12, 8]
|
plt.rcParams['figure.figsize'] = [12, 8]
|
||||||
plt.rcParams['figure.dpi'] = 200
|
plt.rcParams['figure.dpi'] = 200
|
||||||
|
plt.rcParams['font.size'] = 16
|
||||||
|
|
||||||
|
|
||||||
def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, savepath=None):
|
def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, savepath=None):
|
||||||
|
@ -44,11 +46,11 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
|
||||||
|
|
||||||
|
|
||||||
def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
|
def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
|
||||||
|
method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs)
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.grid()
|
ax.grid()
|
||||||
|
|
||||||
method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs)
|
|
||||||
|
|
||||||
data, labels = [], []
|
data, labels = [], []
|
||||||
for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
|
for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
|
||||||
true_prev = true_prev[:,pos_class]
|
true_prev = true_prev[:,pos_class]
|
||||||
|
|
4
test.py
4
test.py
|
@ -12,8 +12,8 @@ from classification.neural import NeuralClassifierTrainer, CNNnet
|
||||||
from method.meta import EPACC
|
from method.meta import EPACC
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
|
|
||||||
# dataset = qp.datasets.fetch_UCIDataset('sonar', verbose=True)
|
dataset = qp.datasets.fetch_UCIDataset('transfusion', verbose=True)
|
||||||
# sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = 500
|
qp.environ['SAMPLE_SIZE'] = 500
|
||||||
|
|
Loading…
Reference in New Issue