From 3c5a53bdecba758f4a56e9ad8004249e927f818e Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Tue, 12 Jan 2021 17:39:00 +0100 Subject: [PATCH] testing quapy via replicating Tweet Quantification experiments --- TweetSentQuant/experiments.py | 136 +++++++++++++++++++++++++ TweetSentQuant/tables.py | 187 ++++++++++++++++++++++++++++++++++ quapy/data/datasets.py | 15 ++- quapy/data/preprocessing.py | 2 +- quapy/evaluation.py | 8 +- quapy/model_selection.py | 4 +- tweet_sent_quant.py | 137 ------------------------- 7 files changed, 343 insertions(+), 146 deletions(-) create mode 100644 TweetSentQuant/experiments.py create mode 100644 TweetSentQuant/tables.py delete mode 100644 tweet_sent_quant.py diff --git a/TweetSentQuant/experiments.py b/TweetSentQuant/experiments.py new file mode 100644 index 0000000..511e60e --- /dev/null +++ b/TweetSentQuant/experiments.py @@ -0,0 +1,136 @@ +from sklearn.linear_model import LogisticRegression +import quapy as qp +import quapy.functional as F +import numpy as np +import os +import pickle +import itertools +from joblib import Parallel, delayed +import multiprocessing + + +n_jobs = multiprocessing.cpu_count() + + +def quantification_models(): + def newLR(): + return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1) + __C_range = np.logspace(-4, 5, 10) + lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} + yield 'cc', qp.method.aggregative.CC(newLR()), lr_params + yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params + yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params + yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params + + +def evaluate_experiment(true_prevalences, estim_prevalences): + print('\nEvaluation Metrics:\n'+'='*22) + for eval_measure in [qp.error.mae, qp.error.mrae]: + err = eval_measure(true_prevalences, estim_prevalences) + print(f'\t{eval_measure.__name__}={err:.4f}') + print() + + +def evaluate_method_point_test(true_prev, estim_prev): + print('\nPoint-Test evaluation:\n' + '=' * 22) + print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}') + for eval_measure in [qp.error.mae, qp.error.mrae]: + err = eval_measure(true_prev, estim_prev) + print(f'\t{eval_measure.__name__}={err:.4f}') + + +def result_path(dataset_name, model_name, optim_loss): + return f'./results/{dataset_name}-{model_name}-{optim_loss}.pkl' + + +def is_already_computed(dataset_name, model_name, optim_loss): + if dataset_name=='semeval': + check_datasets = ['semeval13', 'semeval14', 'semeval15'] + else: + check_datasets = [dataset_name] + return all(os.path.exists(result_path(name, model_name, optim_loss)) for name in check_datasets) + + +def save_results(dataset_name, model_name, optim_loss, *results): + rpath = result_path(dataset_name, model_name, optim_loss) + qp.util.create_parent_dir(rpath) + with open(rpath, 'wb') as foo: + pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL) + + +def run(experiment): + + sample_size = 100 + qp.environ['SAMPLE_SIZE'] = sample_size + + optim_loss, dataset_name, (model_name, model, hyperparams) = experiment + + if is_already_computed(dataset_name, model_name, optim_loss=optim_loss): + print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.') + return + + benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True) + + # model selection (hyperparameter optimization for a quantification-oriented loss) + model_selection = qp.model_selection.GridSearchQ( + model, + param_grid=hyperparams, + sample_size=sample_size, + n_prevpoints=21, + n_repetitions=5, + error='mae', + refit=False, + verbose=True + ) + model_selection.fit(benchmark_devel.training, benchmark_devel.test) + model = model_selection.best_model() + + # model evaluation + test_names = [dataset_name] if dataset_name != 'semeval' else ['semeval13', 'semeval14', 'semeval15'] + for test_no, test_name in enumerate(test_names): + benchmark_eval = qp.datasets.fetch_twitter(test_name, for_model_selection=False, min_df=5, pickle=True) + if test_no == 0: + # fits the model only the first time + model.fit(benchmark_eval.training) + + true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction( + model, + test=benchmark_eval.test, + sample_size=sample_size, + n_prevpoints=21, + n_repetitions=25 + ) + test_estim_prevalence = model.quantify(benchmark_eval.test.instances) + test_true_prevalence = benchmark_eval.test.prevalence() + + evaluate_experiment(true_prevalences, estim_prevalences) + evaluate_method_point_test(test_true_prevalence, test_estim_prevalence) + save_results(test_name, model_name, optim_loss, + true_prevalences, estim_prevalences, + benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence, + model_selection.best_params_) + + +if __name__ == '__main__': + + np.random.seed(0) + + optim_losses = ['mae', 'mrae'] + datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN + models = quantification_models() + + results = Parallel(n_jobs=n_jobs)( + delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models) + ) + + +# QUANTIFIER_ALIASES = { +# 'emq': lambda learner: ExpectationMaximizationQuantifier(learner), +# 'svmq': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='q'), +# 'svmkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='kld'), +# 'svmnkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='nkld'), +# 'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'), +# 'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'), +# 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(), +# } +# diff --git a/TweetSentQuant/tables.py b/TweetSentQuant/tables.py new file mode 100644 index 0000000..12568b6 --- /dev/null +++ b/TweetSentQuant/tables.py @@ -0,0 +1,187 @@ +import quapy as qp +from os import makedirs +# from evaluate import evaluate_directory, statistical_significance, get_ranks_from_Gao_Sebastiani +import sys, os +import pickle +from experiments import result_path + +tables_path = './tables' +MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results + +makedirs(tables_path, exist_ok=True) + +sample_size = 100 +qp.environ['SAMPLE_SIZE'] = sample_size + + + +# results_dict = evaluate_directory('results/*.pkl', evaluation_measures) +# stats = { +# dataset : { +# 'mae': statistical_significance(f'results/{dataset}-*-mae-run?.pkl', ae), +# 'mrae': statistical_significance(f'results/{dataset}-*-mrae-run?.pkl', rae), +# } for dataset in datasets +# } + +nice = { + 'mae':'AE', + 'mrae':'RAE', + 'svmkld': 'SVM(KLD)', + 'svmnkld': 'SVM(NKLD)', + 'svmq': 'SVM(Q)', + 'svmae': 'SVM(AE)', + 'svmnae': 'SVM(NAE)', + 'svmmae': 'SVM(AE)', + 'svmmrae': 'SVM(RAE)', + 'quanet': 'QuaNet', + 'hdy': 'HDy', + 'dys': 'DyS', + 'svmperf':'', + 'sanders': 'Sanders', + 'semeval13': 'SemEval13', + 'semeval14': 'SemEval14', + 'semeval15': 'SemEval15', + 'semeval16': 'SemEval16' +} +# } +# } + + +def nicerm(key): + return '\mathrm{'+nice[key]+'}' + +def color_from_rel_rank(rel_rank, maxtone=100): + rel_rank = rel_rank*2-1 + if rel_rank < 0: + color = 'red' + tone = maxtone*(-rel_rank) + else: + color = 'green' + tone = maxtone*rel_rank + return '\cellcolor{' + color + f'!{int(tone)}' + '}' + +def color_from_abs_rank(abs_rank, n_methods, maxtone=100): + rel_rank = 1.-(abs_rank-1.)/(n_methods-1) + return color_from_rel_rank(rel_rank, maxtone) + + +def save_table(path, table): + print(f'saving results in {path}') + with open(path, 'wt') as foo: + foo.write(table) + + +# Tables evaluation scores for AE and RAE (two tables) +# ---------------------------------------------------- + +datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST +evaluation_measures = [qp.error.mae, qp.error.mrae] +gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld'] + +results_dict = {} +stats={} +def getscore(dataset, method, loss): + path = result_path(dataset, method, loss) + if os.path.exists(path): + true_prevs, estim_prevs, _, _, _, _ = pickle.load(open(path, 'rb')) + err = getattr(qp.error, loss) + return err(true_prevs, estim_prevs) + return None + + +for i, eval_func in enumerate(evaluation_measures): + eval_name = eval_func.__name__ + added_methods = ['svm' + eval_name] # , 'quanet', 'dys'] + methods = gao_seb_methods + added_methods + nold_methods = len(gao_seb_methods) + nnew_methods = len(added_methods) + + tabular = """ + \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*len(gao_seb_methods))+ '|' + ('Y|'*len(added_methods)) + """} \hline + & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c||}{} \\\\ \hline + """ + + for method in methods: + tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' + tabular += '\\\\\hline\n' + + for dataset in datasets: + tabular += nice.get(dataset, dataset.upper()) + ' ' + for method in methods: + #simplify... + score = getscore(dataset, method, eval_name) + if score: + tabular += f' & {score:.3f} ' + else: + tabular += ' & --- ' + tabular += '\\\\\hline\n' + tabular += "\end{tabularx}" + + save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular) + +sys.exit(0) + +# gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani() + +# Tables ranks for AE and RAE (two tables) +# ---------------------------------------------------- +# for i, eval_func in enumerate(evaluation_measures): +# eval_name = eval_func.__name__ +# methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld'] +# table = """ +# \\begin{table}[h] +# """ +# if i == 0: +# caption = """ +# \caption{Rank positions of the quantification methods in the AE +# experiments, and (between parentheses) the rank positions +# obtained in the evaluation of~\cite{Gao:2016uq}.} +# """ +# else: +# caption = "\caption{Same as Table~\\ref{tab:maeranks}, but with " + nice[eval_name] + " instead of AE.}" +# table += caption + """ +# \\begin{center} +# \\resizebox{\\textwidth}{!}{ +# """ +# tabular = """ +# \\begin{tabularx}{\\textwidth}{|c||Y|Y|Y|Y|Y|Y|Y|Y|} \hline +# & \multicolumn{8}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline +# """ +# +# for method in methods: +# tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' +# tabular += '\\\\\hline\n' +# +# for dataset in datasets: +# tabular += nice.get(dataset, dataset.upper()) + ' ' +# ranks_no_gap = [] +# for method in methods: +# learner = 'lr' if not method.startswith('svm') else 'svmperf' +# key = f'{dataset}-{method}-{learner}-{}-{eval_name}' +# ranks_no_gap.append(stats[dataset][eval_name].get(key, (None, None, len(methods)))[2]) +# ranks_no_gap = sorted(ranks_no_gap) +# ranks_no_gap = {rank:i+1 for i,rank in enumerate(ranks_no_gap)} +# for method in methods: +# learner = 'lr' if not method.startswith('svm') else 'svmperf' +# key = f'{dataset}-{method}-{learner}-{sample_size}-{eval_name}' +# if key in stats[dataset][eval_name]: +# _, _, abs_rank = stats[dataset][eval_name][key] +# real_rank = ranks_no_gap[abs_rank] +# tabular += f' & {real_rank}' +# tabular += color_from_abs_rank(real_rank, len(methods), maxtone=MAXTONE) +# else: +# tabular += ' & --- ' +# old_rank = gao_seb_ranks.get(f'{dataset}-{method}-{eval_name}', 'error') +# tabular += f' ({old_rank})' +# tabular += '\\\\\hline\n' +# tabular += "\end{tabularx}" +# table += tabular + """ +# } +# \end{center} +# \label{tab:""" + eval_name + """ranks} +# \end{table} +# """ +# save_table(f'../tables/tab_rank_{eval_name}.tex', table) +# +# +# print("[Done]") \ No newline at end of file diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 54bfbfb..2af3de3 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -9,9 +9,12 @@ import pandas as pd REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb'] -TWITTER_SENTIMENT_DATASETS = ['gasp', 'hcr', 'omd', 'sanders', +TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders', 'semeval13', 'semeval14', 'semeval15', 'semeval16', 'sst', 'wa', 'wb'] +TWITTER_SENTIMENT_DATASETS_TRAIN = ['gasp', 'hcr', 'omd', 'sanders', + 'semeval', 'semeval16', + 'sst', 'wa', 'wb'] def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False): @@ -63,6 +66,7 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom Load a Twitter dataset as a Dataset instance, as used in: Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis. Social Network Analysis and Mining6(19), 1–22 (2016) + The datasets 'semeval13', 'semeval14', 'semeval15' share the same training set. :param dataset_name: the name of the dataset: valid ones are 'gasp', 'hcr', 'omd', 'sanders', 'semeval13', 'semeval14', 'semeval15', 'semeval16', 'sst', 'wa', 'wb' @@ -76,9 +80,11 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom faster subsequent invokations :return: a Dataset instance """ - assert dataset_name in TWITTER_SENTIMENT_DATASETS, \ + assert dataset_name in TWITTER_SENTIMENT_DATASETS_TRAIN + TWITTER_SENTIMENT_DATASETS_TEST, \ f'Name {dataset_name} does not match any known dataset for sentiment twitter. ' \ - f'Valid ones are {TWITTER_SENTIMENT_DATASETS}' + f'Valid ones are {TWITTER_SENTIMENT_DATASETS_TRAIN} for model selection and ' \ + f'{TWITTER_SENTIMENT_DATASETS_TEST} for test (datasets "semeval14", "semeval15", "semeval16" share ' \ + f'a common training set "semeval")' if data_home is None: data_home = get_quapy_home() @@ -97,6 +103,9 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom print(f"the training and development sets for datasets 'semeval13', 'semeval14', 'semeval15' are common " f"(called 'semeval'); returning trainin-set='{trainset_name}' and test-set={testset_name}") else: + if dataset_name == 'semeval' and for_model_selection==False: + raise ValueError('dataset "semeval" can only be used for model selection. ' + 'Use "semeval13", "semeval14", or "semeval15" for model evaluation.') trainset_name = testset_name = dataset_name if for_model_selection: diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py index 6206be0..972a3db 100644 --- a/quapy/data/preprocessing.py +++ b/quapy/data/preprocessing.py @@ -137,7 +137,7 @@ class IndexTransformer: def index(self, documents): vocab = self.vocabulary_.copy() - return [[vocab.get(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')] + return [[vocab.getscore(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')] def fit_transform(self, X, n_jobs=-1): return self.fit(X).transform(X, n_jobs=n_jobs) diff --git a/quapy/evaluation.py b/quapy/evaluation.py index a270663..498f284 100644 --- a/quapy/evaluation.py +++ b/quapy/evaluation.py @@ -39,17 +39,17 @@ def artificial_sampling_prediction( indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions)) if isinstance(model, qp.method.aggregative.AggregativeQuantifier): - print('\tinstance of aggregative-quantifier') + # print('\tinstance of aggregative-quantifier') quantification_func = model.aggregate if isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier): - print('\t\tinstance of probabilitstic-aggregative-quantifier') + # print('\t\tinstance of probabilitstic-aggregative-quantifier') preclassified_instances = model.posterior_probabilities(test.instances) else: - print('\t\tinstance of hard-aggregative-quantifier') + # print('\t\tinstance of hard-aggregative-quantifier') preclassified_instances = model.classify(test.instances) test = LabelledCollection(preclassified_instances, test.labels) else: - print('\t\tinstance of base-quantifier') + # print('\t\tinstance of base-quantifier') quantification_func = model.quantify def _predict_prevalences(index): diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 039dbb9..be330b5 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -112,7 +112,7 @@ class GridSearchQ(BaseQuantifier): raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n' f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}') - def fit(self, training: LabelledCollection, validation: Union[LabelledCollection, float]=0.3): + def fit(self, training: LabelledCollection, validation: Union[LabelledCollection, float]=0.4): """ :param training: the training set on which to optimize the hyperparameters :param validation: either a LabelledCollection on which to test the performance of the different settings, or @@ -121,6 +121,8 @@ class GridSearchQ(BaseQuantifier): training, validation = self.__check_training_validation(training, validation) self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes) + print(f'training size={len(training)}') + print(f'validation size={len(validation)}') params_keys = list(self.param_grid.keys()) params_values = list(self.param_grid.values()) diff --git a/tweet_sent_quant.py b/tweet_sent_quant.py deleted file mode 100644 index ec35010..0000000 --- a/tweet_sent_quant.py +++ /dev/null @@ -1,137 +0,0 @@ -from sklearn.linear_model import LogisticRegression -import quapy as qp -import quapy.functional as F -import numpy as np -import os -import sys -import pickle - -qp.environ['SAMPLE_SIZE'] = 100 -sample_size = qp.environ['SAMPLE_SIZE'] - - - -def evaluate_experiment(true_prevalences, estim_prevalences, n_repetitions=25): - #n_classes = true_prevalences.shape[1] - #true_ave = true_prevalences.reshape(-1, n_repetitions, n_classes).mean(axis=1) - #estim_ave = estim_prevalences.reshape(-1, n_repetitions, n_classes).mean(axis=1) - #estim_std = estim_prevalences.reshape(-1, n_repetitions, n_classes).std(axis=1) - #print('\nTrueP->mean(Phat)(std(Phat))\n'+'='*22) - #for true, estim, std in zip(true_ave, estim_ave, estim_std): - # str_estim = ', '.join([f'{mean:.3f}+-{std:.4f}' for mean, std in zip(estim, std)]) - # print(f'{F.strprev(true)}->[{str_estim}]') - - print('\nEvaluation Metrics:\n'+'='*22) - for eval_measure in [qp.error.mae, qp.error.mrae]: - err = eval_measure(true_prevalences, estim_prevalences) - print(f'\t{eval_measure.__name__}={err:.4f}') - print() - - -def evaluate_method_point_test(method, test): - estim_prev = method.quantify(test.instances) - true_prev = F.prevalence_from_labels(test.labels, test.n_classes) - print('\nPoint-Test evaluation:\n' + '=' * 22) - print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}') - for eval_measure in [qp.error.mae, qp.error.mrae]: - err = eval_measure(true_prev, estim_prev) - print(f'\t{eval_measure.__name__}={err:.4f}') - - -def quantification_models(): - def newLR(): - return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1) - __C_range = np.logspace(-4, 5, 10) - lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} - #yield 'cc', qp.method.aggregative.CC(newLR()), lr_params - #yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params - #yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params - yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params - - -def result_path(dataset_name, model_name, optim_metric): - return f'{dataset_name}-{model_name}-{optim_metric}.pkl' - - -def check_already_computed(dataset_name, model_name, optim_metric): - path = result_path(dataset_name, model_name, optim_metric) - return os.path.exists(path) - - -def save_results(dataset_name, model_name, optim_metric, *results): - path = result_path(dataset_name, model_name, optim_metric) - qp.util.create_parent_dir(path) - with open(path, 'wb') as foo: - pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL) - - -if __name__ == '__main__': - - np.random.seed(0) - - for dataset_name in ['sanders']: # qp.datasets.TWITTER_SENTIMENT_DATASETS: - - benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True) - benchmark_devel.stats() - - for model_name, model, hyperparams in quantification_models(): - - model_selection = qp.model_selection.GridSearchQ( - model, - param_grid=hyperparams, - sample_size=sample_size, - n_prevpoints=21, - n_repetitions=5, - error='mae', - refit=False, - verbose=True - ) - - model_selection.fit(benchmark_devel.training, benchmark_devel.test) - model = model_selection.best_model() - - benchmark_eval = qp.datasets.fetch_twitter(dataset_name, for_model_selection=False, min_df=5, pickle=True) - model.fit(benchmark_eval.training) - true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction( - model, - test=benchmark_eval.test, - sample_size=sample_size, - n_prevpoints=21, - n_repetitions=25 - ) - - evaluate_experiment(true_prevalences, estim_prevalences, n_repetitions=25) - evaluate_method_point_test(model, benchmark_eval.test) - - #save_arrays(FLAGS.results, true_prevalences, estim_prevalences, test_name) - - sys.exit(0) - - # decide the test to be performed (in the case of 'semeval', tests are 'semeval13', 'semeval14', 'semeval15') - if FLAGS.dataset == 'semeval': - test_sets = ['semeval13', 'semeval14', 'semeval15'] - else: - test_sets = [FLAGS.dataset] - - evaluate_method_point_test(method, benchmark_eval.test, test_name=test_set) - - - - -# quantifiers: -# ---------------------------------------- -# alias for quantifiers and default configurations -QUANTIFIER_ALIASES = { - 'cc': lambda learner: ClassifyAndCount(learner), - 'acc': lambda learner: AdjustedClassifyAndCount(learner), - 'pcc': lambda learner: ProbabilisticClassifyAndCount(learner), - 'pacc': lambda learner: ProbabilisticAdjustedClassifyAndCount(learner), - 'emq': lambda learner: ExpectationMaximizationQuantifier(learner), - 'svmq': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='q'), - 'svmkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='kld'), - 'svmnkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='nkld'), - 'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'), - 'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'), - 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(), -} -