experimental method ave-pool, not working due to the fact that onevsall is aggregative and ave-pool is not
This commit is contained in:
parent
99132c8166
commit
1ba0748b59
|
@ -1,7 +1,15 @@
|
||||||
from typing import Union
|
import numpy as np
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from quapy.data import LabelledCollection
|
||||||
|
from quapy.method.base import BaseQuantifier, BinaryQuantifier
|
||||||
from quapy.method.aggregative import PACC, EMQ, HDy
|
from quapy.method.aggregative import PACC, EMQ, HDy
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
class PACCSLD(PACC):
|
class PACCSLD(PACC):
|
||||||
|
@ -35,3 +43,83 @@ class HDySLD(HDy):
|
||||||
def aggregate(self, classif_posteriors):
|
def aggregate(self, classif_posteriors):
|
||||||
priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon=1e-4)
|
priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon=1e-4)
|
||||||
return super(HDySLD, self).aggregate(posteriors)
|
return super(HDySLD, self).aggregate(posteriors)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AveragePoolQuantification(BinaryQuantifier):
|
||||||
|
def __init__(self, learner, sample_size, trials, n_components=-1, zscore=False):
|
||||||
|
self.learner = learner
|
||||||
|
self.sample_size = sample_size
|
||||||
|
self.trials = trials
|
||||||
|
|
||||||
|
self.do_zscore = zscore
|
||||||
|
self.zscore = StandardScaler() if self.do_zscore else None
|
||||||
|
|
||||||
|
self.do_pca = n_components>0
|
||||||
|
self.pca = PCA(n_components) if self.do_pca else None
|
||||||
|
|
||||||
|
def fit(self, data: LabelledCollection):
|
||||||
|
training, validation = data.split_stratified(train_prop=0.7)
|
||||||
|
|
||||||
|
X, y = [], []
|
||||||
|
|
||||||
|
nprevpoints = F.get_nprevpoints_approximation(self.trials, data.n_classes)
|
||||||
|
for sample in tqdm(
|
||||||
|
training.artificial_sampling_generator(self.sample_size, n_prevalences=nprevpoints, repeats=1),
|
||||||
|
desc='generating averages'
|
||||||
|
):
|
||||||
|
X.append(sample.instances.mean(axis=0))
|
||||||
|
y.append(sample.prevalence()[1])
|
||||||
|
while len(X) < self.trials:
|
||||||
|
sample = training.sampling(self.sample_size, F.uniform_simplex_sampling(data.n_classes))
|
||||||
|
X.append(sample.instances.mean(axis=0))
|
||||||
|
y.append(sample.prevalence())
|
||||||
|
X = np.asarray(np.vstack(X))
|
||||||
|
y = np.asarray(y)
|
||||||
|
|
||||||
|
if self.do_pca:
|
||||||
|
X = self.pca.fit_transform(X)
|
||||||
|
print(X.shape)
|
||||||
|
|
||||||
|
if self.do_zscore:
|
||||||
|
X = self.zscore.fit_transform(X)
|
||||||
|
|
||||||
|
print('training regressor...')
|
||||||
|
self.regressor = self.learner.fit(X, y)
|
||||||
|
|
||||||
|
# correction at 0:
|
||||||
|
print('getting corrections...')
|
||||||
|
X0 = np.asarray(np.vstack([validation.sampling(self.sample_size, 0., shuffle=False).instances.mean(axis=0) for _ in range(100)]))
|
||||||
|
X1 = np.asarray(np.vstack([validation.sampling(self.sample_size, 1., shuffle=False).instances.mean(axis=0) for _ in range(100)]))
|
||||||
|
|
||||||
|
if self.do_pca:
|
||||||
|
X0 = self.pca.transform(X0)
|
||||||
|
X1 = self.pca.transform(X1)
|
||||||
|
|
||||||
|
if self.do_zscore:
|
||||||
|
X0 = self.zscore.transform(X0)
|
||||||
|
X1 = self.zscore.transform(X1)
|
||||||
|
|
||||||
|
self.correction_0 = self.regressor.predict(X0).mean()
|
||||||
|
self.correction_1 = self.regressor.predict(X1).mean()
|
||||||
|
|
||||||
|
print('correction-0', self.correction_0)
|
||||||
|
print('correction-1', self.correction_1)
|
||||||
|
print('done')
|
||||||
|
|
||||||
|
def quantify(self, instances):
|
||||||
|
ave = np.asarray(instances.mean(axis=0))
|
||||||
|
|
||||||
|
if self.do_pca:
|
||||||
|
ave = self.pca.transform(ave)
|
||||||
|
if self.do_zscore:
|
||||||
|
ave = self.zscore.transform(ave)
|
||||||
|
phat = self.regressor.predict(ave).item()
|
||||||
|
phat = np.clip((phat-self.correction_0)/(self.correction_1-self.correction_0), 0, 1)
|
||||||
|
return np.asarray([1-phat, phat])
|
||||||
|
|
||||||
|
def set_params(self, **parameters):
|
||||||
|
self.learner.set_params(**parameters)
|
||||||
|
|
||||||
|
def get_params(self, deep=True):
|
||||||
|
return self.learner.get_params(deep=deep)
|
||||||
|
|
|
@ -32,6 +32,7 @@ nice = {
|
||||||
'quanet': 'QuaNet',
|
'quanet': 'QuaNet',
|
||||||
'hdy': 'HDy',
|
'hdy': 'HDy',
|
||||||
'dys': 'DyS',
|
'dys': 'DyS',
|
||||||
|
'epaccmaeptr': 'E(PACC)$_\mathrm{Ptr}$',
|
||||||
'svmperf':'',
|
'svmperf':'',
|
||||||
'sanders': 'Sanders',
|
'sanders': 'Sanders',
|
||||||
'semeval13': 'SemEval13',
|
'semeval13': 'SemEval13',
|
||||||
|
@ -116,7 +117,7 @@ if __name__ == '__main__':
|
||||||
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
|
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
|
||||||
evaluation_measures = [qp.error.ae, qp.error.rae]
|
evaluation_measures = [qp.error.ae, qp.error.rae]
|
||||||
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
||||||
new_methods = ['hdy']
|
new_methods = ['hdy', 'quanet', 'epaccptr']
|
||||||
|
|
||||||
gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
|
gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
|
||||||
|
|
||||||
|
|
|
@ -547,8 +547,6 @@ class OneVsAll(AggregativeQuantifier):
|
||||||
else:
|
else:
|
||||||
predictions = self.classify(X)
|
predictions = self.classify(X)
|
||||||
return self.aggregate(predictions)
|
return self.aggregate(predictions)
|
||||||
#prevalences = self.__parallel(self._delayed_binary_quantify, X)
|
|
||||||
#return F.normalize_prevalence(prevalences)
|
|
||||||
|
|
||||||
def __parallel(self, func, *args, **kwargs):
|
def __parallel(self, func, *args, **kwargs):
|
||||||
return np.asarray(
|
return np.asarray(
|
||||||
|
|
25
test.py
25
test.py
|
@ -1,10 +1,12 @@
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.svm import LinearSVC
|
from sklearn.svm import LinearSVC, LinearSVR
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
import sys
|
import sys
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from NewMethods.methods import AveragePoolQuantification
|
||||||
from classification.methods import PCALR
|
from classification.methods import PCALR
|
||||||
from classification.neural import NeuralClassifierTrainer, CNNnet
|
from classification.neural import NeuralClassifierTrainer, CNNnet
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
|
@ -29,7 +31,7 @@ if binary:
|
||||||
|
|
||||||
else:
|
else:
|
||||||
dataset = qp.datasets.fetch_twitter('hcr', for_model_selection=False, min_df=10, pickle=True)
|
dataset = qp.datasets.fetch_twitter('hcr', for_model_selection=False, min_df=10, pickle=True)
|
||||||
dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
|
#dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
|
||||||
|
|
||||||
print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}')
|
print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}')
|
||||||
|
|
||||||
|
@ -51,14 +53,17 @@ print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.tes
|
||||||
#model = qp.method.meta.QuaNet(learner, sample_size, device='cpu')
|
#model = qp.method.meta.QuaNet(learner, sample_size, device='cpu')
|
||||||
|
|
||||||
#learner = GridSearchCV(LogisticRegression(max_iter=1000), param_grid=param_grid, n_jobs=-1, verbose=1)
|
#learner = GridSearchCV(LogisticRegression(max_iter=1000), param_grid=param_grid, n_jobs=-1, verbose=1)
|
||||||
learner = LogisticRegression(max_iter=1000)
|
#learner = LogisticRegression(max_iter=1000)
|
||||||
# model = qp.method.aggregative.ClassifyAndCount(learner)
|
# model = qp.method.aggregative.ClassifyAndCount(learner)
|
||||||
|
|
||||||
|
|
||||||
model = qp.method.meta.EPACC(learner, size=10, red_size=5,
|
#model = qp.method.meta.EPACC(learner, size=10, red_size=5,
|
||||||
param_grid={'C':[1,10,100]},
|
# param_grid={'C':[1,10,100]},
|
||||||
optim='mae', param_mod_sel={'sample_size':100, 'n_prevpoints':21, 'n_repetitions':5},
|
# optim='mae', param_mod_sel={'sample_size':100, 'n_prevpoints':21, 'n_repetitions':5},
|
||||||
policy='ptr', n_jobs=1)
|
# policy='ptr', n_jobs=1)
|
||||||
|
regressor = LinearSVR(max_iter=10000)
|
||||||
|
param_grid = {'C': np.logspace(-1,3,5)}
|
||||||
|
model = AveragePoolQuantification(regressor, sample_size, trials=5000, n_components=500, zscore=False)
|
||||||
|
|
||||||
#model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
|
#model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
|
||||||
# sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1)
|
# sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1)
|
||||||
|
@ -75,7 +80,7 @@ if qp.isbinary(model) and not qp.isbinary(dataset):
|
||||||
print(f'fitting model {model.__class__.__name__}')
|
print(f'fitting model {model.__class__.__name__}')
|
||||||
#train, val = dataset.training.split_stratified(0.6)
|
#train, val = dataset.training.split_stratified(0.6)
|
||||||
#model.fit(train, val_split=val)
|
#model.fit(train, val_split=val)
|
||||||
model.fit(dataset.training, val_split=dataset.test)
|
model.fit(dataset.training)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,7 +117,7 @@ for error in qp.error.QUANTIFICATION_ERROR:
|
||||||
score = error(true_prev, estim_prev)
|
score = error(true_prev, estim_prev)
|
||||||
print(f'{error.__name__}={score:.5f}')
|
print(f'{error.__name__}={score:.5f}')
|
||||||
|
|
||||||
sys.exit(0)
|
#sys.exit(0)
|
||||||
# Model selection and Evaluation according to the artificial sampling protocol
|
# Model selection and Evaluation according to the artificial sampling protocol
|
||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -123,7 +128,7 @@ model_selection = GridSearchQ(model,
|
||||||
error='mae',
|
error='mae',
|
||||||
refit=True,
|
refit=True,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
timeout=4)
|
timeout=60*60)
|
||||||
|
|
||||||
model = model_selection.fit(dataset.training, val_split=0.3)
|
model = model_selection.fit(dataset.training, val_split=0.3)
|
||||||
#model = model_selection.fit(train, validation=val)
|
#model = model_selection.fit(train, validation=val)
|
||||||
|
|
Loading…
Reference in New Issue