import fixes

This commit is contained in:
Alejandro Moreo Fernandez 2021-01-15 18:32:32 +01:00
parent 9c7c017acd
commit 5e64d2588a
21 changed files with 102 additions and 92 deletions

View File

@ -8,22 +8,28 @@ import pickle
import itertools import itertools
from joblib import Parallel, delayed from joblib import Parallel, delayed
import settings import settings
import argparse
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')
args = parser.parse_args()
def quantification_models(): def quantification_models():
def newLR(): def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1) return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
__C_range = np.logspace(-4, 5, 10) __C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} #lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range} svmperf_params = {'C': __C_range}
lr_params = {'C': [1,10]}
yield 'cc', qp.method.aggregative.CC(newLR()), lr_params yield 'cc', qp.method.aggregative.CC(newLR()), lr_params
yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params #yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params #yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params #yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params #yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params
yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(settings.SVMPERF_HOME)), svmperf_params #yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(settings.SVMPERF_HOME)), svmperf_params
yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(settings.SVMPERF_HOME)), svmperf_params #yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(settings.SVMPERF_HOME)), svmperf_params
yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(settings.SVMPERF_HOME)), svmperf_params #yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(settings.SVMPERF_HOME)), svmperf_params
# 'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'), # 'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'),
# 'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'), # 'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'),
@ -47,7 +53,7 @@ def evaluate_method_point_test(true_prev, estim_prev):
def result_path(dataset_name, model_name, optim_loss): def result_path(dataset_name, model_name, optim_loss):
return f'./results/{dataset_name}-{model_name}-{optim_loss}.pkl' return os.path.join(args.results, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
def is_already_computed(dataset_name, model_name, optim_loss): def is_already_computed(dataset_name, model_name, optim_loss):
@ -77,7 +83,6 @@ def run(experiment):
return return
else: else:
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}') print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
return
benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True) benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
benchmark_devel.stats() benchmark_devel.stats()
@ -125,6 +130,7 @@ def run(experiment):
if __name__ == '__main__': if __name__ == '__main__':
print(f'Result folder: {args.results}')
np.random.seed(0) np.random.seed(0)
optim_losses = ['mae', 'mrae'] optim_losses = ['mae', 'mrae']

View File

@ -1,13 +1,13 @@
from . import error from . import error
from .data import datasets from . import data
from quapy.data import datasets
from . import functional from . import functional
from . import method from . import method
from . import data
from . import evaluation from . import evaluation
from . import plot from . import plot
from . import util from . import util
from . import model_selection from . import model_selection
from method.aggregative import isaggregative, isprobabilistic from quapy.method.aggregative import isaggregative, isprobabilistic
environ = { environ = {

View File

@ -1,6 +1,7 @@
import os import os
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
@ -8,10 +9,10 @@ import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score from sklearn.metrics import accuracy_score, f1_score
from torch.nn.utils.rnn import pad_sequence from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm from tqdm import tqdm
import quapy as qp
from data import LabelledCollection from data import LabelledCollection
from util import EarlyStop from util import EarlyStop
import quapy as qp
class NeuralClassifierTrainer: class NeuralClassifierTrainer:

View File

@ -1,9 +1,10 @@
import random import random
import subprocess import subprocess
import tempfile import tempfile
from os.path import join, exists
from os import remove from os import remove
from os.path import join, exists
from subprocess import PIPE, STDOUT from subprocess import PIPE, STDOUT
import numpy as np import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import dump_svmlight_file from sklearn.datasets import dump_svmlight_file

View File

@ -1,6 +1,6 @@
from . import datasets
from . import preprocessing
from .base import * from .base import *
from .reader import * from .reader import *
from . import preprocessing
from . import datasets

View File

@ -1,8 +1,8 @@
import numpy as np import numpy as np
from scipy.sparse import issparse from scipy.sparse import issparse
from scipy.sparse import vstack
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from quapy.functional import artificial_prevalence_sampling, strprev from quapy.functional import artificial_prevalence_sampling, strprev
from scipy.sparse import vstack
class LabelledCollection: class LabelledCollection:

View File

@ -1,12 +1,13 @@
import zipfile
from util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource
import os import os
import zipfile
from os.path import join from os.path import join
from data.base import Dataset, LabelledCollection
from data.reader import *
from data.preprocessing import text2tfidf, reduce_columns
import pandas as pd import pandas as pd
from data.base import Dataset, LabelledCollection
from quapy.data.preprocessing import text2tfidf, reduce_columns
from quapy.data.reader import *
from quapy.util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource
REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb'] REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb']
TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders', TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders',

View File

@ -1,11 +1,12 @@
import numpy as np import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from data.base import Dataset
from scipy.sparse import spmatrix from scipy.sparse import spmatrix
from util import parallelize from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from .base import LabelledCollection
from tqdm import tqdm from tqdm import tqdm
import quapy as qp import quapy as qp
from quapy.data.base import Dataset
from quapy.util import parallelize
from .base import LabelledCollection
def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs): def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs):

View File

@ -1,7 +1,6 @@
import numpy as np import numpy as np
from scipy.sparse import dok_matrix from scipy.sparse import dok_matrix
from tqdm import tqdm from tqdm import tqdm
import pandas as pd
def from_text(path): def from_text(path):

View File

@ -1,7 +1,7 @@
from sklearn.metrics import f1_score
import numpy as np import numpy as np
import quapy as qp from sklearn.metrics import f1_score
import quapy as qp
def f1e(y_true, y_pred): def f1e(y_true, y_pred):

View File

@ -1,12 +1,13 @@
import quapy as qp
from typing import Union, Callable, Iterable from typing import Union, Callable, Iterable
from data import LabelledCollection
from method.base import BaseQuantifier
from util import temp_seed
import numpy as np import numpy as np
from joblib import Parallel, delayed from joblib import Parallel, delayed
from tqdm import tqdm from tqdm import tqdm
import error
import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier
from quapy.util import temp_seed
def artificial_sampling_prediction( def artificial_sampling_prediction(
@ -72,8 +73,8 @@ def artificial_sampling_prediction(
def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1): def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1):
if isinstance(err, str): if isinstance(err, str):
err = getattr(error, err) err = getattr(qp.error, err)
assert err.__name__ in error.QUANTIFICATION_ERROR_NAMES, \ assert err.__name__ in qp.error.QUANTIFICATION_ERROR_NAMES, \
f'error={err} does not seem to be a quantification error' f'error={err} does not seem to be a quantification error'
scores = Parallel(n_jobs=n_jobs)( scores = Parallel(n_jobs=n_jobs)(
delayed(_delayed_eval)(model, Ti, err) for Ti in test_samples delayed(_delayed_eval)(model, Ti, err) for Ti in test_samples

View File

@ -1,6 +1,7 @@
from collections import defaultdict
import numpy as np
import itertools import itertools
from collections import defaultdict
import numpy as np
def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False): def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False):
@ -61,13 +62,6 @@ def HellingerDistance(P, Q):
return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2)) return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2))
#def uniform_simplex_sampling(n_classes):
# from https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex
# r = [0.] + sorted(np.random.rand(n_classes-1)) + [1.]
# return np.asarray([b-a for a,b in zip(r[:-1],r[1:])])
def uniform_prevalence_sampling(n_classes, size=1): def uniform_prevalence_sampling(n_classes, size=1):
if n_classes == 2: if n_classes == 2:
u = np.random.rand(size) u = np.random.rand(size)

View File

@ -1,8 +1,7 @@
from . import base
from . import aggregative from . import aggregative
from . import non_aggregative from . import base
from . import meta from . import meta
from . import non_aggregative
AGGREGATIVE_METHODS = { AGGREGATIVE_METHODS = {
aggregative.CC, aggregative.CC,

View File

@ -1,19 +1,20 @@
import numpy as np
from copy import deepcopy
from sklearn.base import BaseEstimator, clone
import functional as F
import error
from method.base import BaseQuantifier, BinaryQuantifier
from classification.svmperf import SVMperf
from data import LabelledCollection
from sklearn.metrics import confusion_matrix
from sklearn.calibration import CalibratedClassifierCV
from joblib import Parallel, delayed
from abc import abstractmethod from abc import abstractmethod
from copy import deepcopy
from typing import Union from typing import Union
import numpy as np
from joblib import Parallel, delayed
from sklearn.base import BaseEstimator
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm from tqdm import tqdm
import quapy.functional as F
from quapy.classification.svmperf import SVMperf
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier, BinaryQuantifier
# Abstract classes # Abstract classes
# ------------------------------------ # ------------------------------------

View File

@ -1,5 +1,6 @@
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from data import LabelledCollection
from quapy.data import LabelledCollection
# Base Quantifier abstract class # Base Quantifier abstract class

View File

@ -1,16 +1,17 @@
from copy import deepcopy
import numpy as np import numpy as np
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression from joblib import Parallel, delayed
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, cross_val_predict
import quapy as qp import quapy as qp
from sklearn.model_selection import GridSearchCV, cross_val_predict
from model_selection import GridSearchQ
from .base import BaseQuantifier, BinaryQuantifier
from joblib import Parallel, delayed
from copy import deepcopy
from data import LabelledCollection
from quapy import functional as F from quapy import functional as F
from quapy.data import LabelledCollection
from quapy.evaluation import evaluate
from quapy.model_selection import GridSearchQ
from . import neural from . import neural
from evaluation import evaluate from .base import BaseQuantifier
QuaNet = neural.QuaNetTrainer QuaNet = neural.QuaNetTrainer

View File

@ -1,11 +1,12 @@
import os import os
from pathlib import Path from pathlib import Path
import torch import torch
from torch.nn import MSELoss from torch.nn import MSELoss
from torch.nn.functional import relu from torch.nn.functional import relu
from tqdm import tqdm
from method.aggregative import * from quapy.method.aggregative import *
from util import EarlyStop from quapy.util import EarlyStop
class QuaNetTrainer(BaseQuantifier): class QuaNetTrainer(BaseQuantifier):

View File

@ -1,4 +1,4 @@
from data import LabelledCollection from quapy.data import LabelledCollection
from .base import BaseQuantifier from .base import BaseQuantifier

View File

@ -1,12 +1,13 @@
import itertools import itertools
import quapy as qp
from evaluation import artificial_sampling_prediction
from data.base import LabelledCollection
from method.aggregative import BaseQuantifier
from typing import Union, Callable
import functional as F
from copy import deepcopy
import signal import signal
from copy import deepcopy
from typing import Union, Callable
import quapy as qp
import quapy.functional as F
from data.base import LabelledCollection
from quapy.evaluation import artificial_sampling_prediction
from quapy.method.aggregative import BaseQuantifier
class GridSearchQ(BaseQuantifier): class GridSearchQ(BaseQuantifier):
@ -80,8 +81,8 @@ class GridSearchQ(BaseQuantifier):
training, validation = training.split_stratified(train_prop=1-validation) training, validation = training.split_stratified(train_prop=1-validation)
return training, validation return training, validation
else: else:
raise ValueError('"validation" must either be a LabelledCollection or a float in (0,1) indicating the' raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
'proportion of training documents to extract') f'proportion of training documents to extract (found) {type(validation)}')
def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes): def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes):
if n_prevpoints is None and eval_budget is None: if n_prevpoints is None and eval_budget is None:

View File

@ -1,9 +1,10 @@
from collections import defaultdict from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import quapy as qp
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
import quapy as qp
plt.rcParams['figure.figsize'] = [12, 8] plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200 plt.rcParams['figure.dpi'] = 200

View File

@ -1,12 +1,13 @@
import contextlib
import itertools import itertools
import multiprocessing import multiprocessing
from joblib import Parallel, delayed
import contextlib
import numpy as np
import urllib
import os import os
from pathlib import Path
import pickle import pickle
import urllib
from pathlib import Path
import numpy as np
from joblib import Parallel, delayed
def get_parallel_slices(n_tasks, n_jobs=-1): def get_parallel_slices(n_tasks, n_jobs=-1):