1
0
Fork 0

import fixes

This commit is contained in:
Alejandro Moreo Fernandez 2021-01-15 18:32:32 +01:00
parent 9c7c017acd
commit 5e64d2588a
21 changed files with 102 additions and 92 deletions

View File

@ -8,22 +8,28 @@ import pickle
import itertools
from joblib import Parallel, delayed
import settings
import argparse
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')
args = parser.parse_args()
def quantification_models():
def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
#lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
lr_params = {'C': [1,10]}
yield 'cc', qp.method.aggregative.CC(newLR()), lr_params
yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params
yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(settings.SVMPERF_HOME)), svmperf_params
yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(settings.SVMPERF_HOME)), svmperf_params
yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(settings.SVMPERF_HOME)), svmperf_params
#yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
#yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
#yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
#yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params
#yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(settings.SVMPERF_HOME)), svmperf_params
#yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(settings.SVMPERF_HOME)), svmperf_params
#yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(settings.SVMPERF_HOME)), svmperf_params
# 'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'),
# 'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'),
@ -47,7 +53,7 @@ def evaluate_method_point_test(true_prev, estim_prev):
def result_path(dataset_name, model_name, optim_loss):
return f'./results/{dataset_name}-{model_name}-{optim_loss}.pkl'
return os.path.join(args.results, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
def is_already_computed(dataset_name, model_name, optim_loss):
@ -77,7 +83,6 @@ def run(experiment):
return
else:
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
return
benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
benchmark_devel.stats()
@ -125,6 +130,7 @@ def run(experiment):
if __name__ == '__main__':
print(f'Result folder: {args.results}')
np.random.seed(0)
optim_losses = ['mae', 'mrae']

View File

@ -1,13 +1,13 @@
from . import error
from .data import datasets
from . import data
from quapy.data import datasets
from . import functional
from . import method
from . import data
from . import evaluation
from . import plot
from . import util
from . import model_selection
from method.aggregative import isaggregative, isprobabilistic
from quapy.method.aggregative import isaggregative, isprobabilistic
environ = {

View File

@ -1,6 +1,7 @@
import os
from abc import ABCMeta, abstractmethod
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
@ -8,10 +9,10 @@ import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
import quapy as qp
from data import LabelledCollection
from util import EarlyStop
import quapy as qp
class NeuralClassifierTrainer:

View File

@ -1,9 +1,10 @@
import random
import subprocess
import tempfile
from os.path import join, exists
from os import remove
from os.path import join, exists
from subprocess import PIPE, STDOUT
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import dump_svmlight_file

View File

@ -1,6 +1,6 @@
from . import datasets
from . import preprocessing
from .base import *
from .reader import *
from . import preprocessing
from . import datasets

View File

@ -1,8 +1,8 @@
import numpy as np
from scipy.sparse import issparse
from scipy.sparse import vstack
from sklearn.model_selection import train_test_split
from quapy.functional import artificial_prevalence_sampling, strprev
from scipy.sparse import vstack
class LabelledCollection:

View File

@ -1,12 +1,13 @@
import zipfile
from util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource
import os
import zipfile
from os.path import join
from data.base import Dataset, LabelledCollection
from data.reader import *
from data.preprocessing import text2tfidf, reduce_columns
import pandas as pd
from data.base import Dataset, LabelledCollection
from quapy.data.preprocessing import text2tfidf, reduce_columns
from quapy.data.reader import *
from quapy.util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource
REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb']
TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders',

View File

@ -1,11 +1,12 @@
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from data.base import Dataset
from scipy.sparse import spmatrix
from util import parallelize
from .base import LabelledCollection
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from tqdm import tqdm
import quapy as qp
from quapy.data.base import Dataset
from quapy.util import parallelize
from .base import LabelledCollection
def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs):

View File

@ -1,7 +1,6 @@
import numpy as np
from scipy.sparse import dok_matrix
from tqdm import tqdm
import pandas as pd
def from_text(path):

View File

@ -1,7 +1,7 @@
from sklearn.metrics import f1_score
import numpy as np
import quapy as qp
from sklearn.metrics import f1_score
import quapy as qp
def f1e(y_true, y_pred):

View File

@ -1,12 +1,13 @@
import quapy as qp
from typing import Union, Callable, Iterable
from data import LabelledCollection
from method.base import BaseQuantifier
from util import temp_seed
import numpy as np
from joblib import Parallel, delayed
from tqdm import tqdm
import error
import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier
from quapy.util import temp_seed
def artificial_sampling_prediction(
@ -72,8 +73,8 @@ def artificial_sampling_prediction(
def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1):
if isinstance(err, str):
err = getattr(error, err)
assert err.__name__ in error.QUANTIFICATION_ERROR_NAMES, \
err = getattr(qp.error, err)
assert err.__name__ in qp.error.QUANTIFICATION_ERROR_NAMES, \
f'error={err} does not seem to be a quantification error'
scores = Parallel(n_jobs=n_jobs)(
delayed(_delayed_eval)(model, Ti, err) for Ti in test_samples

View File

@ -1,6 +1,7 @@
from collections import defaultdict
import numpy as np
import itertools
from collections import defaultdict
import numpy as np
def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False):
@ -61,13 +62,6 @@ def HellingerDistance(P, Q):
return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2))
#def uniform_simplex_sampling(n_classes):
# from https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex
# r = [0.] + sorted(np.random.rand(n_classes-1)) + [1.]
# return np.asarray([b-a for a,b in zip(r[:-1],r[1:])])
def uniform_prevalence_sampling(n_classes, size=1):
if n_classes == 2:
u = np.random.rand(size)

View File

@ -1,8 +1,7 @@
from . import base
from . import aggregative
from . import non_aggregative
from . import base
from . import meta
from . import non_aggregative
AGGREGATIVE_METHODS = {
aggregative.CC,

View File

@ -1,19 +1,20 @@
import numpy as np
from copy import deepcopy
from sklearn.base import BaseEstimator, clone
import functional as F
import error
from method.base import BaseQuantifier, BinaryQuantifier
from classification.svmperf import SVMperf
from data import LabelledCollection
from sklearn.metrics import confusion_matrix
from sklearn.calibration import CalibratedClassifierCV
from joblib import Parallel, delayed
from abc import abstractmethod
from copy import deepcopy
from typing import Union
import numpy as np
from joblib import Parallel, delayed
from sklearn.base import BaseEstimator
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import quapy.functional as F
from quapy.classification.svmperf import SVMperf
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier, BinaryQuantifier
# Abstract classes
# ------------------------------------

View File

@ -1,5 +1,6 @@
from abc import ABCMeta, abstractmethod
from data import LabelledCollection
from quapy.data import LabelledCollection
# Base Quantifier abstract class

View File

@ -1,16 +1,17 @@
from copy import deepcopy
import numpy as np
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
from joblib import Parallel, delayed
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, cross_val_predict
import quapy as qp
from sklearn.model_selection import GridSearchCV, cross_val_predict
from model_selection import GridSearchQ
from .base import BaseQuantifier, BinaryQuantifier
from joblib import Parallel, delayed
from copy import deepcopy
from data import LabelledCollection
from quapy import functional as F
from quapy.data import LabelledCollection
from quapy.evaluation import evaluate
from quapy.model_selection import GridSearchQ
from . import neural
from evaluation import evaluate
from .base import BaseQuantifier
QuaNet = neural.QuaNetTrainer

View File

@ -1,11 +1,12 @@
import os
from pathlib import Path
import torch
from torch.nn import MSELoss
from torch.nn.functional import relu
from tqdm import tqdm
from method.aggregative import *
from util import EarlyStop
from quapy.method.aggregative import *
from quapy.util import EarlyStop
class QuaNetTrainer(BaseQuantifier):

View File

@ -1,4 +1,4 @@
from data import LabelledCollection
from quapy.data import LabelledCollection
from .base import BaseQuantifier

View File

@ -1,12 +1,13 @@
import itertools
import quapy as qp
from evaluation import artificial_sampling_prediction
from data.base import LabelledCollection
from method.aggregative import BaseQuantifier
from typing import Union, Callable
import functional as F
from copy import deepcopy
import signal
from copy import deepcopy
from typing import Union, Callable
import quapy as qp
import quapy.functional as F
from data.base import LabelledCollection
from quapy.evaluation import artificial_sampling_prediction
from quapy.method.aggregative import BaseQuantifier
class GridSearchQ(BaseQuantifier):
@ -80,8 +81,8 @@ class GridSearchQ(BaseQuantifier):
training, validation = training.split_stratified(train_prop=1-validation)
return training, validation
else:
raise ValueError('"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
'proportion of training documents to extract')
raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
f'proportion of training documents to extract (found) {type(validation)}')
def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes):
if n_prevpoints is None and eval_budget is None:

View File

@ -1,9 +1,10 @@
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import quapy as qp
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
import quapy as qp
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200

View File

@ -1,12 +1,13 @@
import contextlib
import itertools
import multiprocessing
from joblib import Parallel, delayed
import contextlib
import numpy as np
import urllib
import os
from pathlib import Path
import pickle
import urllib
from pathlib import Path
import numpy as np
from joblib import Parallel, delayed
def get_parallel_slices(n_tasks, n_jobs=-1):