import fixes
This commit is contained in:
parent
9c7c017acd
commit
5e64d2588a
|
@ -8,22 +8,28 @@ import pickle
|
|||
import itertools
|
||||
from joblib import Parallel, delayed
|
||||
import settings
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
|
||||
parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def quantification_models():
|
||||
def newLR():
|
||||
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
|
||||
__C_range = np.logspace(-4, 5, 10)
|
||||
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
|
||||
#lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
|
||||
svmperf_params = {'C': __C_range}
|
||||
lr_params = {'C': [1,10]}
|
||||
yield 'cc', qp.method.aggregative.CC(newLR()), lr_params
|
||||
yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
|
||||
yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
|
||||
yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
|
||||
yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params
|
||||
yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(settings.SVMPERF_HOME)), svmperf_params
|
||||
yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(settings.SVMPERF_HOME)), svmperf_params
|
||||
yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(settings.SVMPERF_HOME)), svmperf_params
|
||||
#yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
|
||||
#yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
|
||||
#yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
|
||||
#yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params
|
||||
#yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(settings.SVMPERF_HOME)), svmperf_params
|
||||
#yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(settings.SVMPERF_HOME)), svmperf_params
|
||||
#yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(settings.SVMPERF_HOME)), svmperf_params
|
||||
|
||||
# 'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'),
|
||||
# 'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'),
|
||||
|
@ -47,7 +53,7 @@ def evaluate_method_point_test(true_prev, estim_prev):
|
|||
|
||||
|
||||
def result_path(dataset_name, model_name, optim_loss):
|
||||
return f'./results/{dataset_name}-{model_name}-{optim_loss}.pkl'
|
||||
return os.path.join(args.results, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
|
||||
|
||||
|
||||
def is_already_computed(dataset_name, model_name, optim_loss):
|
||||
|
@ -77,7 +83,6 @@ def run(experiment):
|
|||
return
|
||||
else:
|
||||
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
|
||||
return
|
||||
|
||||
benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
|
||||
benchmark_devel.stats()
|
||||
|
@ -125,6 +130,7 @@ def run(experiment):
|
|||
|
||||
if __name__ == '__main__':
|
||||
|
||||
print(f'Result folder: {args.results}')
|
||||
np.random.seed(0)
|
||||
|
||||
optim_losses = ['mae', 'mrae']
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
from . import error
|
||||
from .data import datasets
|
||||
from . import data
|
||||
from quapy.data import datasets
|
||||
from . import functional
|
||||
from . import method
|
||||
from . import data
|
||||
from . import evaluation
|
||||
from . import plot
|
||||
from . import util
|
||||
from . import model_selection
|
||||
from method.aggregative import isaggregative, isprobabilistic
|
||||
from quapy.method.aggregative import isaggregative, isprobabilistic
|
||||
|
||||
|
||||
environ = {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
@ -8,10 +9,10 @@ import torch.nn.functional as F
|
|||
from sklearn.metrics import accuracy_score, f1_score
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
from tqdm import tqdm
|
||||
|
||||
import quapy as qp
|
||||
from data import LabelledCollection
|
||||
from util import EarlyStop
|
||||
import quapy as qp
|
||||
|
||||
|
||||
|
||||
class NeuralClassifierTrainer:
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import random
|
||||
import subprocess
|
||||
import tempfile
|
||||
from os.path import join, exists
|
||||
from os import remove
|
||||
from os.path import join, exists
|
||||
from subprocess import PIPE, STDOUT
|
||||
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
from sklearn.datasets import dump_svmlight_file
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from . import datasets
|
||||
from . import preprocessing
|
||||
from .base import *
|
||||
from .reader import *
|
||||
from . import preprocessing
|
||||
from . import datasets
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import numpy as np
|
||||
from scipy.sparse import issparse
|
||||
from scipy.sparse import vstack
|
||||
from sklearn.model_selection import train_test_split
|
||||
from quapy.functional import artificial_prevalence_sampling, strprev
|
||||
from scipy.sparse import vstack
|
||||
|
||||
|
||||
class LabelledCollection:
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import zipfile
|
||||
from util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource
|
||||
import os
|
||||
import zipfile
|
||||
from os.path import join
|
||||
from data.base import Dataset, LabelledCollection
|
||||
from data.reader import *
|
||||
from data.preprocessing import text2tfidf, reduce_columns
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from data.base import Dataset, LabelledCollection
|
||||
from quapy.data.preprocessing import text2tfidf, reduce_columns
|
||||
from quapy.data.reader import *
|
||||
from quapy.util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource
|
||||
|
||||
REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb']
|
||||
TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders',
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||
from data.base import Dataset
|
||||
from scipy.sparse import spmatrix
|
||||
from util import parallelize
|
||||
from .base import LabelledCollection
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||
from tqdm import tqdm
|
||||
|
||||
import quapy as qp
|
||||
from quapy.data.base import Dataset
|
||||
from quapy.util import parallelize
|
||||
from .base import LabelledCollection
|
||||
|
||||
|
||||
def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import numpy as np
|
||||
from scipy.sparse import dok_matrix
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def from_text(path):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from sklearn.metrics import f1_score
|
||||
import numpy as np
|
||||
import quapy as qp
|
||||
from sklearn.metrics import f1_score
|
||||
|
||||
import quapy as qp
|
||||
|
||||
|
||||
def f1e(y_true, y_pred):
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import quapy as qp
|
||||
from typing import Union, Callable, Iterable
|
||||
from data import LabelledCollection
|
||||
from method.base import BaseQuantifier
|
||||
from util import temp_seed
|
||||
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
from tqdm import tqdm
|
||||
import error
|
||||
|
||||
import quapy as qp
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.base import BaseQuantifier
|
||||
from quapy.util import temp_seed
|
||||
|
||||
|
||||
def artificial_sampling_prediction(
|
||||
|
@ -72,8 +73,8 @@ def artificial_sampling_prediction(
|
|||
|
||||
def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1):
|
||||
if isinstance(err, str):
|
||||
err = getattr(error, err)
|
||||
assert err.__name__ in error.QUANTIFICATION_ERROR_NAMES, \
|
||||
err = getattr(qp.error, err)
|
||||
assert err.__name__ in qp.error.QUANTIFICATION_ERROR_NAMES, \
|
||||
f'error={err} does not seem to be a quantification error'
|
||||
scores = Parallel(n_jobs=n_jobs)(
|
||||
delayed(_delayed_eval)(model, Ti, err) for Ti in test_samples
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False):
|
||||
|
@ -61,13 +62,6 @@ def HellingerDistance(P, Q):
|
|||
return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2))
|
||||
|
||||
|
||||
#def uniform_simplex_sampling(n_classes):
|
||||
# from https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex
|
||||
# r = [0.] + sorted(np.random.rand(n_classes-1)) + [1.]
|
||||
# return np.asarray([b-a for a,b in zip(r[:-1],r[1:])])
|
||||
|
||||
|
||||
|
||||
def uniform_prevalence_sampling(n_classes, size=1):
|
||||
if n_classes == 2:
|
||||
u = np.random.rand(size)
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from . import base
|
||||
from . import aggregative
|
||||
from . import non_aggregative
|
||||
from . import base
|
||||
from . import meta
|
||||
|
||||
from . import non_aggregative
|
||||
|
||||
AGGREGATIVE_METHODS = {
|
||||
aggregative.CC,
|
||||
|
|
|
@ -1,19 +1,20 @@
|
|||
import numpy as np
|
||||
from copy import deepcopy
|
||||
from sklearn.base import BaseEstimator, clone
|
||||
import functional as F
|
||||
import error
|
||||
from method.base import BaseQuantifier, BinaryQuantifier
|
||||
from classification.svmperf import SVMperf
|
||||
from data import LabelledCollection
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sklearn.calibration import CalibratedClassifierCV
|
||||
from joblib import Parallel, delayed
|
||||
from abc import abstractmethod
|
||||
from copy import deepcopy
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.calibration import CalibratedClassifierCV
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sklearn.model_selection import StratifiedKFold
|
||||
from tqdm import tqdm
|
||||
|
||||
import quapy.functional as F
|
||||
from quapy.classification.svmperf import SVMperf
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.base import BaseQuantifier, BinaryQuantifier
|
||||
|
||||
|
||||
# Abstract classes
|
||||
# ------------------------------------
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from abc import ABCMeta, abstractmethod
|
||||
from data import LabelledCollection
|
||||
|
||||
from quapy.data import LabelledCollection
|
||||
|
||||
|
||||
# Base Quantifier abstract class
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
|
||||
from joblib import Parallel, delayed
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV, cross_val_predict
|
||||
|
||||
import quapy as qp
|
||||
from sklearn.model_selection import GridSearchCV, cross_val_predict
|
||||
from model_selection import GridSearchQ
|
||||
from .base import BaseQuantifier, BinaryQuantifier
|
||||
from joblib import Parallel, delayed
|
||||
from copy import deepcopy
|
||||
from data import LabelledCollection
|
||||
from quapy import functional as F
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.evaluation import evaluate
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from . import neural
|
||||
from evaluation import evaluate
|
||||
from .base import BaseQuantifier
|
||||
|
||||
QuaNet = neural.QuaNetTrainer
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from torch.nn import MSELoss
|
||||
from torch.nn.functional import relu
|
||||
from tqdm import tqdm
|
||||
from method.aggregative import *
|
||||
from util import EarlyStop
|
||||
|
||||
from quapy.method.aggregative import *
|
||||
from quapy.util import EarlyStop
|
||||
|
||||
|
||||
class QuaNetTrainer(BaseQuantifier):
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from data import LabelledCollection
|
||||
from quapy.data import LabelledCollection
|
||||
from .base import BaseQuantifier
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import itertools
|
||||
import quapy as qp
|
||||
from evaluation import artificial_sampling_prediction
|
||||
from data.base import LabelledCollection
|
||||
from method.aggregative import BaseQuantifier
|
||||
from typing import Union, Callable
|
||||
import functional as F
|
||||
from copy import deepcopy
|
||||
import signal
|
||||
from copy import deepcopy
|
||||
from typing import Union, Callable
|
||||
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
from data.base import LabelledCollection
|
||||
from quapy.evaluation import artificial_sampling_prediction
|
||||
from quapy.method.aggregative import BaseQuantifier
|
||||
|
||||
|
||||
class GridSearchQ(BaseQuantifier):
|
||||
|
@ -80,8 +81,8 @@ class GridSearchQ(BaseQuantifier):
|
|||
training, validation = training.split_stratified(train_prop=1-validation)
|
||||
return training, validation
|
||||
else:
|
||||
raise ValueError('"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
|
||||
'proportion of training documents to extract')
|
||||
raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
|
||||
f'proportion of training documents to extract (found) {type(validation)}')
|
||||
|
||||
def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes):
|
||||
if n_prevpoints is None and eval_budget is None:
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
from collections import defaultdict
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib import cm
|
||||
import numpy as np
|
||||
import quapy as qp
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from matplotlib import cm
|
||||
|
||||
import quapy as qp
|
||||
|
||||
plt.rcParams['figure.figsize'] = [12, 8]
|
||||
plt.rcParams['figure.dpi'] = 200
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import contextlib
|
||||
import itertools
|
||||
import multiprocessing
|
||||
from joblib import Parallel, delayed
|
||||
import contextlib
|
||||
import numpy as np
|
||||
import urllib
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pickle
|
||||
import urllib
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
|
||||
def get_parallel_slices(n_tasks, n_jobs=-1):
|
||||
|
|
Loading…
Reference in New Issue