forked from moreo/QuaPy
uniform sampling added if *prevs is empty
This commit is contained in:
parent
bcb8432457
commit
7d6f523e4b
|
@ -40,6 +40,8 @@ class LabelledCollection:
|
||||||
return self.n_classes == 2
|
return self.n_classes == 2
|
||||||
|
|
||||||
def sampling_index(self, size, *prevs, shuffle=True):
|
def sampling_index(self, size, *prevs, shuffle=True):
|
||||||
|
if len(prevs) == 0: # no prevalence was indicated; returns an index for uniform sampling
|
||||||
|
return np.random.choice(len(self), size, replace=False)
|
||||||
if len(prevs) == self.n_classes-1:
|
if len(prevs) == self.n_classes-1:
|
||||||
prevs = prevs + (1-sum(prevs),)
|
prevs = prevs + (1-sum(prevs),)
|
||||||
assert len(prevs) == self.n_classes, 'unexpected number of prevalences'
|
assert len(prevs) == self.n_classes, 'unexpected number of prevalences'
|
||||||
|
@ -68,9 +70,16 @@ class LabelledCollection:
|
||||||
|
|
||||||
return indexes_sample
|
return indexes_sample
|
||||||
|
|
||||||
|
# def uniform_sampling_index(self, size):
|
||||||
|
# return np.random.choice(len(self), size, replace=False)
|
||||||
|
|
||||||
|
# def uniform_sampling(self, size):
|
||||||
|
# unif_index = self.uniform_sampling_index(size)
|
||||||
|
# return self.sampling_from_index(unif_index)
|
||||||
|
|
||||||
def sampling(self, size, *prevs, shuffle=True):
|
def sampling(self, size, *prevs, shuffle=True):
|
||||||
index = self.sampling_index(size, *prevs, shuffle=shuffle)
|
prev_index = self.sampling_index(size, *prevs, shuffle=shuffle)
|
||||||
return self.sampling_from_index(index)
|
return self.sampling_from_index(prev_index)
|
||||||
|
|
||||||
def sampling_from_index(self, index):
|
def sampling_from_index(self, index):
|
||||||
documents = self.instances[index]
|
documents = self.instances[index]
|
||||||
|
@ -92,6 +101,14 @@ class LabelledCollection:
|
||||||
for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
|
for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
|
||||||
yield self.sampling_index(sample_size, *prevs)
|
yield self.sampling_index(sample_size, *prevs)
|
||||||
|
|
||||||
|
def natural_sampling_generator(self, sample_size, repeats=100):
|
||||||
|
for _ in range(repeats):
|
||||||
|
yield self.uniform_sampling(sample_size)
|
||||||
|
|
||||||
|
def natural_sampling_index_generator(self, sample_size, repeats=100):
|
||||||
|
for _ in range(repeats):
|
||||||
|
yield self.uniform_sampling_index(sample_size)
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
if issparse(self.instances) and issparse(other.documents):
|
if issparse(self.instances) and issparse(other.documents):
|
||||||
docs = vstack([self.instances, other.documents])
|
docs = vstack([self.instances, other.documents])
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import zipfile
|
import zipfile
|
||||||
from utils.util import download_file_if_not_exists, download_file, get_quapy_home
|
from util import download_file_if_not_exists, download_file, get_quapy_home
|
||||||
import os
|
import os
|
||||||
from os.path import join
|
from os.path import join
|
||||||
from data.base import Dataset, LabelledCollection
|
from data.base import Dataset
|
||||||
from data.reader import from_text, from_sparse
|
from data.reader import from_text, from_sparse
|
||||||
from data.preprocessing import text2tfidf, reduce_columns
|
from data.preprocessing import text2tfidf, reduce_columns
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import numpy as np
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||||
from data.base import Dataset
|
from data.base import Dataset
|
||||||
from scipy.sparse import spmatrix
|
from scipy.sparse import spmatrix
|
||||||
from utils.util import parallelize
|
from util import parallelize
|
||||||
from .base import LabelledCollection
|
from .base import LabelledCollection
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from data import LabelledCollection
|
from data import LabelledCollection
|
||||||
from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier
|
from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier
|
||||||
from method.base import BaseQuantifier
|
from method.base import BaseQuantifier
|
||||||
from utils.util import temp_seed
|
from util import temp_seed
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from joblib import Parallel, delayed
|
from joblib import Parallel, delayed
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
|
@ -94,11 +94,11 @@ def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1
|
||||||
"""
|
"""
|
||||||
__cache={}
|
__cache={}
|
||||||
def __f(nc,np):
|
def __f(nc,np):
|
||||||
if (nc,np) in __cache:
|
if (nc,np) in __cache: # cached result
|
||||||
return __cache[(nc,np)]
|
return __cache[(nc,np)]
|
||||||
if nc==1:
|
if nc==1: # stop condition
|
||||||
return 1
|
return 1
|
||||||
else:
|
else: # recursive call
|
||||||
x = sum([__f(nc-1, np-i) for i in range(np)])
|
x = sum([__f(nc-1, np-i) for i in range(np)])
|
||||||
__cache[(nc,np)] = x
|
__cache[(nc,np)] = x
|
||||||
return x
|
return x
|
||||||
|
|
|
@ -8,8 +8,6 @@ import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_parallel_slices(n_tasks, n_jobs=-1):
|
def get_parallel_slices(n_tasks, n_jobs=-1):
|
||||||
if n_jobs == -1:
|
if n_jobs == -1:
|
||||||
n_jobs = multiprocessing.cpu_count()
|
n_jobs = multiprocessing.cpu_count()
|
|
@ -1 +0,0 @@
|
||||||
from . import util
|
|
Loading…
Reference in New Issue