1
0
Fork 0

Compare commits

...

2 Commits

4 changed files with 61 additions and 11 deletions

View File

@ -1,3 +1,9 @@
Change Log 0.1.9
----------------
<...>
Change Log 0.1.8
----------------

View File

@ -11,7 +11,7 @@ from . import util
from . import model_selection
from . import classification
__version__ = '0.1.8'
__version__ = '0.1.9'
environ = {
'SAMPLE_SIZE': None,

View File

@ -108,8 +108,7 @@ class LabelledCollection:
"""
Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
prevalence values are not specified, then returns the index of a uniform sampling.
For each class, the sampling is drawn with replacement if the requested prevalence is larger than
the actual prevalence of the class, or without replacement otherwise.
For each class, the sampling is drawn with replacement.
:param size: integer, the requested size
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
@ -153,7 +152,7 @@ class LabelledCollection:
for class_, n_requested in n_requests.items():
n_candidates = len(self.index[class_])
index_sample = self.index[class_][
np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates))
np.random.choice(n_candidates, size=n_requested, replace=True)
] if n_requested > 0 else []
indexes_sample.append(index_sample)
@ -168,8 +167,7 @@ class LabelledCollection:
def uniform_sampling_index(self, size, random_state=None):
"""
Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
with replacement if the requested size is greater than the number of instances, or without replacement
otherwise.
with replacement.
:param size: integer, the size of the uniform sample
:param random_state: if specified, guarantees reproducibility of the split.
@ -179,13 +177,12 @@ class LabelledCollection:
ng = RandomState(seed=random_state)
else:
ng = np.random
return ng.choice(len(self), size, replace=size > len(self))
return ng.choice(len(self), size, replace=True)
def sampling(self, size, *prevs, shuffle=True, random_state=None):
"""
Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence
values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
the actual prevalence of the class, or with replacement otherwise.
values. For each class, the sampling is drawn with replacement.
:param size: integer, the requested size
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
@ -202,8 +199,7 @@ class LabelledCollection:
def uniform_sampling(self, size, random_state=None):
"""
Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn
with replacement if the requested size is greater than the number of instances, or without replacement
otherwise.
with replacement.
:param size: integer, the requested size
:param random_state: if specified, guarantees reproducibility of the split.

View File

@ -1,5 +1,6 @@
from typing import Union, Callable
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from quapy.functional import get_divergence
from quapy.data import LabelledCollection
@ -146,6 +147,53 @@ class DMx(BaseQuantifier):
return F.argmin_prevalence(loss, n_classes, method=self.search)
class ReadMe(BaseQuantifier):
def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
self.bootstrap_trials = bootstrap_trials
self.bootstrap_range = bootstrap_range
self.bagging_trials = bagging_trials
self.bagging_range = bagging_range
self.vectorizer_kwargs = vectorizer_kwargs
def fit(self, data: LabelledCollection):
X, y = data.Xy
self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
X = self.vectorizer.fit_transform(X)
self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
def quantify(self, instances):
X = self.vectorizer.transform(instances)
# number of features
num_docs, num_feats = X.shape
# bootstrap
p_boots = []
for _ in range(self.bootstrap_trials):
docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
Xboot = X[docs_idx]
# bagging
p_bags = []
for _ in range(self.bagging_trials):
feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
Xbag = Xboot[:,feat_idx]
p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
p_bags.append(p)
p_boots.append(np.mean(p_bags, axis=0))
p_mean = np.mean(p_boots, axis=0)
p_std = np.std(p_bags, axis=0)
return p_mean
def std_constrained_linear_ls(self, X, class_cond_X: dict):
pass
def _get_features_range(X):
feat_ranges = []