forked from moreo/QuaPy
Compare commits
2 Commits
Author | SHA1 | Date |
---|---|---|
Alejandro Moreo Fernandez | 75af15ae4a | |
Alejandro Moreo Fernandez | d50a86daf4 |
|
@ -1,3 +1,9 @@
|
|||
Change Log 0.1.9
|
||||
----------------
|
||||
|
||||
<...>
|
||||
|
||||
|
||||
Change Log 0.1.8
|
||||
----------------
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from . import util
|
|||
from . import model_selection
|
||||
from . import classification
|
||||
|
||||
__version__ = '0.1.8'
|
||||
__version__ = '0.1.9'
|
||||
|
||||
environ = {
|
||||
'SAMPLE_SIZE': None,
|
||||
|
|
|
@ -108,8 +108,7 @@ class LabelledCollection:
|
|||
"""
|
||||
Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
|
||||
prevalence values are not specified, then returns the index of a uniform sampling.
|
||||
For each class, the sampling is drawn with replacement if the requested prevalence is larger than
|
||||
the actual prevalence of the class, or without replacement otherwise.
|
||||
For each class, the sampling is drawn with replacement.
|
||||
|
||||
:param size: integer, the requested size
|
||||
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||||
|
@ -153,7 +152,7 @@ class LabelledCollection:
|
|||
for class_, n_requested in n_requests.items():
|
||||
n_candidates = len(self.index[class_])
|
||||
index_sample = self.index[class_][
|
||||
np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates))
|
||||
np.random.choice(n_candidates, size=n_requested, replace=True)
|
||||
] if n_requested > 0 else []
|
||||
|
||||
indexes_sample.append(index_sample)
|
||||
|
@ -168,8 +167,7 @@ class LabelledCollection:
|
|||
def uniform_sampling_index(self, size, random_state=None):
|
||||
"""
|
||||
Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
|
||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||||
otherwise.
|
||||
with replacement.
|
||||
|
||||
:param size: integer, the size of the uniform sample
|
||||
:param random_state: if specified, guarantees reproducibility of the split.
|
||||
|
@ -179,13 +177,12 @@ class LabelledCollection:
|
|||
ng = RandomState(seed=random_state)
|
||||
else:
|
||||
ng = np.random
|
||||
return ng.choice(len(self), size, replace=size > len(self))
|
||||
return ng.choice(len(self), size, replace=True)
|
||||
|
||||
def sampling(self, size, *prevs, shuffle=True, random_state=None):
|
||||
"""
|
||||
Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence
|
||||
values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
|
||||
the actual prevalence of the class, or with replacement otherwise.
|
||||
values. For each class, the sampling is drawn with replacement.
|
||||
|
||||
:param size: integer, the requested size
|
||||
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||||
|
@ -202,8 +199,7 @@ class LabelledCollection:
|
|||
def uniform_sampling(self, size, random_state=None):
|
||||
"""
|
||||
Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn
|
||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||||
otherwise.
|
||||
with replacement.
|
||||
|
||||
:param size: integer, the requested size
|
||||
:param random_state: if specified, guarantees reproducibility of the split.
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Union, Callable
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
|
||||
from quapy.functional import get_divergence
|
||||
from quapy.data import LabelledCollection
|
||||
|
@ -146,6 +147,53 @@ class DMx(BaseQuantifier):
|
|||
return F.argmin_prevalence(loss, n_classes, method=self.search)
|
||||
|
||||
|
||||
class ReadMe(BaseQuantifier):
|
||||
|
||||
def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
|
||||
self.bootstrap_trials = bootstrap_trials
|
||||
self.bootstrap_range = bootstrap_range
|
||||
self.bagging_trials = bagging_trials
|
||||
self.bagging_range = bagging_range
|
||||
self.vectorizer_kwargs = vectorizer_kwargs
|
||||
|
||||
def fit(self, data: LabelledCollection):
|
||||
X, y = data.Xy
|
||||
self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
|
||||
X = self.vectorizer.fit_transform(X)
|
||||
self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
|
||||
|
||||
def quantify(self, instances):
|
||||
X = self.vectorizer.transform(instances)
|
||||
|
||||
# number of features
|
||||
num_docs, num_feats = X.shape
|
||||
|
||||
# bootstrap
|
||||
p_boots = []
|
||||
for _ in range(self.bootstrap_trials):
|
||||
docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
|
||||
class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
|
||||
Xboot = X[docs_idx]
|
||||
|
||||
# bagging
|
||||
p_bags = []
|
||||
for _ in range(self.bagging_trials):
|
||||
feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
|
||||
class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
|
||||
Xbag = Xboot[:,feat_idx]
|
||||
p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
|
||||
p_bags.append(p)
|
||||
p_boots.append(np.mean(p_bags, axis=0))
|
||||
|
||||
p_mean = np.mean(p_boots, axis=0)
|
||||
p_std = np.std(p_bags, axis=0)
|
||||
|
||||
return p_mean
|
||||
|
||||
|
||||
def std_constrained_linear_ls(self, X, class_cond_X: dict):
|
||||
pass
|
||||
|
||||
|
||||
def _get_features_range(X):
|
||||
feat_ranges = []
|
||||
|
|
Loading…
Reference in New Issue