1
0
Fork 0

sketching readme system by Lu and King, Hopings and King

This commit is contained in:
Alejandro Moreo Fernandez 2024-02-16 17:34:10 +01:00
parent 9e6b9c8955
commit d50a86daf4
3 changed files with 55 additions and 1 deletions

View File

@ -1,3 +1,9 @@
Change Log 0.1.9
----------------
<...>
Change Log 0.1.8 Change Log 0.1.8
---------------- ----------------

View File

@ -11,7 +11,7 @@ from . import util
from . import model_selection from . import model_selection
from . import classification from . import classification
__version__ = '0.1.8' __version__ = '0.1.9'
environ = { environ = {
'SAMPLE_SIZE': None, 'SAMPLE_SIZE': None,

View File

@ -1,5 +1,6 @@
from typing import Union, Callable from typing import Union, Callable
import numpy as np import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from quapy.functional import get_divergence from quapy.functional import get_divergence
from quapy.data import LabelledCollection from quapy.data import LabelledCollection
@ -146,6 +147,53 @@ class DMx(BaseQuantifier):
return F.argmin_prevalence(loss, n_classes, method=self.search) return F.argmin_prevalence(loss, n_classes, method=self.search)
class ReadMe(BaseQuantifier):
def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
self.bootstrap_trials = bootstrap_trials
self.bootstrap_range = bootstrap_range
self.bagging_trials = bagging_trials
self.bagging_range = bagging_range
self.vectorizer_kwargs = vectorizer_kwargs
def fit(self, data: LabelledCollection):
X, y = data.Xy
self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
X = self.vectorizer.fit_transform(X)
self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
def quantify(self, instances):
X = self.vectorizer.transform(instances)
# number of features
num_docs, num_feats = X.shape
# bootstrap
p_boots = []
for _ in range(self.bootstrap_trials):
docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
Xboot = X[docs_idx]
# bagging
p_bags = []
for _ in range(self.bagging_trials):
feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
Xbag = Xboot[:,feat_idx]
p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
p_bags.append(p)
p_boots.append(np.mean(p_bags, axis=0))
p_mean = np.mean(p_boots, axis=0)
p_std = np.std(p_bags, axis=0)
return p_mean
def std_constrained_linear_ls(self, X, class_cond_X: dict):
pass
def _get_features_range(X): def _get_features_range(X):
feat_ranges = [] feat_ranges = []