import quapy as qp from quapy.data import LabelledCollection from quapy.method.base import BinaryQuantifier from quapy.model_selection import GridSearchQ from quapy.method.aggregative import AggregativeProbabilisticQuantifier from quapy.protocol import APP import numpy as np from sklearn.linear_model import LogisticRegression # Define a custom quantifier: for this example, we will consider a new quantification algorithm that uses a # logistic regressor for generating posterior probabilities, and then applies a custom threshold value to the # posteriors. Since the quantifier internally uses a classifier, it is an aggregative quantifier; and since it # relies on posterior probabilities, it is a probabilistic-aggregative quantifier. Note also it has an # internal hyperparameter (let say, alpha) which is the decision threshold. Let's also assume the quantifier # is binary, for simplicity. class MyQuantifier(AggregativeProbabilisticQuantifier, BinaryQuantifier): def __init__(self, classifier, alpha=0.5): self.alpha = alpha # aggregative quantifiers have an internal self.classifier attribute self.classifier = classifier def fit(self, data: LabelledCollection, fit_classifier=True): assert fit_classifier, 'this quantifier needs to fit the classifier!' self.classifier.fit(*data.Xy) return self # in general, we would need to implement the method quantify(self, instances) but, since this method is of # type aggregative, we can simply implement the method aggregate, which has the following interface def aggregate(self, classif_predictions: np.ndarray): # the posterior probabilities have already been generated by the quantify method; we only need to # specify what to do with them positive_probabilities = classif_predictions[:, 1] crisp_decisions = positive_probabilities > self.alpha pos_prev = crisp_decisions.mean() neg_prev = 1-pos_prev return np.asarray([neg_prev, pos_prev]) if __name__ == '__main__': qp.environ['SAMPLE_SIZE'] = 100 # define an instance of our custom quantifier quantifier = MyQuantifier(LogisticRegression(), alpha=0.5) # load the IMDb dataset train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test # model selection # let us assume we want to explore our hyperparameter alpha along with one hyperparameter of the classifier train, val = train.split_stratified(train_prop=0.75) param_grid = { 'alpha': np.linspace(0, 1, 11), # quantifier-dependent hyperparameter 'classifier__C': np.logspace(-2, 2, 5) # classifier-dependent hyperparameter } quantifier = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=True).fit(train) # evaluation mae = qp.evaluation.evaluate(quantifier, protocol=APP(test), error_metric='mae') print(f'MAE = {mae:.4f}') # final remarks: this method is only for demonstration purposes and makes little sense in general. The method relies # on an hyperparameter alpha for binarizing the posterior probabilities. A much better way for fulfilling this # goal would be to calibrate the classifier (LogisticRegression is already reasonably well calibrated) and then # simply cut at 0.5.