2021-01-22 09:58:12 +01:00
|
|
|
from sklearn.base import BaseEstimator
|
2021-01-06 14:58:29 +01:00
|
|
|
from sklearn.decomposition import TruncatedSVD
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
|
|
|
|
|
2021-11-12 14:30:02 +01:00
|
|
|
class LowRankLogisticRegression(BaseEstimator):
|
2021-01-18 19:14:04 +01:00
|
|
|
"""
|
2021-11-12 14:30:02 +01:00
|
|
|
An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`)
|
|
|
|
that also generates embedded inputs (i.e., that implements `transform`), as those required for
|
|
|
|
:class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating
|
|
|
|
:class:`quapy.method.neural.QuaNet` on array-like real-valued instances.
|
|
|
|
The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD`
|
|
|
|
while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space.
|
|
|
|
|
|
|
|
:param n_components: the number of principal components to retain
|
|
|
|
:param kwargs: parameters for the
|
|
|
|
`Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__ classifier
|
2021-01-18 19:14:04 +01:00
|
|
|
"""
|
2021-01-06 14:58:29 +01:00
|
|
|
|
2021-01-22 09:58:12 +01:00
|
|
|
def __init__(self, n_components=100, **kwargs):
|
2021-01-06 14:58:29 +01:00
|
|
|
self.n_components = n_components
|
2023-03-23 15:40:27 +01:00
|
|
|
self.classifier = LogisticRegression(**kwargs)
|
2021-01-06 14:58:29 +01:00
|
|
|
|
|
|
|
def get_params(self):
|
2021-11-12 14:30:02 +01:00
|
|
|
"""
|
|
|
|
Get hyper-parameters for this estimator.
|
|
|
|
|
|
|
|
:return: a dictionary with parameter names mapped to their values
|
|
|
|
"""
|
2021-01-06 14:58:29 +01:00
|
|
|
params = {'n_components': self.n_components}
|
2023-03-23 15:40:27 +01:00
|
|
|
params.update(self.classifier.get_params())
|
2021-01-06 14:58:29 +01:00
|
|
|
return params
|
|
|
|
|
|
|
|
def set_params(self, **params):
|
2021-11-12 14:30:02 +01:00
|
|
|
"""
|
|
|
|
Set the parameters of this estimator.
|
|
|
|
|
|
|
|
:param parameters: a `**kwargs` dictionary with the estimator parameters for
|
|
|
|
`Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__
|
|
|
|
and eventually also `n_components` for `TruncatedSVD`
|
|
|
|
"""
|
|
|
|
params_ = dict(params)
|
|
|
|
if 'n_components' in params_:
|
|
|
|
self.n_components = params_['n_components']
|
|
|
|
del params_['n_components']
|
2023-03-23 15:40:27 +01:00
|
|
|
self.classifier.set_params(**params_)
|
2021-01-06 14:58:29 +01:00
|
|
|
|
2021-01-18 19:14:04 +01:00
|
|
|
def fit(self, X, y):
|
2021-11-12 14:30:02 +01:00
|
|
|
"""
|
|
|
|
Fit the model according to the given training data. The fit consists of
|
|
|
|
fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation.
|
|
|
|
|
|
|
|
:param X: array-like of shape `(n_samples, n_features)` with the instances
|
|
|
|
:param y: array-like of shape `(n_samples, n_classes)` with the class labels
|
|
|
|
:return: `self`
|
|
|
|
"""
|
2021-06-11 10:52:30 +02:00
|
|
|
nF = X.shape[1]
|
|
|
|
self.pca = None
|
|
|
|
if nF > self.n_components:
|
2021-11-12 14:30:02 +01:00
|
|
|
self.pca = TruncatedSVD(self.n_components).fit(X)
|
|
|
|
X = self.transform(X)
|
2023-03-23 15:40:27 +01:00
|
|
|
self.classifier.fit(X, y)
|
|
|
|
self.classes_ = self.classifier.classes_
|
2021-01-06 14:58:29 +01:00
|
|
|
return self
|
2021-06-15 07:49:16 +02:00
|
|
|
|
2021-01-18 19:14:04 +01:00
|
|
|
def predict(self, X):
|
2021-11-12 14:30:02 +01:00
|
|
|
"""
|
|
|
|
Predicts labels for the instances `X` embedded into the low-rank space.
|
|
|
|
|
|
|
|
:param X: array-like of shape `(n_samples, n_features)` instances to classify
|
|
|
|
:return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of
|
|
|
|
instances in `X`
|
|
|
|
"""
|
|
|
|
X = self.transform(X)
|
2023-03-23 15:40:27 +01:00
|
|
|
return self.classifier.predict(X)
|
2021-01-06 14:58:29 +01:00
|
|
|
|
2021-01-18 19:14:04 +01:00
|
|
|
def predict_proba(self, X):
|
2021-11-12 14:30:02 +01:00
|
|
|
"""
|
|
|
|
Predicts posterior probabilities for the instances `X` embedded into the low-rank space.
|
|
|
|
|
|
|
|
:param X: array-like of shape `(n_samples, n_features)` instances to classify
|
|
|
|
:return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
|
|
|
|
"""
|
|
|
|
X = self.transform(X)
|
2023-03-23 15:40:27 +01:00
|
|
|
return self.classifier.predict_proba(X)
|
2021-01-06 14:58:29 +01:00
|
|
|
|
2021-01-18 19:14:04 +01:00
|
|
|
def transform(self, X):
|
2021-11-12 14:30:02 +01:00
|
|
|
"""
|
|
|
|
Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if
|
|
|
|
`n_components` >= `X.shape[1]`.
|
|
|
|
|
|
|
|
:param X: array-like of shape `(n_samples, n_features)` instances to embed
|
|
|
|
:return: array-like of shape `(n_samples, n_components)` with the embedded instances
|
|
|
|
"""
|
2021-06-11 10:52:30 +02:00
|
|
|
if self.pca is None:
|
|
|
|
return X
|
2021-01-18 19:14:04 +01:00
|
|
|
return self.pca.transform(X)
|