From d1d4e08314dae13db12039269b8ba76f61bd6125 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 16 Jun 2020 10:53:28 +0200
Subject: [PATCH] optimization optional, automatic download and unzip of
 corpus, calibration of Esuli

---
 src/author_identification.py |  15 +-
 src/experiments.sh           |  19 ++
 src/model.py                 |  25 +-
 src/util/calibration.py      | 589 +++++++++++++++++++++++++++++++++++
 4 files changed, 639 insertions(+), 9 deletions(-)
 create mode 100644 src/experiments.sh
 create mode 100644 src/util/calibration.py

diff --git a/src/author_identification.py b/src/author_identification.py
index 5133808..8130931 100755
--- a/src/author_identification.py
+++ b/src/author_identification.py
@@ -56,7 +56,16 @@ def main():
         Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative)
 
         print('Fitting the Verificator')
-        av = AuthorshipVerificator(nfolds=10)
+        if args.C is None:
+            params = {'C': np.logspace(-4, +3, 8)}
+            C = 1.
+        else:
+            params = None
+            C = args.C
+
+        from sklearn.calibration import CalibratedClassifierCV
+
+        av = AuthorshipVerificator(C=C, params=params)
         av.fit(Xtr, ytr)
 
         if args.unknown:
@@ -73,6 +82,7 @@ def main():
             f1_scores.append(f1_from_counters(tp, fp, fn, tn))
             counters.append((tp, fp, fn, tn))
             tee(f'F1 for {author} = {f1_scores[-1]:.3f}', log)
+            print(f'TP={tp} FP={fp} FN={fn} TN={tn}')
 
     if args.loo:
         print(f'Computing macro- and micro-averages (discarded {discarded}/{len(args.authors)})')
@@ -88,6 +98,7 @@ def main():
 
     log.close()
 
+
 def tee(msg, log):
     print(msg)
     log.write(f'{msg}\n')
@@ -111,6 +122,8 @@ if __name__ == '__main__':
                         help='path to the file of unknown paternity (default None)')
     parser.add_argument('--log', type=str, metavar='PATH', default='./results.txt',
                         help='path to the log file where to write the results (default ./results.txt)')
+    parser.add_argument('--C', type=float, metavar='C', default=None,
+                        help='set the parameter C (trade off between error and margin) or leave as None to optimize')
 
     args = parser.parse_args()
 
diff --git a/src/experiments.sh b/src/experiments.sh
new file mode 100644
index 0000000..f5019c1
--- /dev/null
+++ b/src/experiments.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -x
+
+corpus='../MedLatin'
+
+if [ ! -d $corpus ]; then
+  curl -0 http://hlt.isti.cnr.it/medlatin/MedLatin.zip -o ../MedLatin.zip
+  unzip ../MedLatin.zip -d ../
+  rm ../MedLatin.zip
+fi
+
+PY="python3 author_identification.py"
+MedLatin1="../MedLatin/Corpora/MedLatin1"
+MedLatin2="../MedLatin/Corpora/MedLatin2"
+EP1="../MedLatin/Epistle/EpistolaXIII_1.txt"
+EP2="../MedLatin/Epistle/EpistolaXIII_2.txt"
+
+$PY $MedLatin1 ALL --unknown $EP1 --loo --log ./results_EP1.txt
+$PY $MedLatin2 ALL --unknown $EP2 --loo --log ./results_EP2.txt
diff --git a/src/model.py b/src/model.py
index 137fbbe..11bcc77 100755
--- a/src/model.py
+++ b/src/model.py
@@ -1,27 +1,28 @@
 from sklearn.metrics import make_scorer
 from sklearn.model_selection import GridSearchCV, LeaveOneOut, LeaveOneGroupOut, cross_val_score, StratifiedKFold
-from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
-from sklearn.svm import *
+from sklearn.linear_model import LogisticRegression
 from data.features import *
+from util.calibration import CalibratedClassifierCV
 from util.evaluation import f1, get_counters
 
 
 class AuthorshipVerificator:
 
     def __init__(self, nfolds=10,
-                 params={'C': np.logspace(-4, +4, 9), 'class_weight': ['balanced', None]},
+                 params={'C': np.logspace(-4, +3, 8)},
+                 C=1.,
                  author_name=None):
         self.nfolds = nfolds
         self.params = params
         self.author_name = author_name if author_name else 'this author'
-        self.classifier = LogisticRegression()
+        self.classifier = LogisticRegression(C=C, class_weight='balanced')
 
     def fit(self, X, y):
         y = np.asarray(y)
         positive_examples = y.sum()
-        if positive_examples >= self.nfolds:
+        if positive_examples >= self.nfolds and self.params is not None:
             print('optimizing {}'.format(self.classifier.__class__.__name__))
-            folds = list(StratifiedKFold(n_splits=self.nfolds).split(X, y))
+            folds = list(StratifiedKFold(n_splits=self.nfolds, shuffle=True, random_state=42).split(X, y))
             self.estimator = GridSearchCV(
                 self.classifier, param_grid=self.params, cv=folds, scoring=make_scorer(f1), n_jobs=-1
             )
@@ -35,6 +36,9 @@ class AuthorshipVerificator:
             print(f'Best params: {self.estimator.best_params_} (cross-validation F1={f1_mean:.3f})')
             self.estimator = self.estimator.best_estimator_
 
+        self.estimator = CalibratedClassifierCV(base_estimator=self.estimator, cv=self.nfolds, ensemble=False)
+        self.estimator.fit(X, y)
+
         return self
 
     def leave_one_out(self, X, y, files, groups=None, test_lowest_index_only=True, counters=False):
@@ -50,9 +54,14 @@ class AuthorshipVerificator:
                 folds = [(train, np.min(test, keepdims=True)) for train, test in folds]
 
         scores = cross_val_score(self.estimator, X, y, cv=folds, scoring=make_scorer(f1), n_jobs=-1)
-        missclassified = '\n'.join(files[scores == 0].tolist())
+        missclassified = files[scores == 0].tolist()
+        #if hasattr(self.estimator, 'predict_proba') and len(missclassified) > 0:
+        #    missclassified_prob = self.estimator.predict_proba(csr_matrix(X)[scores == 0])[:, 1]
+        #    missclassified_prob = missclassified_prob.flatten().tolist()
+        #    missclassified = [f'{file} Pr={prob:.3f}' for file, prob in zip(missclassified,missclassified_prob)]
         print('missclassified texts:')
-        print(missclassified)
+        print('\n'.join(missclassified))
+
 
         if counters and test_lowest_index_only:
             yfull_true = y[:len(folds)]
diff --git a/src/util/calibration.py b/src/util/calibration.py
new file mode 100644
index 0000000..eb329a1
--- /dev/null
+++ b/src/util/calibration.py
@@ -0,0 +1,589 @@
+"""Calibration of predicted probabilities."""
+
+# Author: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
+#         Balazs Kegl <balazs.kegl@gmail.com>
+#         Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
+#         Mathieu Blondel <mathieu@mblondel.org>
+#
+# License: BSD 3 clause
+
+import warnings
+from inspect import signature
+
+from math import log
+import numpy as np
+
+from scipy.special import expit
+from scipy.special import xlogy
+from scipy.optimize import fmin_bfgs
+from sklearn.preprocessing import LabelEncoder
+
+from sklearn.base import (BaseEstimator, ClassifierMixin, RegressorMixin, clone,
+                   MetaEstimatorMixin)
+from sklearn.preprocessing import label_binarize, LabelBinarizer
+from sklearn.utils import check_array, indexable, column_or_1d
+from sklearn.utils.validation import check_is_fitted, check_consistent_length
+from sklearn.utils.validation import _check_sample_weight
+from sklearn.isotonic import IsotonicRegression
+from sklearn.svm import LinearSVC
+from sklearn.model_selection import check_cv, cross_val_predict
+from sklearn.utils.validation import _deprecate_positional_args
+
+
+class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
+                             MetaEstimatorMixin):
+    """Probability calibration with isotonic regression or logistic regression.
+    The calibration is based on the :term:`decision_function` method of the
+    `base_estimator` if it exists, else on :term:`predict_proba`.
+    Read more in the :ref:`User Guide <calibration>`.
+    Parameters
+    ----------
+    base_estimator : instance BaseEstimator
+        The classifier whose output need to be calibrated to provide more
+        accurate `predict_proba` outputs.
+    method : 'sigmoid' or 'isotonic'
+        The method to use for calibration. Can be 'sigmoid' which
+        corresponds to Platt's method (i.e. a logistic regression model) or
+        'isotonic' which is a non-parametric approach. It is not advised to
+        use isotonic calibration with too few calibration samples
+        ``(<<1000)`` since it tends to overfit.
+    cv : integer, cross-validation generator, iterable or "prefit", optional
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+        - None, to use the default 5-fold cross-validation,
+        - integer, to specify the number of folds.
+        - :term:`CV splitter`,
+        - An iterable yielding (train, test) splits as arrays of indices.
+        For integer/None inputs, if ``y`` is binary or multiclass,
+        :class:`sklearn.model_selection.StratifiedKFold` is used. If ``y`` is
+        neither binary nor multiclass, :class:`sklearn.model_selection.KFold`
+        is used.
+        Refer :ref:`User Guide <cross_validation>` for the various
+        cross-validation strategies that can be used here.
+        If "prefit" is passed, it is assumed that `base_estimator` has been
+        fitted already and all data is used for calibration.
+        .. versionchanged:: 0.22
+            ``cv`` default value if None changed from 3-fold to 5-fold.
+    ensemble : bool, optional
+        When ``cv`` is not "prefit", it determines how the final estimator
+        is fit.
+        If ``ensemble`` is True (default), an estimator (clone of
+        base_estimator) is fit and calibrated on each fold. The final
+        estimator is an ensemble that averages the predicted probabilities
+        of all such estimators.
+        When ``ensemble`` is False, the cross validation generator is used to
+        compute predictions (using :func:`cross_val_predict`), and the union
+        of these predictions is used for training the sigmoid or isotonic
+        model. The ``base_estimator`` is then fit on the whole data.
+    Attributes
+    ----------
+    classes_ : array, shape (n_classes)
+        The class labels.
+    calibrated_classifiers_ : list (len() equal to cv or 1 if cv == "prefit" \
+        or ensemble == False)
+        When ``ensemble`` is True, a list of calibrated classifiers, one for
+        each crossvalidation fold, which has been fitted on all but the
+        validation fold and calibrated on the validation fold.
+        When ``ensemble`` is False, or when using "prefit", the list contains
+        the single calibrated classifier produced by the fit method.
+    References
+    ----------
+    .. [1] Obtaining calibrated probability estimates from decision trees
+           and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001
+    .. [2] Transforming Classifier Scores into Accurate Multiclass
+           Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)
+    .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to
+           Regularized Likelihood Methods, J. Platt, (1999)
+    .. [4] Predicting Good Probabilities with Supervised Learning,
+           A. Niculescu-Mizil & R. Caruana, ICML 2005
+    """
+    @_deprecate_positional_args
+    def __init__(self, base_estimator=None, *, method='sigmoid', cv=None,
+                 ensemble=True):
+        self.base_estimator = base_estimator
+        self.method = method
+        self.cv = cv
+        self.ensemble = ensemble
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit the calibrated model
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data.
+        y : array-like, shape (n_samples,)
+            Target values.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted.
+        Returns
+        -------
+        self : object
+            Returns an instance of self.
+        """
+        X, y = indexable(X, y)
+        le = LabelBinarizer().fit(y)
+        self.classes_ = le.classes_
+
+        # Check that each cross-validation fold can have at least one
+        # example per class
+        n_folds = self.cv if isinstance(self.cv, int) \
+            else self.cv.n_folds if hasattr(self.cv, "n_folds") else None
+        if n_folds and \
+                np.any([np.sum(y == class_) < n_folds for class_ in
+                        self.classes_]):
+            raise ValueError("Requesting %d-fold cross-validation but provided"
+                             " less than %d examples for at least one class."
+                             % (n_folds, n_folds))
+
+        self.calibrated_classifiers_ = []
+        if self.base_estimator is None:
+            # we want all classifiers that don't expose a random_state
+            # to be deterministic (and we don't want to expose this one).
+            base_estimator = LinearSVC(random_state=0)
+        else:
+            base_estimator = self.base_estimator
+
+        if self.cv == "prefit":
+            calibrated_classifier = _CalibratedClassifier(
+                base_estimator, method=self.method)
+            calibrated_classifier.fit(X, y, sample_weight)
+            self.calibrated_classifiers_.append(calibrated_classifier)
+        else:
+            cv = check_cv(self.cv, y, classifier=True)
+            fit_parameters = signature(base_estimator.fit).parameters
+            base_estimator_supports_sw = "sample_weight" in fit_parameters
+
+            if sample_weight is not None:
+                sample_weight = _check_sample_weight(sample_weight, X)
+
+                if not base_estimator_supports_sw:
+                    estimator_name = type(base_estimator).__name__
+                    warnings.warn("Since %s does not support sample_weights, "
+                                  "sample weights will only be used for the "
+                                  "calibration itself." % estimator_name)
+            if self.ensemble:
+                for train, test in cv.split(X, y):
+                    this_estimator = clone(base_estimator)
+
+                    if sample_weight is not None and \
+                            base_estimator_supports_sw:
+                        this_estimator.fit(X[train], y[train],
+                                           sample_weight=sample_weight[train])
+                    else:
+                        this_estimator.fit(X[train], y[train])
+
+                    calibrated_classifier = _CalibratedClassifier(
+                        this_estimator, method=self.method,
+                        classes=self.classes_)
+                    sw = None if sample_weight is None else sample_weight[test]
+                    calibrated_classifier.fit(X[test], y[test],
+                                              sample_weight=sw)
+                    self.calibrated_classifiers_.append(calibrated_classifier)
+            else:
+                if hasattr(base_estimator, "decision_function"):
+                    base_estimator_method = "decision_function"
+                elif hasattr(base_estimator, "predict_proba"):
+                    base_estimator_method = "predict_proba"
+                else:
+                    raise RuntimeError('classifier has no decision_function '
+                                       'or predict_proba method.')
+                predictions = cross_val_predict(base_estimator, X, y, cv=cv,
+                                                method=base_estimator_method)
+                this_estimator = clone(base_estimator)
+                if sample_weight is not None and base_estimator_supports_sw:
+                    this_estimator.\
+                        fit(X, y, sample_weight=sample_weight)
+                else:
+                    this_estimator.fit(X, y)
+                calibrated_classifier = \
+                    _CalibratedClassifier(this_estimator, method=self.method,
+                                          classes=self.classes_,
+                                          predictions_in_X=True)
+                if hasattr(this_estimator, "decision_function"):
+                    if predictions.ndim == 1:
+                        predictions = predictions[:, np.newaxis]
+                elif hasattr(this_estimator, "predict_proba"):
+                    if len(self.classes_) == 2:
+                        predictions = predictions[:, 1:]
+                calibrated_classifier.fit(predictions, y, sample_weight)
+                self.calibrated_classifiers_.append(calibrated_classifier)
+        return self
+
+    def predict_proba(self, X):
+        """Posterior probabilities of classification
+        This function returns posterior probabilities of classification
+        according to each class on an array of test vectors X.
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The samples.
+        Returns
+        -------
+        C : array, shape (n_samples, n_classes)
+            The predicted probas.
+        """
+        check_is_fitted(self)
+        X = check_array(X, accept_sparse=['csc', 'csr', 'coo'],
+                        force_all_finite=False)
+        # Compute the arithmetic mean of the predictions of the calibrated
+        # classifiers
+        mean_proba = np.zeros((X.shape[0], len(self.classes_)))
+        for calibrated_classifier in self.calibrated_classifiers_:
+            proba = calibrated_classifier.predict_proba(X)
+            mean_proba += proba
+
+        mean_proba /= len(self.calibrated_classifiers_)
+
+        return mean_proba
+
+    def predict(self, X):
+        """Predict the target of new samples. The predicted class is the
+        class that has the highest probability, and can thus be different
+        from the prediction of the uncalibrated classifier.
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The samples.
+        Returns
+        -------
+        C : array, shape (n_samples,)
+            The predicted class.
+        """
+        check_is_fitted(self)
+        return self.classes_[np.argmax(self.predict_proba(X), axis=1)]
+
+
+class _CalibratedClassifier:
+    """Probability calibration with isotonic regression or sigmoid.
+    It assumes that base_estimator has already been fit, and trains the
+    calibration on the input set of the fit function. Note that this class
+    should not be used as an estimator directly. Use CalibratedClassifierCV
+    with cv="prefit" instead.
+    Parameters
+    ----------
+    base_estimator : instance BaseEstimator
+        The classifier whose output decision function needs to be calibrated
+        to offer more accurate predict_proba outputs. No default value since
+        it has to be an already fitted estimator.
+    method : 'sigmoid' | 'isotonic'
+        The method to use for calibration. Can be 'sigmoid' which
+        corresponds to Platt's method or 'isotonic' which is a
+        non-parametric approach based on isotonic regression.
+    classes : array-like, shape (n_classes,), optional
+            Contains unique classes used to fit the base estimator.
+            if None, then classes is extracted from the given target values
+            in fit().
+    predictions_in_X : bool, optional
+            When False (default), ``X`` are the element to be classified, and
+            predictions are determined applying the ``base_estimator`` to
+            ``X``.
+            When True, ``X`` already contains predictions.
+    See also
+    --------
+    CalibratedClassifierCV
+    References
+    ----------
+    .. [1] Obtaining calibrated probability estimates from decision trees
+           and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001
+    .. [2] Transforming Classifier Scores into Accurate Multiclass
+           Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)
+    .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to
+           Regularized Likelihood Methods, J. Platt, (1999)
+    .. [4] Predicting Good Probabilities with Supervised Learning,
+           A. Niculescu-Mizil & R. Caruana, ICML 2005
+    """
+    @_deprecate_positional_args
+    def __init__(self, base_estimator, *, method='sigmoid', classes=None,
+                 predictions_in_X=False):
+        self.base_estimator = base_estimator
+        self.method = method
+        self.classes = classes
+        self.predictions_in_X_ = predictions_in_X
+
+    def _preproc(self, X):
+        n_classes = len(self.classes_)
+        if hasattr(self.base_estimator, "decision_function"):
+            df = self.base_estimator.decision_function(X)
+            if df.ndim == 1:
+                df = df[:, np.newaxis]
+        elif hasattr(self.base_estimator, "predict_proba"):
+            df = self.base_estimator.predict_proba(X)
+            if n_classes == 2:
+                df = df[:, 1:]
+        else:
+            raise RuntimeError('classifier has no decision_function or '
+                               'predict_proba method.')
+
+        idx_pos_class = self.label_encoder_.\
+            transform(self.base_estimator.classes_)
+
+        return df, idx_pos_class
+
+    def fit(self, X, y, sample_weight=None):
+        """Calibrate the fitted model
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data.
+        y : array-like, shape (n_samples,)
+            Target values.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted.
+        Returns
+        -------
+        self : object
+            Returns an instance of self.
+        """
+
+        self.label_encoder_ = LabelEncoder()
+        if self.classes is None:
+            self.label_encoder_.fit(y)
+        else:
+            self.label_encoder_.fit(self.classes)
+
+        self.classes_ = self.label_encoder_.classes_
+        Y = label_binarize(y, classes=self.classes_)
+
+        if not self.predictions_in_X_:
+            df, idx_pos_class = self._preproc(X)
+        else:
+            df = X
+            idx_pos_class = \
+                self.label_encoder_.transform(self.base_estimator.classes_)
+        self.calibrators_ = []
+
+        for k, this_df in zip(idx_pos_class, df.T):
+            if self.method == 'isotonic':
+                calibrator = IsotonicRegression(out_of_bounds='clip')
+            elif self.method == 'sigmoid':
+                calibrator = _SigmoidCalibration()
+            else:
+                raise ValueError('method should be "sigmoid" or '
+                                 '"isotonic". Got %s.' % self.method)
+            calibrator.fit(this_df, Y[:, k], sample_weight)
+            self.calibrators_.append(calibrator)
+
+        return self
+
+    def predict_proba(self, X):
+        """Posterior probabilities of classification
+        This function returns posterior probabilities of classification
+        according to each class on an array of test vectors X.
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The samples.
+        Returns
+        -------
+        C : array, shape (n_samples, n_classes)
+            The predicted probas. Can be exact zeros.
+        """
+        n_classes = len(self.classes_)
+        proba = np.zeros((X.shape[0], n_classes))
+
+        df, idx_pos_class = self._preproc(X)
+
+        for k, this_df, calibrator in \
+                zip(idx_pos_class, df.T, self.calibrators_):
+            if n_classes == 2:
+                k += 1
+            proba[:, k] = calibrator.predict(this_df)
+
+        # Normalize the probabilities
+        if n_classes == 2:
+            proba[:, 0] = 1. - proba[:, 1]
+        else:
+            proba /= np.sum(proba, axis=1)[:, np.newaxis]
+
+        # XXX : for some reason all probas can be 0
+        proba[np.isnan(proba)] = 1. / n_classes
+
+        # Deal with cases where the predicted probability minimally exceeds 1.0
+        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0
+
+        return proba
+
+
+def _sigmoid_calibration(df, y, sample_weight=None):
+    """Probability Calibration with sigmoid method (Platt 2000)
+    Parameters
+    ----------
+    df : ndarray, shape (n_samples,)
+        The decision function or predict proba for the samples.
+    y : ndarray, shape (n_samples,)
+        The targets.
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights. If None, then samples are equally weighted.
+    Returns
+    -------
+    a : float
+        The slope.
+    b : float
+        The intercept.
+    References
+    ----------
+    Platt, "Probabilistic Outputs for Support Vector Machines"
+    """
+    df = column_or_1d(df)
+    y = column_or_1d(y)
+
+    F = df  # F follows Platt's notations
+
+    # Bayesian priors (see Platt end of section 2.2)
+    prior0 = float(np.sum(y <= 0))
+    prior1 = y.shape[0] - prior0
+    T = np.zeros(y.shape)
+    T[y > 0] = (prior1 + 1.) / (prior1 + 2.)
+    T[y <= 0] = 1. / (prior0 + 2.)
+    T1 = 1. - T
+
+    def objective(AB):
+        # From Platt (beginning of Section 2.2)
+        P = expit(-(AB[0] * F + AB[1]))
+        loss = -(xlogy(T, P) + xlogy(T1, 1. - P))
+        if sample_weight is not None:
+            return (sample_weight * loss).sum()
+        else:
+            return loss.sum()
+
+    def grad(AB):
+        # gradient of the objective function
+        P = expit(-(AB[0] * F + AB[1]))
+        TEP_minus_T1P = T - P
+        if sample_weight is not None:
+            TEP_minus_T1P *= sample_weight
+        dA = np.dot(TEP_minus_T1P, F)
+        dB = np.sum(TEP_minus_T1P)
+        return np.array([dA, dB])
+
+    AB0 = np.array([0., log((prior0 + 1.) / (prior1 + 1.))])
+    AB_ = fmin_bfgs(objective, AB0, fprime=grad, disp=False)
+    return AB_[0], AB_[1]
+
+
+class _SigmoidCalibration(RegressorMixin, BaseEstimator):
+    """Sigmoid regression model.
+    Attributes
+    ----------
+    a_ : float
+        The slope.
+    b_ : float
+        The intercept.
+    """
+    def fit(self, X, y, sample_weight=None):
+        """Fit the model using X, y as training data.
+        Parameters
+        ----------
+        X : array-like, shape (n_samples,)
+            Training data.
+        y : array-like, shape (n_samples,)
+            Training target.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted.
+        Returns
+        -------
+        self : object
+            Returns an instance of self.
+        """
+        X = column_or_1d(X)
+        y = column_or_1d(y)
+        X, y = indexable(X, y)
+
+        self.a_, self.b_ = _sigmoid_calibration(X, y, sample_weight)
+        return self
+
+    def predict(self, T):
+        """Predict new data by linear interpolation.
+        Parameters
+        ----------
+        T : array-like, shape (n_samples,)
+            Data to predict from.
+        Returns
+        -------
+        T_ : array, shape (n_samples,)
+            The predicted data.
+        """
+        T = column_or_1d(T)
+        return expit(-(self.a_ * T + self.b_))
+
+
+@_deprecate_positional_args
+def calibration_curve(y_true, y_prob, *, normalize=False, n_bins=5,
+                      strategy='uniform'):
+    """Compute true and predicted probabilities for a calibration curve.
+    The method assumes the inputs come from a binary classifier, and
+    discretize the [0, 1] interval into bins.
+    Calibration curves may also be referred to as reliability diagrams.
+    Read more in the :ref:`User Guide <calibration>`.
+    Parameters
+    ----------
+    y_true : array-like of shape (n_samples,)
+        True targets.
+    y_prob : array-like of shape (n_samples,)
+        Probabilities of the positive class.
+    normalize : bool, default=False
+        Whether y_prob needs to be normalized into the [0, 1] interval, i.e.
+        is not a proper probability. If True, the smallest value in y_prob
+        is linearly mapped onto 0 and the largest one onto 1.
+    n_bins : int, default=5
+        Number of bins to discretize the [0, 1] interval. A bigger number
+        requires more data. Bins with no samples (i.e. without
+        corresponding values in `y_prob`) will not be returned, thus the
+        returned arrays may have less than `n_bins` values.
+    strategy : {'uniform', 'quantile'}, default='uniform'
+        Strategy used to define the widths of the bins.
+        uniform
+            The bins have identical widths.
+        quantile
+            The bins have the same number of samples and depend on `y_prob`.
+    Returns
+    -------
+    prob_true : ndarray of shape (n_bins,) or smaller
+        The proportion of samples whose class is the positive class, in each
+        bin (fraction of positives).
+    prob_pred : ndarray of shape (n_bins,) or smaller
+        The mean predicted probability in each bin.
+    References
+    ----------
+    Alexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good
+    Probabilities With Supervised Learning, in Proceedings of the 22nd
+    International Conference on Machine Learning (ICML).
+    See section 4 (Qualitative Analysis of Predictions).
+    """
+    y_true = column_or_1d(y_true)
+    y_prob = column_or_1d(y_prob)
+    check_consistent_length(y_true, y_prob)
+
+    if normalize:  # Normalize predicted values into interval [0, 1]
+        y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min())
+    elif y_prob.min() < 0 or y_prob.max() > 1:
+        raise ValueError("y_prob has values outside [0, 1] and normalize is "
+                         "set to False.")
+
+    labels = np.unique(y_true)
+    if len(labels) > 2:
+        raise ValueError("Only binary classification is supported. "
+                         "Provided labels %s." % labels)
+    y_true = label_binarize(y_true, classes=labels)[:, 0]
+
+    if strategy == 'quantile':  # Determine bin edges by distribution of data
+        quantiles = np.linspace(0, 1, n_bins + 1)
+        bins = np.percentile(y_prob, quantiles * 100)
+        bins[-1] = bins[-1] + 1e-8
+    elif strategy == 'uniform':
+        bins = np.linspace(0., 1. + 1e-8, n_bins + 1)
+    else:
+        raise ValueError("Invalid entry to 'strategy' input. Strategy "
+                         "must be either 'quantile' or 'uniform'.")
+
+    binids = np.digitize(y_prob, bins) - 1
+
+    bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins))
+    bin_true = np.bincount(binids, weights=y_true, minlength=len(bins))
+    bin_total = np.bincount(binids, minlength=len(bins))
+
+    nonzero = bin_total != 0
+    prob_true = bin_true[nonzero] / bin_total[nonzero]
+    prob_pred = bin_sums[nonzero] / bin_total[nonzero]
+
+    return prob_true, prob_pred
\ No newline at end of file