documented ReadMe

2025-10-20 18:33:45 +02:00 · 2025-10-20 18:33:45 +02:00 · 854b3ba3f9
parent eafe486893
commit 854b3ba3f9
2 changed files with 26 additions and 2 deletions
--- a/examples/18.ReadMe_for_text_analysis.py
+++ b/examples/18.ReadMe_for_text_analysis.py
@ -16,8 +16,8 @@ for test_prev in [[0.25, 0.75], [0.5, 0.5], [0.75, 0.25]]:
    prev_estim, conf = readme.predict_conf(sample.X)
    err = qp.error.mae(sample.prevalence(), prev_estim)
    print(f'true-prevalence={F.strprev(sample.prevalence())},\n'
-          f'predicted-prevalence={F.strprev(prev_estim)},\n'
+          f'predicted-prevalence={F.strprev(prev_estim)}, with confidence intervals {conf},\n'
          f'MAE={err:.4f}')
-    print(conf)
+
--- a/quapy/method/non_aggregative.py
+++ b/quapy/method/non_aggregative.py
@ -153,6 +153,30 @@ class DMx(BaseQuantifier):
 class ReadMe(BaseQuantifier, WithConfidenceABC):
    """
    ReadMe is a non-aggregative quantification system proposed by
    `Daniel Hopkins and Gary King, 2007. A method of automated nonparametric content analysis for
    social science. American Journal of Political Science, 54(1):229–247.
    <https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1540-5907.2009.00428.x>`_.
    The idea is to estimate `Q(Y=i)` directly from:
    :math:`Q(X)=\\sum_{i=1} Q(X|Y=i) Q(Y=i)`
    via least-squares regression, i.e., without incurring the cost of computing posterior probabilities.
    However, this poses a very difficult representation in which the vector `Q(X)` and the matrix `Q(X|Y=i)`
    can be of very high dimensions. In order to render the problem tracktable, ReadMe performs bagging in
    the feature space. ReadMe also combines bagging with bootstrap in order to derive confidence intervals
    around point estimations.
    :param bootstrap_trials: int, number of bootstrap trials (default 100)
    :param bagging_trials: int, number of bagging trials (default 100)
    :param bagging_range: int, number of features to keep for each bagging trial (default 250)
    :param confidence_level: float, a value in (0,1) reflecting the desired confidence level (default 0.95)
    :param region: str in 'intervals', 'ellipse', 'ellipse-clr'; indicates the preferred method for
        defining the confidence region (see :class:`WithConfidenceABC`)
    :param random_state: int or None, allows replicability (default None)
    :param verbose: bool, whether to display information during the process (default False)
    """
    def __init__(self,
                 bootstrap_trials=100,