Source code for asreview.models.stoppers

# Copyright 2019-2025 The ASReview Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Stopper mechanisms for the review process.

The stopper mechanisms determine when the review process should be stopped.
This can be based on the properties of the results table or the input dataset.


.. warning::
    This module is experimental and might change.

"""

import pandas as pd

from sklearn.base import BaseEstimator

__all__ = [
    "LastRelevant",
    "NLabeled",
    "QuantileLabeled",
    "IsFittable",
    "NConsecutiveIrrelevant",
]


def safe_stop(stop_method):
    """Decorator to ensure safe stopping conditions."""

    def wrapper(self, results, data):
        if len(data) == 0:
            return True
        if len(results) == len(data):
            return True
        return stop_method(self, results, data)

    return wrapper


def raise_if_not_simulate(stop_method):
    """Decorator to only use the stopping mechanism in simulation."""

    def wrapper(self, results, data):
        if isinstance(data, (pd.DataFrame, pd.Series)):
            data_check = data
        else:
            data_check = pd.Series(data)

        if data_check.isna().any():
            raise ValueError("Stopper mechanism requires all data to be labeled.")

        return stop_method(self, results, data)

    return wrapper


[docs] class LastRelevant(BaseEstimator): """Stop after last relevant record. The stopping mechanism stops the review when all records have been labeled. Arguments --------- value: int, str Number of labels to stop the review at. If set to "min", the review will stop when all relevant records are found. """ name = "last_relevant" label = "Last Relevant"
[docs] @safe_stop @raise_if_not_simulate def stop(self, results, data): """Check if the review should be stopped. This function checks if the review should be stopped based on the results and the labels of the papers. Arguments --------- results: pandas.DataFrame DataFrame with the results of the review. data: pandas.DataFrame, list, np.array pandas.DataFrame, list, np.array with all records. Used to determine number of all records in data. Returns ------- bool: True if the review should be stopped, False otherwise. """ if sum(data) == sum(results["label"]): return True return False
[docs] class NLabeled(BaseEstimator): """Stop the review after n have been labeled. Arguments --------- n: int, tuple Number of labels to stop the review at. If tuple, the first element is the number of relevant records to find, the second element is the number of irrelevant records to find. """ name = "n_labeled" label = "N Labeled" def __init__(self, n): self.n = n
[docs] @safe_stop def stop(self, results, data): """Check if the review should be stopped. This function checks if the review should be stopped based on the results and the labels of the papers. Arguments --------- results: pandas.DataFrame DataFrame with the results of the review. data: pandas.DataFrame, list, np.array pandas.DataFrame, list, np.array with all records. Used to determine number of all records in data. Returns ------- bool: True if the review should be stopped, False otherwise. """ if not isinstance(self.n, (int, tuple)): raise ValueError("StopperN requires an integer or a tuple of integers") if self.n == -1: return False if isinstance(self.n, int) and len(results) >= self.n: return True if isinstance(self.n, tuple): n_relevant, n_irrelevant = self.n if ( sum(results["label"] == 1) >= n_relevant and sum(results["label"] == 0) >= n_irrelevant ): return True return False
[docs] class QuantileLabeled(BaseEstimator): """Stop the review after a certain quantile of the records have been labeled. Arguments --------- quantile: float Quantile of records to label before stopping the review. """ name = "q_labeled" label = "Quantile Labeled" def __init__(self, quantile): self.quantile = quantile
[docs] @safe_stop def stop(self, results, data): """Check if the review should be stopped. This function checks if the review should be stopped based on the results and the labels of the papers. Arguments --------- results: pandas.DataFrame DataFrame with the results of the review. data: pandas.DataFrame, list, np.array pandas.DataFrame, list, np.array with all records. Used to determine number of all records in data. Returns ------- bool: True if the review should be stopped, False otherwise. """ # Stop when reaching quantile (if provided) if len(results) / len(data) >= self.quantile: return True return False
[docs] class IsFittable(NLabeled): """Stop the review after both classes are found.""" name = "is_fittable" label = "Is Fittable" def __init__(self): super().__init__(n=(1, 1))
[docs] class NConsecutiveIrrelevant(BaseEstimator): """Stop the review after n irrelevant records have been labeled in a row. Arguments --------- n: int Number of irrelevant records in a row to stop the review at. """ name = "n_consecutive_irrelevant" label = "N Consecutive Irrelevant" def __init__(self, n): self.n = n
[docs] @safe_stop def stop(self, results, data): """Check if the review cycle should be stopped. This function checks if the review cycle should be stopped based on the results and the labels of the papers. Arguments --------- results: pandas.DataFrame DataFrame with the results of the review. data: pandas.DataFrame, list, np.array pandas.DataFrame, list, np.array with all records. Used to determine number of all records in data. Returns ------- bool: True if the review should be stopped, False otherwise. """ if len(results) > self.n and sum(results["label"].iloc[-self.n :]) == 0: return True return False