Source code for asreview.data.loader

from io import StringIO
from pathlib import Path

from asreview.datasets import DatasetManager
from asreview.datasets import DatasetNotFoundError
from asreview.exceptions import BadFileFormatError
from asreview.utils import _entry_points
from asreview.utils import _get_filename_from_url
from asreview.utils import is_url


def _from_file(fp, reader=None):
    """Create instance from supported file format.

    It works in two ways; either manual control where the conversion
    functions are supplied or automatic, where it searches in the entry
    points for the right conversion functions.

    Arguments
    ---------
    fp: str, pathlib.Path
        Read the data from this file or url.
    reader: class
        Reader to import the file.
    """

    if reader is not None:
        return reader.read_data(fp)

    # get the filename from a url else file path
    if is_url(fp):
        fn = _get_filename_from_url(fp)
    else:
        fn = Path(fp).name

    try:
        reader = _entry_points(group="asreview.readers")[Path(fn).suffix].load()
    except Exception:
        raise BadFileFormatError(f"Importing file {fp} not possible.")

    return reader.read_data(fp)


def _from_extension(name, reader=None):
    """Load a dataset from extension.

    Arguments
    ---------
    fp: str, pathlib.Path
        Read the data from this file or url.
    reader: class
        Reader to import the file.
    """

    dataset = DatasetManager().find(name)

    if dataset.filepath:
        fp = dataset.filepath
    else:
        # build dataset to temporary file
        reader = dataset.reader()
        fp = StringIO(dataset.to_file())

    if reader is None:
        # get the filename from a url else file path
        if is_url(fp):
            fn = _get_filename_from_url(fp)
        else:
            fn = Path(fp).name

        try:
            reader = _entry_points(group="asreview.readers")[Path(fn).suffix].load()
        except Exception:
            raise BadFileFormatError(f"Importing file {fp} not possible.")

    return reader.read_data(fp)



[docs]
def load_dataset(name, **kwargs):
    """Load data from file, URL, or plugin.

    Parameters
    ----------
    name: str, pathlib.Path
        File path, URL, or alias of extension dataset.
    **kwargs:
        Keyword arguments passed to the reader.

    Returns
    -------
    asreview.Dataset:
        Inititalized ASReview data object.
    """

    # check is file or URL
    if is_url(name) or Path(name).exists():
        return _from_file(name, **kwargs)

    # check if dataset is plugin dataset
    try:
        return _from_extension(name, **kwargs)
    except DatasetNotFoundError:
        pass

    # Could not find dataset, return None.
    raise FileNotFoundError(f"File, URL, or dataset does not exist: '{name}'")



def load_data(name, **kwargs):
    """Deprecated, use asreview.load_dataset instead.

    Parameters
    ----------
    name: str, pathlib.Path
        File path, URL, or alias of extension dataset.
    **kwargs:
        Keyword arguments passed to the reader.

    Returns
    -------
    asreview.Dataset:
        Inititalized ASReview data object.
    """

    UserWarning(
        "'load_data' is deprecated and will be removed in the future. "
        "Use 'load_dataset' instead."
    )

    return load_dataset(name, **kwargs)