Source code for sacroml.attacks.data

"""Abstract data handler supporting both PyTorch and scikit-learn."""

from __future__ import annotations

from abc import ABC, abstractmethod
from collections.abc import Sequence

import numpy as np
from torch.utils.data import DataLoader, Dataset



[docs]
class BaseDataHandler(ABC):  # pylint: disable=too-few-public-methods
    """Base data handling interface."""


[docs]
    @abstractmethod
    def __init__(self) -> None:
        """Instantiate a data handler."""


    @abstractmethod
    def __len__(self) -> int:
        """Return the length of the dataset."""




[docs]
class PyTorchDataHandler(BaseDataHandler):
    """PyTorch dataset handling interface."""


[docs]
    @abstractmethod
    def get_dataset(self) -> Dataset:
        """Return a processed dataset.

        Returns
        -------
        Dataset
            A (processed) PyTorch dataset.
        """



[docs]
    @abstractmethod
    def get_raw_dataset(self) -> Dataset | None:
        """Return a raw unprocessed dataset.

        Returns
        -------
        Dataset | None
            An unprocessed PyTorch dataset.
        """



[docs]
    @abstractmethod
    def get_dataloader(
        self,
        dataset: Dataset,
        indices: Sequence[int],
        batch_size: int = 32,
        shuffle: bool = False,
    ) -> DataLoader:
        """Return a data loader with a requested subset of samples.

        Parameters
        ----------
        dataset : Dataset
            A (processed) PyTorch dataset.
        indices : Sequence[int]
            The indices to load from the dataset.
        batch_size : int
            The batch_size to sample the dataset.
        shuffle : bool
            Whether to shuffle the data.

        Returns
        -------
        DataLoader
            A PyTorch DataLoader.
        """





[docs]
class SklearnDataHandler(BaseDataHandler):  # pragma: no cover
    """Scikit-learn data handling interface."""


[docs]
    @abstractmethod
    def get_data(self) -> tuple[np.ndarray, np.ndarray]:
        """Return the processed data arrays.

        Returns
        -------
        tuple[np.ndarray, np.ndarray]
            Features (X) and targets (y) as numpy arrays.
        """



[docs]
    @abstractmethod
    def get_raw_data(self) -> tuple[np.ndarray, np.ndarray] | None:
        """Return the original unprocessed data arrays.

        Returns
        -------
        tuple[np.ndarray, np.ndarray] | None
            Features (X) and targets (y) as numpy arrays.
        """



[docs]
    @abstractmethod
    def get_subset(
        self, X: np.ndarray, y: np.ndarray, indices: Sequence[int]
    ) -> tuple[np.ndarray, np.ndarray]:
        """Return a subset of the data.

        Parameters
        ----------
        X : np.ndarray
            Feature array.
        y : np.ndarray
            Target array.
        indices : Sequence[int]
            The indices to extract.

        Returns
        -------
        tuple[np.ndarray, np.ndarray]
            Subset of features and targets.
        """