PyTorch Examples#

This section demonstrates how to use SACRO-ML with PyTorch models for privacy assessment.

Simple PyTorch Example#

A basic example showing how to train a simple PyTorch model and run privacy attacks.

Training the Model:

Simple PyTorch Model Training#

"""Train a classifier on synthetic data using sacroml Target and Dataset classes."""

import logging

import torch
from dataset import Synthetic
from model import OverfitNet
from train import test, train

from sacroml.attacks.target import Target

target_dir = "target_pytorch"
random_state = 2

if __name__ == "__main__":
    torch.manual_seed(random_state)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_state)
        logging.info(torch.cuda.get_device_name(torch.cuda.current_device()))
    else:
        logging.info("Found no NVIDIA driver on your system")

    #############################################################################
    # Dataset loading and model training
    #############################################################################

    logging.info("Loading dataset")

    # Access dataset
    data_handler = Synthetic()

    # Get the (preprocessed) dataset
    dataset = data_handler.get_dataset()

    # Create data splits
    indices_train, indices_test = data_handler.get_train_test_indices()

    # Get dataloaders
    train_loader = data_handler.get_dataloader(dataset, indices_train, shuffle=True)
    test_loader = data_handler.get_dataloader(dataset, indices_test, shuffle=False)

    logging.info("Defining the model")

    model_params = {
        "x_dim": 4,
        "y_dim": 4,
        "n_units": 1000,
    }
    train_params = {
        "epochs": 1000,
        "learning_rate": 0.001,
        "momentum": 0.9,
    }
    model = OverfitNet(**model_params)

    logging.info("Training the model")
    train(model, train_loader, **train_params)

    logging.info("Testing the model")
    test(model, test_loader)

    #############################################################################
    # Below shows the use of the Target class to help generate the target_dir/
    # If you have already saved your model, you can use the CLI target generator.
    #############################################################################

    logging.info("Wrapping the model and data in a Target object")
    target = Target(
        model=model,
        model_module_path="model.py",
        model_params=model_params,  # Must match all required in model constructor
        train_module_path="train.py",
        train_params=train_params,  # Must match all required in the train function
        dataset_module_path="dataset.py",
        dataset_name="Synthetic",  # Must match the class name in dataset module
        indices_train=indices_train,
        indices_test=indices_test,
    )

    logging.info("Writing Target object to directory: '%s'", target_dir)
    target.save(target_dir)

Note: the training script above is included from examples/pytorch/simple/train_pytorch.py; any variables or helper functions it references (for example dataset preparation or target definitions) are defined in the example source files shown below.

Model Definition:

Simple PyTorch Model Architecture (from examples/pytorch/simple/model.py)#

"""An example Pytorch classifier."""

import torch


class OverfitNet(torch.nn.Module):
    """An example Pytorch classification model."""

    def __init__(self, x_dim: int, y_dim: int, n_units: int) -> None:
        """Construct a simple Pytorch model."""
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(x_dim, n_units),
            torch.nn.ReLU(),
            torch.nn.Linear(n_units, y_dim),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Forward propagate input."""
        return self.layers(x)

Note: this model code is included from examples/pytorch/simple/model.py. Definitions used by other snippets on this page (for example a variable named target or model class definitions) come from this file.

Running Privacy Attacks:

Privacy Attacks on Simple PyTorch Model (from examples/pytorch/simple/attack_pytorch.py)#

"""Example of how to run attacks on a model saved with the Target wrapper."""

import logging

from sacroml.attacks.likelihood_attack import LIRAAttack
from sacroml.attacks.target import Target
from sacroml.attacks.worst_case_attack import WorstCaseAttack

output_dir = "output_pytorch"
target_dir = "target_pytorch"


if __name__ == "__main__":
    logging.info("Loading Target object from '%s'", target_dir)

    target = Target()
    target.load(target_dir)

    logging.info("Running attacks...")

    attack = WorstCaseAttack(n_reps=10, output_dir=output_dir)
    attack.attack(target)

    attack = LIRAAttack(n_shadow_models=100, output_dir=output_dir)
    attack.attack(target)

Note: the attack examples are included from examples/pytorch/simple/attack_pytorch.py and may call into the training or model files for required functions/objects.

CIFAR Dataset Example#

Advanced example using CIFAR dataset with convolutional neural networks.

Training the Model:

"""Train a PyTorch classifier on CIFAR10 using sacroml Target and Dataset classes."""

import logging

import torch
from dataset import Cifar10
from model import Net
from train import test, train

from sacroml.attacks.target import Target

target_dir = "target_pytorch"
random_state = 2

if __name__ == "__main__":
    torch.manual_seed(random_state)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_state)
        logging.info(torch.cuda.get_device_name(torch.cuda.current_device()))
    else:
        logging.info("Found no NVIDIA driver on your system")

    #############################################################################
    # Dataset loading and model training
    #############################################################################

    logging.info("Loading dataset")

    # Access dataset
    data_handler = Cifar10()

    # Get the (preprocessed) dataset
    dataset = data_handler.get_dataset()

    # Create data splits
    indices_train, indices_test = data_handler.get_train_test_indices()

    # Get dataloaders
    train_loader = data_handler.get_dataloader(dataset, indices_train, shuffle=True)
    test_loader = data_handler.get_dataloader(dataset, indices_test, shuffle=False)

    logging.info("Defining the model")

    model_params = {
        "n_kernel": 5,
    }
    train_params = {
        "epochs": 100,
        "learning_rate": 0.001,
        "momentum": 0.9,
    }
    model = Net(**model_params)

    logging.info("Training the model")
    train(model, train_loader, **train_params)

    logging.info("Testing the model")
    test(model, test_loader, data_handler.classes)

    #############################################################################
    # Below shows the use of the Target class to help generate the target_dir/
    # If you have already saved your model, you can use the CLI target generator.
    #############################################################################

    logging.info("Wrapping the model and data in a Target object")
    target = Target(
        model=model,
        model_module_path="model.py",
        model_params=model_params,  # Must match all required in model constructor
        train_module_path="train.py",
        train_params=train_params,  # Must match all required in the train function
        dataset_module_path="dataset.py",
        dataset_name="Cifar10",  # Must match the class name in dataset module
        indices_train=indices_train,
        indices_test=indices_test,
    )

    logging.info("Writing Target object to directory: '%s'", target_dir)
    target.save(target_dir)

Note: the training script above is included from examples/pytorch/cifar/train_pytorch.py; dataset handling and model definitions referenced below are defined in their respective files.

caption:: CIFAR Dataset PyTorch Training (from examples/pytorch/cifar/train_pytorch.py)

Model Architecture:

CIFAR CNN Model Architecture (from examples/pytorch/cifar/model.py)#

"""An example Pytorch classifier."""

import torch
from torch import nn


class Net(nn.Module):
    """A Pytorch classification model for cifar10."""

    def __init__(self, n_kernel: int = 5):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, n_kernel)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, n_kernel)
        self.fc1 = nn.Linear(16 * n_kernel * n_kernel, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        """Forward propagate input."""
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

Note: this model architecture is included from examples/pytorch/cifar/model.py and contains the network and related definitions used by the training script.

Dataset Processing:

CIFAR Dataset Processing (from examples/pytorch/cifar/dataset.py)#

"""Example dataset handler for CIFAR10.

PyTorch datasets must implement `sacroml.attacks.data.PyTorchDataHandler`.
"""

from collections.abc import Sequence

from torch.utils.data import ConcatDataset, DataLoader, Dataset, Subset
from torchvision import transforms
from torchvision.datasets import CIFAR10

from sacroml.attacks.data import PyTorchDataHandler


class Cifar10(PyTorchDataHandler):
    """CIFAR10 dataset handler."""

    def __init__(self) -> None:
        """Fetch and process CIFAR10."""
        self.transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ]
        )

        train_set = CIFAR10(
            root="./data", train=True, download=True, transform=self.transform
        )

        test_set = CIFAR10(
            root="./data", train=False, download=True, transform=self.transform
        )

        self.dataset = ConcatDataset([train_set, test_set])

        self.classes = (
            "plane",
            "car",
            "bird",
            "cat",
            "deer",
            "dog",
            "frog",
            "horse",
            "ship",
            "truck",
        )

    def __len__(self) -> int:
        """Return the length of the dataset."""
        return len(self.dataset)

    def get_raw_dataset(self) -> Dataset | None:
        """Return a raw unprocessed dataset."""
        # Raw data only required for attribute inference
        return None

    def get_dataset(self) -> Dataset:
        """Return a preprocessed dataset."""
        return self.dataset

    def get_dataloader(
        self,
        dataset: Dataset,
        indices: Sequence[int],
        batch_size: int = 32,
        shuffle: bool = False,
    ) -> DataLoader:
        """Return a data loader with a requested subset of samples."""
        subset = Subset(dataset, indices)
        return DataLoader(subset, batch_size=batch_size, shuffle=shuffle)

    def get_train_test_indices(self) -> tuple[Sequence[int], Sequence[int]]:
        """Return train and test set indices."""
        train = range(50000)
        test = range(50000, 60000)
        return train, test

Note: dataset loading and preprocessing functions are provided in examples/pytorch/cifar/dataset.py; training and evaluation snippets reference these utilities.

Running Privacy Attacks:

Privacy Attacks on CIFAR Model (from examples/pytorch/cifar/attack_pytorch.py)#

"""Example of how to run attacks on a model saved with the Target wrapper."""

import logging

from sacroml.attacks.likelihood_attack import LIRAAttack
from sacroml.attacks.target import Target
from sacroml.attacks.worst_case_attack import WorstCaseAttack

output_dir = "output_pytorch"
target_dir = "target_pytorch"


if __name__ == "__main__":
    logging.info("Loading Target object from '%s'", target_dir)

    target = Target()
    target.load(target_dir)

    logging.info("Running attacks...")

    attack = WorstCaseAttack(n_reps=10, output_dir=output_dir)
    attack.attack(target)

    attack = LIRAAttack(n_shadow_models=40, output_dir=output_dir)
    attack.attack(target)

Note: the attack code is taken from examples/pytorch/cifar/attack_pytorch.py and may depend on the model and dataset code linked above.

PyTorch Examples#

Simple PyTorch Example#

CIFAR Dataset Example#

This Page