SafeModel Examples#

This section demonstrates how to use the SafeModel package for ante-hoc privacy assessment.

SafeModel Usage#

Example showing how to use SafeModel wrappers for privacy-aware machine learning:

SafeModel Usage Example#
 1"""Example showing how to integrate attacks into safemodel classes."""
 2
 3import logging
 4
 5import numpy as np
 6from sklearn.datasets import fetch_openml
 7from sklearn.model_selection import train_test_split
 8from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 9
10from sacroml.attacks.target import Target
11from sacroml.safemodel.classifiers import SafeDecisionTreeClassifier
12
13output_dir = "outputs_safemodel"
14
15if __name__ == "__main__":
16    logging.info("Loading dataset")
17    nursery_data = fetch_openml(data_id=26, as_frame=True)
18    X = np.asarray(nursery_data.data, dtype=str)
19    y = np.asarray(nursery_data.target, dtype=str)
20
21    n_features = np.shape(X)[1]
22    indices = [
23        [0, 1, 2],  # parents
24        [3, 4, 5, 6, 7],  # has_nurs
25        [8, 9, 10, 11],  # form
26        [12, 13, 14, 15],  # children
27        [16, 17, 18],  # housing
28        [19, 20],  # finance
29        [21, 22, 23],  # social
30        [24, 25, 26],  # health
31    ]
32
33    logging.info("Splitting data into training and test sets")
34    X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(
35        X, y, test_size=0.5, stratify=y, shuffle=True
36    )
37
38    logging.info("Preprocessing dataset")
39    label_enc = LabelEncoder()
40    feature_enc = OneHotEncoder()
41    X_train = feature_enc.fit_transform(X_train_orig).toarray()
42    y_train = label_enc.fit_transform(y_train_orig)
43    X_test = feature_enc.transform(X_test_orig).toarray()
44    y_test = label_enc.transform(y_test_orig)
45
46    logging.info("Defining the (safe) model")
47    model = SafeDecisionTreeClassifier(random_state=1)
48
49    logging.info("Training the model")
50    model.fit(X_train, y_train)
51    acc_train = model.score(X_train, y_train)
52    acc_test = model.score(X_test, y_test)
53    logging.info("Base model train accuracy: %.4f", acc_train)
54    logging.info("Base model test accuracy: %.4f", acc_test)
55
56    logging.info("Performing a preliminary check")
57    msg, disclosive = model.preliminary_check()
58
59    logging.info("Wrapping the model and data in a Target object")
60    target = Target(
61        model=model,
62        dataset_name="nursery",
63        X_train=X_train,
64        y_train=y_train,
65        X_test=X_test,
66        y_test=y_test,
67        X_train_orig=X_train_orig,
68        y_train_orig=y_train_orig,
69        X_test_orig=X_test_orig,
70        y_test_orig=y_test_orig,
71    )
72    for i in range(n_features):
73        target.add_feature(nursery_data.feature_names[i], indices[i], "onehot")
74
75    logging.info("Dataset: %s", target.dataset_name)
76    logging.info("Features: %s", target.features)
77    logging.info("X_train shape: %s", str(target.X_train.shape))
78    logging.info("y_train shape: %s", str(target.y_train.shape))
79    logging.info("X_test shape: %s", str(target.X_test.shape))
80    logging.info("y_test shape: %s", str(target.y_test.shape))
81
82    logging.info("Performing disclosure checks")
83    model.request_release(path=output_dir, ext="pkl", target=target)
84
85    logging.info("Please see the files generated in: %s", output_dir)

The SafeModel package provides wrappers around common machine learning models that automatically assess privacy risks before training. This helps researchers identify potentially problematic model configurations before investing time in training.