SafeModel Examples#
This section demonstrates how to use the SafeModel package for ante-hoc privacy assessment.
SafeModel Usage#
Example showing how to use SafeModel wrappers for privacy-aware machine learning:
SafeModel Usage Example#
1"""Example showing how to integrate attacks into safemodel classes."""
2
3import logging
4
5import numpy as np
6from sklearn.datasets import fetch_openml
7from sklearn.model_selection import train_test_split
8from sklearn.preprocessing import LabelEncoder, OneHotEncoder
9
10from sacroml.attacks.target import Target
11from sacroml.safemodel.classifiers import SafeDecisionTreeClassifier
12
13output_dir = "outputs_safemodel"
14
15if __name__ == "__main__":
16 logging.info("Loading dataset")
17 nursery_data = fetch_openml(data_id=26, as_frame=True)
18 X = np.asarray(nursery_data.data, dtype=str)
19 y = np.asarray(nursery_data.target, dtype=str)
20
21 n_features = np.shape(X)[1]
22 indices = [
23 [0, 1, 2], # parents
24 [3, 4, 5, 6, 7], # has_nurs
25 [8, 9, 10, 11], # form
26 [12, 13, 14, 15], # children
27 [16, 17, 18], # housing
28 [19, 20], # finance
29 [21, 22, 23], # social
30 [24, 25, 26], # health
31 ]
32
33 logging.info("Splitting data into training and test sets")
34 X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(
35 X, y, test_size=0.5, stratify=y, shuffle=True
36 )
37
38 logging.info("Preprocessing dataset")
39 label_enc = LabelEncoder()
40 feature_enc = OneHotEncoder()
41 X_train = feature_enc.fit_transform(X_train_orig).toarray()
42 y_train = label_enc.fit_transform(y_train_orig)
43 X_test = feature_enc.transform(X_test_orig).toarray()
44 y_test = label_enc.transform(y_test_orig)
45
46 logging.info("Defining the (safe) model")
47 model = SafeDecisionTreeClassifier(random_state=1)
48
49 logging.info("Training the model")
50 model.fit(X_train, y_train)
51 acc_train = model.score(X_train, y_train)
52 acc_test = model.score(X_test, y_test)
53 logging.info("Base model train accuracy: %.4f", acc_train)
54 logging.info("Base model test accuracy: %.4f", acc_test)
55
56 logging.info("Performing a preliminary check")
57 msg, disclosive = model.preliminary_check()
58
59 logging.info("Wrapping the model and data in a Target object")
60 target = Target(
61 model=model,
62 dataset_name="nursery",
63 X_train=X_train,
64 y_train=y_train,
65 X_test=X_test,
66 y_test=y_test,
67 X_train_orig=X_train_orig,
68 y_train_orig=y_train_orig,
69 X_test_orig=X_test_orig,
70 y_test_orig=y_test_orig,
71 )
72 for i in range(n_features):
73 target.add_feature(nursery_data.feature_names[i], indices[i], "onehot")
74
75 logging.info("Dataset: %s", target.dataset_name)
76 logging.info("Features: %s", target.features)
77 logging.info("X_train shape: %s", str(target.X_train.shape))
78 logging.info("y_train shape: %s", str(target.y_train.shape))
79 logging.info("X_test shape: %s", str(target.X_test.shape))
80 logging.info("y_test shape: %s", str(target.y_test.shape))
81
82 logging.info("Performing disclosure checks")
83 model.request_release(path=output_dir, ext="pkl", target=target)
84
85 logging.info("Please see the files generated in: %s", output_dir)
The SafeModel package provides wrappers around common machine learning models that automatically assess privacy risks before training. This helps researchers identify potentially problematic model configurations before investing time in training.