User Stories#
This section contains real-world user stories demonstrating how SACRO-ML is used in Trusted Research Environments (TREs).
User Story 1: Basic Model Training and Attack#
A basic workflow showing model training and privacy assessment.
Researcher Template:
User Story 1 - Researcher Template#
1"""RESEARCHER EXAMPLE FOR USER STORY 1.
2
3This file is an example of a researcher creating/training a machine learning
4model and requesting for it to be released.
5
6This specific example uses the nursery dataset: data is read in and
7pre-processed, and a classifier is trained and tested on this dataset.
8
9This example follows User Story 1.
10
11Steps:
12
13- Researcher reads in data and processes it.
14- Researcher creates and trains a classifier.
15- Researcher runs experiments themselves to check if their model is disclosive
16 or not.
17- Once satisfied, researcher calls request_release() to make it ready for TRE
18"""
19
20import logging
21import os
22
23import numpy as np
24import pandas as pd
25from sklearn.model_selection import train_test_split
26from sklearn.preprocessing import LabelEncoder, OneHotEncoder
27
28from sacroml.attacks.target import Target
29from sacroml.safemodel.classifiers import SafeDecisionTreeClassifier
30
31
32def main():
33 """Create and train a model to be released."""
34 # This section is not necessary but helpful - cleans up files that are
35 # created by sacroml
36 save_directory = "training_artefacts"
37 print("Creating directory for training artefacts")
38
39 if not os.path.exists(save_directory):
40 os.makedirs(save_directory)
41
42 print()
43 print("Acting as researcher...")
44 print()
45
46 # Read in and pre-process the dataset - replace this with your data
47 # reading/pre-processing code
48 print(os.getcwd())
49 filename = os.path.join(".", "user_stories_resources", "dataset_26_nursery.csv")
50 print("Reading data from " + filename)
51 data_df = pd.read_csv(filename)
52
53 print()
54
55 labels = np.asarray(data_df["class"])
56 data = np.asarray(data_df.drop(columns=["class"], inplace=False))
57
58 n_features = np.shape(data)[1]
59 indices: list[list[int]] = [
60 [0, 1, 2], # parents
61 [3, 4, 5, 6, 7], # has_nurs
62 [8, 9, 10, 11], # form
63 [12, 13, 14, 15], # children
64 [16, 17, 18], # housing
65 [19, 20], # finance
66 [21, 22, 23], # social
67 [24, 25, 26], # health
68 ]
69
70 (
71 X_train_orig,
72 X_test_orig,
73 y_train_orig,
74 y_test_orig,
75 ) = train_test_split(
76 data,
77 labels,
78 test_size=0.5,
79 stratify=labels,
80 shuffle=True,
81 )
82
83 label_enc = LabelEncoder()
84 feature_enc = OneHotEncoder()
85 X_train = feature_enc.fit_transform(X_train_orig).toarray()
86 y_train = label_enc.fit_transform(y_train_orig)
87 X_test = feature_enc.transform(X_test_orig).toarray()
88 y_test = label_enc.transform(y_test_orig)
89
90 logging.getLogger("attack-reps").setLevel(logging.WARNING)
91 logging.getLogger("prep-attack-data").setLevel(logging.WARNING)
92 logging.getLogger("attack-from-preds").setLevel(logging.WARNING)
93
94 # Create and train a SafeDecisionTree classifier on the above data
95 model = SafeDecisionTreeClassifier(random_state=1)
96 model.fit(X_train, y_train)
97
98 # Run a preliminary check to make sure the model is not disclosive
99 _, _ = model.preliminary_check()
100
101 # Wrap the model and data in a Target object
102 # needed in order to call request_release()
103 target = Target(
104 model=model,
105 dataset_name="nursery",
106 # processed data
107 X_train=X_train,
108 y_train=y_train,
109 X_test=X_test,
110 y_test=y_test,
111 # original unprocessed data
112 X_train_orig=X_train_orig,
113 y_train_orig=y_train_orig,
114 X_test_orig=X_test_orig,
115 y_test_orig=y_test_orig,
116 )
117 for i in range(n_features):
118 target.add_feature(data_df.columns[i], indices[i], "onehot")
119
120 logging.info("Dataset: %s", target.dataset_name)
121 logging.info("Features: %s", target.features)
122 logging.info("X_train shape: %s", str(target.X_train.shape))
123 logging.info("y_train shape: %s", str(target.y_train.shape))
124 logging.info("X_test shape: %s", str(target.X_test.shape))
125 logging.info("y_test shape: %s", str(target.y_test.shape))
126
127 # Researcher can check for themselves whether their model passes individual
128 # disclosure checks.Leave this code as-is for output disclosure checking.
129 print("==========> first running attacks explicitly via run_attack()")
130 for attack_name in ["worst_case", "attribute", "lira"]:
131 print(f"===> running {attack_name} attack directly")
132 metadata = model.run_attack(target, attack_name, save_directory)
133 logging.info("metadata is:")
134 for key, val in metadata.items():
135 if isinstance(val, dict):
136 logging.info(" %s ", key)
137 for key1, val2 in val.items():
138 logging.info(" %s : %s", key1, val2)
139 else:
140 logging.info(" %s : %s", key, val)
141
142 # Modify/re-run all of the above code until you're happy with the model
143 # you've created. If the tests do not pass, try changing the model or
144 # hyperparameters until the tests pass. When you are satisfied and ready to
145 # release your model, call the request release() function with the Target
146 # class you created above.
147
148 # This code will run checks for the TRE staff.
149
150 # NOTE: you should only do this when you have confirmed that the above
151 # tests pass. You would not normally waste your and TRE time calling this
152 # unless you have already checked that your model is not disclosive or can
153 # provide a justification for an exception request.
154
155 print("===> now running attacks implicitly via request_release()")
156 model.request_release(path=save_directory, ext="pkl", target=target)
157
158 # The files generated can be found in this file location.
159 print(f"Please see the files generated in: {save_directory}")
160
161
162if __name__ == "__main__":
163 main()
TRE Implementation:
User Story 1 - TRE Implementation#
1"""TRE SCRIPT FOR USER STORY 1.
2
3This file contains the code needed to run user story 1.
4
5To run: change the user_story key inside the .yaml config file to '1', and run
6the 'generate_disclosure_risk_report.py' file.
7
8NOTE: you should not need to change this file at all, set all parameters via
9the .yaml file.
10"""
11
12import argparse
13import os
14
15import yaml
16
17from sacroml.attacks.attack_report_formatter import GenerateTextReport
18
19
20def generate_report(directory, attack_results, target, outfile):
21 """Generate report based on target model."""
22 print()
23 print("Acting as TRE...")
24 print()
25
26 text_report = GenerateTextReport()
27
28 attack_pathname = os.path.join(directory, attack_results)
29 text_report.process_attack_target_json(
30 attack_pathname, target_filename=os.path.join(directory, target)
31 )
32
33 out_pathname = os.path.join(directory, outfile)
34 text_report.export_to_file(output_filename=out_pathname, move_files=True)
35
36 print("Results written to " + out_pathname)
37
38
39def run_user_story(release_config: dict):
40 """Run the user story, parsing arguments and then invoking report generation."""
41 generate_report(
42 release_config["training_artefacts_dir"],
43 release_config["attack_results"],
44 release_config["target_results"],
45 release_config["outfile"],
46 )
47
48
49if __name__ == "__main__":
50 parser = argparse.ArgumentParser(
51 description=(
52 "Generate a risk report after request_release() "
53 "has been called by researcher"
54 )
55 )
56
57 parser.add_argument(
58 "--config_file",
59 type=str,
60 action="store",
61 dest="config_file",
62 required=False,
63 default="default_config.yaml",
64 help=("Name of yaml configuration file"),
65 )
66
67 args = parser.parse_args()
68
69 try:
70 with open(args.config_file, encoding="utf-8") as handle:
71 config = yaml.load(handle, Loader=yaml.loader.SafeLoader)
72 except AttributeError as error:
73 print(
74 f"Invalid command. Try --help to get more detailserror message is {error}"
75 )
76
77 run_user_story(config)
User Story 2: Data Processing and Privacy Assessment#
Advanced workflow including data preprocessing and comprehensive privacy assessment.
Data Processing:
User Story 2 - Data Processing#
1"""SUPPORTING FILE FOR USER STORY 2.
2
3This file is an example of a function created by a researcher that will
4pre-process a dataset.
5
6To use: write a function that will process your input data, and output the
7processed version.
8
9NOTE: in order to work, this function needs to:
10
11- take a single parameter (the data to be processed)
12- return a dictionary
13- which contains the keys ]
14 ['n_features_raw_data', 'X_transformed', 'y_transformed', 'train_indices']
15"""
16
17import numpy as np
18from sklearn.model_selection import train_test_split
19from sklearn.preprocessing import LabelEncoder, OneHotEncoder
20
21
22def process_dataset(data):
23 """Create a function that does the data pre-processing for user story 2."""
24 # Replace the contents of this function with your pre-processing code
25
26 labels = np.asarray(data["class"])
27 data = np.asarray(data.drop(columns=["class"], inplace=False))
28
29 n_features_raw_data = np.shape(data)[1]
30
31 label_enc = LabelEncoder()
32 feature_enc = OneHotEncoder()
33 X_transformed = feature_enc.fit_transform(data).toarray()
34 y_transformed = label_enc.fit_transform(labels)
35
36 row_indices = np.arange(np.shape(X_transformed)[0])
37
38 # This step is not necessary, however it's the simplest way of getting
39 # training indices from the data. Any method of generating indices of
40 # samples to be used for training will work here.
41 (
42 X_train,
43 X_test,
44 y_train,
45 y_test,
46 train_indices,
47 test_indices,
48 ) = train_test_split(
49 X_transformed,
50 y_transformed,
51 row_indices,
52 test_size=0.5,
53 stratify=y_transformed,
54 shuffle=True,
55 )
56
57 returned = {}
58 returned["n_features_raw_data"] = n_features_raw_data
59 returned["X_transformed"] = X_transformed
60 returned["y_transformed"] = y_transformed
61 returned["train_indices"] = train_indices
62
63 return returned
Researcher Template:
User Story 2 - Researcher Template#
1"""RESEARCHER EXAMPLE FOR USER STORY 2.
2
3This file is an example of a researcher creating/training a machine learning
4model and to be released form a secure environment.
5
6This specific example uses the nursery dataset: data is read in and
7pre-processed, and a classifier is trained and tested on this dataset.
8
9This example follows User Story 2.
10
11Steps:
12
13- Researcher creates a function to read and process a dataset, which a TRE can
14 also use and call.
15- Researcher creates and trains a classifier on this data.
16- Researcher emails (or otherwise contacts) TRE to request the model be released.
17- TREs will use this code/functions to test the model themselves.
18"""
19
20import logging
21import os
22
23import numpy as np
24import pandas as pd
25from data_processing_researcher import process_dataset
26
27from sacroml.attacks.target import Target
28from sacroml.safemodel.classifiers import SafeDecisionTreeClassifier
29
30
31def run_user_story():
32 """Create and train a model to be released."""
33 # This section is not necessary but helpful - cleans up files that are
34 # created by sacroml
35 directory = "training_artefacts"
36 print("Creating directory for training artefacts")
37
38 if not os.path.exists(directory):
39 os.makedirs(directory)
40
41 print()
42 print("Acting as researcher...")
43 print()
44
45 # Read in and pre-process the dataset - replace this with your dataset
46 filename = os.path.join(".", "user_stories_resources", "dataset_26_nursery.csv")
47 print("Reading data from " + filename)
48 data = pd.read_csv(filename)
49
50 # Write a function to pre-process the data that the TRE can call (see
51 # data_processing_researcher.py) Use the output of this function to split
52 # the data into training/testing sets.
53
54 # NOTE: to use this user story/script, the process_dataset function MUST:
55 # take a single parameter (the data to be processed) return a dictionary
56 # which contains the keys:
57 # >>> ['n_features_raw_data', 'X_transformed', 'y_transformed', 'train_indices']
58 # as in this example.
59
60 returned = process_dataset(data)
61
62 X_transformed = returned["X_transformed"]
63 y_transformed = returned["y_transformed"]
64
65 train_indices = set(returned["train_indices"])
66
67 X_train = []
68 X_test = []
69 y_train = []
70 y_test = []
71
72 for i, label in enumerate(y_transformed):
73 if i in train_indices:
74 X_train.append(X_transformed[i])
75 y_train.append(label)
76 else:
77 X_test.append(X_transformed[i])
78 y_test.append(label)
79
80 logging.getLogger("attack-reps").setLevel(logging.WARNING)
81 logging.getLogger("prep-attack-data").setLevel(logging.WARNING)
82 logging.getLogger("attack-from-preds").setLevel(logging.WARNING)
83
84 # Build a model and request its release
85 model = SafeDecisionTreeClassifier(random_state=1)
86 model.fit(X_train, y_train)
87 model.request_release(path=directory, ext="pkl")
88
89 # Wrap the model and data in a Target object
90 target = Target(
91 model=model,
92 dataset_name="nursery",
93 X_train=X_train,
94 y_train=y_train,
95 X_test=X_test,
96 y_test=y_test,
97 )
98
99 # NOTE: we assume here that the researcher does not use the target.save()
100 # function and instead provides only the model and the list of indices
101 # which have been used to split the dataset, which will allow a TRE to
102 # re-create the input data used in training.
103
104 logging.info("Dataset: %s", target.name)
105 logging.info("Features: %s", target.features)
106 logging.info("X_train shape = %s", np.shape(target.X_train))
107 logging.info("y_train shape = %s", np.shape(target.y_train))
108 logging.info("X_test shape = %s", np.shape(target.X_test))
109 logging.info("y_test shape = %s", np.shape(target.y_test))
110
111
112if __name__ == "__main__":
113 run_user_story()
TRE Implementation:
User Story 2 - TRE Implementation#
1"""TRE SCRIPT FOR USER STORY 2.
2
3This file contains the code needed to run user story 2.
4
5To run: change the user_story key inside the .yaml config file to '2', and run
6the 'generate_disclosure_risk_report.py' file.
7
8NOTE: you should not need to change this file at all, set all parameters via
9the .yaml file.
10"""
11
12import argparse
13import importlib
14import os
15import pickle
16
17import numpy as np
18import pandas as pd
19import yaml
20
21from sacroml.attacks.attack_report_formatter import GenerateTextReport
22from sacroml.attacks.target import Target
23
24
25def process_dataset(filename, function_name, data_to_be_processed):
26 """Process dataset.
27
28 DO NOT CHANGE: this is a wrapper function that allows a callable function
29 to be read from a file.
30 """
31 spec = importlib.util.spec_from_file_location(function_name, filename)
32 module = importlib.util.module_from_spec(spec)
33 spec.loader.exec_module(module)
34 function = getattr(module, function_name)
35 return function(data_to_be_processed)
36
37
38def generate_report(
39 data_processing_filename,
40 data_processing_function_name,
41 dataset_filename,
42 directory,
43 target_model,
44 attack_results,
45 target_filename,
46 outfile,
47):
48 """Generate report based on target model."""
49 print()
50 print("Acting as TRE...")
51 print(
52 "(when instructions on how to recreate the dataset have "
53 "been provided by the researcher)"
54 )
55 print(directory)
56 print()
57
58 # Read in the model supplied by the researcher
59 filename = os.path.join(directory, target_model)
60 print("Reading target model from " + filename)
61 with open(filename, "rb") as f:
62 target_model = pickle.load(f)
63
64 # Read the data used by the researcher, and process it using their defined function
65 print("Reading data from " + dataset_filename)
66 data = pd.read_csv(dataset_filename)
67
68 returned = process_dataset(
69 data_processing_filename, data_processing_function_name, data
70 )
71 X_transformed = returned["X_transformed"]
72 y_transformed = returned["y_transformed"]
73 train_indices = set(returned["train_indices"])
74
75 X_train = []
76 X_test = []
77 y_train = []
78 y_test = []
79
80 for i, label in enumerate(y_transformed):
81 if i in train_indices:
82 X_train.append(X_transformed[i])
83 y_train.append(label)
84 else:
85 X_test.append(X_transformed[i])
86 y_test.append(label)
87
88 X_train = np.array(X_train)
89 y_train = np.array(y_train)
90 X_test = np.array(X_test)
91 y_test = np.array(y_test)
92
93 # Wrap the model and data in a Target object
94 target = Target(
95 model=target_model,
96 X_train=X_train,
97 y_train=y_train,
98 X_test=X_test,
99 y_test=y_test,
100 )
101
102 # TRE calls request_release()
103 print("===> now running attacks implicitly via request_release()")
104 target_model.request_release(path=directory, ext="pkl", target=target)
105
106 print(f"Please see the files generated in: {directory}")
107
108 # Generate a report indicating calculated disclosure risk
109 text_report = GenerateTextReport()
110 text_report.process_attack_target_json(
111 os.path.join(directory, attack_results),
112 target_filename=os.path.join(directory, target_filename),
113 )
114
115 text_report.export_to_file(
116 output_filename=os.path.join(directory, outfile), move_files=True
117 )
118
119 print("Results written to " + str(os.path.join(directory, outfile)))
120
121
122def run_user_story(release_config: dict):
123 """Run the user story, parsing arguments and then invoking report generation."""
124 generate_report(
125 release_config["data_processing_filename"],
126 release_config["data_processing_function_name"],
127 release_config["dataset_filename"],
128 release_config["training_artefacts_dir"],
129 release_config["target_model"],
130 release_config["attack_results"],
131 release_config["target_results"],
132 release_config["outfile"],
133 )
134
135
136if __name__ == "__main__":
137 parser = argparse.ArgumentParser(
138 description=(
139 "Generate a risk report after request_release() "
140 "has been called by researcher"
141 )
142 )
143
144 parser.add_argument(
145 "--config_file",
146 type=str,
147 action="store",
148 dest="config_file",
149 required=False,
150 default="default_config.yaml",
151 help=("Name of yaml configuration file"),
152 )
153
154 args = parser.parse_args()
155
156 try:
157 with open(args.config_file, encoding="utf-8") as handle:
158 config = yaml.load(handle, Loader=yaml.loader.SafeLoader)
159 except AttributeError as error:
160 print(
161 f"Invalid command. Try --help to get more detailserror message is {error}"
162 )
163
164 run_user_story(config)
User Story 3: Advanced Privacy Analysis#
Comprehensive privacy analysis workflow for sensitive datasets.
Researcher Template:
User Story 3 - Researcher Template#
1"""RESEARCHER EXAMPLE FOR USER STORY 3.
2
3This file is an example of a researcher creating/training a machine learning
4model and to be released form a secure environment.
5
6This specific example uses the nursery dataset: data is read in and
7pre-processed, and a classifier is trained and tested on this dataset.
8
9This example follows User Story 3.
10
11Steps:
12
13- Researcher creates and pre-processes a dataset.
14- Researcher creates and trains a classifier on this data.
15- Reasercher saves the model manually (e.g. using pickle, not through
16 request_release() or similar).
17- Researcher emails (or otherwise contacts) TRE to request the model be released.
18- TREs will use this model and data to test the model themselves.
19"""
20
21import os
22import pickle
23
24import numpy as np
25import pandas as pd
26from sklearn.ensemble import RandomForestClassifier
27from sklearn.metrics import accuracy_score
28from sklearn.model_selection import train_test_split
29from sklearn.preprocessing import LabelEncoder, OneHotEncoder
30
31
32def run_user_story():
33 """Create and train a model to be released."""
34 # This section is not necessary but helpful - cleans up files that are
35 # created by sacroml
36 directory = "training_artefacts"
37 print("Creating directory for training artefacts")
38
39 if not os.path.exists(directory):
40 os.makedirs(directory)
41
42 # Read in and pre-process the dataset - replace this with your data
43 # reading/pre-processing code.
44 filename = os.path.join(".", "user_stories_resources", "dataset_26_nursery.csv")
45 print("Reading data from " + filename)
46 data = pd.read_csv(filename)
47
48 target_encoder = LabelEncoder()
49 target_vals = target_encoder.fit_transform(data["class"].values)
50 target_dataframe = pd.DataFrame({"class": target_vals})
51 data = data.drop(columns=["class"], inplace=False)
52
53 feature_encoder = OneHotEncoder()
54 X_encoded = feature_encoder.fit_transform(data).toarray()
55 feature_dataframe = pd.DataFrame(
56 X_encoded, columns=feature_encoder.get_feature_names_out()
57 )
58
59 X_train, X_test, y_train, y_test = train_test_split(
60 feature_dataframe.values,
61 target_dataframe.to_numpy().flatten(),
62 test_size=0.7,
63 random_state=42,
64 )
65
66 # Save the training and test data to a file which a TRE can access
67 print("Saving training/testing data to ./" + directory)
68 np.savetxt(os.path.join(directory, "X_train.txt"), X_train, fmt="%d")
69 np.savetxt(os.path.join(directory, "y_train.txt"), y_train, fmt="%d")
70 np.savetxt(os.path.join(directory, "X_test.txt"), X_test, fmt="%d")
71 np.savetxt(os.path.join(directory, "y_test.txt"), y_test, fmt="%d")
72
73 # Create, train and test a model
74 # Replace this with your training and testing code
75 hyperparameters = {}
76 hyperparameters["min_samples_split"] = 5
77 hyperparameters["min_samples_leaf"] = 5
78 hyperparameters["max_depth"] = None
79 hyperparameters["bootstrap"] = False
80
81 target_model = RandomForestClassifier(**hyperparameters)
82 target_model.fit(X_train, y_train)
83
84 train_acc = accuracy_score(y_train, target_model.predict(X_train))
85 test_acc = accuracy_score(y_test, target_model.predict(X_test))
86 print(f"Training accuracy on model: {train_acc:.2f}")
87 print(f"Testing accuracy on model: {test_acc:.2f}")
88
89 # Save your model somewhere a TRE can access
90 filename = os.path.join(directory, "model.pkl")
91 print("Saving model to " + filename)
92 with open(filename, "wb") as file:
93 pickle.dump(target_model, file)
94
95
96if __name__ == "__main__":
97 run_user_story()
TRE Implementation:
User Story 3 - TRE Implementation#
1"""TRE SCRIPT FOR USER STORY 3.
2
3This file contains the code needed to run user story 3.
4
5To run: change the user_story key inside the .yaml config file to '3', and run
6the 'generate_disclosure_risk_report.py' file.
7
8NOTE: you should not need to change this file at all, set all parameters via
9the .yaml file.
10"""
11
12import argparse
13import logging
14import os
15import pickle
16
17import numpy as np
18import yaml
19
20from sacroml.attacks.attack_report_formatter import GenerateTextReport
21from sacroml.attacks.likelihood_attack import LIRAAttack
22from sacroml.attacks.target import Target
23from sacroml.attacks.worst_case_attack import WorstCaseAttack
24
25
26def generate_report(
27 directory,
28 target_model,
29 X_train,
30 y_train,
31 X_test,
32 y_test,
33 target_filename,
34 outfile,
35):
36 """Generate report based on target model."""
37 print()
38 print("Acting as TRE...")
39 print()
40
41 if not os.path.exists(directory):
42 os.makedirs(directory)
43
44 # Suppress messages from SACRO-ML -- comment out these lines to
45 # see all the sacroml logging statements
46 logging.getLogger("attack-reps").setLevel(logging.WARNING)
47 logging.getLogger("prep-attack-data").setLevel(logging.WARNING)
48 logging.getLogger("attack-from-preds").setLevel(logging.WARNING)
49
50 # Read the model to be released as supplied by the researcher
51 model_filename = os.path.join(directory, target_model)
52 print("Reading target model from " + model_filename)
53 with open(model_filename, "rb") as file:
54 target_model = pickle.load(file)
55
56 # Read the training/testing data as supplied by the researcher
57 print("Reading training/testing data from ./" + directory)
58 train_x = np.loadtxt(os.path.join(directory, X_train))
59 train_y = np.loadtxt(os.path.join(directory, y_train))
60 test_x = np.loadtxt(os.path.join(directory, X_test))
61 test_y = np.loadtxt(os.path.join(directory, y_test))
62
63 # Wrap the training and test data into the Target object
64 target = Target(
65 model=target_model,
66 X_train=train_x,
67 y_train=train_y,
68 X_test=test_x,
69 y_test=test_y,
70 )
71 target.save(os.path.join(directory, "target"))
72
73 # Run the attack
74 wca = WorstCaseAttack(n_dummy_reps=10, output_dir=directory)
75 wca.attack(target)
76
77 # Run the LiRA attack to test disclosure risk
78 lira_attack_obj = LIRAAttack(n_shadow_models=100, output_dir=directory)
79 lira_attack_obj.attack(target)
80
81 text_report = GenerateTextReport()
82 text_report.process_attack_target_json(
83 os.path.join(directory, "report") + ".json",
84 target_filename=os.path.join(directory, "target", target_filename),
85 )
86
87 text_report.export_to_file(
88 output_filename=os.path.join(directory, outfile),
89 move_files=True,
90 model_filename=model_filename,
91 )
92
93 print("Results written to " + os.path.join(directory, outfile))
94
95
96def run_user_story(release_config: dict):
97 """Run the user story, parsing arguments and then invoking report generation."""
98 generate_report(
99 release_config["training_artefacts_dir"],
100 release_config["target_model"],
101 release_config["X_train_path"],
102 release_config["y_train_path"],
103 release_config["X_test_path"],
104 release_config["y_test_path"],
105 release_config["target_results"],
106 release_config["outfile"],
107 )
108
109
110if __name__ == "__main__":
111 parser = argparse.ArgumentParser(
112 description=(
113 "Generate a risk report after request_release() "
114 "has been called by researcher"
115 )
116 )
117
118 parser.add_argument(
119 "--config_file",
120 type=str,
121 action="store",
122 dest="config_file",
123 required=False,
124 default="default_config.yaml",
125 help=("Name of yaml configuration file"),
126 )
127
128 args = parser.parse_args()
129
130 try:
131 with open(args.config_file, encoding="utf-8") as handle:
132 config = yaml.load(handle, Loader=yaml.loader.SafeLoader)
133 except AttributeError as error:
134 print(
135 f"Invalid command. Try --help to get more detailserror message is {error}"
136 )
137
138 run_user_story(config)
Configuration and Utilities#
Default Configuration:
Default Configuration File#
1---
2# DIRECTIONS FOR USE
3#
4# Researcher should fill in this file with relevant parameters for the model
5# they are releasing. Parameters tagged with a 'generated by code' label are
6# files that are generated by sacroml. You can change the filename, but it is
7# not necessary. All other parameters need to be set by either the researcher
8# or TRE.
9
10# Scenario to be run
11user_story: UNDEFINED
12
13# Details of experiments and files - replace these with the relevant filenames.
14
15# Path to the dataset used to train the model.
16# Researcher should supply this.
17dataset_filename: "./user_stories_resources/dataset_26_nursery.csv"
18
19# Name of the file (or directory for keras) containing the saved model.
20# Researcher should supply this.
21target_model: "model.pkl"
22
23# Location of the directory with all the files needed to assess this release.
24# Generated by code.
25outfile: "summary.txt"
26attack_results: "report.json"
27training_artefacts_dir: "training_artefacts"
28
29# User story 1, 2 or 3: name of the target results file generated by sacroml.
30# Generated by code.
31target_results: "target.yaml"
32
33# User story 2
34data_processing_filename: user_story_2/data_processing_researcher.py
35data_processing_function_name: process_dataset
36
37# User story 3
38X_train_path: "X_train.txt"
39y_train_path: "y_train.txt"
40X_test_path: "X_test.txt"
41y_test_path: "y_test.txt"
42
43# User story 4 - replace with path to csv files
44train_probabilities: "output_train.csv"
45test_probabilities: "output_test.csv"
46...
Report Generation:
Disclosure Risk Report Generation#
1"""TRE script to perform disclosure checking for a trained ML model.
2
3Researchers should fill out the relevant parameters in the .yaml file, which
4should be in the same directory as this file. TREs can change the script that
5is run using the user_story parameter at the top of the file.
6
7To run this code:
8
9python generate_disclosure_risk_report.py (with the .yaml file in the same directory)
10
11NOTE: you should not need to change this file at all.
12"""
13
14import argparse
15
16import yaml
17from user_story_1 import user_story_1_tre
18from user_story_2 import user_story_2_tre
19from user_story_3 import user_story_3_tre
20from user_story_4 import user_story_4_tre
21from user_story_7 import user_story_7_tre
22from user_story_8 import user_story_8_tre
23
24if __name__ == "__main__":
25 parser = argparse.ArgumentParser(
26 description=("Run user stories code from a config file")
27 )
28
29 parser.add_argument(
30 "--config_file",
31 type=str,
32 action="store",
33 dest="config_file",
34 required=False,
35 default="default_config.yaml",
36 help=("Name of yaml configuration file"),
37 )
38
39 args = parser.parse_args()
40
41 try:
42 with open(args.config_file, encoding="utf-8") as handle:
43 config = yaml.load(handle, Loader=yaml.loader.SafeLoader)
44 except AttributeError as error: # pragma:no cover
45 print(
46 f"Invalid command. Try --help to get more detailserror message is {error}"
47 )
48
49 user_story = config["user_story"]
50 if user_story == "UNDEFINED":
51 print(
52 "User story not selected, please select a user story by "
53 "referring to user_stories_flow_chart.png and adding the "
54 "relevant number to the the first line of 'default_config.yaml'"
55 )
56 elif user_story == 1:
57 user_story_1_tre.run_user_story(config)
58 elif user_story == 2:
59 user_story_2_tre.run_user_story(config)
60 elif user_story == 3:
61 user_story_3_tre.run_user_story(config)
62 elif user_story == 4:
63 user_story_4_tre.run_user_story(config)
64 elif user_story == 7:
65 user_story_7_tre.run_user_story(config)
66 elif user_story == 8:
67 user_story_8_tre.run_user_story(config)
68 else:
69 raise NotImplementedError(f"User story {user_story} has not been implemented")