Safe Support Vector Machine Notebook#
A Quick Start Guide to implementing Safer Support Vector Machines#
Commands commented out for path manipulation are for developers only#
[1]:
import logging
import os
import numpy as np
from sklearn import datasets
# next few commented out lines are for developers only
# ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath("")))
# sys.path.append(ROOT_DIR)
# home = expanduser("~")
# sys.path.append(os.path.abspath(home + "/AI-SDC"))
# sys.path.insert(0, os.path.abspath(".."))
logging.basicConfig()
logger = logging.getLogger("wrapper_svm")
logger.setLevel(logging.INFO)
# ROOT_PROJECT_FOLDER = os.path.dirname(os.path.dirname(__file__))
# sys.path.append(ROOT_PROJECT_FOLDER)
from aisdc.safemodel.classifiers import SafeSVC
Use the sklearn Wisconsin breast cancer dataset#
[2]:
cancer = datasets.load_breast_cancer()
x = np.asarray(cancer.data, dtype=np.float64)
y = np.asarray(cancer.target, dtype=np.float64)
Kernel for approximator: equivalent to rbf.#
[3]:
def rbf(x, y, gamma=1):
return np.exp(-gamma * np.sum((x - y) ** 2))
def rbf_svm(x, y, gamma=1):
r = np.zeros((x.shape[0], y.shape[0]))
for i in range(x.shape[0]):
for j in range(y.shape[0]):
r[i, j] = rbf(x[i, :], y[j, :], gamma)
return r
Set parameters#
[4]:
gamma = 0.1 # Kernel width
C = 1 # Penalty term
dhat = 5 # Dimension of approximator
eps = 500 # DP level (not very private)
Define Differentially Private version with DP level (approximate)#
[5]:
clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)
clf3.fit(x, y)
c3 = clf3.predict(x)
p3 = clf3.predict_proba(x)
Define the model and fit it.#
Save and Request Release#
We are warned that dhat is too low.#
[6]:
clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)
clf3.fit(x, y)
clf3.save(name="testSaveSVC.pkl")
clf3.request_release(path="testSaveSVC", ext="pkl")
[7]:
target_json = os.path.normpath("testSaveSVC/target.json")
with open(target_json) as f:
print(f.read())
{
"data_name": "",
"n_samples": 0,
"features": {},
"n_features": 0,
"n_samples_orig": 0,
"generalisation_error": "unknown",
"safemodel": [
{
"researcher": "j4-smith",
"model_type": "SVC",
"details": "WARNING: model parameters may present a disclosure risk:\n- parameter dhat = 5 identified as less than the recommended min value of 1000.",
"recommendation": "Do not allow release",
"reason": "WARNING: model parameters may present a disclosure risk:\n- parameter dhat = 5 identified as less than the recommended min value of 1000.",
"timestamp": "2023-10-12 01:49:21"
}
],
"model_path": "model.pkl",
"model_name": "SafeSVC",
"model_params": {}
}
Set Parameters to safe values#
[8]:
gamma = 0.1 # Kernel width
C = 1 # Penalty term
dhat = 1000 # Dimension of approximator
eps = 500 # DP level (not very private)
Define the model and fit it.#
Save and Request Release#
Model parameters are within recommended ranges. The saved model can pass through next step of checking procedure#
[9]:
clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)
clf3.fit(x, y)
clf3.save(name="testSaveSVC.pkl")
clf3.request_release(path="testSaveSVC", ext="pkl")
Examine the checkfile#
[10]:
target_json = os.path.normpath("testSaveSVC/target.json")
with open(target_json) as f:
print(f.read())
{
"data_name": "",
"n_samples": 0,
"features": {},
"n_features": 0,
"n_samples_orig": 0,
"generalisation_error": "unknown",
"safemodel": [
{
"researcher": "j4-smith",
"model_type": "SVC",
"details": "Model parameters are within recommended ranges.\n",
"recommendation": "Proceed to next step of checking",
"timestamp": "2023-10-12 01:49:21"
}
],
"model_path": "model.pkl",
"model_name": "SafeSVC",
"model_params": {}
}
[ ]: