Safe Support Vector Machine Notebook#

A Quick Start Guide to implementing Safer Support Vector Machines#

Commands commented out for path manipulation are for developers only#

[1]:
import logging
import os

import numpy as np
from sklearn import datasets

# next few commented out lines are for developers only
# ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath("")))
# sys.path.append(ROOT_DIR)
# home = expanduser("~")
# sys.path.append(os.path.abspath(home + "/AI-SDC"))
# sys.path.insert(0, os.path.abspath(".."))

logging.basicConfig()
logger = logging.getLogger("wrapper_svm")
logger.setLevel(logging.INFO)
# ROOT_PROJECT_FOLDER = os.path.dirname(os.path.dirname(__file__))
# sys.path.append(ROOT_PROJECT_FOLDER)
from aisdc.safemodel.classifiers import SafeSVC

Use the sklearn Wisconsin breast cancer dataset#

[2]:
cancer = datasets.load_breast_cancer()
x = np.asarray(cancer.data, dtype=np.float64)
y = np.asarray(cancer.target, dtype=np.float64)

Kernel for approximator: equivalent to rbf.#

[3]:
def rbf(x, y, gamma=1):
    return np.exp(-gamma * np.sum((x - y) ** 2))


def rbf_svm(x, y, gamma=1):
    r = np.zeros((x.shape[0], y.shape[0]))
    for i in range(x.shape[0]):
        for j in range(y.shape[0]):
            r[i, j] = rbf(x[i, :], y[j, :], gamma)
    return r

Set parameters#

[4]:
gamma = 0.1  # Kernel width
C = 1  # Penalty term
dhat = 5  # Dimension of approximator
eps = 500  # DP level (not very private)

Define Differentially Private version with DP level (approximate)#

[5]:
clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)
clf3.fit(x, y)
c3 = clf3.predict(x)
p3 = clf3.predict_proba(x)

Define the model and fit it.#

Save and Request Release#

We are warned that dhat is too low.#

[6]:
clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)
clf3.fit(x, y)
clf3.save(name="testSaveSVC.pkl")
clf3.request_release(path="testSaveSVC", ext="pkl")
[7]:
target_json = os.path.normpath("testSaveSVC/target.json")
with open(target_json) as f:
    print(f.read())
{
    "data_name": "",
    "n_samples": 0,
    "features": {},
    "n_features": 0,
    "n_samples_orig": 0,
    "generalisation_error": "unknown",
    "safemodel": [
        {
            "researcher": "j4-smith",
            "model_type": "SVC",
            "details": "WARNING: model parameters may present a disclosure risk:\n- parameter dhat = 5 identified as less than the recommended min value of 1000.",
            "recommendation": "Do not allow release",
            "reason": "WARNING: model parameters may present a disclosure risk:\n- parameter dhat = 5 identified as less than the recommended min value of 1000.",
            "timestamp": "2023-10-12 01:49:21"
        }
    ],
    "model_path": "model.pkl",
    "model_name": "SafeSVC",
    "model_params": {}
}

Set Parameters to safe values#

[8]:
gamma = 0.1  # Kernel width
C = 1  # Penalty term
dhat = 1000  # Dimension of approximator
eps = 500  # DP level (not very private)

Define the model and fit it.#

Save and Request Release#

Examine the checkfile#

[10]:
target_json = os.path.normpath("testSaveSVC/target.json")
with open(target_json) as f:
    print(f.read())
{
    "data_name": "",
    "n_samples": 0,
    "features": {},
    "n_features": 0,
    "n_samples_orig": 0,
    "generalisation_error": "unknown",
    "safemodel": [
        {
            "researcher": "j4-smith",
            "model_type": "SVC",
            "details": "Model parameters are within recommended ranges.\n",
            "recommendation": "Proceed to next step of checking",
            "timestamp": "2023-10-12 01:49:21"
        }
    ],
    "model_path": "model.pkl",
    "model_name": "SafeSVC",
    "model_params": {}
}
[ ]: