{ "cells": [ { "cell_type": "markdown", "id": "6105c66c", "metadata": {}, "source": [ "# Safe Support Vector Machine Notebook \n", "## A Quick Start Guide to implementing Safer Support Vector Machines\n", "### Commands commented out for path manipulation are for developers only\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "82797236", "metadata": {}, "outputs": [], "source": [ "import logging\n", "import os\n", "\n", "import numpy as np\n", "from sklearn import datasets\n", "\n", "# next few commented out lines are for developers only\n", "# ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(\"\")))\n", "# sys.path.append(ROOT_DIR)\n", "# home = expanduser(\"~\")\n", "# sys.path.append(os.path.abspath(home + \"/AI-SDC\"))\n", "# sys.path.insert(0, os.path.abspath(\"..\"))\n", "\n", "logging.basicConfig()\n", "logger = logging.getLogger(\"wrapper_svm\")\n", "logger.setLevel(logging.INFO)\n", "# ROOT_PROJECT_FOLDER = os.path.dirname(os.path.dirname(__file__))\n", "# sys.path.append(ROOT_PROJECT_FOLDER)\n", "from aisdc.safemodel.classifiers import SafeSVC" ] }, { "cell_type": "markdown", "id": "190329f4", "metadata": {}, "source": [ "## Use the sklearn Wisconsin breast cancer dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "85af89a0", "metadata": {}, "outputs": [], "source": [ "cancer = datasets.load_breast_cancer()\n", "x = np.asarray(cancer.data, dtype=np.float64)\n", "y = np.asarray(cancer.target, dtype=np.float64)" ] }, { "cell_type": "markdown", "id": "1410e2f4", "metadata": {}, "source": [ "## Kernel for approximator: equivalent to rbf." ] }, { "cell_type": "code", "execution_count": 3, "id": "69481c5c", "metadata": {}, "outputs": [], "source": [ "def rbf(x, y, gamma=1):\n", " return np.exp(-gamma * np.sum((x - y) ** 2))\n", "\n", "\n", "def rbf_svm(x, y, gamma=1):\n", " r = np.zeros((x.shape[0], y.shape[0]))\n", " for i in range(x.shape[0]):\n", " for j in range(y.shape[0]):\n", " r[i, j] = rbf(x[i, :], y[j, :], gamma)\n", " return r" ] }, { "cell_type": "markdown", "id": "f7bdc592", "metadata": {}, "source": [ "## Set parameters" ] }, { "cell_type": "code", "execution_count": 4, "id": "7cd6c7f2", "metadata": {}, "outputs": [], "source": [ "gamma = 0.1 # Kernel width\n", "C = 1 # Penalty term\n", "dhat = 5 # Dimension of approximator\n", "eps = 500 # DP level (not very private)" ] }, { "cell_type": "markdown", "id": "cab1eed3", "metadata": {}, "source": [ "# Define Differentially Private version with DP level (approximate)" ] }, { "cell_type": "code", "execution_count": 5, "id": "1a7734dc", "metadata": {}, "outputs": [], "source": [ "clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)\n", "clf3.fit(x, y)\n", "c3 = clf3.predict(x)\n", "p3 = clf3.predict_proba(x)" ] }, { "cell_type": "markdown", "id": "6c08d536", "metadata": {}, "source": [ "## Define the model and fit it.\n", "## Save and Request Release\n", "### We are warned that dhat is too low." ] }, { "cell_type": "code", "execution_count": 6, "id": "c3b7e21f", "metadata": {}, "outputs": [], "source": [ "clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)\n", "clf3.fit(x, y)\n", "clf3.save(name=\"testSaveSVC.pkl\")\n", "clf3.request_release(path=\"testSaveSVC\", ext=\"pkl\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "9aa22802", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"data_name\": \"\",\n", " \"n_samples\": 0,\n", " \"features\": {},\n", " \"n_features\": 0,\n", " \"n_samples_orig\": 0,\n", " \"generalisation_error\": \"unknown\",\n", " \"safemodel\": [\n", " {\n", " \"researcher\": \"j4-smith\",\n", " \"model_type\": \"SVC\",\n", " \"details\": \"WARNING: model parameters may present a disclosure risk:\\n- parameter dhat = 5 identified as less than the recommended min value of 1000.\",\n", " \"recommendation\": \"Do not allow release\",\n", " \"reason\": \"WARNING: model parameters may present a disclosure risk:\\n- parameter dhat = 5 identified as less than the recommended min value of 1000.\",\n", " \"timestamp\": \"2023-10-12 01:49:21\"\n", " }\n", " ],\n", " \"model_path\": \"model.pkl\",\n", " \"model_name\": \"SafeSVC\",\n", " \"model_params\": {}\n", "}\n" ] } ], "source": [ "target_json = os.path.normpath(\"testSaveSVC/target.json\")\n", "with open(target_json) as f:\n", " print(f.read())" ] }, { "cell_type": "markdown", "id": "cf32d434", "metadata": {}, "source": [ "## Set Parameters to safe values" ] }, { "cell_type": "code", "execution_count": 8, "id": "87689404", "metadata": {}, "outputs": [], "source": [ "gamma = 0.1 # Kernel width\n", "C = 1 # Penalty term\n", "dhat = 1000 # Dimension of approximator\n", "eps = 500 # DP level (not very private)" ] }, { "cell_type": "markdown", "id": "75c8400a", "metadata": {}, "source": [ "## Define the model and fit it.\n", "## Save and Request Release\n", "### Model parameters are within recommended ranges. The saved model can pass through next step of checking procedure" ] }, { "cell_type": "code", "execution_count": 9, "id": "90369029", "metadata": {}, "outputs": [], "source": [ "clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)\n", "clf3.fit(x, y)\n", "clf3.save(name=\"testSaveSVC.pkl\")\n", "clf3.request_release(path=\"testSaveSVC\", ext=\"pkl\")" ] }, { "cell_type": "markdown", "id": "f7ae65a4", "metadata": {}, "source": [ "## Examine the checkfile" ] }, { "cell_type": "code", "execution_count": 10, "id": "ca19340f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"data_name\": \"\",\n", " \"n_samples\": 0,\n", " \"features\": {},\n", " \"n_features\": 0,\n", " \"n_samples_orig\": 0,\n", " \"generalisation_error\": \"unknown\",\n", " \"safemodel\": [\n", " {\n", " \"researcher\": \"j4-smith\",\n", " \"model_type\": \"SVC\",\n", " \"details\": \"Model parameters are within recommended ranges.\\n\",\n", " \"recommendation\": \"Proceed to next step of checking\",\n", " \"timestamp\": \"2023-10-12 01:49:21\"\n", " }\n", " ],\n", " \"model_path\": \"model.pkl\",\n", " \"model_name\": \"SafeSVC\",\n", " \"model_params\": {}\n", "}\n" ] } ], "source": [ "target_json = os.path.normpath(\"testSaveSVC/target.json\")\n", "with open(target_json) as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": null, "id": "bc6c0dfe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "aisdc-v1.1", "language": "python", "name": "aisdc-v1.1" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 5 }