From bfe942113cc4de294a5cc1f7fec2b3f64f401e84 Mon Sep 17 00:00:00 2001 From: David Sommer Date: Wed, 19 May 2021 12:10:01 +0200 Subject: [PATCH] added eps_ADP_upper_bound() method --- README.md | 2 ++ core/probabilitybuckets_light.py | 29 +++++++++++++++++++- example_composition.py | 46 +++++++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d410986..f3b6a52 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ For the correct choice of number_of_buckets and factor, see below. ### NEWS +Provides now a class method returning an epsilon for a given target-delta. Previously, only the reverse was implemented. + Supports now approximate differential privacy, probabilistic differential privacy, Renyi differential privacy, and the ability to construct a bucket distribution from an (epsilon, delta)-graph. ### Directory /core diff --git a/core/probabilitybuckets_light.py b/core/probabilitybuckets_light.py index e7048e4..ef406ff 100644 --- a/core/probabilitybuckets_light.py +++ b/core/probabilitybuckets_light.py @@ -6,6 +6,7 @@ import logging import numpy as np +from scipy import optimize import copy import gc import os @@ -512,6 +513,32 @@ def delta_ADP_lower_bound(self, eps): vals[vals < 0] = 0 # = max(0, vals) return np.sum(vals) + self.distinguishing_events + def eps_ADP_upper_bound(self, delta): + """ + Returns an upper bound of epsilon for a target delta. Throws descriptive errors if it cannot satisfy the + conditions. Without error correction, the optimizer might not converge properly. + """ + if not self.error_correction: + self.logger.warning("Error-correction disabled, optimizer might not converge properly.") + + # The maximal epsilon we can serve. For any epsilons larger, self.delta_ADP_upper_bound will return the + # delta of the infinity bucket+dist_events only, rendering upper-bound-epsilon=np.inf + max_eps = (self.number_of_buckets // 2 ) * self.log_factor + + try: + root = optimize.bisect(lambda eps: self.delta_ADP_upper_bound(eps) - delta, 0, max_eps) + except ValueError as e: + if self.delta_ADP_upper_bound(eps=max_eps) > delta: + raise ValueError("Required target-delta is smaller than self.delta_ADP_upper_bound(max_eps) can serve. " + "For an instant remedy, increase number_of_buckets, or increase factor.") + elif self.delta_ADP_upper_bound(eps=0.0) < delta: + self.logger.warning("Returning over-approximation eps=0. " + "(self.delta_ADP_upper_bound(eps=0) < target-delta)") + return 0.0 + raise e + + return root + def renyi_divergence_upper_bound(self, alpha): """ returns a upper bound on the alpha renyi-divergence for a given alpha >= 1 @@ -680,7 +707,7 @@ def create_bucket_distribution(self, delta_func, DP_type): raise MemoryError("Insufficient memory. Use smaller number_of_buckets.") from e # we try to solve min_w G.dot(w) - delta_func(eps_vec) with w_i > 0 forall i - w = scipy.optimize.nnls(G, delta_vec, maxiter=10 * G.shape[1])[0] + w = optimize.nnls(G, delta_vec, maxiter=10 * G.shape[1])[0] self.bucket_distribution = w[:-1].copy() diff --git a/example_composition.py b/example_composition.py index 235f557..e2f6d38 100644 --- a/example_composition.py +++ b/example_composition.py @@ -1,5 +1,6 @@ # written by David Sommer (david.sommer at inf.ethz.ch) +import traceback import numpy as np from core.probabilitybuckets_light import ProbabilityBuckets from matplotlib import pyplot as plt @@ -77,11 +78,11 @@ # Now we build the delta(eps) graphs from the computed distribution. eps_vector = np.linspace(0, 3, 100) -upper_bound = [privacybuckets_composed.delta_ADP_upper_bound(eps) for eps in eps_vector] -lower_bound = [privacybuckets_composed.delta_ADP_lower_bound(eps) for eps in eps_vector] +upper_bound_delta = [privacybuckets_composed.delta_ADP_upper_bound(eps) for eps in eps_vector] +lower_bound_delta = [privacybuckets_composed.delta_ADP_lower_bound(eps) for eps in eps_vector] -plt.plot(eps_vector, upper_bound, label="upper_bound") -plt.plot(eps_vector, lower_bound, label="lower_bound") +plt.plot(eps_vector, upper_bound_delta, label="upper_bound") +plt.plot(eps_vector, lower_bound_delta, label="lower_bound") plt.legend() plt.title("Extended Randomized response with eps={:e}, delta={:f} after {:d} compositions".format(eps_rr, delta, 2**k)) plt.xlabel("eps") @@ -90,6 +91,43 @@ plt.show() +# we can ask for a specific epsilon (upper bound) for a given delta directly. +# for illustration purposes, we do not just switch axis of the previous plot but call also the corresponding method. +delta_vector = np.linspace(0.75, 0.57, 100) +upper_bound_eps = [privacybuckets_composed.eps_ADP_upper_bound(delta) for delta in delta_vector] + +plt.plot(delta_vector, upper_bound_eps, '--', alpha=0.5, label="upper_bound (class method)") +plt.plot(upper_bound_delta, eps_vector, alpha=0.5, label="upper_bound (axis switch of previous plot)") +plt.legend() +plt.title("Extended Randomized response with eps={:e}, delta={:f} after {:d} compositions".format(eps_rr, delta, 2**k)) +plt.xlabel("delta") +plt.ylabel("eps") +plt.ticklabel_format(useOffset=False) # Hotfix for the behaviour of my current matplotlib version +plt.show() + + +# Illustration what happens when we call ProbabilityBuckets.eps_ADP_upper_bound(delta) with a unsuitable delta +try: + eps = privacybuckets_composed.eps_ADP_upper_bound(delta=.5) +except ValueError as e: + print("") + print("[*] THIS IS AN INTENTIONALLY INDUCED ERROR FOR ILLUSTRATION PURPOSES!") + print(" (target delta was too small.)") + print("") + + traceback.print_exc() + + print("") + print("[*] THIS WAS AN INTENTIONALLY INDUCED ERROR FOR ILLUSTRATION PURPOSES!") + print(" (target delta was too small.)") + print("") + +print("[*] Here, the target delta is too big.") +eps = privacybuckets_composed.eps_ADP_upper_bound(delta=0.9) +print(f"for delta=0.9: eps={eps}") +print("") + + # abusing internals, we can look at the bucket distribution plt.plot(privacybuckets_composed.bucket_distribution) plt.title("bucket distribution")