Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 0aa7b55

Browse files
committedDec 20, 2024
upload ldp protocols
1 parent 454f108 commit 0aa7b55

File tree

9 files changed

+2284
-0
lines changed

9 files changed

+2284
-0
lines changed
 

‎ldp_protocols/alh.py

+265
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
import numpy as np
2+
from sys import maxsize
3+
import xxhash
4+
import matplotlib.pyplot as plt
5+
6+
class AdaptiveLocalHashing:
7+
def __init__(self, k: int, epsilon: float, w_asr: float = 0.5, w_variance: float = 0.5):
8+
"""
9+
Initialize the Adaptive Local Hashing (ALH) protocol.
10+
11+
Parameters
12+
----------
13+
k : int
14+
Attribute's domain size. Must be an integer greater than or equal to 2.
15+
epsilon : float
16+
Privacy guarantee. Must be a positive numerical value.
17+
w_asr : float, optional
18+
Weight given to the Adversarial Success Rate (ASR) in the objective function. Default is 0.5.
19+
w_variance : float, optional
20+
Weight given to the variance in the objective function. Default is 0.5.
21+
22+
Raises
23+
------
24+
ValueError
25+
If `k` is not >= 2, `epsilon` is not positive, or the weights are invalid.
26+
"""
27+
if not isinstance(k, int) or k < 2:
28+
raise ValueError("k must be an integer >= 2.")
29+
if epsilon <= 0:
30+
raise ValueError("epsilon must be a numerical value greater than 0.")
31+
if not (0 <= w_asr <= 1) or not (0 <= w_variance <= 1):
32+
raise ValueError("Weights must be between 0 and 1.")
33+
34+
# Normalize the weights so that their sum is 1
35+
total_weight = w_asr + w_variance
36+
self.w_asr = w_asr / total_weight
37+
self.w_variance = w_variance / total_weight
38+
self.k = k
39+
self.epsilon = epsilon
40+
self.g = self.optimize_parameters()
41+
42+
# Calculate probability for GRR-based perturbation
43+
self.p = np.exp(self.epsilon) / (np.exp(self.epsilon) + self.g - 1)
44+
self.q = 1 / self.g
45+
46+
def get_parameter_range(self) -> np.ndarray:
47+
"""
48+
Get a range of g values to optimize over.
49+
50+
Returns
51+
-------
52+
numpy.ndarray
53+
A range of g values between 2 and max(k, exp(epsilon) + 1).
54+
"""
55+
return np.arange(2, max(self.k, int(np.round(np.exp(self.epsilon)) + 1) + 1))
56+
57+
def optimize_parameters(self) -> int:
58+
"""
59+
Grid-search optimization for the value of g to balance variance and ASR.
60+
61+
Returns
62+
-------
63+
int
64+
The optimized value of g.
65+
"""
66+
# Define range of g values to search over
67+
g_values = self.get_parameter_range()
68+
69+
# Perform grid search to find the best g
70+
best_g = 2
71+
best_obj_value = float('inf')
72+
73+
for g in g_values:
74+
asr = self.get_asr(g)
75+
variance = self.get_variance(g)
76+
obj_value = self.w_asr * asr + self.w_variance * variance
77+
if obj_value < best_obj_value:
78+
best_g = g
79+
best_obj_value = obj_value
80+
81+
return best_g
82+
83+
def obfuscate(self, input_data: int) -> tuple[int, int]:
84+
"""
85+
Obfuscate the input data using the ALH mechanism.
86+
87+
Parameters
88+
----------
89+
input_data : int
90+
The true input value to be obfuscated. Must be in the range [0, k-1].
91+
92+
Returns
93+
-------
94+
tuple[int, int]
95+
A tuple containing:
96+
- The sanitized (obfuscated) value (int) within the optimized hash domain size `g`.
97+
- The random seed (int) used for hashing.
98+
99+
Raises
100+
------
101+
ValueError
102+
If `input_data` is not in the range [0, k-1].
103+
"""
104+
if input_data < 0 or input_data >= self.k:
105+
raise ValueError("input_data must be in the range [0, k-1].")
106+
107+
# Generate random seed and hash the user's value
108+
rnd_seed = np.random.randint(0, maxsize, dtype=np.int64)
109+
hashed_input_data = (xxhash.xxh32(str(input_data), seed=rnd_seed).intdigest() % self.g)
110+
111+
# GRR-based perturbation
112+
domain = np.arange(self.g)
113+
if np.random.binomial(1, self.p) == 1:
114+
sanitized_value = hashed_input_data
115+
else:
116+
sanitized_value = np.random.choice(domain[domain != hashed_input_data])
117+
118+
return sanitized_value, rnd_seed
119+
120+
def estimate(self, noisy_reports: list) -> np.ndarray:
121+
"""
122+
Estimate frequencies from noisy reports collected using the Adaptive Local Hashing (ALH) mechanism.
123+
124+
This method applies unbiased estimation to recover approximate frequencies of values
125+
in the domain `[0, k-1]`. The LH mechanism maps input values to a hash domain of size `g`,
126+
perturbs the mapped values, and reports the noisy results. The method uses `p` (true value probability)
127+
and `q` (false value probability) to correct for this perturbation.
128+
129+
Parameters
130+
----------
131+
noisy_reports : list of tuple (int, int)
132+
A list of noisy reports collected from users. Each report is a tuple containing:
133+
- `value` : The obfuscated hash-mapped value.
134+
- `seed` : The random seed used for hashing during the LH mechanism.
135+
136+
Returns
137+
-------
138+
np.ndarray
139+
An array of estimated frequencies for each value in the domain `[0, k-1]`.
140+
The output array has size `k` and sums to 1.
141+
142+
Raises
143+
------
144+
ValueError
145+
If `noisy_reports` is empty.
146+
"""
147+
n = len(noisy_reports) # Number of reports
148+
if n == 0:
149+
raise ValueError("Noisy reports cannot be empty.")
150+
151+
# Count the occurrences of each value in the noisy reports
152+
support_counts = np.zeros(self.k)
153+
154+
# Hash-based support counting for LH protocols
155+
for value, seed in noisy_reports:
156+
for v in range(self.k):
157+
if value == (xxhash.xxh32(str(v), seed=seed).intdigest() % self.g):
158+
support_counts[v] += 1
159+
160+
# Unbiased frequency estimation
161+
freq_estimates = (support_counts - n * self.q) / (n * (self.p - self.q))
162+
163+
# Ensure non-negative estimates and normalize
164+
return np.maximum(freq_estimates, 0) / np.sum(np.maximum(freq_estimates, 0))
165+
166+
def attack(self, val_seed):
167+
"""
168+
Perform a privacy attack on an obfuscated value generated using the Adaptive Local Hashing (ALH) protocol.
169+
170+
This method attempts to infer the true input value by leveraging the obfuscated hash-mapped value
171+
and the corresponding random seed used during hashing. The method reconstructs the possible
172+
candidate values that could produce the same hash output and randomly selects one of them.
173+
174+
Parameters
175+
----------
176+
val_seed : tuple (int, int)
177+
A tuple containing:
178+
- `obfuscated value` : The hash-mapped value generated during obfuscation.
179+
- `seed` : The random seed used for hashing.
180+
181+
Returns
182+
-------
183+
int
184+
The inferred true value of the input. If no valid candidate values are found, a random value
185+
within the domain `[0, k-1]` is returned.
186+
"""
187+
188+
lh_val = val_seed[0]
189+
rnd_seed = val_seed[1]
190+
191+
ss_lh = []
192+
for v in range(self.k):
193+
if lh_val == (xxhash.xxh32(str(v), seed=rnd_seed).intdigest() % self.g):
194+
ss_lh.append(v)
195+
196+
if len(ss_lh) == 0:
197+
return np.random.randint(self.k)
198+
else:
199+
return np.random.choice(ss_lh)
200+
201+
def get_variance(self, g: int = None) -> float:
202+
"""
203+
Compute the variance of the LH mechanism for a given g.
204+
205+
Parameters
206+
----------
207+
g : int, optional
208+
Hash domain size. If None, use the optimized value of g.
209+
210+
Returns
211+
-------
212+
float
213+
The variance of the LH mechanism.
214+
"""
215+
if g is None:
216+
g = self.g
217+
218+
p = np.exp(self.epsilon) / (np.exp(self.epsilon) + g - 1)
219+
q = 1 / g
220+
221+
return q * (1 - q) / (p - q) ** 2
222+
223+
def get_asr(self, g: int = None) -> float:
224+
"""
225+
Compute the Adversarial Success Rate (ASR) of the LH mechanism for a given g.
226+
227+
Parameters
228+
----------
229+
g : int, optional
230+
Hash domain size. If None, use the optimized value of g.
231+
232+
Returns
233+
-------
234+
float
235+
The Adversarial Success Rate (ASR).
236+
"""
237+
if g is None:
238+
g = self.g
239+
240+
return np.exp(self.epsilon) / ((np.exp(self.epsilon) + g - 1) * max(self.k / g, 1))
241+
242+
def plot_objective_function(self) -> None:
243+
"""
244+
Plot the objective function over a range of g values, highlighting the optimal g value.
245+
"""
246+
g_values = self.get_parameter_range()
247+
objective_values = []
248+
249+
for g in g_values:
250+
asr = self.get_asr(g)
251+
variance = self.get_variance(g)
252+
objective_value = self.w_asr * asr + self.w_variance * variance
253+
objective_values.append(objective_value)
254+
255+
plt.plot(g_values, objective_values, marker='o', label='Objective Function')
256+
plt.xlabel('g')
257+
plt.ylabel('Objective Function Value')
258+
plt.title(f'Objective Function vs. g (epsilon={self.epsilon})')
259+
plt.grid(True)
260+
261+
# Highlight the best g value
262+
plt.axvline(self.g, color='r', linestyle='--', label=f'Optimal g={self.g}')
263+
plt.legend()
264+
plt.yscale('log')
265+
plt.show()
There was a problem loading the remainder of the diff.

0 commit comments

Comments
 (0)
Please sign in to comment.