Skip to content

Commit

Permalink
Remove public_partitions from AggregateParams (#270)
Browse files Browse the repository at this point in the history
  • Loading branch information
rialg authored May 10, 2022
1 parent bf5c1c4 commit 3837118
Show file tree
Hide file tree
Showing 11 changed files with 274 additions and 293 deletions.
22 changes: 9 additions & 13 deletions examples/codelab/codelab_PipelineDP.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -662,15 +662,14 @@
" noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
" metrics=[pipeline_dp.Metrics.COUNT],\n",
" max_partitions_contributed=1,\n",
" max_contributions_per_partition=1,\n",
" public_partitions=public_partitions)\n",
" max_contributions_per_partition=1)\n",
"\n",
" data_extractors = pipeline_dp.DataExtractors(\n",
" privacy_id_extractor=lambda row: row.user_id,\n",
" partition_extractor=lambda row: row.product_view_0,\n",
" value_extractor=lambda row: row.has_conversion)\n",
"\n",
" dp_result = dp_engine.aggregate(data, params, data_extractors)\n",
" dp_result = dp_engine.aggregate(data, params, data_extractors, public_partitions)\n",
"\n",
" budget_accountant.compute_budgets()\n",
"\n",
Expand Down Expand Up @@ -810,8 +809,7 @@
" noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
" metrics=[pipeline_dp.Metrics.COUNT],\n",
" max_partitions_contributed=1,\n",
" max_contributions_per_partition=1,\n",
" public_partitions=public_partitions_products)\n",
" max_contributions_per_partition=1)\n",
"```\n",
"\n",
" If you use public partitions and `LAPLACE` noise, it's possible to set the `total_delta` argument to a `0` value.\n",
Expand Down Expand Up @@ -970,7 +968,6 @@
" max_contributions_per_partition=1,\n",
" min_value=0,\n",
" max_value=100,\n",
" public_partitions=public_partitions_products,\n",
" budget_weight=1/3)\n",
"```\n",
"\n",
Expand All @@ -989,7 +986,8 @@
"\n",
" dp_result_conversion_value_metrics = (\n",
" dp_engine.aggregate(data, params_conversion_value_metrics,\n",
" data_extractors_conversion_value_metrics))\n",
" data_extractors_conversion_value_metrics,\n",
" public_partitions_products))\n",
"```\n",
"\n",
"\n",
Expand All @@ -1003,7 +1001,6 @@
" max_contributions_per_partition=1,\n",
" min_value=0,\n",
" max_value=1,\n",
" public_partitions=public_partitions_products,\n",
" budget_weight=2/3)\n",
" ```\n",
" ```\n",
Expand All @@ -1016,7 +1013,8 @@
" dp_result_conversion_rate_metrics = (\n",
" dp_engine.aggregate(\n",
" data, params_conversion_rate_metrics,\n",
" data_extractors_conversion_rate_metrics))\n",
" data_extractors_conversion_rate_metrics,\n",
" public_partitions_products))\n",
"```\n",
"\n",
" The only change is in the `pipeline_dp.AggregateParams` instance, in which you now define `mean` and `count` as aggregations, and assign two-thirds of your privacy budget to this calculation. Because you want to have the same contribution bounds for both statistics and calculate them on top of the same `has_conversion` variable, you can combine them in the same `pipeline_dp.AggregateParams` instance and calculate them at the same time.\n",
Expand Down Expand Up @@ -1074,7 +1072,6 @@
" max_contributions_per_partition=1,\n",
" min_value=0,\n",
" max_value=100,\n",
" public_partitions=public_partitions,\n",
" budget_weight=1/3)\n",
"\n",
" data_extractors_conversion_value_metrics = pipeline_dp.DataExtractors(\n",
Expand All @@ -1084,7 +1081,7 @@
"\n",
" dp_result_conversion_value_metrics = (\n",
" dp_engine.aggregate(data, params_conversion_value_metrics,\n",
" data_extractors_conversion_value_metrics))\n",
" data_extractors_conversion_value_metrics, public_partitions))\n",
"\n",
" params_conversion_rate_metrics = pipeline_dp.AggregateParams(\n",
" noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
Expand All @@ -1093,7 +1090,6 @@
" max_contributions_per_partition=1,\n",
" min_value=0,\n",
" max_value=1,\n",
" public_partitions=public_partitions,\n",
" budget_weight=2/3)\n",
"\n",
" data_extractors_conversion_rate_metrics = pipeline_dp.DataExtractors(\n",
Expand All @@ -1103,7 +1099,7 @@
"\n",
" dp_result_conversion_rate_metrics = (\n",
" dp_engine.aggregate(data, params_conversion_rate_metrics,\n",
" data_extractors_conversion_rate_metrics))\n",
" data_extractors_conversion_rate_metrics, public_partitions))\n",
"\n",
" budget_accountant.compute_budgets()\n",
"\n",
Expand Down
6 changes: 3 additions & 3 deletions examples/movie_view_ratings/run_all_frameworks.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ def calc_dp_rating_metrics(movie_views, backend, public_partitions):
max_partitions_contributed=2,
max_contributions_per_partition=1,
min_value=1,
max_value=5,
public_partitions=public_partitions)
max_value=5)

# Specify how to extract privacy_id, partition_key and value from an
# element of movie view collection.
Expand All @@ -88,7 +87,8 @@ def calc_dp_rating_metrics(movie_views, backend, public_partitions):
value_extractor=lambda mv: mv.rating)

# Run aggregation.
dp_result = dp_engine.aggregate(movie_views, params, data_extractors)
dp_result = dp_engine.aggregate(movie_views, params, data_extractors,
public_partitions)

budget_accountant.compute_budgets()
return dp_result
Expand Down
Loading

0 comments on commit 3837118

Please sign in to comment.