Automated rollback of commit 7e89dad

galenmandrew · tensorflower-gardener · commit 34ad9dbedc68 · 2022-04-12T11:34:14.000-07:00
PiperOrigin-RevId: 441245348
diff --git a/tensorflow_privacy/privacy/analysis/BUILD b/tensorflow_privacy/privacy/analysis/BUILD
@@ -51,6 +51,12 @@ py_test(
     deps = [":compute_noise_from_budget_lib"],
 )
 
+py_library(
+    name = "dp_event",
+    srcs = ["dp_event.py"],
+    srcs_version = "PY3",
+)
+
 py_library(
     name = "gdp_accountant",
     srcs = ["gdp_accountant.py"],
diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py
@@ -0,0 +1,214 @@
+# Copyright 2021, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Standard DpEvent classes.
+
+A `DpEvent` represents the (hyper)parameters of a differentially
+private query, amplification mechanism, or composition, that are necessary
+and sufficient for privacy accounting. Various independent implementations of DP
+algorithms that are functionally equivalent from an accounting perspective may
+correspond to the same `DpEvent`. Similarly, various independent implementations
+of accounting algorithms may consume the same `DpEvent`.
+
+All `DpEvents` processed together are assumed to take place on a single dataset
+of records. `DpEvents` fall into roughly three categories:
+ - `DpEvents` that release an output, and incur a privacy cost,
+    e.g., `GaussianDpEvent`.
+ - `DpEvents` that select a subset (or subsets) of the dataset, and run nested
+    `DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`.
+ - `DpEvents` that represent (possibly sequentially) applying (multiple)
+   mechanisms to the dataset (or currently active subset). Currently, this is
+   only `ComposedDpEvent` and `SelfComposedDpEvent`.
+
+Each `DpEvent` should completely document the mathematical behavior and
+assumptions of the mechanism it represents so that the writer of an accountant
+class can implement the accounting correctly without knowing any other
+implementation details of the algorithm that produced it.
+
+New mechanism types should be given a corresponding `DpEvent` class, although
+not all accountants will be required to support them. In general,
+`PrivacyAccountant` implementations are not required to be aware of all
+`DpEvent` classes, but they should support the following basic events and handle
+them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and
+`SelfComposedDpEvent`. They should return `supports(event)` is False for
+`UnsupportedDpEvent` or any other event type they have not been designed to
+handle.
+
+To ensure that a `PrivacyAccountant` does not accidentally start to return
+incorrect results, the following should be enforced:
+ * `DpEvent` classes and their parameters should never be removed, barring some
+   extended, onerous deprecation process.
+ * New parameters cannot be added to existing mechanisms unless they are
+   optional. That is, old composed `DpEvent` objects that do not include them
+   must remain valid.
+ * The meaning of existing mechanisms or parameters must not change. That is,
+   existing mechanisms should not have their implementations change in ways that
+   alter their privacy properties; new `DpEvent` classes should be added
+   instead.
+ * `PrivacyAccountant` implementations are expected to return `supports(event)`
+   is `False` when processing unknown mechanisms.
+"""
+
+from typing import List, Union
+
+import attr
+
+
+class DpEvent(object):
+  """Represents application of a private mechanism.
+
+  A `DpEvent` describes a differentially private mechanism sufficiently for
+  computing the associated privacy losses, both in isolation and in combination
+  with other `DpEvent`s.
+  """
+
+
+@attr.s(frozen=True)
+class NoOpDpEvent(DpEvent):
+  """Represents appplication of an operation with no privacy impact.
+
+  A `NoOpDpEvent` is generally never required, but it can be useful as a
+  placeholder where a `DpEvent` is expected, such as in tests or some live
+  accounting pipelines.
+  """
+
+
+@attr.s(frozen=True)
+class NonPrivateDpEvent(DpEvent):
+  """Represents application of a non-private operation.
+
+  This `DpEvent` should be used when an operation is performed that does not
+  satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite
+  epsilon/delta when encountering a `NonPrivateDpEvent`.
+  """
+
+
+@attr.s(frozen=True)
+class UnsupportedDpEvent(DpEvent):
+  """Represents application of an as-yet unsupported operation.
+
+  This `DpEvent` should be used when an operation is performed that does not yet
+  have any associated DP description, or if the description is temporarily
+  inaccessible, for example, during development. All `PrivacyAccountant`s should
+  return `supports(event) == False` for `UnsupportedDpEvent`.
+  """
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class GaussianDpEvent(DpEvent):
+  """Represents an application of the Gaussian mechanism.
+
+  For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z.
+  If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is
+  defined as s / C.
+  """
+  noise_multiplier: float
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class LaplaceDpEvent(DpEvent):
+  """Represents an application of the Laplace mechanism.
+
+  For values v_i and noise z sampled coordinate-wise from the Laplace
+  distribution L(0, s), this mechanism returns sum_i v_i + z.
+  The probability density function of the Laplace distribution L(0, s) with
+  parameter s is given as exp(-|x|/s) * (0.5/s) at x for any real value x.
+  If the L_1 norm of the values are bounded ||v_i||_1 <= C, the noise_multiplier
+  is defined as s / C.
+  """
+  noise_multiplier: float
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class SelfComposedDpEvent(DpEvent):
+  """Represents repeated application of a mechanism.
+
+  The repeated applications may be adaptive, where the query producing each
+  event depends on the results of prior queries.
+
+  This is equivalent to `ComposedDpEvent` that contains a list of length `count`
+  of identical copies of `event`.
+  """
+  event: DpEvent
+  count: int
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class ComposedDpEvent(DpEvent):
+  """Represents application of a series of composed mechanisms.
+
+  The composition may be adaptive, where the query producing each event depends
+  on the results of prior queries.
+  """
+  events: List[DpEvent]
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class PoissonSampledDpEvent(DpEvent):
+  """Represents an application of Poisson subsampling.
+
+  Each record in the dataset is included in the sample independently with
+  probability `sampling_probability`. Then the `DpEvent` `event` is applied
+  to the sample of records.
+  """
+  sampling_probability: float
+  event: DpEvent
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class SampledWithReplacementDpEvent(DpEvent):
+  """Represents sampling a fixed sized batch of records with replacement.
+
+  A sample of `sample_size` (possibly repeated) records is drawn uniformly at
+  random from the set of possible samples of a source dataset of size
+  `source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of
+  records.
+  """
+  source_dataset_size: int
+  sample_size: int
+  event: DpEvent
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class SampledWithoutReplacementDpEvent(DpEvent):
+  """Represents sampling a fixed sized batch of records without replacement.
+
+  A sample of `sample_size` unique records is drawn uniformly at random from the
+  set of possible samples of a source dataset of size `source_dataset_size`.
+  Then the `DpEvent` `event` is applied to the sample of records.
+  """
+  source_dataset_size: int
+  sample_size: int
+  event: DpEvent
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class SingleEpochTreeAggregationDpEvent(DpEvent):
+  """Represents aggregation for a single epoch using one or more trees.
+
+  Multiple tree-aggregation steps can occur, but it is required that each
+  record occurs at most once *across all trees*. See appendix D of
+  "Practical and Private (Deep) Learning without Sampling or Shuffling"
+  https://arxiv.org/abs/2103.00039.
+
+  To represent the common case where the same record can occur in multiple
+  trees (but still at most once per tree), wrap this with `SelfComposedDpEvent`
+  or `ComposedDpEvent` and use a scalar for `step_counts`.
+
+  Attributes:
+    noise_multiplier: The ratio of the noise per node to the sensitivity.
+    step_counts: The number of steps in each tree. May be a scalar for a single
+      tree.
+  """
+  noise_multiplier: float
+  step_counts: Union[int, List[int]]