Skip to content

Commit

Permalink
Allow flags and integer RAPPOR fields to be recorded with no noise.
Browse files Browse the repository at this point in the history
BUG=584553

Review URL: https://codereview.chromium.org/1676653002

Cr-Commit-Position: refs/heads/master@{#376884}
  • Loading branch information
holte authored and Commit bot committed Feb 23, 2016
1 parent 7679a4d commit 4592135
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 35 deletions.
17 changes: 12 additions & 5 deletions components/rappor/byte_vector_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,24 @@ ByteVector ByteVectorGenerator::GetRandomByteVector() {

ByteVector ByteVectorGenerator::GetWeightedRandomByteVector(
Probability probability) {
ByteVector bytes = GetRandomByteVector();
switch (probability) {
case PROBABILITY_75:
case PROBABILITY_100:
return ByteVector(byte_count_, 0xff);
case PROBABILITY_75: {
ByteVector bytes = GetRandomByteVector();
return *ByteVectorOr(GetRandomByteVector(), &bytes);
}
case PROBABILITY_50:
return bytes;
case PROBABILITY_25:
return GetRandomByteVector();
case PROBABILITY_25: {
ByteVector bytes = GetRandomByteVector();
return *ByteVectorAnd(GetRandomByteVector(), &bytes);
}
case PROBABILITY_0:
return ByteVector(byte_count_);
}
NOTREACHED();
return bytes;
return ByteVector(byte_count_);
}

HmacByteVectorGenerator::HmacByteVectorGenerator(
Expand Down
14 changes: 14 additions & 0 deletions components/rappor/byte_vector_utils_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,20 @@ TEST(ByteVectorTest, HmacNist) {
std::string(random_50.begin(), random_50.end()));
}

TEST(ByteVectorTest, WeightedRandomStatistics0) {
ByteVectorGenerator generator(50u);
ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_0);
int bit_count = CountBits(random);
EXPECT_EQ(bit_count, 0);
}

TEST(ByteVectorTest, WeightedRandomStatistics100) {
ByteVectorGenerator generator(50u);
ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_100);
int bit_count = CountBits(random);
EXPECT_EQ(bit_count, 50 * 8);
}

TEST(ByteVectorTest, WeightedRandomStatistics50) {
ByteVectorGenerator generator(50u);
ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_50);
Expand Down
12 changes: 11 additions & 1 deletion components/rappor/rappor_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ namespace rappor {

// Levels of noise added to a sample.
enum NoiseLevel {
NORMAL_NOISE = 0,
NO_NOISE = 0,
NORMAL_NOISE,
NUM_NOISE_LEVELS,
};

Expand All @@ -28,9 +29,11 @@ enum RapporType {
};

enum Probability {
PROBABILITY_100, // 100%
PROBABILITY_75, // 75%
PROBABILITY_50, // 50%
PROBABILITY_25, // 25%
PROBABILITY_0, // 0%
};


Expand Down Expand Up @@ -87,6 +90,13 @@ struct RapporParameters {
namespace internal {

const NoiseParameters kNoiseParametersForLevel[NUM_NOISE_LEVELS] = {
// NO_NOISE
{
rappor::PROBABILITY_0 /* Fake data probability */,
rappor::PROBABILITY_0 /* Fake one probability */,
rappor::PROBABILITY_100 /* One coin probability */,
rappor::PROBABILITY_0 /* Zero coin probability */,
},
// NORMAL_NOISE
{
rappor::PROBABILITY_50 /* Fake data probability */,
Expand Down
62 changes: 48 additions & 14 deletions components/rappor/sample.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,41 +31,75 @@ void Sample::SetStringField(const std::string& field_name,
const std::string& value) {
DVLOG(2) << "Recording sample \"" << value
<< "\" for sample metric field \"" << field_name << "\"";
DCHECK_EQ(0u, sizes_[field_name]);
fields_[field_name] = internal::GetBloomBits(
DCHECK_EQ(0u, field_info_[field_name].size);
uint64_t bloom_bits = internal::GetBloomBits(
parameters_.bloom_filter_size_bytes,
parameters_.bloom_filter_hash_function_count,
bloom_offset_,
value);
sizes_[field_name] = parameters_.bloom_filter_size_bytes;
field_info_[field_name] = Sample::FieldInfo{
parameters_.bloom_filter_size_bytes /* size */,
bloom_bits /* value */,
parameters_.noise_level,
};
}

void Sample::SetFlagsField(const std::string& field_name,
uint64_t flags,
size_t num_flags) {
SetFlagsField(field_name, flags, num_flags, parameters_.noise_level);
}

void Sample::SetFlagsField(const std::string& field_name,
uint64_t flags,
size_t num_flags,
NoiseLevel noise_level) {
if (noise_level == NO_NOISE) {
// Non-noised fields can only be recorded for UMA rappor metrics.
DCHECK_EQ(UMA_RAPPOR_GROUP, parameters_.recording_group);
if (parameters_.recording_group != UMA_RAPPOR_GROUP)
return;
}
DVLOG(2) << "Recording flags " << flags
<< " for sample metric field \"" << field_name << "\"";
DCHECK_EQ(0u, sizes_[field_name]);
DCHECK_EQ(0u, field_info_[field_name].size);
DCHECK_GT(num_flags, 0u);
DCHECK_LE(num_flags, 64u);
DCHECK(num_flags == 64u || flags >> num_flags == 0);
fields_[field_name] = flags;
sizes_[field_name] = (num_flags + 7) / 8;
field_info_[field_name] = Sample::FieldInfo{
(num_flags + 7) / 8 /* size */,
flags /* value */,
noise_level,
};
}

void Sample::SetUInt64Field(const std::string& field_name,
uint64_t value,
NoiseLevel noise_level) {
// Noised integers not supported yet.
DCHECK_EQ(NO_NOISE, noise_level);
// Non-noised fields can only be recorded for UMA rappor metrics.
DCHECK_EQ(UMA_RAPPOR_GROUP, parameters_.recording_group);
if (parameters_.recording_group != UMA_RAPPOR_GROUP)
return;
DCHECK_EQ(0u, field_info_[field_name].size);
field_info_[field_name] = Sample::FieldInfo{
8,
value,
noise_level,
};
}

void Sample::ExportMetrics(const std::string& secret,
const std::string& metric_name,
RapporReports* reports) const {
for (const auto& kv : fields_) {
uint64_t value = kv.second;
const auto it = sizes_.find(kv.first);
DCHECK(it != sizes_.end());
size_t size = it->second;
ByteVector value_bytes(size);
Uint64ToByteVector(value, size, &value_bytes);
for (const auto& kv : field_info_) {
FieldInfo field_info = kv.second;
ByteVector value_bytes(field_info.size);
Uint64ToByteVector(field_info.value, field_info.size, &value_bytes);
ByteVector report_bytes = internal::GenerateReport(
secret,
internal::kNoiseParametersForLevel[parameters_.noise_level],
internal::kNoiseParametersForLevel[field_info.noise_level],
value_bytes);

RapporReports::Report* report = reports->add_report();
Expand Down
32 changes: 25 additions & 7 deletions components/rappor/sample.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,24 @@ class Sample {
virtual void SetStringField(const std::string& field_name,
const std::string& value);

// Sets a group of boolean flags as a field in this sample.
// TODO(holte): Move all callers to the version with NoiseLevel.
virtual void SetFlagsField(const std::string& field_name,
uint64_t flags,
size_t num_flags);

// Sets a group of boolean flags as a field in this sample, with the
// specified noise level.
// |flags| should be a set of boolean flags stored in the lowest |num_flags|
// bits of |flags|.
virtual void SetFlagsField(const std::string& field_name,
uint64_t flags,
size_t num_flags);
size_t num_flags,
NoiseLevel noise_level);

// Sets an integer value field in this sample, at the given noise level.
virtual void SetUInt64Field(const std::string& field_name,
uint64_t value,
NoiseLevel noise_level);

// Generate randomized reports and store them in |reports|.
virtual void ExportMetrics(const std::string& secret,
Expand All @@ -60,11 +72,17 @@ class Sample {
// Offset used for bloom filter hash functions.
uint32_t bloom_offset_;

// Size of each of the different fields, in bytes.
std::map<std::string, size_t> sizes_;

// The non-randomized report values for each field.
std::map<std::string, uint64_t> fields_;
struct FieldInfo {
// Size of the field, in bytes.
size_t size;
// The non-randomized report value for the field.
uint64_t value;
// The noise level to use when creating a report for the field.
NoiseLevel noise_level;
};

// Information about all recorded fields.
std::map<std::string, FieldInfo> field_info_;

DISALLOW_COPY_AND_ASSIGN(Sample);
};
Expand Down
23 changes: 23 additions & 0 deletions components/rappor/sampler_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <utility>

#include "base/metrics/metrics_hashes.h"
#include "components/rappor/byte_vector_utils.h"
#include "components/rappor/proto/rappor_metric.pb.h"
#include "testing/gtest/include/gtest/gtest.h"
Expand Down Expand Up @@ -54,6 +55,28 @@ TEST(RapporSamplerTest, TestExport) {
EXPECT_EQ(0, reports2.report_size());
}

// Test exporting fields with NO_NOISE.
TEST(RapporSamplerTest, TestNoNoise) {
Sampler sampler;

scoped_ptr<Sample> sample1 = TestSamplerFactory::CreateSample();
sample1->SetFlagsField("Foo", 0xde, 8, NO_NOISE);
sample1->SetUInt64Field("Bar", 0x0011223344aabbccdd, NO_NOISE);
sampler.AddSample("Metric1", std::move(sample1));

RapporReports reports;
std::string secret = HmacByteVectorGenerator::GenerateEntropyInput();
sampler.ExportMetrics(secret, &reports);
EXPECT_EQ(2, reports.report_size());

uint64_t hash1 = base::HashMetricName("Metric1.Foo");
bool order = reports.report(0).name_hash() == hash1;
const RapporReports::Report& report1 = reports.report(order ? 0 : 1);
EXPECT_EQ("\xde", report1.bits());
const RapporReports::Report& report2 = reports.report(order ? 1 : 0);
EXPECT_EQ("\xdd\xcc\xbb\xaa\x44\x33\x22\x11\x00", report2.bits());
}

} // namespace internal

} // namespace rappor
4 changes: 3 additions & 1 deletion tools/metrics/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ def Marshall(self, doc, obj):
self.float_attributes +
self.string_attributes)
for attr in attributes:
node.setAttribute(attr, str(obj[attr]))
value = str(obj[attr])
if value:
node.setAttribute(attr, value)

PutComments(node, obj['comments'])

Expand Down
38 changes: 36 additions & 2 deletions tools/metrics/rappor/pretty_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,45 @@
# Model definitions for rappor.xml content
_SUMMARY_TYPE = models.TextNodeType('summary')

_NOISE_VALUES_TYPE = models.ObjectNodeType('noise-values',
float_attributes=[
'fake-prob',
'fake-one-prob',
'one-coin-prob',
'zero-coin-prob',
])

_NOISE_LEVEL_TYPE = models.ObjectNodeType('noise-level',
extra_newlines=(1, 1, 1),
string_attributes=['name'],
children=[
models.ChildType('summary', _SUMMARY_TYPE, False),
models.ChildType('values', _NOISE_VALUES_TYPE, False),
])

_NOISE_LEVELS_TYPE = models.ObjectNodeType('noise-levels',
extra_newlines=(1, 1, 1),
dont_indent=True,
children=[
models.ChildType('types', _NOISE_LEVEL_TYPE, True),
])

_PARAMETERS_TYPE = models.ObjectNodeType('parameters',
int_attributes=[
'num-cohorts',
'bytes',
'hash-functions',
],
# Remove probabilities once all parsers process noise levels.
float_attributes=[
'fake-prob',
'fake-one-prob',
'one-coin-prob',
'zero-coin-prob',
],
string_attributes=[
'reporting-level'
'reporting-level',
'noise-level',
])

_RAPPOR_PARAMETERS_TYPE = models.ObjectNodeType('rappor-parameters',
Expand Down Expand Up @@ -59,12 +84,19 @@

_FLAGS_FIELD_TYPE = models.ObjectNodeType('flags-field',
extra_newlines=(1, 1, 0),
string_attributes=['name'],
string_attributes=['name', 'noise-level'],
children=[
models.ChildType('flags', _FLAG_TYPE, True),
models.ChildType('summary', _SUMMARY_TYPE, False),
])

_UINT64_FIELD_TYPE = models.ObjectNodeType('uint64-field',
extra_newlines=(1, 1, 0),
string_attributes=['name', 'noise-level'],
children=[
models.ChildType('summary', _SUMMARY_TYPE, False),
])

_RAPPOR_METRIC_TYPE = models.ObjectNodeType('rappor-metric',
extra_newlines=(1, 1, 1),
string_attributes=['name', 'type'],
Expand All @@ -73,6 +105,7 @@
models.ChildType('summary', _SUMMARY_TYPE, False),
models.ChildType('strings', _STRING_FIELD_TYPE, True),
models.ChildType('flags', _FLAGS_FIELD_TYPE, True),
models.ChildType('uint64', _UINT64_FIELD_TYPE, True),
])

_RAPPOR_METRICS_TYPE = models.ObjectNodeType('rappor-metrics',
Expand All @@ -86,6 +119,7 @@
extra_newlines=(1, 1, 1),
dont_indent=True,
children=[
models.ChildType('noiseLevels', _NOISE_LEVELS_TYPE, False),
models.ChildType('parameterTypes', _RAPPOR_PARAMETERS_TYPES_TYPE, False),
models.ChildType('metrics', _RAPPOR_METRICS_TYPE, False),
])
Expand Down
Loading

0 comments on commit 4592135

Please sign in to comment.