Skip to content

Commit

Permalink
Update the socket tracer metrics to differentiate between tls and pla…
Browse files Browse the repository at this point in the history
…intext metrics (pixie-io#903)

Summary: This updates the `SocketTracerMetrics` class to differentiate
between plaintext and tls metrics. Since we are working to instrument
BoringSSL based applications more broadly (pixie-io#692), this will allow us to
experiment with the underlying tls tracing implementation and verify
that there aren't protocol parsing issues introduced (indicated by more
data loss).

Relevant Issues: pixie-io#692

Type of change: /kind feature

Test Plan: Updated the `mux_trace_bpf_test` and
`netty_tls_trace_bpf_test` with the following diff to verify that they
increment the correct counter
([P317](https://phab.corp.pixielabs.ai/P317))
- [x] Inspect the counters values and dimensions despite end to end test
mentioned above
- [x] Verified that this data was not used for any previous purposes
(new dimension would likely cause breakage)

---------

Signed-off-by: Dom Del Nano <ddelnano@pixielabs.ai>
  • Loading branch information
ddelnano authored and RagalahariP committed Mar 22, 2023
1 parent ce24c66 commit 5e3add6
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 27 deletions.
9 changes: 6 additions & 3 deletions src/stirling/source_connectors/socket_tracer/conn_tracker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ void ConnTracker::AddDataEvent(std::unique_ptr<SocketDataEvent> event) {

namespace {
void UpdateProtocolMetrics(traffic_protocol_t protocol, const conn_stats_event_t& event,
const ConnTracker::ConnStatsTracker& conn_stats) {
auto& metrics = SocketTracerMetrics::GetProtocolMetrics(protocol);
const ConnTracker::ConnStatsTracker& conn_stats, bool ssl) {
auto& metrics = SocketTracerMetrics::GetProtocolMetrics(protocol, ssl);
if (event.rd_bytes > conn_stats.bytes_recv()) {
metrics.conn_stats_bytes.Increment(event.rd_bytes - conn_stats.bytes_recv());
}
Expand All @@ -204,7 +204,7 @@ void ConnTracker::AddConnStats(const conn_stats_event_t& event) {
DCHECK_GE(event.rd_bytes, conn_stats_.bytes_recv());
DCHECK_GE(event.wr_bytes, conn_stats_.bytes_sent());

UpdateProtocolMetrics(protocol_, event, conn_stats_);
UpdateProtocolMetrics(protocol_, event, conn_stats_, ssl_);

conn_stats_.set_bytes_recv(event.rd_bytes);
conn_stats_.set_bytes_sent(event.wr_bytes);
Expand Down Expand Up @@ -542,6 +542,9 @@ bool ConnTracker::SetSSL(bool ssl, std::string_view reason) {

bool old_ssl = ssl_;
ssl_ = ssl;
send_data_.set_ssl(ssl);
recv_data_.set_ssl(ssl);

CONN_TRACE(1) << absl::Substitute("SSL state changed: $0->$1, reason=[$2]", old_ssl, ssl, reason);
return true;
}
Expand Down
3 changes: 2 additions & 1 deletion src/stirling/source_connectors/socket_tracer/data_stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ void DataStream::ProcessBytesToFrames(message_type_t type, TStateType* state) {
ssize_t num_bytes_advanced = data_buffer_.position() - last_processed_pos_;
if (num_bytes_advanced > 0 && static_cast<size_t>(num_bytes_advanced) > frame_bytes) {
size_t bytes_lost = num_bytes_advanced - frame_bytes;
SocketTracerMetrics::GetProtocolMetrics(protocol_).data_loss_bytes.Increment(bytes_lost);
SocketTracerMetrics::GetProtocolMetrics(protocol_, is_ssl_)
.data_loss_bytes.Increment(bytes_lost);
}
last_processed_pos_ = data_buffer_.position();

Expand Down
4 changes: 4 additions & 0 deletions src/stirling/source_connectors/socket_tracer/data_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ class DataStream : NotCopyMoveable {

void set_conn_closed() { conn_closed_ = true; }

void set_ssl(bool ssl) { is_ssl_ = ssl; }

void set_current_time(std::chrono::time_point<std::chrono::steady_clock> time) {
ECHECK(time >= current_time_);
current_time_ = time;
Expand Down Expand Up @@ -302,6 +304,8 @@ class DataStream : NotCopyMoveable {
// This is set to true when connection is closed.
bool conn_closed_ = false;

bool is_ssl_ = false;

// Keep some stats on ParseFrames() attempts.
int stat_valid_frames_ = 0;
int stat_invalid_frames_ = 0;
Expand Down
63 changes: 55 additions & 8 deletions src/stirling/source_connectors/socket_tracer/data_stream_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ class DataStreamTest : public ::testing::Test {

void TearDown() override {
TestOnlyResetMetricsRegistry();
SocketTracerMetrics::TestOnlyResetProtocolMetrics(kProtocolHTTP);
SocketTracerMetrics::TestOnlyResetProtocolMetrics(kProtocolHTTP, false);
SocketTracerMetrics::TestOnlyResetProtocolMetrics(kProtocolHTTP, true);
}

std::chrono::steady_clock::time_point now() {
Expand Down Expand Up @@ -95,7 +96,9 @@ TEST_F(DataStreamTest, LostEvent) {
EXPECT_THAT(stream.Frames<http::Message>(), SizeIs(4));

EXPECT_EQ(req1->msg.size() + req4->msg.size(),
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP).data_loss_bytes.Value());
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
}

TEST_F(DataStreamTest, StuckTemporarily) {
Expand Down Expand Up @@ -126,7 +129,10 @@ TEST_F(DataStreamTest, StuckTemporarily) {
EXPECT_EQ(requests[1].req_path, "/foo.html");
EXPECT_EQ(requests[2].req_path, "/bar.html");

EXPECT_EQ(0, SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
}

TEST_F(DataStreamTest, StuckTooLong) {
Expand Down Expand Up @@ -161,7 +167,9 @@ TEST_F(DataStreamTest, StuckTooLong) {
EXPECT_EQ(requests[1].req_path, "/bar.html");

EXPECT_EQ(kHTTPReq0.length(),
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP).data_loss_bytes.Value());
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
}

TEST_F(DataStreamTest, PartialMessageRecovery) {
Expand All @@ -187,7 +195,9 @@ TEST_F(DataStreamTest, PartialMessageRecovery) {
EXPECT_EQ(requests[1].req_path, "/bar.html");

EXPECT_EQ(kHTTPReq1.length(),
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP).data_loss_bytes.Value());
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
}

TEST_F(DataStreamTest, HeadAndMiddleMissing) {
Expand Down Expand Up @@ -221,7 +231,9 @@ TEST_F(DataStreamTest, HeadAndMiddleMissing) {
EXPECT_EQ(requests[0].req_path, "/bar.html");

EXPECT_EQ(req0b_size + kHTTPReq1.length(),
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP).data_loss_bytes.Value());
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
}

TEST_F(DataStreamTest, LateArrivalPlusMissingEvents) {
Expand Down Expand Up @@ -278,7 +290,9 @@ TEST_F(DataStreamTest, LateArrivalPlusMissingEvents) {
EXPECT_EQ(requests[2].req_path, "/foo.html");

EXPECT_EQ(kHTTPReq0.length() + kHTTPReq2.length(),
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP).data_loss_bytes.Value());
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
}

// This test checks that various stats updated on each call ProcessBytesToFrames()
Expand Down Expand Up @@ -422,7 +436,40 @@ TEST_F(DataStreamTest, SpikeCapacityWithLargeDataChunk) {
// Run ProcessBytesToFrames again to propagate data loss stats.
stream.ProcessBytesToFrames<http::Message>(message_type_t::kResponse, &state);
EXPECT_EQ(kHTTPIncompleteResp.length() - retention_capacity_bytes,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP).data_loss_bytes.Value());
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
}

TEST_F(DataStreamTest, SpikeCapacityWithLargeDataChunkAndSSLEnabled) {
int spike_capacity_bytes = 1024;
int retention_capacity_bytes = 16;
auto buffer_expiry_timestamp = now() - std::chrono::seconds(10000);
DataStream stream(spike_capacity_bytes);
stream.set_ssl(true);
stream.set_protocol(kProtocolHTTP);

std::unique_ptr<SocketDataEvent> resp0 = event_gen_.InitRecvEvent<kProtocolHTTP>(kHTTPResp0);
std::unique_ptr<SocketDataEvent> resp1 = event_gen_.InitRecvEvent<kProtocolHTTP>(kHTTPResp0);
std::unique_ptr<SocketDataEvent> resp2 =
event_gen_.InitRecvEvent<kProtocolHTTP>(kHTTPIncompleteResp);

stream.AddData(std::move(resp0));
stream.AddData(std::move(resp1));
stream.AddData(std::move(resp2));

protocols::http::StateWrapper state{};
stream.ProcessBytesToFrames<http::Message>(message_type_t::kResponse, &state);
stream.CleanupEvents(retention_capacity_bytes, buffer_expiry_timestamp);
EXPECT_THAT(stream.Frames<http::Message>(), SizeIs(2));
EXPECT_EQ(stream.data_buffer().size(), 16);

// Run ProcessBytesToFrames again to propagate data loss stats.
stream.ProcessBytesToFrames<http::Message>(message_type_t::kResponse, &state);
EXPECT_EQ(kHTTPIncompleteResp.length() - retention_capacity_bytes,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, true).data_loss_bytes.Value());
EXPECT_EQ(0,
SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, false).data_loss_bytes.Value());
}

TEST_F(DataStreamTest, ResyncCausesDuplicateEventBug) {
Expand Down
54 changes: 42 additions & 12 deletions src/stirling/source_connectors/socket_tracer/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,47 +19,77 @@
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>

#include <magic_enum.hpp>

#include "src/stirling/source_connectors/socket_tracer/metrics.h"

using metrics_key = std::pair<traffic_protocol_t, bool>;

namespace std {

// Provides hash specialization since stdlib doesn't provide one for std::pair.
// This uses the first 32 bits for the traffic_protocol_t enum and the final 32
// bits for the boolean value.
template <>
struct hash<metrics_key> {
std::size_t operator()(const metrics_key& p) const {
uint64_t protocol = p.first;
int32_t b = p.second ? 1 : 0;

return (protocol << 32) | b;
}
};

} // namespace std

namespace px {
namespace stirling {

SocketTracerMetrics::SocketTracerMetrics(prometheus::Registry* registry,
traffic_protocol_t protocol)
traffic_protocol_t protocol, bool tls)
: data_loss_bytes(
prometheus::BuildCounter()
.Name("data_loss_bytes")
.Help("Total bytes of data loss for this protocol. Measured by bytes that weren't "
"successfully parsed.")
.Register(*registry)
.Add({{"protocol", std::string(magic_enum::enum_name(protocol))}})),
.Add({
{"protocol", std::string(magic_enum::enum_name(protocol))},
{"tls", tls ? "1" : "0"},
})),
conn_stats_bytes(prometheus::BuildCounter()
.Name("conn_stats_bytes")
.Help("Total bytes of data tracked by conn stats for this protocol.")
.Register(*registry)
.Add({{"protocol", std::string(magic_enum::enum_name(protocol))}})) {}
.Add({
{"protocol", std::string(magic_enum::enum_name(protocol))},
{"tls", tls ? "1" : "0"},
})) {}

namespace {
std::unordered_map<traffic_protocol_t, std::unique_ptr<SocketTracerMetrics>> g_protocol_metrics;

void ResetProtocolMetrics(traffic_protocol_t protocol) {
std::unordered_map<metrics_key, std::unique_ptr<SocketTracerMetrics>> g_protocol_metrics;

void ResetProtocolMetrics(traffic_protocol_t protocol, bool tls) {
std::pair<traffic_protocol_t, bool> key = {protocol, tls};
g_protocol_metrics.insert_or_assign(
protocol, std::make_unique<SocketTracerMetrics>(&GetMetricsRegistry(), protocol));
key, std::make_unique<SocketTracerMetrics>(&GetMetricsRegistry(), protocol, tls));
}
} // namespace

SocketTracerMetrics& SocketTracerMetrics::GetProtocolMetrics(traffic_protocol_t protocol) {
if (g_protocol_metrics.find(protocol) == g_protocol_metrics.end()) {
ResetProtocolMetrics(protocol);
SocketTracerMetrics& SocketTracerMetrics::GetProtocolMetrics(traffic_protocol_t protocol,
bool tls) {
std::pair<traffic_protocol_t, bool> key = {protocol, tls};
if (g_protocol_metrics.find(key) == g_protocol_metrics.end()) {
ResetProtocolMetrics(protocol, tls);
}
return *g_protocol_metrics[protocol];
return *g_protocol_metrics[key];
}

void SocketTracerMetrics::TestOnlyResetProtocolMetrics(traffic_protocol_t protocol) {
ResetProtocolMetrics(protocol);
void SocketTracerMetrics::TestOnlyResetProtocolMetrics(traffic_protocol_t protocol, bool tls) {
ResetProtocolMetrics(protocol, tls);
}

} // namespace stirling
Expand Down
6 changes: 3 additions & 3 deletions src/stirling/source_connectors/socket_tracer/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ namespace px {
namespace stirling {

struct SocketTracerMetrics {
SocketTracerMetrics(prometheus::Registry* registry, traffic_protocol_t protocol);
SocketTracerMetrics(prometheus::Registry* registry, traffic_protocol_t protocol, bool tls);
prometheus::Counter& data_loss_bytes;
prometheus::Counter& conn_stats_bytes;

static SocketTracerMetrics& GetProtocolMetrics(traffic_protocol_t protocol);
static SocketTracerMetrics& GetProtocolMetrics(traffic_protocol_t protocol, bool tls);

static void TestOnlyResetProtocolMetrics(traffic_protocol_t protocol);
static void TestOnlyResetProtocolMetrics(traffic_protocol_t protocol, bool tls);
};

} // namespace stirling
Expand Down

0 comments on commit 5e3add6

Please sign in to comment.