Skip to content

Commit 3944b23

Browse files
committed
[yugabyte#19943] docdb: Add new regex filters to Prometheus metric endpoint
Summary: This diff creates brand new API for YBA to filter prometheus metric output, and also enable server level aggregation for all tablet and table metrics when new API is used. To enable the new API, user need to explicit add `version=v2` to the URL. **New URL parameters for version control:** `version`: Options(“v1”, “v2”), Default(“v1”). "v1" means endpoint expect old regex filters:`priority_regex` and `metrics` "v2" means endpoint expect new regex filters, and ignore the v1 filters. Tablet and table metrics will be aggregated to server level by default. **New regex filters:** `server_allowlist`: Options( “ALL”, “NONE”, Customized regex), Default (“ALL”) `server_blocklist`: Options( “ALL”, “NONE”, Customized regex), Default (“NONE”) `table_allowlist`: Options( “ALL”, “NONE”, Customized regex), Default (“ALL”) `table_blocklist`: Options( “ALL”, “NONE”, Customized regex), Default (“NONE”) A metric produced internally is tablet or table type will be emitted at table level if and only if it matches table_allowlist and does not match table_blocklist. A metric produced internally is tablet, table, or server type will be emitted at the server level if and only if it matches server_allowlist and does not match server_blocklist. **To distinguish whether a metric from the output is on stream, table, or server level:** ``` If metric_attributes contains “stream_id” : Stream level else if metric_attributes contains “table_id” : Table level else : Server level ``` **Others:** If we accidentally call a server without the new code using the new v2 version URL, the server will generate a huge list of metrics at the table level, which can consume large amount of memory. Thus, it is recommended to add `metrics=` in the v2 URL , so that the server will just return empty output. URL Example: `/prometheus-metrics?version=v2&table_allowlist=<regex>&table_blocklist=<regex>&server_allowlist<regex>&server_blocklist=<regex>&metrics=` Jira: DB-8907 Test Plan: PrometheusMetricFilterTest.TestVOneDefault PrometheusMetricFilterTest.TestVOnePriorityRegex PrometheusMetricFilterTest.TestVTwoDefault PrometheusMetricFilterTest.TestVTwoTableLevel PrometheusMetricFilterTest.TestVTwoServerLevel Also tested on portal Reviewers: amitanand, esheng, rthallam, mlillibridge Reviewed By: amitanand, esheng, mlillibridge Subscribers: sanketh, amalyshev, nbhatia, ybase, bogdan Differential Revision: https://phorge.dev.yugabyte.com/D31259
1 parent 76f9c99 commit 3944b23

9 files changed

+429
-90
lines changed

src/yb/server/default-path-handlers.cc

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -538,13 +538,43 @@ static void ParseRequestOptions(const Webserver::WebRequest& req,
538538
if (prometheus_opts) {
539539
ParseMetricOptions(req, prometheus_opts);
540540

541-
prometheus_opts->priority_regex_string = FindWithDefault(
542-
req.parsed_args, "priority_regex", ".*");
543-
544541
if (const std::string* arg_p = FindOrNull(req.parsed_args, "show_help")) {
545542
prometheus_opts->export_help_and_type =
546543
ExportHelpAndType(ParseLeadingBoolValue(arg_p->c_str(), false));
547544
}
545+
546+
prometheus_opts->version = FindWithDefault(req.parsed_args, "version",
547+
kFilterVersionOne);
548+
549+
if (prometheus_opts->version == kFilterVersionTwo) {
550+
// Set it to accept all metrics, because we ignore metrics URL parameter when using v2.
551+
prometheus_opts->general_metrics_allowlist = std::nullopt;
552+
553+
auto FindHandlingAllOrNone = [&](
554+
const std::string& arg, const std::string& default_value) -> std::string {
555+
std::string regex_string = FindWithDefault(req.parsed_args, arg, default_value);
556+
if (regex_string == "ALL") {
557+
return ".*";
558+
} else if (regex_string == "NONE") {
559+
return "";
560+
}
561+
return regex_string;
562+
};
563+
564+
prometheus_opts->table_allowlist_string = FindHandlingAllOrNone("table_allowlist", "ALL");
565+
566+
prometheus_opts->table_blocklist_string = FindHandlingAllOrNone("table_blocklist", "NONE");
567+
568+
prometheus_opts->server_allowlist_string = FindHandlingAllOrNone("server_allowlist", "ALL");
569+
570+
prometheus_opts->server_blocklist_string = FindHandlingAllOrNone("server_blocklist", "NONE");
571+
} else {
572+
prometheus_opts->priority_regex_string = FindWithDefault(
573+
req.parsed_args, "priority_regex", ".*");
574+
LOG_IF(WARNING, prometheus_opts->version != kFilterVersionOne)
575+
<< "Prometheus endpoint URL parameter version=" << prometheus_opts->version
576+
<< " is not recognized. Only v1 or v2 can be accepted.";
577+
}
548578
}
549579

550580
if (json_mode) {

src/yb/util/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ ADD_YB_TEST(os-util-test)
394394
ADD_YB_TEST(path_util-test)
395395
ADD_YB_TEST(priority_queue-test)
396396
ADD_YB_TEST(priority_thread_pool-test)
397+
ADD_YB_TEST(prometheus_metric_filter-test)
397398
ADD_YB_TEST(pstack_watcher-test)
398399
ADD_YB_TEST(random-test)
399400
ADD_YB_TEST(random_util-test)

src/yb/util/metric_entity.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,25 @@ struct MetricJsonOptions : public MetricOptions {
8989

9090
YB_STRONGLY_TYPED_BOOL(ExportHelpAndType);
9191

92+
static const std::string kFilterVersionOne = "v1";
93+
static const std::string kFilterVersionTwo = "v2";
94+
9295
struct MetricPrometheusOptions : public MetricOptions {
9396
// Include #TYPE and #HELP in Prometheus metrics output
9497
ExportHelpAndType export_help_and_type{ExportHelpAndType::kFalse};
9598

96-
// Metrics that shows on table level.
99+
std::string version = kFilterVersionOne;
100+
101+
// For filtering table level metrics when version is equal to kFilterVersionOne.
97102
std::string priority_regex_string = ".*";
103+
104+
// The four regexs are for filtering table level and server level metrics
105+
// when version is equal to kFilterVersionTwo.
106+
std::string table_allowlist_string = ".*";
107+
std::string table_blocklist_string = "";
108+
109+
std::string server_allowlist_string = ".*";
110+
std::string server_blocklist_string = "";
98111
};
99112

100113
class MetricEntityPrototype {

src/yb/util/metrics-test.cc

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -678,60 +678,4 @@ TEST_F(MetricsTest, VerifyHelpAndTypeTags) {
678678
"# HELP t_lag Test lag description\n# TYPE t_lag gauge"));
679679
}
680680

681-
TEST_F(MetricsTest, PrometheusMetricFilterTest) {
682-
MetricEntity::AttributeMap entity_attr;
683-
entity_attr["tablet_id"] = "tablet_id_42";
684-
entity_attr["table_name"] = "test_table";
685-
entity_attr["table_id"] = "table_id_43";
686-
scoped_refptr<MetricEntity> tablet_entity =
687-
METRIC_ENTITY_tablet.Instantiate(&registry_, "tablet_entity_id", entity_attr);
688-
// Initilize two tablet metrics.
689-
scoped_refptr<Gauge> gauge = METRIC_t_gauge.Instantiate(tablet_entity, 0);
690-
scoped_refptr<Counter> counter = METRIC_t_counter.Instantiate(tablet_entity);
691-
std::string kGaugeName = "t_gauge";
692-
std::string kCounterName = "t_counter";
693-
{
694-
// Test priority_regex.
695-
MetricPrometheusOptions opts;
696-
opts.priority_regex_string = "t_c.*r";
697-
std::stringstream output;
698-
PrometheusWriter writer(&output, opts);
699-
ASSERT_OK(registry_.WriteForPrometheus(&writer, opts));
700-
MetricAggregationMap* filter =
701-
writer.TEST_GetPrometheusMetricFilter()->TEST_GetAggregationMap();
702-
ASSERT_EQ(kServerLevel | kStreamLevel, (*filter)[kGaugeName]);
703-
ASSERT_EQ(kTableLevel | kServerLevel | kStreamLevel, (*filter)[kCounterName]);
704-
int line_num = 0;
705-
std::string line;
706-
while (std::getline(output, line)) {
707-
if (line.find("table_id=") != std::string::npos) {
708-
// If line contain table id, it means metric is on table level.
709-
ASSERT_TRUE(line.starts_with(kCounterName));
710-
} else {
711-
// Otherwise, it is server level.
712-
ASSERT_TRUE(line.starts_with(kGaugeName));
713-
}
714-
line_num++;
715-
}
716-
ASSERT_EQ(2, line_num);
717-
}
718-
{
719-
// Test default.
720-
MetricPrometheusOptions opts;
721-
std::stringstream output;
722-
PrometheusWriter writer(&output, opts);
723-
ASSERT_OK(registry_.WriteForPrometheus(&writer, opts));
724-
MetricAggregationMap* filter =
725-
writer.TEST_GetPrometheusMetricFilter()->TEST_GetAggregationMap();
726-
EXPECT_EQ(kTableLevel | kServerLevel | kStreamLevel, (*filter)[kGaugeName]);
727-
EXPECT_EQ(kTableLevel | kServerLevel | kStreamLevel, (*filter)[kCounterName]);
728-
int line_num = 0;
729-
std::string line;
730-
while (std::getline(output, line)) {
731-
ASSERT_TRUE(line.find("table_id=") != std::string::npos);
732-
line_num++;
733-
}
734-
ASSERT_EQ(2, line_num);
735-
}
736-
}
737681
} // namespace yb

src/yb/util/metrics_writer.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ PrometheusWriter::PrometheusWriter(std::stringstream* output,
2525
timestamp_(std::chrono::duration_cast<std::chrono::milliseconds>(
2626
std::chrono::system_clock::now().time_since_epoch()).count()),
2727
export_help_and_type_(opts.export_help_and_type),
28-
prometheus_metric_filter_(
29-
std::make_unique<PrometheusMetricFilter>(opts.priority_regex_string)) {}
28+
prometheus_metric_filter_(CreatePrometheusMetricFilter(opts)) {}
3029

3130
PrometheusWriter::~PrometheusWriter() {}
3231

@@ -108,10 +107,11 @@ Status PrometheusWriter::WriteSingleEntry(
108107
const std::string& name,
109108
int64_t value,
110109
AggregationFunction aggregation_function,
111-
AggregationLevels aggregation_levels,
110+
AggregationLevels default_levels,
112111
const char* type,
113112
const char* description) {
114-
aggregation_levels &= prometheus_metric_filter_->GetAggregationLevels(name);
113+
AggregationLevels aggregation_levels =
114+
prometheus_metric_filter_->GetAggregationLevels(name, default_levels);
115115

116116
auto metric_type_it = attr.find("metric_type");
117117
DCHECK(metric_type_it != attr.end());
@@ -140,8 +140,6 @@ Status PrometheusWriter::WriteSingleEntry(
140140
AddAggregatedEntry(tablet_id_it->second, attr, name, value, aggregation_function,
141141
type, description);
142142
}
143-
// Currently, if a metric is exported on table level, it shouldn't be on other levels.
144-
return Status::OK();
145143
}
146144

147145
if (aggregation_levels & kServerLevel) {

src/yb/util/metrics_writer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class PrometheusWriter {
5656
const std::string& name,
5757
int64_t value,
5858
AggregationFunction aggregation_function,
59-
AggregationLevels aggregation_levels,
59+
AggregationLevels default_levels,
6060
const char* type = "unknown",
6161
const char* description = "unknown");
6262

0 commit comments

Comments
 (0)