Skip to content

Commit

Permalink
collector: Unwrap glob textfile directories (prometheus#1985)
Browse files Browse the repository at this point in the history
* collector: Unwrap glob textfile directories
* collector: Store full path in mtime's file label

The point is to avoid duplicated gauges from files with the same name in
different directories.

This introduces support for exporting from multiple directories matching
given pattern (e.g. `/home/*/metrics/`).

Signed-off-by: Kiril Vladimirov <kiril@vladimiroff.org>
  • Loading branch information
vladimiroff authored and oblitorum committed Apr 9, 2024
1 parent e3af5e6 commit afed85a
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 33 deletions.
2 changes: 1 addition & 1 deletion collector/fixtures/textfile/different_metric_types.out
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ events_total{foo="bar"} 10
events_total{foo="baz"} 20
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/different_metric_types/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
49 changes: 49 additions & 0 deletions collector/fixtures/textfile/glob_extra_dimension.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/histogram_extra_dimension/metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/summary_extra_dimension/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
# HELP prometheus_rule_evaluation_duration_seconds The duration for a rule to execute.
# TYPE prometheus_rule_evaluation_duration_seconds summary
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="alerting",quantile="0.9"} 0.001765451
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="alerting",quantile="0.99"} 0.018672076
prometheus_rule_evaluation_duration_seconds_sum{handler="",rule_type="alerting"} 214.85081044700146
prometheus_rule_evaluation_duration_seconds_count{handler="",rule_type="alerting"} 185209
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="recording",quantile="0.5"} 4.3132e-05
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="recording",quantile="0.9"} 8.9295e-05
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="recording",quantile="0.99"} 0.000193657
prometheus_rule_evaluation_duration_seconds_sum{handler="",rule_type="recording"} 185091.01317759082
prometheus_rule_evaluation_duration_seconds_count{handler="",rule_type="recording"} 1.0020195e+08
prometheus_rule_evaluation_duration_seconds{handler="foo",rule_type="alerting",quantile="0.5"} 0.000571464
prometheus_rule_evaluation_duration_seconds_sum{handler="foo",rule_type="alerting"} 0
prometheus_rule_evaluation_duration_seconds_count{handler="foo",rule_type="alerting"} 0
# HELP prometheus_tsdb_compaction_chunk_range Final time range of chunks on their first compaction
# TYPE prometheus_tsdb_compaction_chunk_range histogram
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="100"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="1600"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="6400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="25600"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="102400"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="409600"} 1.412839e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="1.6384e+06"} 1.69185e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="6.5536e+06"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="2.62144e+07"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="+Inf"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_sum{foo="bar"} 6.71393432189e+11
prometheus_tsdb_compaction_chunk_range_count{foo="bar"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="100"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="1600"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="6400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="25600"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="102400"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="409600"} 1.412839e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="1.6384e+06"} 1.69185e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="6.5536e+06"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="2.62144e+07"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="+Inf"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_sum{foo="baz"} 6.71393432189e+11
prometheus_tsdb_compaction_chunk_range_count{foo="baz"} 1.691853e+06
2 changes: 1 addition & 1 deletion collector/fixtures/textfile/histogram.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/histogram/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
Expand Down
2 changes: 1 addition & 1 deletion collector/fixtures/textfile/histogram_extra_dimension.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/histogram_extra_dimension/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
Expand Down
2 changes: 1 addition & 1 deletion collector/fixtures/textfile/inconsistent_metrics.out
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ http_requests_total{baz="",code="503",foo="",handler="query_range",method="get"}
http_requests_total{baz="bar",code="200",foo="",handler="",method="get"} 93
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/inconsistent_metrics/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
2 changes: 1 addition & 1 deletion collector/fixtures/textfile/summary.out
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ event_duration_seconds_total_sum{baz="result_sort"} 3.4123187829998307
event_duration_seconds_total_count{baz="result_sort"} 1.427647e+06
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/summary/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
2 changes: 1 addition & 1 deletion collector/fixtures/textfile/summary_extra_dimension.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/summary_extra_dimension/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
Expand Down
4 changes: 2 additions & 2 deletions collector/fixtures/textfile/two_metric_files.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics1.prom"} 1
node_textfile_mtime_seconds{file="metrics2.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/two_metric_files/metrics1.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/two_metric_files/metrics2.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
Expand Down
59 changes: 34 additions & 25 deletions collector/textfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,18 @@ func (c *textFileCollector) exportMTimes(mtimes map[string]time.Time, ch chan<-

// Export the mtimes of the successful files.
// Sorting is needed for predictable output comparison in tests.
filenames := make([]string, 0, len(mtimes))
for filename := range mtimes {
filenames = append(filenames, filename)
filepaths := make([]string, 0, len(mtimes))
for path := range mtimes {
filepaths = append(filepaths, path)
}
sort.Strings(filenames)
sort.Strings(filepaths)

for _, filename := range filenames {
mtime := float64(mtimes[filename].UnixNano() / 1e9)
for _, path := range filepaths {
mtime := float64(mtimes[path].UnixNano() / 1e9)
if c.mtime != nil {
mtime = *c.mtime
}
ch <- prometheus.MustNewConstMetric(mtimeDesc, prometheus.GaugeValue, mtime, filename)
ch <- prometheus.MustNewConstMetric(mtimeDesc, prometheus.GaugeValue, mtime, path)
}
}

Expand All @@ -192,28 +192,37 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
// Iterate over files and accumulate their metrics, but also track any
// parsing errors so an error metric can be reported.
var errored bool
files, err := ioutil.ReadDir(c.path)
if err != nil && c.path != "" {
errored = true
level.Error(c.logger).Log("msg", "failed to read textfile collector directory", "path", c.path, "err", err)
}

mtimes := make(map[string]time.Time, len(files))
for _, f := range files {
if !strings.HasSuffix(f.Name(), ".prom") {
continue
}
paths, err := filepath.Glob(c.path)
if err != nil || len(paths) == 0 {
// not glob or not accessible path either way assume single
// directory and let ioutil.ReadDir handle it
paths = []string{c.path}
}

mtime, err := c.processFile(f.Name(), ch)
if err != nil {
mtimes := make(map[string]time.Time)
for _, path := range paths {
files, err := ioutil.ReadDir(path)
if err != nil && path != "" {
errored = true
level.Error(c.logger).Log("msg", "failed to collect textfile data", "file", f.Name(), "err", err)
continue
level.Error(c.logger).Log("msg", "failed to read textfile collector directory", "path", path, "err", err)
}

mtimes[f.Name()] = *mtime
}
for _, f := range files {
if !strings.HasSuffix(f.Name(), ".prom") {
continue
}

mtime, err := c.processFile(path, f.Name(), ch)
if err != nil {
errored = true
level.Error(c.logger).Log("msg", "failed to collect textfile data", "file", f.Name(), "err", err)
continue
}

mtimes[filepath.Join(path, f.Name())] = *mtime
}
}
c.exportMTimes(mtimes, ch)

// Export if there were errors.
Expand All @@ -235,8 +244,8 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
}

// processFile processes a single file, returning its modification time on success.
func (c *textFileCollector) processFile(name string, ch chan<- prometheus.Metric) (*time.Time, error) {
path := filepath.Join(c.path, name)
func (c *textFileCollector) processFile(dir, name string, ch chan<- prometheus.Metric) (*time.Time, error) {
path := filepath.Join(dir, name)
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open textfile data file %q: %w", path, err)
Expand Down
4 changes: 4 additions & 0 deletions collector/textfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ func TestTextfileCollector(t *testing.T) {
path: "fixtures/textfile/summary_extra_dimension",
out: "fixtures/textfile/summary_extra_dimension.out",
},
{
path: "fixtures/textfile/*_extra_dimension",
out: "fixtures/textfile/glob_extra_dimension.out",
},
}

for i, test := range tests {
Expand Down

0 comments on commit afed85a

Please sign in to comment.