From 0f9e964303c5930508c186d8a2b9a7bd7ae2f51e Mon Sep 17 00:00:00 2001 From: Dean Coakley Date: Mon, 15 Oct 2018 08:00:14 +0100 Subject: [PATCH] Add stream metrics support (#16) Closes: https://github.com/nginxinc/nginx-prometheus-exporter/pull/1 * Add streamUpstream metrics * Add streamUpstreamServer metrics * Add streamServerZone metrics * Metric descriptor must be the same for every metric with the same metricName, so Metric descriptions must be the same regardless of label * Add stream metrics documentation to readme * Fix readme typo Upsteams->Upstreams --- README.md | 5 +- collector/nginx_plus.go | 115 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 05bac76a..aa6b1741 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,10 @@ Usage of ./nginx-prometheus-exporter: * [HTTP](http://nginx.org/en/docs/http/ngx_http_api_module.html#http_). * [SSL](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_ssl_object). * [HTTP Server Zones](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_server_zone). - * [HTTP Upsteams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"draining"` -> `2.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`. + * [Stream Server Zones](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_stream_server_zone). + * [HTTP Upstreams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"draining"` -> `2.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`. + * [Stream Upstreams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_stream_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`. + Connect to the `/metrics` page of the running exporter to see the complete list of metrics along with their descriptions. Note: to see server zones related metrics you must configure [status zones](https://nginx.org/en/docs/http/ngx_http_status_module.html#status_zone) and to see upstream related metrics you must configure upstreams with a [shared memory zone](http://nginx.org/en/docs/http/ngx_http_upstream_module.html#zone). diff --git a/collector/nginx_plus.go b/collector/nginx_plus.go index c953beea..35f616aa 100644 --- a/collector/nginx_plus.go +++ b/collector/nginx_plus.go @@ -10,9 +10,15 @@ import ( // NginxPlusCollector collects NGINX Plus metrics. It implements prometheus.Collector interface. type NginxPlusCollector struct { - nginxClient *plusclient.NginxClient - totalMetrics, serverZoneMetrics, upstreamMetrics, upstreamServerMetrics map[string]*prometheus.Desc - mutex sync.Mutex + nginxClient *plusclient.NginxClient + totalMetrics map[string]*prometheus.Desc + serverZoneMetrics map[string]*prometheus.Desc + upstreamMetrics map[string]*prometheus.Desc + upstreamServerMetrics map[string]*prometheus.Desc + streamServerZoneMetrics map[string]*prometheus.Desc + streamUpstreamMetrics map[string]*prometheus.Desc + streamUpstreamServerMetrics map[string]*prometheus.Desc + mutex sync.Mutex } // NewNginxPlusCollector creates an NginxPlusCollector. @@ -42,10 +48,23 @@ func NewNginxPlusCollector(nginxClient *plusclient.NginxClient, namespace string "received": newServerZoneMetric(namespace, "received", "Bytes received from clients", nil), "sent": newServerZoneMetric(namespace, "sent", "Bytes sent to clients", nil), }, + streamServerZoneMetrics: map[string]*prometheus.Desc{ + "processing": newStreamServerZoneMetric(namespace, "processing", "Client connections that are currently being processed", nil), + "connections": newStreamServerZoneMetric(namespace, "connections", "Total connections", nil), + "sessions_2xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "2xx"}), + "sessions_4xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "4xx"}), + "sessions_5xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "5xx"}), + "discarded": newStreamServerZoneMetric(namespace, "discarded", "Connections completed without creating a session", nil), + "received": newStreamServerZoneMetric(namespace, "received", "Bytes received from clients", nil), + "sent": newStreamServerZoneMetric(namespace, "sent", "Bytes sent to clients", nil), + }, upstreamMetrics: map[string]*prometheus.Desc{ "keepalives": newUpstreamMetric(namespace, "keepalives", "Idle keepalive connections"), "zombies": newUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client requests"), }, + streamUpstreamMetrics: map[string]*prometheus.Desc{ + "zombies": newStreamUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client connections"), + }, upstreamServerMetrics: map[string]*prometheus.Desc{ "state": newUpstreamServerMetric(namespace, "state", "Current state", nil), "active": newUpstreamServerMetric(namespace, "active", "Active connections", nil), @@ -65,6 +84,21 @@ func NewNginxPlusCollector(nginxClient *plusclient.NginxClient, namespace string "health_checks_fails": newUpstreamServerMetric(namespace, "health_checks_fails", "Failed health checks", nil), "health_checks_unhealthy": newUpstreamServerMetric(namespace, "health_checks_unhealthy", "How many times the server became unhealthy (state 'unhealthy')", nil), }, + streamUpstreamServerMetrics: map[string]*prometheus.Desc{ + "state": newStreamUpstreamServerMetric(namespace, "state", "Current state"), + "active": newStreamUpstreamServerMetric(namespace, "active", "Active connections"), + "sent": newStreamUpstreamServerMetric(namespace, "sent", "Bytes sent to this server"), + "received": newStreamUpstreamServerMetric(namespace, "received", "Bytes received from this server"), + "fails": newStreamUpstreamServerMetric(namespace, "fails", "Number of unsuccessful attempts to communicate with the server"), + "unavail": newStreamUpstreamServerMetric(namespace, "unavail", "How many times the server became unavailable for client connections (state 'unavail') due to the number of unsuccessful attempts reaching the max_fails threshold"), + "connections": newStreamUpstreamServerMetric(namespace, "connections", "Total number of client connections forwarded to this server"), + "connect_time": newStreamUpstreamServerMetric(namespace, "connect_time", "Average time to connect to the upstream server"), + "first_byte_time": newStreamUpstreamServerMetric(namespace, "first_byte_time", "Average time to receive the first byte of data"), + "response_time": newStreamUpstreamServerMetric(namespace, "response_time", "Average time to receive the last byte of data"), + "health_checks_checks": newStreamUpstreamServerMetric(namespace, "health_checks_checks", "Total health check requests"), + "health_checks_fails": newStreamUpstreamServerMetric(namespace, "health_checks_fails", "Failed health checks"), + "health_checks_unhealthy": newStreamUpstreamServerMetric(namespace, "health_checks_unhealthy", "How many times the server became unhealthy (state 'unhealthy')"), + }, } } @@ -83,6 +117,15 @@ func (c *NginxPlusCollector) Describe(ch chan<- *prometheus.Desc) { for _, m := range c.upstreamServerMetrics { ch <- m } + for _, m := range c.streamServerZoneMetrics { + ch <- m + } + for _, m := range c.streamUpstreamMetrics { + ch <- m + } + for _, m := range c.streamUpstreamServerMetrics { + ch <- m + } } // Collect fetches metrics from NGINX Plus and sends them to the provided channel. @@ -138,6 +181,25 @@ func (c *NginxPlusCollector) Collect(ch chan<- prometheus.Metric) { prometheus.CounterValue, float64(zone.Sent), name) } + for name, zone := range stats.StreamServerZones { + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["processing"], + prometheus.GaugeValue, float64(zone.Processing), name) + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["connections"], + prometheus.CounterValue, float64(zone.Connections), name) + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_2xx"], + prometheus.CounterValue, float64(zone.Sessions.Sessions2xx), name) + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_4xx"], + prometheus.CounterValue, float64(zone.Sessions.Sessions4xx), name) + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_5xx"], + prometheus.CounterValue, float64(zone.Sessions.Sessions5xx), name) + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["discarded"], + prometheus.CounterValue, float64(zone.Discarded), name) + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["received"], + prometheus.CounterValue, float64(zone.Received), name) + ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sent"], + prometheus.CounterValue, float64(zone.Sent), name) + } + for name, upstream := range stats.Upstreams { for _, peer := range upstream.Peers { ch <- prometheus.MustNewConstMetric(c.upstreamServerMetrics["state"], @@ -183,6 +245,41 @@ func (c *NginxPlusCollector) Collect(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric(c.upstreamMetrics["zombies"], prometheus.GaugeValue, float64(upstream.Zombies), name) } + + for name, upstream := range stats.StreamUpstreams { + for _, peer := range upstream.Peers { + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["state"], + prometheus.GaugeValue, upstreamServerStates[peer.State], name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["active"], + prometheus.GaugeValue, float64(peer.Active), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["connections"], + prometheus.CounterValue, float64(peer.Connections), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["connect_time"], + prometheus.GaugeValue, float64(peer.ConnectTime), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["first_byte_time"], + prometheus.GaugeValue, float64(peer.FirstByteTime), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["response_time"], + prometheus.GaugeValue, float64(peer.ResponseTime), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["sent"], + prometheus.CounterValue, float64(peer.Sent), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["received"], + prometheus.CounterValue, float64(peer.Received), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["fails"], + prometheus.CounterValue, float64(peer.Fails), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["unavail"], + prometheus.CounterValue, float64(peer.Unavail), name, peer.Server) + if peer.HealthChecks != (plusclient.HealthChecks{}) { + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_checks"], + prometheus.CounterValue, float64(peer.HealthChecks.Checks), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_fails"], + prometheus.CounterValue, float64(peer.HealthChecks.Fails), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_unhealthy"], + prometheus.CounterValue, float64(peer.HealthChecks.Unhealthy), name, peer.Server) + } + } + ch <- prometheus.MustNewConstMetric(c.streamUpstreamMetrics["zombies"], + prometheus.GaugeValue, float64(upstream.Zombies), name) + } } var upstreamServerStates = map[string]float64{ @@ -198,10 +295,22 @@ func newServerZoneMetric(namespace string, metricName string, docString string, return prometheus.NewDesc(prometheus.BuildFQName(namespace, "server_zone", metricName), docString, []string{"server_zone"}, constLabels) } +func newStreamServerZoneMetric(namespace string, metricName string, docString string, constLabels prometheus.Labels) *prometheus.Desc { + return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_server_zone", metricName), docString, []string{"server_zone"}, constLabels) +} + func newUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc { return prometheus.NewDesc(prometheus.BuildFQName(namespace, "upstream", metricName), docString, []string{"upstream"}, nil) } +func newStreamUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc { + return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream", metricName), docString, []string{"upstream"}, nil) +} + func newUpstreamServerMetric(namespace string, metricName string, docString string, constLabels prometheus.Labels) *prometheus.Desc { return prometheus.NewDesc(prometheus.BuildFQName(namespace, "upstream_server", metricName), docString, []string{"upstream", "server"}, constLabels) } + +func newStreamUpstreamServerMetric(namespace string, metricName string, docString string) *prometheus.Desc { + return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream_server", metricName), docString, []string{"upstream", "server"}, nil) +}