Skip to content

Commit

Permalink
Add stream metrics support (#16)
Browse files Browse the repository at this point in the history
Closes: #1
* Add streamUpstream metrics
* Add streamUpstreamServer metrics
* Add streamServerZone metrics
* Metric descriptor must be the same for every metric with the same metricName, so Metric
descriptions must be the same regardless of label
* Add stream metrics documentation to readme

* Fix readme typo Upsteams->Upstreams
  • Loading branch information
Dean-Coakley authored and ismael_serrano committed Oct 16, 2018
1 parent 0682519 commit 0f9e964
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 4 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ Usage of ./nginx-prometheus-exporter:
* [HTTP](http://nginx.org/en/docs/http/ngx_http_api_module.html#http_).
* [SSL](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_ssl_object).
* [HTTP Server Zones](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_server_zone).
* [HTTP Upsteams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"draining"` -> `2.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`.
* [Stream Server Zones](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_stream_server_zone).
* [HTTP Upstreams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"draining"` -> `2.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`.
* [Stream Upstreams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_stream_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`.


Connect to the `/metrics` page of the running exporter to see the complete list of metrics along with their descriptions. Note: to see server zones related metrics you must configure [status zones](https://nginx.org/en/docs/http/ngx_http_status_module.html#status_zone) and to see upstream related metrics you must configure upstreams with a [shared memory zone](http://nginx.org/en/docs/http/ngx_http_upstream_module.html#zone).

Expand Down
115 changes: 112 additions & 3 deletions collector/nginx_plus.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@ import (

// NginxPlusCollector collects NGINX Plus metrics. It implements prometheus.Collector interface.
type NginxPlusCollector struct {
nginxClient *plusclient.NginxClient
totalMetrics, serverZoneMetrics, upstreamMetrics, upstreamServerMetrics map[string]*prometheus.Desc
mutex sync.Mutex
nginxClient *plusclient.NginxClient
totalMetrics map[string]*prometheus.Desc
serverZoneMetrics map[string]*prometheus.Desc
upstreamMetrics map[string]*prometheus.Desc
upstreamServerMetrics map[string]*prometheus.Desc
streamServerZoneMetrics map[string]*prometheus.Desc
streamUpstreamMetrics map[string]*prometheus.Desc
streamUpstreamServerMetrics map[string]*prometheus.Desc
mutex sync.Mutex
}

// NewNginxPlusCollector creates an NginxPlusCollector.
Expand Down Expand Up @@ -42,10 +48,23 @@ func NewNginxPlusCollector(nginxClient *plusclient.NginxClient, namespace string
"received": newServerZoneMetric(namespace, "received", "Bytes received from clients", nil),
"sent": newServerZoneMetric(namespace, "sent", "Bytes sent to clients", nil),
},
streamServerZoneMetrics: map[string]*prometheus.Desc{
"processing": newStreamServerZoneMetric(namespace, "processing", "Client connections that are currently being processed", nil),
"connections": newStreamServerZoneMetric(namespace, "connections", "Total connections", nil),
"sessions_2xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "2xx"}),
"sessions_4xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "4xx"}),
"sessions_5xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "5xx"}),
"discarded": newStreamServerZoneMetric(namespace, "discarded", "Connections completed without creating a session", nil),
"received": newStreamServerZoneMetric(namespace, "received", "Bytes received from clients", nil),
"sent": newStreamServerZoneMetric(namespace, "sent", "Bytes sent to clients", nil),
},
upstreamMetrics: map[string]*prometheus.Desc{
"keepalives": newUpstreamMetric(namespace, "keepalives", "Idle keepalive connections"),
"zombies": newUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client requests"),
},
streamUpstreamMetrics: map[string]*prometheus.Desc{
"zombies": newStreamUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client connections"),
},
upstreamServerMetrics: map[string]*prometheus.Desc{
"state": newUpstreamServerMetric(namespace, "state", "Current state", nil),
"active": newUpstreamServerMetric(namespace, "active", "Active connections", nil),
Expand All @@ -65,6 +84,21 @@ func NewNginxPlusCollector(nginxClient *plusclient.NginxClient, namespace string
"health_checks_fails": newUpstreamServerMetric(namespace, "health_checks_fails", "Failed health checks", nil),
"health_checks_unhealthy": newUpstreamServerMetric(namespace, "health_checks_unhealthy", "How many times the server became unhealthy (state 'unhealthy')", nil),
},
streamUpstreamServerMetrics: map[string]*prometheus.Desc{
"state": newStreamUpstreamServerMetric(namespace, "state", "Current state"),
"active": newStreamUpstreamServerMetric(namespace, "active", "Active connections"),
"sent": newStreamUpstreamServerMetric(namespace, "sent", "Bytes sent to this server"),
"received": newStreamUpstreamServerMetric(namespace, "received", "Bytes received from this server"),
"fails": newStreamUpstreamServerMetric(namespace, "fails", "Number of unsuccessful attempts to communicate with the server"),
"unavail": newStreamUpstreamServerMetric(namespace, "unavail", "How many times the server became unavailable for client connections (state 'unavail') due to the number of unsuccessful attempts reaching the max_fails threshold"),
"connections": newStreamUpstreamServerMetric(namespace, "connections", "Total number of client connections forwarded to this server"),
"connect_time": newStreamUpstreamServerMetric(namespace, "connect_time", "Average time to connect to the upstream server"),
"first_byte_time": newStreamUpstreamServerMetric(namespace, "first_byte_time", "Average time to receive the first byte of data"),
"response_time": newStreamUpstreamServerMetric(namespace, "response_time", "Average time to receive the last byte of data"),
"health_checks_checks": newStreamUpstreamServerMetric(namespace, "health_checks_checks", "Total health check requests"),
"health_checks_fails": newStreamUpstreamServerMetric(namespace, "health_checks_fails", "Failed health checks"),
"health_checks_unhealthy": newStreamUpstreamServerMetric(namespace, "health_checks_unhealthy", "How many times the server became unhealthy (state 'unhealthy')"),
},
}
}

Expand All @@ -83,6 +117,15 @@ func (c *NginxPlusCollector) Describe(ch chan<- *prometheus.Desc) {
for _, m := range c.upstreamServerMetrics {
ch <- m
}
for _, m := range c.streamServerZoneMetrics {
ch <- m
}
for _, m := range c.streamUpstreamMetrics {
ch <- m
}
for _, m := range c.streamUpstreamServerMetrics {
ch <- m
}
}

// Collect fetches metrics from NGINX Plus and sends them to the provided channel.
Expand Down Expand Up @@ -138,6 +181,25 @@ func (c *NginxPlusCollector) Collect(ch chan<- prometheus.Metric) {
prometheus.CounterValue, float64(zone.Sent), name)
}

for name, zone := range stats.StreamServerZones {
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["processing"],
prometheus.GaugeValue, float64(zone.Processing), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["connections"],
prometheus.CounterValue, float64(zone.Connections), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_2xx"],
prometheus.CounterValue, float64(zone.Sessions.Sessions2xx), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_4xx"],
prometheus.CounterValue, float64(zone.Sessions.Sessions4xx), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_5xx"],
prometheus.CounterValue, float64(zone.Sessions.Sessions5xx), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["discarded"],
prometheus.CounterValue, float64(zone.Discarded), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["received"],
prometheus.CounterValue, float64(zone.Received), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sent"],
prometheus.CounterValue, float64(zone.Sent), name)
}

for name, upstream := range stats.Upstreams {
for _, peer := range upstream.Peers {
ch <- prometheus.MustNewConstMetric(c.upstreamServerMetrics["state"],
Expand Down Expand Up @@ -183,6 +245,41 @@ func (c *NginxPlusCollector) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(c.upstreamMetrics["zombies"],
prometheus.GaugeValue, float64(upstream.Zombies), name)
}

for name, upstream := range stats.StreamUpstreams {
for _, peer := range upstream.Peers {
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["state"],
prometheus.GaugeValue, upstreamServerStates[peer.State], name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["active"],
prometheus.GaugeValue, float64(peer.Active), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["connections"],
prometheus.CounterValue, float64(peer.Connections), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["connect_time"],
prometheus.GaugeValue, float64(peer.ConnectTime), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["first_byte_time"],
prometheus.GaugeValue, float64(peer.FirstByteTime), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["response_time"],
prometheus.GaugeValue, float64(peer.ResponseTime), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["sent"],
prometheus.CounterValue, float64(peer.Sent), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["received"],
prometheus.CounterValue, float64(peer.Received), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["fails"],
prometheus.CounterValue, float64(peer.Fails), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["unavail"],
prometheus.CounterValue, float64(peer.Unavail), name, peer.Server)
if peer.HealthChecks != (plusclient.HealthChecks{}) {
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_checks"],
prometheus.CounterValue, float64(peer.HealthChecks.Checks), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_fails"],
prometheus.CounterValue, float64(peer.HealthChecks.Fails), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_unhealthy"],
prometheus.CounterValue, float64(peer.HealthChecks.Unhealthy), name, peer.Server)
}
}
ch <- prometheus.MustNewConstMetric(c.streamUpstreamMetrics["zombies"],
prometheus.GaugeValue, float64(upstream.Zombies), name)
}
}

var upstreamServerStates = map[string]float64{
Expand All @@ -198,10 +295,22 @@ func newServerZoneMetric(namespace string, metricName string, docString string,
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "server_zone", metricName), docString, []string{"server_zone"}, constLabels)
}

func newStreamServerZoneMetric(namespace string, metricName string, docString string, constLabels prometheus.Labels) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_server_zone", metricName), docString, []string{"server_zone"}, constLabels)
}

func newUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "upstream", metricName), docString, []string{"upstream"}, nil)
}

func newStreamUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream", metricName), docString, []string{"upstream"}, nil)
}

func newUpstreamServerMetric(namespace string, metricName string, docString string, constLabels prometheus.Labels) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "upstream_server", metricName), docString, []string{"upstream", "server"}, constLabels)
}

func newStreamUpstreamServerMetric(namespace string, metricName string, docString string) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream_server", metricName), docString, []string{"upstream", "server"}, nil)
}

0 comments on commit 0f9e964

Please sign in to comment.