Skip to content

Commit

Permalink
Merge pull request #604 from oliver006/oh_prometheus_metrics
Browse files Browse the repository at this point in the history
Add prometheus metrics
  • Loading branch information
tidwall authored May 14, 2021
2 parents 476cc49 + 70f3188 commit 559081e
Show file tree
Hide file tree
Showing 412 changed files with 47,534 additions and 1,307 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,20 @@ $ ./tile38-cli
> help
```

#### Prometheus Metrics
Tile38 can natively export Prometheus metrics by setting the `--metrics-addr` command line flag (disabled by default). This example exposes the HTTP metrics server on port 4321:
```
# start server and enable Prometheus metrics, listen on local interface only
./tile38-server --metrics-addr=127.0.0.1:4321
# access metrics
curl http://127.0.0.1:4321/metrics
```
If you need to access the `/metrics` endpoint from a different host you'll have to set the flag accordingly, e.g. set it to `0.0.0.0:<<port>>` to listen on all interfaces.

Use the [redis_exporter](https://github.com/oliver006/redis_exporter) for more advanced use cases like extracting key values or running a lua script.


## <a name="cli"></a>Playing with Tile38

Basic operations:
Expand Down Expand Up @@ -275,7 +289,6 @@ Check out [maptiler.org](http://www.maptiler.org/google-maps-coordinates-tile-bo
#### QuadKey
A QuadKey used the same coordinate system as an XYZ tile except that the string representation is a string characters composed of 0, 1, 2, or 3. For a detailed explanation checkout [The Bing Maps Tile System](https://msdn.microsoft.com/en-us/library/bb259689.aspx).


## Network protocols

It's recommended to use a [client library](#tile38-client-libraries) or the [Tile38 CLI](#running), but there are times when only HTTP is available or when you need to test from a remote terminal. In those cases we provide an HTTP and telnet options.
Expand Down
43 changes: 25 additions & 18 deletions cmd/tile38-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,11 @@ import (
"github.com/tidwall/tile38/internal/hservice"
"github.com/tidwall/tile38/internal/log"
"github.com/tidwall/tile38/internal/server"

"golang.org/x/net/context"
"google.golang.org/grpc"
)

var (
dir string
port int
host string
verbose bool
veryVerbose bool
devMode bool
quiet bool
pidfile string
cpuprofile string
memprofile string
pprofport int
nohup bool
)

// TODO: Set to false in 2.*
var httpTransport = true

Expand Down Expand Up @@ -151,8 +137,12 @@ Developer Options:
return
}

var showEvioDisabled bool
var showThreadsDisabled bool
var (
devMode bool
nohup bool
showEvioDisabled bool
showThreadsDisabled bool
)

// parse non standard args.
nargs := []string{os.Args[0]}
Expand Down Expand Up @@ -256,6 +246,21 @@ Developer Options:
}
os.Args = nargs

metricsAddr := flag.String("metrics-addr", "", "The listening addr for Prometheus metrics.")

var (
dir string
port int
host string
verbose bool
veryVerbose bool
quiet bool
pidfile string
cpuprofile string
memprofile string
pprofport int
)

flag.IntVar(&port, "p", 9851, "The listening port.")
flag.StringVar(&pidfile, "pidfile", "", "A file that contains the pid")
flag.StringVar(&host, "h", "", "The listening host.")
Expand Down Expand Up @@ -403,6 +408,7 @@ Developer Options:
| | | tile38.com
|_______|_______|
`+"\n", core.Version, gitsha, strconv.IntSize, runtime.GOARCH, runtime.GOOS, hostd, port, os.Getpid())

if pidferr != nil {
log.Warnf("pidfile: %v", pidferr)
}
Expand All @@ -413,7 +419,8 @@ Developer Options:
if showThreadsDisabled {
log.Warnf("thread flag is deprecated use GOMAXPROCS to set number of threads instead")
}
if err := server.Serve(host, port, dir, httpTransport); err != nil {

if err := server.Serve(host, port, dir, httpTransport, *metricsAddr); err != nil {
log.Fatal(err)
}
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/nats-io/nats-server/v2 v2.1.9 // indirect
github.com/nats-io/nats.go v1.10.0
github.com/peterh/liner v1.2.1
github.com/prometheus/client_golang v1.10.0
github.com/streadway/amqp v1.0.0
github.com/tidwall/btree v0.5.0
github.com/tidwall/buntdb v1.2.3
Expand Down
325 changes: 322 additions & 3 deletions go.sum

Large diffs are not rendered by default.

151 changes: 151 additions & 0 deletions internal/server/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package server

import (
"fmt"
"net/http"

"github.com/tidwall/tile38/core"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
metricDescriptions = map[string]*prometheus.Desc{
/*
these metrics are taken from basicStats() / extStats()
by accessing the map and directly exporting the value found
*/
"num_collections": prometheus.NewDesc("tile38_collections", "Total number of collections", nil, nil),
"pid": prometheus.NewDesc("tile38_pid", "", nil, nil),
"aof_size": prometheus.NewDesc("tile38_aof_size_bytes", "", nil, nil),
"num_hooks": prometheus.NewDesc("tile38_hooks", "", nil, nil),
"in_memory_size": prometheus.NewDesc("tile38_in_memory_size_bytes", "", nil, nil),
"heap_size": prometheus.NewDesc("tile38_heap_size_bytes", "", nil, nil),
"heap_released": prometheus.NewDesc("tile38_memory_reap_released_bytes", "", nil, nil),
"max_heap_size": prometheus.NewDesc("tile38_memory_max_heap_size_bytes", "", nil, nil),
"avg_item_size": prometheus.NewDesc("tile38_avg_item_size_bytes", "", nil, nil),
"pointer_size": prometheus.NewDesc("tile38_pointer_size_bytes", "", nil, nil),
"cpus": prometheus.NewDesc("tile38_num_cpus", "", nil, nil),
"tile38_connected_clients": prometheus.NewDesc("tile38_connected_clients", "", nil, nil),

"tile38_total_connections_received": prometheus.NewDesc("tile38_connections_received_total", "", nil, nil),
"tile38_total_messages_sent": prometheus.NewDesc("tile38_messages_sent_total", "", nil, nil),
"tile38_expired_keys": prometheus.NewDesc("tile38_expired_keys_total", "", nil, nil),

/*
these metrics are NOT taken from basicStats() / extStats()
but are calculated independently
*/
"collection_objects": prometheus.NewDesc("tile38_collection_objects", "Total number of objects per collection", []string{"col"}, nil),
"collection_points": prometheus.NewDesc("tile38_collection_points", "Total number of points per collection", []string{"col"}, nil),
"collection_strings": prometheus.NewDesc("tile38_collection_strings", "Total number of strings per collection", []string{"col"}, nil),
"collection_weight": prometheus.NewDesc("tile38_collection_weight_bytes", "Total weight of collection in bytes", []string{"col"}, nil),
"server_info": prometheus.NewDesc("tile38_server_info", "Server info", []string{"id", "version"}, nil),
"replication": prometheus.NewDesc("tile38_replication_info", "Replication info", []string{"role", "following", "caught_up", "caught_up_once"}, nil),
"start_time": prometheus.NewDesc("tile38_start_time_seconds", "", nil, nil),
}

cmdDurations = prometheus.NewSummaryVec(prometheus.SummaryOpts{
Name: "tile38_cmd_duration_seconds",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
}, []string{"cmd"},
)
)

func (s *Server) MetricsIndexHandler(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<html><head>
<title>Tile38 ` + core.Version + `</title></head>
<body><h1>Tile38 ` + core.Version + `</h1>
<p><a href='/metrics'>Metrics</a></p>
</body></html>`))
}

func (s *Server) MetricsHandler(w http.ResponseWriter, r *http.Request) {
reg := prometheus.NewRegistry()

reg.MustRegister(
prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}),
prometheus.NewGoCollector(),
prometheus.NewBuildInfoCollector(),
cmdDurations,
s,
)

promhttp.HandlerFor(reg, promhttp.HandlerOpts{}).ServeHTTP(w, r)
}

func (s *Server) Describe(ch chan<- *prometheus.Desc) {
for _, desc := range metricDescriptions {
ch <- desc
}
}

func (s *Server) Collect(ch chan<- prometheus.Metric) {
s.mu.RLock()
defer s.mu.RUnlock()

m := make(map[string]interface{})
s.basicStats(m)
s.extStats(m)

for metric, descr := range metricDescriptions {
if val, ok := m[metric].(int); ok {
ch <- prometheus.MustNewConstMetric(descr, prometheus.GaugeValue, float64(val))
} else if val, ok := m[metric].(float64); ok {
ch <- prometheus.MustNewConstMetric(descr, prometheus.GaugeValue, val)
}
}

ch <- prometheus.MustNewConstMetric(
metricDescriptions["server_info"],
prometheus.GaugeValue, 1.0,
s.config.serverID(), core.Version)

ch <- prometheus.MustNewConstMetric(
metricDescriptions["start_time"],
prometheus.GaugeValue, float64(s.started.Unix()))

replLbls := []string{"leader", "", "", ""}
if s.config.followHost() != "" {
replLbls = []string{"follower",
fmt.Sprintf("%s:%d", s.config.followHost(), s.config.followPort()),
fmt.Sprintf("%t", s.fcup), fmt.Sprintf("%t", s.fcuponce)}
}
ch <- prometheus.MustNewConstMetric(
metricDescriptions["replication"],
prometheus.GaugeValue, 1.0,
replLbls...)

/*
add objects/points/strings stats for each collection
*/
s.cols.Ascend(nil, func(v interface{}) bool {
c := v.(*collectionKeyContainer)
ch <- prometheus.MustNewConstMetric(
metricDescriptions["collection_objects"],
prometheus.GaugeValue,
float64(c.col.Count()),
c.key,
)
ch <- prometheus.MustNewConstMetric(
metricDescriptions["collection_points"],
prometheus.GaugeValue,
float64(c.col.PointCount()),
c.key,
)
ch <- prometheus.MustNewConstMetric(
metricDescriptions["collection_strings"],
prometheus.GaugeValue,
float64(c.col.StringCount()),
c.key,
)
ch <- prometheus.MustNewConstMetric(
metricDescriptions["collection_weight"],
prometheus.GaugeValue,
float64(c.col.TotalWeight()),
c.key,
)
return true
})
}
37 changes: 28 additions & 9 deletions internal/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"fmt"
"io"
"net"
net_http "net/http"
"net/url"
"os"
"path"
Expand Down Expand Up @@ -37,6 +38,8 @@ import (
"github.com/tidwall/tile38/internal/endpoint"
"github.com/tidwall/tile38/internal/expire"
"github.com/tidwall/tile38/internal/log"

"github.com/prometheus/client_golang/prometheus"
)

var errOOM = errors.New("OOM command not allowed when used memory > 'maxmemory'")
Expand Down Expand Up @@ -132,7 +135,7 @@ type Server struct {
}

// Serve starts a new tile38 server
func Serve(host string, port int, dir string, http bool) error {
func Serve(host string, port int, dir string, http bool, metricsAddr string) error {
if core.AppendFileName == "" {
core.AppendFileName = path.Join(dir, "appendonly.aof")
}
Expand Down Expand Up @@ -282,6 +285,16 @@ func Serve(host string, port int, dir string, http bool) error {
go server.follow(server.config.followHost(), server.config.followPort(),
server.followc.get())
}

if metricsAddr != "" {
log.Infof("Listening for metrics at: %s", metricsAddr)
go func() {
net_http.HandleFunc("/", server.MetricsIndexHandler)
net_http.HandleFunc("/metrics", server.MetricsHandler)
log.Fatal(net_http.ListenAndServe(metricsAddr, nil))
}()
}

go server.processLives()
go server.watchOutOfMemory()
go server.watchLuaStatePool()
Expand Down Expand Up @@ -762,14 +775,20 @@ func (server *Server) handleInputCommand(client *Client, msg *Message) error {
}
}

cmd := msg.Command()
defer func() {
took := time.Now().Sub(start).Seconds()
cmdDurations.With(prometheus.Labels{"cmd": cmd}).Observe(took)
}()

// Ping. Just send back the response. No need to put through the pipeline.
if msg.Command() == "ping" || msg.Command() == "echo" {
if cmd == "ping" || cmd == "echo" {
switch msg.OutputType {
case JSON:
if len(msg.Args) > 1 {
return writeOutput(`{"ok":true,"` + msg.Command() + `":` + jsonString(msg.Args[1]) + `,"elapsed":"` + time.Since(start).String() + `"}`)
return writeOutput(`{"ok":true,"` + cmd + `":` + jsonString(msg.Args[1]) + `,"elapsed":"` + time.Since(start).String() + `"}`)
}
return writeOutput(`{"ok":true,"` + msg.Command() + `":"pong","elapsed":"` + time.Since(start).String() + `"}`)
return writeOutput(`{"ok":true,"` + cmd + `":"pong","elapsed":"` + time.Since(start).String() + `"}`)
case RESP:
if len(msg.Args) > 1 {
data := redcon.AppendBulkString(nil, msg.Args[1])
Expand All @@ -787,27 +806,27 @@ func (server *Server) handleInputCommand(client *Client, msg *Message) error {
return writeOutput(`{"ok":false,"err":` + jsonString(errMsg) + `,"elapsed":"` + time.Since(start).String() + "\"}")
case RESP:
if errMsg == errInvalidNumberOfArguments.Error() {
return writeOutput("-ERR wrong number of arguments for '" + msg.Command() + "' command\r\n")
return writeOutput("-ERR wrong number of arguments for '" + cmd + "' command\r\n")
}
v, _ := resp.ErrorValue(errors.New("ERR " + errMsg)).MarshalRESP()
return writeOutput(string(v))
}
return nil
}

if msg.Command() == "timeout" {
if cmd == "timeout" {
if err := rewriteTimeoutMsg(msg); err != nil {
return writeErr(err.Error())
}
}

var write bool

if (!client.authd || msg.Command() == "auth") && msg.Command() != "output" {
if (!client.authd || cmd == "auth") && cmd != "output" {
if server.config.requirePass() != "" {
password := ""
// This better be an AUTH command or the Message should contain an Auth
if msg.Command() != "auth" && msg.Auth == "" {
if cmd != "auth" && msg.Auth == "" {
// Just shut down the pipeline now. The less the client connection knows the better.
return writeErr("authentication required")
}
Expand Down Expand Up @@ -898,7 +917,7 @@ func (server *Server) handleInputCommand(client *Client, msg *Message) error {
// No locking for scripts, otherwise writes cannot happen within scripts
case "subscribe", "psubscribe", "publish":
// No locking for pubsub
case "montior":
case "monitor":
// No locking for monitor
}
res, d, err := func() (res resp.Value, d commandDetails, err error) {
Expand Down
2 changes: 1 addition & 1 deletion tests/107/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func main() {

func startTile38Server() {
log.Println("start tile38 server")
err := server.Serve("localhost", tile38Port, "data", false)
err := server.Serve("localhost", tile38Port, "data", false, "")
if err != nil {
log.Fatal(err)
}
Expand Down
Loading

0 comments on commit 559081e

Please sign in to comment.