Skip to content
This repository has been archived by the owner on Aug 2, 2021. It is now read-only.

Commit

Permalink
all: change metric names delimiter . to / (#2138) (#2139)
Browse files Browse the repository at this point in the history
all: enable prometheus metrics endpoint
  • Loading branch information
Ivan Vandot authored Mar 25, 2020
1 parent f878aed commit 17a389d
Show file tree
Hide file tree
Showing 52 changed files with 819 additions and 296 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,11 @@ The most common types of measurements we use in Swarm are `counters` and `resett

```go
// incrementing a counter
metrics.GetOrRegisterCounter("network.stream.received_chunks", nil).Inc(1)
metrics.GetOrRegisterCounter("network/stream/received_chunks", nil).Inc(1)

// measuring latency with a resetting timer
start := time.Now()
t := metrics.GetOrRegisterResettingTimer("http.request.GET.time"), nil)
t := metrics.GetOrRegisterResettingTimer("http/request/GET/time"), nil)
...
t := UpdateSince(start)
```
Expand Down
48 changes: 24 additions & 24 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,30 +50,30 @@ import (
)

var (
apiResolveCount = metrics.NewRegisteredCounter("api.resolve.count", nil)
apiResolveFail = metrics.NewRegisteredCounter("api.resolve.fail", nil)
apiGetCount = metrics.NewRegisteredCounter("api.get.count", nil)
apiGetNotFound = metrics.NewRegisteredCounter("api.get.notfound", nil)
apiGetHTTP300 = metrics.NewRegisteredCounter("api.get.http.300", nil)
apiManifestUpdateCount = metrics.NewRegisteredCounter("api.manifestupdate.count", nil)
apiManifestUpdateFail = metrics.NewRegisteredCounter("api.manifestupdate.fail", nil)
apiManifestListCount = metrics.NewRegisteredCounter("api.manifestlist.count", nil)
apiManifestListFail = metrics.NewRegisteredCounter("api.manifestlist.fail", nil)
apiDeleteCount = metrics.NewRegisteredCounter("api.delete.count", nil)
apiDeleteFail = metrics.NewRegisteredCounter("api.delete.fail", nil)
apiGetTarCount = metrics.NewRegisteredCounter("api.gettar.count", nil)
apiGetTarFail = metrics.NewRegisteredCounter("api.gettar.fail", nil)
apiUploadTarCount = metrics.NewRegisteredCounter("api.uploadtar.count", nil)
apiUploadTarFail = metrics.NewRegisteredCounter("api.uploadtar.fail", nil)
apiModifyCount = metrics.NewRegisteredCounter("api.modify.count", nil)
apiModifyFail = metrics.NewRegisteredCounter("api.modify.fail", nil)
apiAddFileCount = metrics.NewRegisteredCounter("api.addfile.count", nil)
apiAddFileFail = metrics.NewRegisteredCounter("api.addfile.fail", nil)
apiRmFileCount = metrics.NewRegisteredCounter("api.removefile.count", nil)
apiRmFileFail = metrics.NewRegisteredCounter("api.removefile.fail", nil)
apiAppendFileCount = metrics.NewRegisteredCounter("api.appendfile.count", nil)
apiAppendFileFail = metrics.NewRegisteredCounter("api.appendfile.fail", nil)
apiGetInvalid = metrics.NewRegisteredCounter("api.get.invalid", nil)
apiResolveCount = metrics.NewRegisteredCounter("api/resolve/count", nil)
apiResolveFail = metrics.NewRegisteredCounter("api/resolve/fail", nil)
apiGetCount = metrics.NewRegisteredCounter("api/get/count", nil)
apiGetNotFound = metrics.NewRegisteredCounter("api/get/notfound", nil)
apiGetHTTP300 = metrics.NewRegisteredCounter("api/get/http/300", nil)
apiManifestUpdateCount = metrics.NewRegisteredCounter("api/manifestupdate/count", nil)
apiManifestUpdateFail = metrics.NewRegisteredCounter("api/manifestupdate/fail", nil)
apiManifestListCount = metrics.NewRegisteredCounter("api/manifestlist/count", nil)
apiManifestListFail = metrics.NewRegisteredCounter("api/manifestlist/fail", nil)
apiDeleteCount = metrics.NewRegisteredCounter("api/delete/count", nil)
apiDeleteFail = metrics.NewRegisteredCounter("api/delete/fail", nil)
apiGetTarCount = metrics.NewRegisteredCounter("api/gettar/count", nil)
apiGetTarFail = metrics.NewRegisteredCounter("api/gettar/fail", nil)
apiUploadTarCount = metrics.NewRegisteredCounter("api/uploadtar/count", nil)
apiUploadTarFail = metrics.NewRegisteredCounter("api/uploadtar/fail", nil)
apiModifyCount = metrics.NewRegisteredCounter("api/modify/count", nil)
apiModifyFail = metrics.NewRegisteredCounter("api/modify/fail", nil)
apiAddFileCount = metrics.NewRegisteredCounter("api/addfile/count", nil)
apiAddFileFail = metrics.NewRegisteredCounter("api/addfile/fail", nil)
apiRmFileCount = metrics.NewRegisteredCounter("api/removefile/count", nil)
apiRmFileFail = metrics.NewRegisteredCounter("api/removefile/fail", nil)
apiAppendFileCount = metrics.NewRegisteredCounter("api/appendfile/count", nil)
apiAppendFileFail = metrics.NewRegisteredCounter("api/appendfile/fail", nil)
apiGetInvalid = metrics.NewRegisteredCounter("api/get/invalid", nil)
)

// ResolverFunc is function which takes a domain in the form of a string and resolves it to a content hash
Expand Down
24 changes: 12 additions & 12 deletions api/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -826,51 +826,51 @@ func GetClientTrace(traceMsg, metricPrefix, ruid string, tn *time.Time) *httptra
trace := &httptrace.ClientTrace{
GetConn: func(_ string) {
log.Trace(traceMsg+" - http get", "event", "GetConn", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".getconn", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/getconn", nil).Update(time.Since(*tn))
},
GotConn: func(_ httptrace.GotConnInfo) {
log.Trace(traceMsg+" - http get", "event", "GotConn", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".gotconn", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/gotconn", nil).Update(time.Since(*tn))
},
PutIdleConn: func(err error) {
log.Trace(traceMsg+" - http get", "event", "PutIdleConn", "ruid", ruid, "err", err)
metrics.GetOrRegisterResettingTimer(metricPrefix+".putidle", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/putidle", nil).Update(time.Since(*tn))
},
GotFirstResponseByte: func() {
log.Trace(traceMsg+" - http get", "event", "GotFirstResponseByte", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".firstbyte", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/firstbyte", nil).Update(time.Since(*tn))
},
Got100Continue: func() {
log.Trace(traceMsg, "event", "Got100Continue", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".got100continue", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/got100continue", nil).Update(time.Since(*tn))
},
DNSStart: func(_ httptrace.DNSStartInfo) {
log.Trace(traceMsg, "event", "DNSStart", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".dnsstart", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/dnsstart", nil).Update(time.Since(*tn))
},
DNSDone: func(_ httptrace.DNSDoneInfo) {
log.Trace(traceMsg, "event", "DNSDone", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".dnsdone", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/dnsdone", nil).Update(time.Since(*tn))
},
ConnectStart: func(network, addr string) {
log.Trace(traceMsg, "event", "ConnectStart", "ruid", ruid, "network", network, "addr", addr)
metrics.GetOrRegisterResettingTimer(metricPrefix+".connectstart", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/connectstart", nil).Update(time.Since(*tn))
},
ConnectDone: func(network, addr string, err error) {
log.Trace(traceMsg, "event", "ConnectDone", "ruid", ruid, "network", network, "addr", addr, "err", err)
metrics.GetOrRegisterResettingTimer(metricPrefix+".connectdone", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/connectdone", nil).Update(time.Since(*tn))
},
WroteHeaders: func() {
log.Trace(traceMsg, "event", "WroteHeaders(request)", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".wroteheaders", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/wroteheaders", nil).Update(time.Since(*tn))
},
Wait100Continue: func() {
log.Trace(traceMsg, "event", "Wait100Continue", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".wait100continue", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/wait100continue", nil).Update(time.Since(*tn))
},
WroteRequest: func(_ httptrace.WroteRequestInfo) {
log.Trace(traceMsg, "event", "WroteRequest", "ruid", ruid)
metrics.GetOrRegisterResettingTimer(metricPrefix+".wroterequest", nil).Update(time.Since(*tn))
metrics.GetOrRegisterResettingTimer(metricPrefix+"/wroterequest", nil).Update(time.Since(*tn))
},
}
return trace
Expand Down
6 changes: 3 additions & 3 deletions api/http/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type Adapter func(http.Handler) http.Handler
func SetRequestID(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
r = r.WithContext(SetRUID(r.Context(), uuid.New()[:8]))
metrics.GetOrRegisterCounter(fmt.Sprintf("http.request.%s", r.Method), nil).Inc(1)
metrics.GetOrRegisterCounter(fmt.Sprintf("http/request/%s", r.Method), nil).Inc(1)
log.Info("created ruid for request", "ruid", GetRUID(r.Context()), "method", r.Method, "url", r.RequestURI)

h.ServeHTTP(w, r)
Expand Down Expand Up @@ -92,8 +92,8 @@ func InitLoggingResponseWriter(h http.Handler) http.Handler {

ts := time.Since(tn)
log.Info("request served", "ruid", GetRUID(r.Context()), "code", writer.statusCode, "time", ts)
metrics.GetOrRegisterResettingTimer(fmt.Sprintf("http.request.%s.time", r.Method), nil).Update(ts)
metrics.GetOrRegisterResettingTimer(fmt.Sprintf("http.request.%s.%d.time", r.Method, writer.statusCode), nil).Update(ts)
metrics.GetOrRegisterResettingTimer(fmt.Sprintf("http/request/%s/time", r.Method), nil).Update(ts)
metrics.GetOrRegisterResettingTimer(fmt.Sprintf("http/request/%s/%d/time", r.Method, writer.statusCode), nil).Update(ts)
})
}

Expand Down
6 changes: 3 additions & 3 deletions api/http/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ import (
)

var (
htmlCounter = metrics.NewRegisteredCounter("api.http.errorpage.html.count", nil)
jsonCounter = metrics.NewRegisteredCounter("api.http.errorpage.json.count", nil)
plaintextCounter = metrics.NewRegisteredCounter("api.http.errorpage.plaintext.count", nil)
htmlCounter = metrics.NewRegisteredCounter("api/http/errorpage/html/count", nil)
jsonCounter = metrics.NewRegisteredCounter("api/http/errorpage/json/count", nil)
plaintextCounter = metrics.NewRegisteredCounter("api/http/errorpage/plaintext/count", nil)
)

type ResponseParams struct {
Expand Down
44 changes: 22 additions & 22 deletions api/http/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,28 +51,28 @@ import (
)

var (
postRawCount = metrics.NewRegisteredCounter("api.http.post.raw.count", nil)
postRawFail = metrics.NewRegisteredCounter("api.http.post.raw.fail", nil)
postFilesCount = metrics.NewRegisteredCounter("api.http.post.files.count", nil)
postFilesFail = metrics.NewRegisteredCounter("api.http.post.files.fail", nil)
deleteCount = metrics.NewRegisteredCounter("api.http.delete.count", nil)
deleteFail = metrics.NewRegisteredCounter("api.http.delete.fail", nil)
getCount = metrics.NewRegisteredCounter("api.http.get.count", nil)
getFail = metrics.NewRegisteredCounter("api.http.get.fail", nil)
getFileCount = metrics.NewRegisteredCounter("api.http.get.file.count", nil)
getFileNotFound = metrics.NewRegisteredCounter("api.http.get.file.notfound", nil)
getFileFail = metrics.NewRegisteredCounter("api.http.get.file.fail", nil)
getListCount = metrics.NewRegisteredCounter("api.http.get.list.count", nil)
getListFail = metrics.NewRegisteredCounter("api.http.get.list.fail", nil)
getTagCount = metrics.NewRegisteredCounter("api.http.get.tag.count", nil)
getTagNotFound = metrics.NewRegisteredCounter("api.http.get.tag.notfound", nil)
getTagFail = metrics.NewRegisteredCounter("api.http.get.tag.fail", nil)
getPinCount = metrics.NewRegisteredCounter("api.http.get.pin.count", nil)
getPinFail = metrics.NewRegisteredCounter("api.http.get.pin.fail", nil)
postPinCount = metrics.NewRegisteredCounter("api.http.post.pin.count", nil)
postPinFail = metrics.NewRegisteredCounter("api.http.post.pin.fail", nil)
deletePinCount = metrics.NewRegisteredCounter("api.http.delete.pin.count", nil)
deletePinFail = metrics.NewRegisteredCounter("api.http.delete.pin.fail", nil)
postRawCount = metrics.NewRegisteredCounter("api/http/post/raw/count", nil)
postRawFail = metrics.NewRegisteredCounter("api/http/post/raw/fail", nil)
postFilesCount = metrics.NewRegisteredCounter("api/http/post/files/count", nil)
postFilesFail = metrics.NewRegisteredCounter("api/http/post/files/fail", nil)
deleteCount = metrics.NewRegisteredCounter("api/http/delete/count", nil)
deleteFail = metrics.NewRegisteredCounter("api/http/delete/fail", nil)
getCount = metrics.NewRegisteredCounter("api/http/get/count", nil)
getFail = metrics.NewRegisteredCounter("api/http/get/fail", nil)
getFileCount = metrics.NewRegisteredCounter("api/http/get/file/count", nil)
getFileNotFound = metrics.NewRegisteredCounter("api/http/get/file/notfound", nil)
getFileFail = metrics.NewRegisteredCounter("api/http/get/file/fail", nil)
getListCount = metrics.NewRegisteredCounter("api/http/get/list/count", nil)
getListFail = metrics.NewRegisteredCounter("api/http/get/list/fail", nil)
getTagCount = metrics.NewRegisteredCounter("api/http/get/tag/count", nil)
getTagNotFound = metrics.NewRegisteredCounter("api/http/get/tag/notfound", nil)
getTagFail = metrics.NewRegisteredCounter("api/http/get/tag/fail", nil)
getPinCount = metrics.NewRegisteredCounter("api/http/get/pin/count", nil)
getPinFail = metrics.NewRegisteredCounter("api/http/get/pin/fail", nil)
postPinCount = metrics.NewRegisteredCounter("api/http/post/pin/count", nil)
postPinFail = metrics.NewRegisteredCounter("api/http/post/pin/fail", nil)
deletePinCount = metrics.NewRegisteredCounter("api/http/delete/pin/count", nil)
deletePinFail = metrics.NewRegisteredCounter("api/http/delete/pin/fail", nil)
)

const (
Expand Down
2 changes: 1 addition & 1 deletion cmd/swarm-smoke-pss/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

"github.com/ethereum/go-ethereum/cmd/utils"
gethmetrics "github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/metrics/influxdb"
"github.com/ethersphere/swarm/metrics/influxdb"

"github.com/ethersphere/swarm/internal/flags"
"github.com/ethersphere/swarm/log"
Expand Down
10 changes: 5 additions & 5 deletions cmd/swarm-smoke-pss/pss.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,12 @@ func runCheck(mode pssMode, count int, msgSizeBytes int) error {
successCount = int64(sc)
failCount = int64(count - sc)

metrics.GetOrRegisterCounter(fmt.Sprintf("pss.%s.msgs.fail", mode), nil).Inc(failCount)
metrics.GetOrRegisterCounter(fmt.Sprintf("pss.%s.msgs.success", mode), nil).Inc(successCount)
metrics.GetOrRegisterCounter(fmt.Sprintf("pss/%s/msgs/fail", mode), nil).Inc(failCount)
metrics.GetOrRegisterCounter(fmt.Sprintf("pss/%s/msgs/success", mode), nil).Inc(successCount)

totalTime := time.Since(t)

metrics.GetOrRegisterResettingTimer(fmt.Sprintf("pss.%s.total-time", mode), nil).Update(totalTime)
metrics.GetOrRegisterResettingTimer(fmt.Sprintf("pss/%s/total-time", mode), nil).Update(totalTime)
log.Info(fmt.Sprintf("pss.%s test ended", mode), "time", totalTime, "success", successCount, "failures", failCount)

if failCount > 0 {
Expand All @@ -165,11 +165,11 @@ func runCheck(mode pssMode, count int, msgSizeBytes int) error {
select {
case err := <-errC:
if err != nil {
metrics.GetOrRegisterCounter(fmt.Sprintf("pss.%s.fail", mode), nil).Inc(1)
metrics.GetOrRegisterCounter(fmt.Sprintf("pss/%s/fail", mode), nil).Inc(1)
}
return err
case <-time.After(time.Duration(timeout) * time.Second):
metrics.GetOrRegisterCounter(fmt.Sprintf("pss.%s.timeout", mode), nil).Inc(1)
metrics.GetOrRegisterCounter(fmt.Sprintf("pss/%s/timeout", mode), nil).Inc(1)
return fmt.Errorf("timeout after %v sec", timeout)
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/swarm-smoke-pss/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func wrapCliCommand(name string, command func(*cli.Context) error) func(*cli.Con
defer func(now time.Time) {
totalTime := time.Since(now)
log.Info("total time", "time", totalTime)
metrics.GetOrRegisterResettingTimer(name+".total-time", nil).Update(totalTime)
metrics.GetOrRegisterResettingTimer(name+"/total-time", nil).Update(totalTime)
}(time.Now())

log.Info("pss smoke test starting", "task", name, "timeout", timeout)
Expand Down
4 changes: 2 additions & 2 deletions cmd/swarm-smoke/feed_upload_and_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ func feedUploadAndSyncCmd(ctx *cli.Context) error {
select {
case err := <-errc:
if err != nil {
metrics.GetOrRegisterCounter(fmt.Sprintf("%s.fail", commandName), nil).Inc(1)
metrics.GetOrRegisterCounter(fmt.Sprintf("%s/fail", commandName), nil).Inc(1)
}
return err
case <-time.After(time.Duration(timeout) * time.Second):
metrics.GetOrRegisterCounter(fmt.Sprintf("%s.timeout", commandName), nil).Inc(1)
metrics.GetOrRegisterCounter(fmt.Sprintf("%s/timeout", commandName), nil).Inc(1)

return fmt.Errorf("timeout after %v sec", timeout)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/swarm-smoke/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (

"github.com/ethereum/go-ethereum/cmd/utils"
gethmetrics "github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/metrics/influxdb"
cliflags "github.com/ethersphere/swarm/internal/flags"
"github.com/ethersphere/swarm/metrics/influxdb"
"github.com/ethersphere/swarm/tracing"

"github.com/ethereum/go-ethereum/log"
Expand Down
12 changes: 6 additions & 6 deletions cmd/swarm-smoke/sliding_window.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func slidingWindowCmd(ctx *cli.Context) error {

err := <-errc
if err != nil {
metrics.GetOrRegisterCounter(fmt.Sprintf("%s.fail", commandName), nil).Inc(1)
metrics.GetOrRegisterCounter(fmt.Sprintf("%s/fail", commandName), nil).Inc(1)
}
return err
}
Expand Down Expand Up @@ -72,8 +72,8 @@ outer:
return err
}

metrics.GetOrRegisterResettingTimer("sliding-window.upload-time", nil).UpdateSince(t1)
metrics.GetOrRegisterGauge("sliding-window.upload-depth", nil).Update(int64(len(hashes)))
metrics.GetOrRegisterResettingTimer("sliding-window/upload-time", nil).UpdateSince(t1)
metrics.GetOrRegisterGauge("sliding-window/upload-depth", nil).Update(int64(len(hashes)))

fhash, err := digest(bytes.NewReader(randomBytes))
if err != nil {
Expand Down Expand Up @@ -118,7 +118,7 @@ outer:
}
done = true
}
metrics.GetOrRegisterResettingTimer("sliding-window.single.fetch-time", nil).UpdateSince(start)
metrics.GetOrRegisterResettingTimer("sliding-window/single/fetch-time", nil).UpdateSince(start)
d <- struct{}{}
}()
case <-d:
Expand All @@ -127,14 +127,14 @@ outer:
case <-timeoutC:
errored = true
log.Error("error retrieving hash. timeout", "hash idx", i)
metrics.GetOrRegisterCounter("sliding-window.single.error", nil).Inc(1)
metrics.GetOrRegisterCounter("sliding-window/single/error", nil).Inc(1)
break outer
default:
}
}

networkDepth = i
metrics.GetOrRegisterGauge("sliding-window.network-depth", nil).Update(int64(networkDepth))
metrics.GetOrRegisterGauge("sliding-window/network-depth", nil).Update(int64(networkDepth))
log.Info("sliding window test successfully fetched file", "currentDepth", networkDepth)
// this test might take a long time to finish - but we'd like to see metrics while they accumulate and not just when
// the test finishes. therefore emit the metrics on each iteration
Expand Down
Loading

0 comments on commit 17a389d

Please sign in to comment.