Skip to content

Add more prometheus metrics #33307

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/klauspost/pgzip v1.2.6 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/libdns/libdns v1.0.0-beta.1 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/markbates/going v1.0.3 // indirect
Expand Down
12 changes: 12 additions & 0 deletions models/db/engine_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"xorm.io/xorm/contexts"
)

Expand All @@ -21,12 +23,22 @@ type EngineHook struct {

var _ contexts.Hook = (*EngineHook)(nil)

// follows: https://opentelemetry.io/docs/specs/semconv/database/database-metrics/#metric-dbclientoperationduration
var durationHistogram = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "db",
Subsystem: "client",
Name: "operation_duration_seconds",
Help: "Duration of database client operations.",
// ConstLabels: prometheus.Labels{"db.system.name": BuilderDialect()}, //TODO: add type of database per spec.
})

func (*EngineHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) {
ctx, _ := gtprof.GetTracer().Start(c.Ctx, gtprof.TraceSpanDatabase)
return ctx, nil
}

func (h *EngineHook) AfterProcess(c *contexts.ContextHook) error {
durationHistogram.Observe(c.ExecuteTime.Seconds())
span := gtprof.GetContextSpan(c.Ctx)
if span != nil {
// Do not record SQL parameters here:
Expand Down
31 changes: 30 additions & 1 deletion modules/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,39 @@ import (

"code.gitea.io/gitea/modules/setting"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

_ "gitea.com/go-chi/cache/memcache" //nolint:depguard // memcache plugin for cache, it is required for config "ADAPTER=memcache"
)

var defaultCache StringCache
var (
defaultCache StringCache

// TODO: Combine hit and miss into one
hitCounter = promauto.NewCounter(prometheus.CounterOpts{
Namespace: "gitea",
Help: "Cache count",
Subsystem: "cache",
Name: "response",
ConstLabels: prometheus.Labels{"state": "hit"},
})
missCounter = promauto.NewCounter(prometheus.CounterOpts{
Namespace: "gitea",
Help: "Cache count",
Subsystem: "cache",
Name: "response",
ConstLabels: prometheus.Labels{"state": "miss"},
})
latencyHistogram = promauto.NewHistogram(
prometheus.HistogramOpts{
Namespace: "gitea",
Help: "Cache latency",
Subsystem: "cache",
Name: "duration",
},
)
)

// Init start cache service
func Init() error {
Expand Down
6 changes: 6 additions & 0 deletions modules/cache/string_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package cache
import (
"errors"
"strings"
"time"

"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/setting"
Expand Down Expand Up @@ -63,10 +64,15 @@ func (sc *stringCache) Ping() error {
}

func (sc *stringCache) Get(key string) (string, bool) {
start := time.Now()
v := sc.chiCache.Get(key)
elapsed := time.Since(start).Seconds()
latencyHistogram.Observe(elapsed)
if v == nil {
missCounter.Add(1)
return "", false
}
hitCounter.Add(1)
s, ok := v.(string)
return s, ok
}
Expand Down
25 changes: 25 additions & 0 deletions modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,39 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/util"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

// TrustedCmdArgs returns the trusted arguments for git command.
// It's mainly for passing user-provided and trusted arguments to git command
// In most cases, it shouldn't be used. Use AddXxx function instead
type TrustedCmdArgs []internal.CmdArg

// const gitOperation = "command"

var (
// globalCommandArgs global command args for external package setting
globalCommandArgs TrustedCmdArgs

// defaultCommandExecutionTimeout default command execution timeout duration
defaultCommandExecutionTimeout = 360 * time.Second

reqInflightGauge = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: "gitea",
Subsystem: "git",
Name: "active_commands",
Help: "Number of active git subprocesses.",
})
// reqDurationHistogram tracks the time taken by git call
reqDurationHistogram = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "gitea",
Subsystem: "git",
Name: "command_duration_seconds", // diverge from spec to store the unit in metric.
Help: "Measures the time taken by git subprocesses",
Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30, 60, 120, 300}, // based on dotnet buckets https://github.com/open-telemetry/semantic-conventions/issues/336
})
)

// DefaultLocale is the default LC_ALL to run git commands in.
Expand Down Expand Up @@ -315,6 +335,10 @@ func (c *Command) run(ctx context.Context, skip int, opts *RunOpts) error {
desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(opts.Dir), cmdLogString)
log.Debug("git.Command: %s", desc)

inflight := reqInflightGauge // add command type
inflight.Inc()
defer inflight.Dec()

_, span := gtprof.GetTracer().Start(ctx, gtprof.TraceSpanGitRun)
defer span.End()
span.SetAttributeString(gtprof.TraceAttrFuncCaller, callerInfo)
Expand Down Expand Up @@ -364,6 +388,7 @@ func (c *Command) run(ctx context.Context, skip int, opts *RunOpts) error {
if elapsed > time.Second {
log.Debug("slow git.Command.Run: %s (%s)", c, elapsed)
}
reqDurationHistogram.Observe(elapsed.Seconds())

// We need to check if the context is canceled by the program on Windows.
// This is because Windows does not have signal checking when terminating the process.
Expand Down
26 changes: 22 additions & 4 deletions modules/metrics/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

activities_model "code.gitea.io/gitea/models/activities"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/system"
"code.gitea.io/gitea/modules/setting"

"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -41,6 +42,7 @@ type Collector struct {
Releases *prometheus.Desc
Repositories *prometheus.Desc
Stars *prometheus.Desc
SystemNotices *prometheus.Desc
Teams *prometheus.Desc
UpdateTasks *prometheus.Desc
Users *prometheus.Desc
Expand Down Expand Up @@ -89,7 +91,7 @@ func NewCollector() Collector {
Issues: prometheus.NewDesc(
namespace+"issues",
"Number of Issues",
nil, nil,
[]string{"state"}, nil,
),
IssuesByLabel: prometheus.NewDesc(
namespace+"issues_by_label",
Expand All @@ -103,12 +105,12 @@ func NewCollector() Collector {
),
IssuesOpen: prometheus.NewDesc(
namespace+"issues_open",
"Number of open Issues",
"DEPRECATED: Use Issues with state: open",
nil, nil,
),
IssuesClosed: prometheus.NewDesc(
namespace+"issues_closed",
"Number of closed Issues",
"DEPRECATED: Use Issues with state: closed",
nil, nil,
),
Labels: prometheus.NewDesc(
Expand Down Expand Up @@ -171,6 +173,10 @@ func NewCollector() Collector {
"Number of Stars",
nil, nil,
),
SystemNotices: prometheus.NewDesc(
namespace+"system_notices",
"Number of system notices",
nil, nil),
Teams: prometheus.NewDesc(
namespace+"teams",
"Number of Teams",
Expand Down Expand Up @@ -234,6 +240,7 @@ func (c Collector) Describe(ch chan<- *prometheus.Desc) {
// Collect returns the metrics with values
func (c Collector) Collect(ch chan<- prometheus.Metric) {
stats := activities_model.GetStatistic(db.DefaultContext)
noticeCount := system.CountNotices(db.DefaultContext)

ch <- prometheus.MustNewConstMetric(
c.Accesses,
Expand Down Expand Up @@ -272,8 +279,14 @@ func (c Collector) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(
c.Issues,
prometheus.GaugeValue,
float64(stats.Counter.Issue),
float64(stats.Counter.IssueOpen), "open",
)
ch <- prometheus.MustNewConstMetric(
c.Issues,
prometheus.GaugeValue,
float64(stats.Counter.IssueClosed), "closed",
)

for _, il := range stats.Counter.IssueByLabel {
ch <- prometheus.MustNewConstMetric(
c.IssuesByLabel,
Expand Down Expand Up @@ -360,6 +373,11 @@ func (c Collector) Collect(ch chan<- prometheus.Metric) {
prometheus.GaugeValue,
float64(stats.Counter.Star),
)
ch <- prometheus.MustNewConstMetric(
c.SystemNotices,
prometheus.GaugeValue,
float64(noticeCount),
)
ch <- prometheus.MustNewConstMetric(
c.Teams,
prometheus.GaugeValue,
Expand Down
73 changes: 73 additions & 0 deletions routers/common/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ package common
import (
"fmt"
"net/http"
"strconv"
"strings"
"time"

"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/gtprof"
Expand All @@ -19,6 +21,52 @@ import (
"gitea.com/go-chi/session"
"github.com/chi-middleware/proxy"
"github.com/go-chi/chi/v5"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

const (
httpRequestMethod = "http_request_method"
httpResponseStatusCode = "http_response_status_code"
httpRoute = "http_route"
kb = 1000
mb = kb * kb
)

// reference: https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#http-server
var (
sizeBuckets = []float64{1 * kb, 2 * kb, 5 * kb, 10 * kb, 100 * kb, 500 * kb, 1 * mb, 2 * mb, 5 * mb, 10 * mb}
// reqInflightGauge tracks the amount of currently handled requests
reqInflightGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "http",
Subsystem: "server",
Name: "active_requests",
Help: "Number of active HTTP server requests.",
}, []string{httpRequestMethod})
// reqDurationHistogram tracks the time taken by http request
reqDurationHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "http",
Subsystem: "server",
Name: "request_duration_seconds", // diverge from spec to store the unit in metric.
Help: "Measures the latency of HTTP requests processed by the server",
Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30, 60, 120, 300}, // based on dotnet buckets https://github.com/open-telemetry/semantic-conventions/issues/336
}, []string{httpRequestMethod, httpResponseStatusCode, httpRoute})
// reqSizeHistogram tracks the size of request
reqSizeHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "http",
Subsystem: "server_request",
Name: "body_size",
Help: "Size of HTTP server request bodies.",
Buckets: sizeBuckets,
}, []string{httpRequestMethod, httpResponseStatusCode, httpRoute})
// respSizeHistogram tracks the size of the response
respSizeHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "http",
Subsystem: "server_response",
Name: "body_size",
Help: "Size of HTTP server response bodies.",
Buckets: sizeBuckets,
}, []string{httpRequestMethod, httpResponseStatusCode, httpRoute})
)

// ProtocolMiddlewares returns HTTP protocol related middlewares, and it provides a global panic recovery
Expand All @@ -38,6 +86,9 @@ func ProtocolMiddlewares() (handlers []any) {
if setting.IsAccessLogEnabled() {
handlers = append(handlers, context.AccessLogger())
}
if setting.Metrics.Enabled {
handlers = append(handlers, RouteMetrics())
}

return handlers
}
Expand Down Expand Up @@ -107,6 +158,28 @@ func ForwardedHeadersHandler(limit int, trustedProxies []string) func(h http.Han
return proxy.ForwardedHeaders(opt)
}

// RouteMetrics instruments http requests and responses
func RouteMetrics() func(h http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
inflight := reqInflightGauge.WithLabelValues(req.Method)
inflight.Inc()
defer inflight.Dec()
start := time.Now()

next.ServeHTTP(resp, req)
m := context.WrapResponseWriter(resp)
route := chi.RouteContext(req.Context()).RoutePattern()
code := strconv.Itoa(m.WrittenStatus())
reqDurationHistogram.WithLabelValues(req.Method, code, route).Observe(time.Since(start).Seconds())
respSizeHistogram.WithLabelValues(req.Method, code, route).Observe(float64(m.WrittenSize()))

size := max(req.ContentLength, 0)
reqSizeHistogram.WithLabelValues(req.Method, code, route).Observe(float64(size))
})
}
}

func Sessioner() func(next http.Handler) http.Handler {
return session.Sessioner(session.Options{
Provider: setting.SessionConfig.Provider,
Expand Down
41 changes: 41 additions & 0 deletions routers/common/middleware_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package common

import (
"net/http"
"net/http/httptest"
"testing"
"time"

"github.com/go-chi/chi/v5"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestMetricsMiddlewere(t *testing.T) {
middleware := RouteMetrics()
r := chi.NewRouter()
r.Use(middleware)
r.Get("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("test"))
time.Sleep(5 * time.Millisecond)
}))
testServer := httptest.NewServer(r)
// Check all defined metrics
verify := func(i int) {
assert.Equal(t, testutil.CollectAndCount(reqDurationHistogram, "http_server_request_duration_seconds"), i)
assert.Equal(t, testutil.CollectAndCount(reqSizeHistogram, "http_server_request_body_size"), i)
assert.Equal(t, testutil.CollectAndCount(respSizeHistogram, "http_server_response_body_size"), i)
assert.Equal(t, testutil.CollectAndCount(reqInflightGauge, "http_server_active_requests"), i)
}

// Check they don't exist before making a request
verify(0)
_, err := http.Get(testServer.URL)
require.NoError(t, err)
// Check they do exist after making the request
verify(1)
}
Loading