Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
58 changes: 53 additions & 5 deletions apiserver/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"io"
"log"
"net/http"
"strings"

"garm/apiserver/params"
"garm/auth"
Expand All @@ -30,9 +31,10 @@ import (

"github.com/gorilla/websocket"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
)

func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.Hub) (*APIController, error) {
func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.Hub, controllerInfo runnerParams.ControllerInfo) (*APIController, error) {
return &APIController{
r: r,
auth: auth,
Expand All @@ -41,14 +43,16 @@ func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.
ReadBufferSize: 1024,
WriteBufferSize: 16384,
},
controllerInfo: controllerInfo,
}, nil
}

type APIController struct {
r *runner.Runner
auth *auth.Authenticator
hub *wsWriter.Hub
upgrader websocket.Upgrader
r *runner.Runner
auth *auth.Authenticator
hub *wsWriter.Hub
upgrader websocket.Upgrader
controllerInfo runnerParams.ControllerInfo
}

func handleError(w http.ResponseWriter, err error) {
Expand Down Expand Up @@ -85,6 +89,21 @@ func handleError(w http.ResponseWriter, err error) {
}
}

// metric to count total webhooks received
// at this point the webhook is not yet authenticated and
// we don't know if it's meant for us or not
var webhooksReceived = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "garm_webhooks_received",
Help: "The total number of webhooks received",
}, []string{"valid", "reason", "hostname", "controller_id"})

func init() {
err := prometheus.Register(webhooksReceived)
if err != nil {
log.Printf("error registering prometheus metric: %q", err)
}
}

func (a *APIController) handleWorkflowJobEvent(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()
body, err := io.ReadAll(r.Body)
Expand All @@ -96,14 +115,23 @@ func (a *APIController) handleWorkflowJobEvent(w http.ResponseWriter, r *http.Re
signature := r.Header.Get("X-Hub-Signature-256")
hookType := r.Header.Get("X-Github-Hook-Installation-Target-Type")

controllerInfo := a.r.GetControllerInfo(r.Context())

if err := a.r.DispatchWorkflowJob(hookType, signature, body); err != nil {
if errors.Is(err, gErrors.ErrNotFound) {
webhooksReceived.WithLabelValues("false", "owner_unknown", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
log.Printf("got not found error from DispatchWorkflowJob. webhook not meant for us?: %q", err)
return
} else if strings.Contains(err.Error(), "signature") { // TODO: check error type
webhooksReceived.WithLabelValues("false", "signature_invalid", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
} else {
webhooksReceived.WithLabelValues("false", "unknown", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
}

handleError(w, err)
return
}
webhooksReceived.WithLabelValues("true", "", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
}

func (a *APIController) CatchAll(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -170,6 +198,26 @@ func (a *APIController) NotFoundHandler(w http.ResponseWriter, r *http.Request)
}
}

func (a *APIController) MetricsTokenHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()

if !auth.IsAdmin(ctx) {
handleError(w, gErrors.ErrUnauthorized)
return
}

token, err := a.auth.GetJWTMetricsToken(ctx)
if err != nil {
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(runnerParams.JWTResponse{Token: token})
if err != nil {
log.Printf("failed to encode response: %q", err)
}
}

// LoginHandler returns a jwt token
func (a *APIController) LoginHandler(w http.ResponseWriter, r *http.Request) {
var loginInfo runnerParams.PasswordLoginParams
Expand Down
126 changes: 126 additions & 0 deletions apiserver/controllers/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package controllers

import (
"log"

"garm/auth"
"garm/runner"

"github.com/prometheus/client_golang/prometheus"
)

type GarmCollector struct {
healthMetric *prometheus.Desc
instanceMetric *prometheus.Desc
runner *runner.Runner
}

func NewGarmCollector(r *runner.Runner) *GarmCollector {
return &GarmCollector{
runner: r,
instanceMetric: prometheus.NewDesc(
"garm_runner_status",
"Status of the runner",
[]string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "hostname", "controller_id"}, nil,
),
healthMetric: prometheus.NewDesc(
"garm_health",
"Health of the runner",
[]string{"hostname", "controller_id"}, nil,
),
}
}

func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.instanceMetric
ch <- c.healthMetric
}

func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) {
controllerInfo := c.runner.GetControllerInfo(auth.GetAdminContext())

c.CollectInstanceMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
c.CollectHealthMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
}

func (c *GarmCollector) CollectHealthMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
m, err := prometheus.NewConstMetric(
c.healthMetric,
prometheus.GaugeValue,
1,
hostname,
controllerID,
)
if err != nil {
log.Printf("error on creating health metric: %s", err)
return
}
ch <- m
}

// CollectInstanceMetric collects the metrics for the runner instances
// reflecting the statuses and the pool they belong to.
func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {

ctx := auth.GetAdminContext()

instances, err := c.runner.ListAllInstances(ctx)
if err != nil {
log.Printf("cannot collect metrics, listing instances: %s", err)
return
}

pools, err := c.runner.ListAllPools(ctx)
if err != nil {
log.Printf("listing pools: %s", err)
// continue anyway
}

type poolInfo struct {
Name string
Type string
}

poolNames := make(map[string]poolInfo)
for _, pool := range pools {
if pool.EnterpriseName != "" {
poolNames[pool.ID] = poolInfo{
Name: pool.EnterpriseName,
Type: string(pool.PoolType()),
}
} else if pool.OrgName != "" {
poolNames[pool.ID] = poolInfo{
Name: pool.OrgName,
Type: string(pool.PoolType()),
}
} else {
poolNames[pool.ID] = poolInfo{
Name: pool.RepoName,
Type: string(pool.PoolType()),
}
}
}

for _, instance := range instances {

m, err := prometheus.NewConstMetric(
c.instanceMetric,
prometheus.GaugeValue,
1,
instance.Name,
string(instance.Status),
string(instance.RunnerStatus),
poolNames[instance.PoolID].Name,
poolNames[instance.PoolID].Type,
instance.PoolID,
hostname,
controllerID,
)

if err != nil {
log.Printf("cannot collect metrics, creating metric: %s", err)
continue
}
ch <- m
}
}
17 changes: 16 additions & 1 deletion apiserver/routers/routers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,28 @@ import (
"net/http"

"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus/promhttp"

"garm/apiserver/controllers"
"garm/auth"
"garm/config"
"garm/util"
)

func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, authMiddleware, initMiddleware, instanceMiddleware auth.Middleware) *mux.Router {
func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, cfg *config.Config, authMiddleware, initMiddleware, instanceMiddleware, metricsMiddlerware auth.Middleware) *mux.Router {
router := mux.NewRouter()
logMiddleware := util.NewLoggingMiddleware(logWriter)
router.Use(logMiddleware)

if cfg.Metrics.Enable {
metricsRouter := router.PathPrefix("/metrics").Subrouter()
if !cfg.Metrics.DisableAuth {
metricsRouter.Use(metricsMiddlerware.Middleware)
}
metricsRouter.Handle("/", promhttp.Handler()).Methods("GET", "OPTIONS")
metricsRouter.Handle("", promhttp.Handler()).Methods("GET", "OPTIONS")
}

// Handles github webhooks
webhookRouter := router.PathPrefix("/webhooks").Subrouter()
webhookRouter.PathPrefix("/").Handler(http.HandlerFunc(han.CatchAll))
Expand Down Expand Up @@ -61,6 +72,10 @@ func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, authMiddl
apiRouter.Use(initMiddleware.Middleware)
apiRouter.Use(authMiddleware.Middleware)

// Metrics Token
apiRouter.Handle("/metrics-token/", http.HandlerFunc(han.MetricsTokenHandler)).Methods("GET", "OPTIONS")
apiRouter.Handle("/metrics-token", http.HandlerFunc(han.MetricsTokenHandler)).Methods("GET", "OPTIONS")

///////////
// Pools //
///////////
Expand Down
35 changes: 35 additions & 0 deletions auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,41 @@ func (a *Authenticator) GetJWTToken(ctx context.Context) (string, error) {
return tokenString, nil
}

// GetJWTMetricsToken returns a JWT token that can be used to read metrics.
// This token is not tied to a user, no user is stored in the db.
func (a *Authenticator) GetJWTMetricsToken(ctx context.Context) (string, error) {

if !IsAdmin(ctx) {
return "", runnerErrors.ErrUnauthorized
}

tokenID, err := util.GetRandomString(16)
if err != nil {
return "", errors.Wrap(err, "generating random string")
}
// TODO: currently this is the same TTL as the normal Token
// maybe we should make this configurable
// it's usually pretty nasty if the monitoring fails because the token expired
expireToken := time.Now().Add(a.cfg.TimeToLive.Duration()).Unix()
claims := JWTClaims{
StandardClaims: jwt.StandardClaims{
ExpiresAt: expireToken,
// TODO: make this configurable
Issuer: "garm",
},
TokenID: tokenID,
IsAdmin: false,
ReadMetrics: true,
}
token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
tokenString, err := token.SignedString([]byte(a.cfg.Secret))
if err != nil {
return "", errors.Wrap(err, "fetching token string")
}

return tokenString, nil
}

func (a *Authenticator) InitController(ctx context.Context, param params.NewUserParams) (params.User, error) {
_, err := a.store.ControllerInfo()
if err != nil {
Expand Down
5 changes: 3 additions & 2 deletions auth/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ import (
type contextFlags string

const (
isAdminKey contextFlags = "is_admin"
fullNameKey contextFlags = "full_name"
isAdminKey contextFlags = "is_admin"
fullNameKey contextFlags = "full_name"
readMetricsKey contextFlags = "read_metrics"
// UserIDFlag is the User ID flag we set in the context
UserIDFlag contextFlags = "user_id"
isEnabledFlag contextFlags = "is_enabled"
Expand Down
9 changes: 5 additions & 4 deletions auth/jwt.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ import (

// JWTClaims holds JWT claims
type JWTClaims struct {
UserID string `json:"user"`
TokenID string `json:"token_id"`
FullName string `json:"full_name"`
IsAdmin bool `json:"is_admin"`
UserID string `json:"user"`
TokenID string `json:"token_id"`
FullName string `json:"full_name"`
IsAdmin bool `json:"is_admin"`
ReadMetrics bool `json:"read_metrics"`
jwt.StandardClaims
}

Expand Down
Loading