Skip to content

Commit

Permalink
operator: Add rules support (#5986)
Browse files Browse the repository at this point in the history
  • Loading branch information
periklis authored May 24, 2022
1 parent 5206e3a commit fd627f2
Show file tree
Hide file tree
Showing 91 changed files with 6,710 additions and 268 deletions.
1 change: 1 addition & 0 deletions operator/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

- [6199](https://github.com/grafana/loki/pull/6199) **Red-GV**: Update GCP secret volume path
- [6125](https://github.com/grafana/loki/pull/6125) **sasagarw**: Add method to get authenticated from GCP
- [5986](https://github.com/grafana/loki/pull/5986) **periklis**: Add support for Loki Rules reconciliation
- [5987](https://github.com/grafana/loki/pull/5987) **Red-GV**: Update logerr to v2.0.0
- [5907](https://github.com/grafana/loki/pull/5907) **xperimental**: Do not include non-static labels in pod selectors
- [5893](https://github.com/grafana/loki/pull/5893) **periklis**: Align PVC storage size requests for all lokistack t-shirt sizes
Expand Down
2 changes: 1 addition & 1 deletion operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ help: ## Display this help.

.PHONY: deps
deps: go.mod go.sum
go mod tidy
go mod tidy -compat=1.17
go mod download
go mod verify

Expand Down
28 changes: 26 additions & 2 deletions operator/PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,39 @@ plugins:
manifests.sdk.operatorframework.io/v2: {}
scorecard.sdk.operatorframework.io/v2: {}
projectName: loki-operator
repo: github.com/grafana/loki
repo: github.com/grafana/loki/operator
resources:
- api:
crdVersion: v1beta1
crdVersion: v1
namespaced: true
controller: true
domain: grafana.com
group: loki
kind: LokiStack
path: github.com/grafana/loki/operator/api/v1beta1
version: v1beta1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: grafana.com
group: loki
kind: AlertingRule
path: github.com/grafana/loki/operator/api/v1beta1
version: v1beta1
webhooks:
validation: true
webhookVersion: v1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: grafana.com
group: loki
kind: RecordingRule
path: github.com/grafana/loki/operator/api/v1beta1
version: v1beta1
webhooks:
validation: true
webhookVersion: v1
version: "3"
133 changes: 133 additions & 0 deletions operator/api/v1beta1/alertingrule_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package v1beta1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// AlertingRuleSpec defines the desired state of AlertingRule
type AlertingRuleSpec struct {
// TenantID of tenant where the alerting rules are evaluated in.
//
// +required
// +kubebuilder:validation:Required
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Tenant ID"
TenantID string `json:"tenantID"`

// List of groups for alerting rules.
//
// +optional
// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Groups"
Groups []*AlertingRuleGroup `json:"groups"`
}

// AlertingRuleGroup defines a group of Loki alerting rules.
type AlertingRuleGroup struct {
// Name of the alerting rule group. Must be unique within all alerting rules.
//
// +required
// +kubebuilder:validation:Required
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Name"
Name string `json:"name"`

// Interval defines the time interval between evaluation of the given
// alerting rule.
//
// +optional
// +kubebuilder:validation:Optional
// +kubebuilder:default:="1m"
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Evaluation Interval"
Interval PrometheusDuration `json:"interval"`

// Limit defines the number of alerts an alerting rule can produce. 0 is no limit.
//
// +optional
// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Limit of firing alerts"
Limit int32 `json:"limit,omitempty"`

// Rules defines a list of alerting rules
//
// +required
// +kubebuilder:validation:Required
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Rules"
Rules []*AlertingRuleGroupSpec `json:"rules"`
}

// AlertingRuleGroupSpec defines the spec for a Loki alerting rule.
type AlertingRuleGroupSpec struct {
// The name of the alert. Must be a valid label value.
//
// +optional
// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Name"
Alert string `json:"alert,omitempty"`

// The LogQL expression to evaluate. Every evaluation cycle this is
// evaluated at the current time, and all resultant time series become
// pending/firing alerts.
//
// +required
// +kubebuilder:validation:Required
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="LogQL Expression"
Expr string `json:"expr"`

// Alerts are considered firing once they have been returned for this long.
// Alerts which have not yet fired for long enough are considered pending.
//
// +optional
// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Firing Threshold"
For PrometheusDuration `json:"for,omitempty"`

// Annotations to add to each alert.
//
// +optional
// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Annotations"
Annotations map[string]string `json:"annotations,omitempty"`

// Labels to add to each alert.
//
// +optional
// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Labels"
Labels map[string]string `json:"labels,omitempty"`
}

// AlertingRuleStatus defines the observed state of AlertingRule
type AlertingRuleStatus struct {
// Conditions of the AlertingRule generation health.
//
// +optional
// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=status,xDescriptors="urn:alm:descriptor:io.kubernetes.conditions"
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status

// AlertingRule is the Schema for the alertingrules API
//
// +operator-sdk:csv:customresourcedefinitions:displayName="AlertingRule",resources={{LokiStack,v1beta1}}
type AlertingRule struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec AlertingRuleSpec `json:"spec,omitempty"`
Status AlertingRuleStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// AlertingRuleList contains a list of AlertingRule
type AlertingRuleList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []AlertingRule `json:"items"`
}

func init() {
SchemeBuilder.Register(&AlertingRule{}, &AlertingRuleList{})
}
105 changes: 105 additions & 0 deletions operator/api/v1beta1/alertingrule_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package v1beta1

import (
"github.com/grafana/loki/pkg/logql/syntax"

"github.com/prometheus/common/model"

apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/validation/field"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)

// SetupWebhookWithManager registers the AlertingRuleWebhook to the controller-runtime manager
// or returns an error.
func (r *AlertingRule) SetupWebhookWithManager(mgr ctrl.Manager) error {
return ctrl.NewWebhookManagedBy(mgr).
For(r).
Complete()
}

//+kubebuilder:webhook:path=/validate-loki-grafana-com-v1beta1-alertingrule,mutating=false,failurePolicy=fail,sideEffects=None,groups=loki.grafana.com,resources=alertingrules,verbs=create;update,versions=v1beta1,name=valertingrule.kb.io,admissionReviewVersions=v1

var _ webhook.Validator = &AlertingRule{}

// ValidateCreate implements webhook.Validator so a webhook will be registered for the type
func (r *AlertingRule) ValidateCreate() error {
return r.validate()
}

// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
func (r *AlertingRule) ValidateUpdate(_ runtime.Object) error {
return r.validate()
}

// ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (r *AlertingRule) ValidateDelete() error {
// Do nothing
return nil
}

func (r *AlertingRule) validate() error {
var allErrs field.ErrorList

found := make(map[string]bool)

for i, g := range r.Spec.Groups {
// Check for group name uniqueness
if found[g.Name] {
allErrs = append(allErrs, field.Invalid(
field.NewPath("Spec").Child("Groups").Index(i).Child("Name"),
g.Name,
ErrGroupNamesNotUnique.Error(),
))
}

found[g.Name] = true

// Check if rule evaluation period is a valid PromQL duration
_, err := model.ParseDuration(string(g.Interval))
if err != nil {
allErrs = append(allErrs, field.Invalid(
field.NewPath("Spec").Child("Groups").Index(i).Child("Interval"),
g.Interval,
ErrParseEvaluationInterval.Error(),
))
}

for j, r := range g.Rules {
// Check if alert for period is a valid PromQL duration
if r.Alert != "" {
_, err := model.ParseDuration(string(r.For))
if err != nil {
allErrs = append(allErrs, field.Invalid(
field.NewPath("Spec").Child("Groups").Index(i).Child("Rules").Index(j).Child("For"),
r.For,
ErrParseAlertForPeriod.Error(),
))
}
}

// Check if the LogQL parser can parse the rule expression
_, err := syntax.ParseExpr(r.Expr)
if err != nil {
allErrs = append(allErrs, field.Invalid(
field.NewPath("Spec").Child("Groups").Index(i).Child("Rules").Index(j).Child("Expr"),
r.Expr,
ErrParseLogQLExpression.Error(),
))
}
}
}

if len(allErrs) == 0 {
return nil
}

return apierrors.NewInvalid(
schema.GroupKind{Group: "loki.grafana.com", Kind: "AlertingRule"},
r.Name,
allErrs,
)
}
Loading

0 comments on commit fd627f2

Please sign in to comment.