Skip to content

Commit ffea952

Browse files
Andrew LengAndrew Leng
authored andcommitted
fix: make alert merge logic more restrictive for API resolution
- Add StartsAt equality check to isResolving condition to ensure only alerts with identical start times can be merged for resolution - This prevents unintended merges of alerts that appear similar but represent different alert instances - Add comprehensive unit tests covering all merge scenarios: * Resolving active alerts via API * Re-resolving already resolved alerts to earlier times * Rejecting merges with different StartsAt (security) * Rejecting attempts to extend active alerts (security) Addresses reviewer concern about overly permissive merge logic that could allow merging alerts 'regardless of timestamps'. Signed-off-by: Andrew Leng <work@Andrews-MacBook-Air.local>
1 parent dd21d43 commit ffea952

File tree

2 files changed

+199
-1
lines changed

2 files changed

+199
-1
lines changed

provider/mem/mem.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,10 @@ func (a *Alerts) Put(alerts ...*types.Alert) error {
254254
// This handles the case where an alert is resolved via API without time overlap.
255255
// An alert is being resolved if it has endsAt set and the old alert doesn't,
256256
// or if the new endsAt is earlier than the old endsAt.
257-
isResolving := !alert.EndsAt.IsZero() && (old.EndsAt.IsZero() || alert.EndsAt.Before(old.EndsAt))
257+
// Additionally require StartsAt to match to ensure we're updating the same alert instance.
258+
isResolving := !alert.EndsAt.IsZero() &&
259+
(old.EndsAt.IsZero() || alert.EndsAt.Before(old.EndsAt)) &&
260+
alert.StartsAt.Equal(old.StartsAt)
258261

259262
if hasOverlap || isResolving {
260263
alert = old.Merge(alert)

provider/mem/mem_test.go

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,3 +710,198 @@ func TestSubscriberChannelMetrics(t *testing.T) {
710710
return writeCount == float64(len(alertsToSend))
711711
}, 1*time.Second, 10*time.Millisecond, "subscriberChannelWrites should equal the number of alerts sent")
712712
}
713+
714+
// TestAlertMergeResolution tests the specific case of resolving alerts via API
715+
// where endsAt is set without normal time overlap.
716+
func TestAlertMergeResolution(t *testing.T) {
717+
marker := types.NewMarker(prometheus.NewRegistry())
718+
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, noopCallback{}, promslog.NewNopLogger(), prometheus.NewRegistry())
719+
require.NoError(t, err)
720+
721+
now := time.Now()
722+
baseLabels := model.LabelSet{"alertname": "test", "instance": "localhost:9090"}
723+
724+
// Test case 1: Resolving an active alert (old alert has no EndsAt)
725+
t.Run("resolve_active_alert", func(t *testing.T) {
726+
activeAlert := &types.Alert{
727+
Alert: model.Alert{
728+
Labels: baseLabels,
729+
Annotations: model.LabelSet{"summary": "Active alert"},
730+
StartsAt: now.Add(-1 * time.Hour),
731+
EndsAt: time.Time{}, // Zero time = active
732+
GeneratorURL: "http://example.com/prometheus",
733+
},
734+
UpdatedAt: now.Add(-30 * time.Minute),
735+
Timeout: false,
736+
}
737+
738+
resolveAlert := &types.Alert{
739+
Alert: model.Alert{
740+
Labels: baseLabels,
741+
Annotations: model.LabelSet{"summary": "Resolved alert"},
742+
StartsAt: now.Add(-1 * time.Hour), // Same StartsAt
743+
EndsAt: now.Add(-10 * time.Minute), // EndsAt in the past
744+
GeneratorURL: "http://example.com/prometheus",
745+
},
746+
UpdatedAt: now,
747+
Timeout: false,
748+
}
749+
750+
// Insert active alert first
751+
err := alerts.Put(activeAlert)
752+
require.NoError(t, err)
753+
754+
// Verify it's stored
755+
stored, err := alerts.Get(activeAlert.Fingerprint())
756+
require.NoError(t, err)
757+
require.True(t, stored.EndsAt.IsZero(), "Active alert should have zero EndsAt")
758+
759+
// Now resolve it
760+
err = alerts.Put(resolveAlert)
761+
require.NoError(t, err)
762+
763+
// Verify it was merged and resolved
764+
updated, err := alerts.Get(activeAlert.Fingerprint())
765+
require.NoError(t, err)
766+
require.False(t, updated.EndsAt.IsZero(), "Alert should now have EndsAt set")
767+
require.True(t, updated.EndsAt.Equal(resolveAlert.EndsAt), "EndsAt should match resolve alert")
768+
})
769+
770+
// Test case 2: Re-resolving an already resolved alert to an earlier time
771+
t.Run("re_resolve_earlier", func(t *testing.T) {
772+
resolvedAlert := &types.Alert{
773+
Alert: model.Alert{
774+
Labels: model.LabelSet{"alertname": "re_resolve_test"},
775+
Annotations: model.LabelSet{"summary": "Originally resolved"},
776+
StartsAt: now.Add(-2 * time.Hour),
777+
EndsAt: now.Add(-30 * time.Minute), // Already resolved
778+
GeneratorURL: "http://example.com/prometheus",
779+
},
780+
UpdatedAt: now.Add(-1 * time.Hour),
781+
Timeout: false,
782+
}
783+
784+
reResolveAlert := &types.Alert{
785+
Alert: model.Alert{
786+
Labels: model.LabelSet{"alertname": "re_resolve_test"},
787+
Annotations: model.LabelSet{"summary": "Re-resolved earlier"},
788+
StartsAt: now.Add(-2 * time.Hour), // Same StartsAt
789+
EndsAt: now.Add(-1 * time.Hour), // Earlier resolution
790+
GeneratorURL: "http://example.com/prometheus",
791+
},
792+
UpdatedAt: now,
793+
Timeout: false,
794+
}
795+
796+
// Insert resolved alert first
797+
err := alerts.Put(resolvedAlert)
798+
require.NoError(t, err)
799+
800+
// Verify it's stored with original EndsAt
801+
stored, err := alerts.Get(resolvedAlert.Fingerprint())
802+
require.NoError(t, err)
803+
require.True(t, stored.EndsAt.Equal(resolvedAlert.EndsAt), "Should have original EndsAt")
804+
805+
// Now re-resolve it earlier
806+
err = alerts.Put(reResolveAlert)
807+
require.NoError(t, err)
808+
809+
// Verify it was updated to the earlier resolution time
810+
updated, err := alerts.Get(resolvedAlert.Fingerprint())
811+
require.NoError(t, err)
812+
require.True(t, updated.EndsAt.Equal(reResolveAlert.EndsAt), "Should have earlier EndsAt")
813+
})
814+
815+
// Test case 3: Reject merge when StartsAt doesn't match (different alert instance)
816+
t.Run("reject_different_starts_at", func(t *testing.T) {
817+
originalAlert := &types.Alert{
818+
Alert: model.Alert{
819+
Labels: model.LabelSet{"alertname": "starts_at_test"},
820+
Annotations: model.LabelSet{"summary": "Original alert"},
821+
StartsAt: now.Add(-1 * time.Hour),
822+
EndsAt: time.Time{}, // Active
823+
GeneratorURL: "http://example.com/prometheus",
824+
},
825+
UpdatedAt: now.Add(-30 * time.Minute),
826+
Timeout: false,
827+
}
828+
829+
differentStartsAlert := &types.Alert{
830+
Alert: model.Alert{
831+
Labels: model.LabelSet{"alertname": "starts_at_test"}, // Same labels
832+
Annotations: model.LabelSet{"summary": "Different start time"},
833+
StartsAt: now.Add(-2 * time.Hour), // Different StartsAt!
834+
EndsAt: now.Add(-10 * time.Minute), // Trying to resolve
835+
GeneratorURL: "http://example.com/prometheus",
836+
},
837+
UpdatedAt: now,
838+
Timeout: false,
839+
}
840+
841+
// Insert original alert
842+
err := alerts.Put(originalAlert)
843+
require.NoError(t, err)
844+
845+
// Try to "resolve" with different StartsAt
846+
err = alerts.Put(differentStartsAlert)
847+
require.NoError(t, err)
848+
849+
// Verify it was NOT merged - should be stored as separate alert
850+
allAlerts := alerts.GetAll()
851+
require.Len(t, allAlerts, 2, "Should have 2 separate alerts")
852+
853+
// Original should still be active
854+
stored, err := alerts.Get(originalAlert.Fingerprint())
855+
require.NoError(t, err)
856+
require.True(t, stored.EndsAt.IsZero(), "Original alert should still be active")
857+
858+
// New alert should exist separately
859+
stored2, err := alerts.Get(differentStartsAlert.Fingerprint())
860+
require.NoError(t, err)
861+
require.True(t, stored2.EndsAt.Equal(differentStartsAlert.EndsAt), "New alert should have its own EndsAt")
862+
})
863+
864+
// Test case 4: Reject merge when trying to extend active alert (security check)
865+
t.Run("reject_extend_active_alert", func(t *testing.T) {
866+
activeAlert := &types.Alert{
867+
Alert: model.Alert{
868+
Labels: model.LabelSet{"alertname": "extend_test"},
869+
Annotations: model.LabelSet{"summary": "Active alert"},
870+
StartsAt: now.Add(-1 * time.Hour),
871+
EndsAt: time.Time{}, // Active
872+
GeneratorURL: "http://example.com/prometheus",
873+
},
874+
UpdatedAt: now.Add(-30 * time.Minute),
875+
Timeout: false,
876+
}
877+
878+
extendAlert := &types.Alert{
879+
Alert: model.Alert{
880+
Labels: model.LabelSet{"alertname": "extend_test"}, // Same labels
881+
Annotations: model.LabelSet{"summary": "Trying to extend"},
882+
StartsAt: now.Add(-1 * time.Hour), // Same StartsAt
883+
EndsAt: now.Add(1 * time.Hour), // Future time - trying to extend!
884+
GeneratorURL: "http://example.com/prometheus",
885+
},
886+
UpdatedAt: now,
887+
Timeout: false,
888+
}
889+
890+
// Insert active alert
891+
err := alerts.Put(activeAlert)
892+
require.NoError(t, err)
893+
894+
// Try to extend it (this should NOT merge)
895+
err = alerts.Put(extendAlert)
896+
require.NoError(t, err)
897+
898+
// Verify it was NOT merged - should be stored as separate alert
899+
allAlerts := alerts.GetAll()
900+
require.Len(t, allAlerts, 2, "Should have 2 separate alerts")
901+
902+
// Original should still be active
903+
stored, err := alerts.Get(activeAlert.Fingerprint())
904+
require.NoError(t, err)
905+
require.True(t, stored.EndsAt.IsZero(), "Original alert should still be active")
906+
})
907+
}

0 commit comments

Comments
 (0)