-
Notifications
You must be signed in to change notification settings - Fork 99
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
The internal/metrics/yaml.go has a race condition in its create-delete-update operations. When multiple goroutines concurrently create, update, or delete metrics/presets, some operations are lost.
To Reproduce
func TestConcurrentUpdates(t *testing.T) {
ctx := context.Background()
tempDir := t.TempDir()
tempFile := filepath.Join(tempDir, "metrics.yaml")
yamlrw, err := metrics.NewYAMLMetricReaderWriter(ctx, tempFile)
assert.NoError(t, err)
// Create initial empty metrics file
initialMetrics := &metrics.Metrics{
MetricDefs: make(map[string]metrics.Metric),
PresetDefs: make(map[string]metrics.Preset),
}
err = yamlrw.WriteMetrics(initialMetrics)
assert.NoError(t, err)
// Number of concurrent updates
numGoroutines := 10
var wg sync.WaitGroup
errorChan := make(chan error, numGoroutines)
// Each goroutine will add a unique metric
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
metricName := fmt.Sprintf("metric_%d", id)
testMetric := metrics.Metric{
Description: fmt.Sprintf("Test metric %d", id),
SQLs: map[int]string{
1: fmt.Sprintf("SELECT %d", id),
},
}
// Simulate small random delay
time.Sleep(time.Millisecond * time.Duration(id%3))
err := yamlrw.UpdateMetric(metricName, testMetric)
if err != nil {
errorChan <- fmt.Errorf("goroutine %d: %w", id, err)
}
}(i)
}
wg.Wait()
close(errorChan)
// Check for errors during updates
var errors []error
for err := range errorChan {
errors = append(errors, err)
t.Logf("Error during concurrent update: %v", err)
}
// ensure all metrics were saved
finalMetrics, err := yamlrw.GetMetrics()
assert.NoError(t, err)
assert.Equal(t, numGoroutines, len(finalMetrics.MetricDefs),
"Expected %d metrics, but got %d. Some updates were lost due to race condition!",
numGoroutines, len(finalMetrics.MetricDefs))
// Print which metrics are missing if test fails
if len(finalMetrics.MetricDefs) != numGoroutines {
for i := 0; i < numGoroutines; i++ {
metricName := fmt.Sprintf("metric_%d", i)
if _, exists := finalMetrics.MetricDefs[metricName]; !exists {
t.Logf("LOST UPDATE: %s was not saved", metricName)
}
}
}
}Expected behavior
- All 10 concurrent metric updates should be saved
- No race conditions
- No data loss under concurrent access
Screenshots

Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working