Skip to content

Commit 505c5ed

Browse files
committed
Fix alertmanager reloading bug that removes user template files
Signed-off-by: Charlie Le <charlie_le@apple.com>
1 parent 1ba0cb0 commit 505c5ed

2 files changed

Lines changed: 18 additions & 1 deletion

File tree

pkg/alertmanager/multitenant.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -887,12 +887,18 @@ func (am *MultitenantAlertmanager) setConfig(cfg alertspb.AlertConfigDesc) error
887887
// List existing files to keep track the ones to be removed
888888
if oldTemplateFiles, err := os.ReadDir(userTemplateDir); err == nil {
889889
for _, file := range oldTemplateFiles {
890-
pathsToRemove[filepath.Join(userTemplateDir, file.Name())] = struct{}{}
890+
pathToRemove, err := safeTemplateFilepath(userTemplateDir, file.Name())
891+
if err != nil {
892+
return err
893+
}
894+
level.Debug(am.logger).Log("msg", "discovered existing file", "oldTemplateFile", file.Name(), "pathToRemove", pathToRemove, "user", cfg.User)
895+
pathsToRemove[pathToRemove] = struct{}{}
891896
}
892897
}
893898

894899
for _, tmpl := range cfg.Templates {
895900
templateFilePath, err := safeTemplateFilepath(userTemplateDir, tmpl.Filename)
901+
level.Debug(am.logger).Log("msg", "skipping template file since it's still used by the config", "file", templateFilePath, "user", cfg.User)
896902
if err != nil {
897903
return err
898904
}
@@ -906,10 +912,12 @@ func (am *MultitenantAlertmanager) setConfig(cfg alertspb.AlertConfigDesc) error
906912

907913
if hasChanged {
908914
hasTemplateChanges = true
915+
level.Debug(am.logger).Log("msg", "template file changed", "file", templateFilePath, "body", tmpl.Body, "user", cfg.User)
909916
}
910917
}
911918

912919
for pathToRemove := range pathsToRemove {
920+
level.Debug(am.logger).Log("msg", "removing stale template file", "file", pathToRemove, "user", cfg.User)
913921
err := os.Remove(pathToRemove)
914922
if err != nil {
915923
level.Warn(am.logger).Log("msg", "failed to remove file", "file", pathToRemove, "err", err)

pkg/alertmanager/multitenant_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,11 @@ func TestMultitenantAlertmanager_loadAndSyncConfigs(t *testing.T) {
226226

227227
reg := prometheus.NewPedanticRegistry()
228228
cfg := mockAlertmanagerConfig(t)
229+
230+
// Using a relative directory here instead of a fully qualified one to test that files are cleaned up properly
231+
relativeTmpDir := "TestMultitenantAlertmanager_loadAndSyncConfigs"
232+
cfg.DataDir = relativeTmpDir
233+
229234
am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, nil, log.NewNopLogger(), reg)
230235
require.NoError(t, err)
231236

@@ -370,6 +375,10 @@ templates:
370375
require.True(t, dirExists(t, user3Dir))
371376
require.True(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "first.tpl")))
372377
require.False(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "second.tpl")))
378+
379+
// Cleaning up relative tmp directory
380+
err = os.RemoveAll(relativeTmpDir)
381+
require.NoError(t, err)
373382
}
374383

375384
func TestMultitenantAlertmanager_FirewallShouldBlockHTTPBasedReceiversWhenEnabled(t *testing.T) {

0 commit comments

Comments
 (0)