Skip to content

Commit

Permalink
fix: don't validate machine.install if installed
Browse files Browse the repository at this point in the history
As Talos doesn't consume `.machine.install` if already installed, there
is no point in validating it once already installed.

This fixes a problem users often run into: after a reboot/upgrade the
system disk blockdevice name changes, due to the kernel upgrade, or just
unpredictable behavior of device discovery. Talos fails to boot as it
can't validate the machine config, while it's already installed, so
actual blockdevice name doesn't matter.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Nov 3, 2023
1 parent dff6006 commit 813442d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,16 @@ func (s *Server) Register(obj *grpc.Server) {
timeapi.RegisterTimeServiceServer(obj, &TimeServer{ConfigProvider: s.Controller.Runtime()})
}

// modeWrapper overrides RequiresInstall() based on actual installed status.
type modeWrapper struct {
runtime.Mode
installed bool
}

func (m modeWrapper) RequiresInstall() bool {
return m.Mode.RequiresInstall() && !m.installed
}

// ApplyConfiguration implements machine.MachineService.
//
//nolint:gocyclo,cyclop
Expand All @@ -169,7 +179,12 @@ func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfig
return nil, status.Error(codes.InvalidArgument, err.Error())
}

warnings, err := cfgProvider.Validate(s.Controller.Runtime().State().Platform().Mode())
warnings, err := cfgProvider.Validate(
modeWrapper{
Mode: s.Controller.Runtime().State().Platform().Mode(),
installed: s.Controller.Runtime().State().Machine().Installed(),
},
)
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
Expand Down
16 changes: 15 additions & 1 deletion internal/app/machined/pkg/controllers/config/acquire.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,19 @@ func (ctrl *AcquireController) stateDisk(ctx context.Context, r controller.Runti
}
}

// validationModeDiskConfig is a "fake" validation mode for config loaded from disk.
type validationModeDiskConfig struct{}

// RequiresInstall implements validation.RuntimeMode interface.
func (validationModeDiskConfig) RequiresInstall() bool {
return false
}

// String implements validation.RuntimeMode interface.
func (validationModeDiskConfig) String() string {
return "diskConfig"
}

// loadFromDisk is a helper function for stateDisk.
func (ctrl *AcquireController) loadFromDisk(logger *zap.Logger) (config.Provider, error) {
logger.Debug("loading config from STATE", zap.String("path", ctrl.ConfigPath))
Expand All @@ -232,7 +245,8 @@ func (ctrl *AcquireController) loadFromDisk(logger *zap.Logger) (config.Provider
return nil, nil
}

warnings, err := cfg.Validate(ctrl.ValidationMode)
// if the STATE partition is present & contains machine config, Talos is already installed
warnings, err := cfg.Validate(validationModeDiskConfig{})
if err != nil {
return nil, fmt.Errorf("failed to validate on-disk config: %w", err)
}
Expand Down
16 changes: 8 additions & 8 deletions internal/integration/api/apply-config.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ func (suite *ApplyConfigSuite) TestApplyNoReboot() {
nodeCtx := client.WithNodes(suite.ctx, node)

provider, err := suite.ReadConfigFromNode(nodeCtx)
suite.Assert().Nilf(err, "failed to read existing config from node %q: %w", node, err)
suite.Require().Nilf(err, "failed to read existing config from node %q: %s", node, err)

cfg := provider.RawV1Alpha1()
suite.Require().NotNil(cfg)
Expand All @@ -395,7 +395,7 @@ func (suite *ApplyConfigSuite) TestApplyNoReboot() {
suite.Require().NoError(err)

cfgDataOut, err := provider.Bytes()
suite.Assert().Nilf(err, "failed to marshal updated machine config data (node %q): %w", node, err)
suite.Require().Nilf(err, "failed to marshal updated machine config data (node %q): %s", node, err)

_, err = suite.Client.ApplyConfiguration(
nodeCtx, &machineapi.ApplyConfigurationRequest{
Expand Down Expand Up @@ -429,7 +429,7 @@ func (suite *ApplyConfigSuite) TestApplyDryRun() {
nodeCtx := client.WithNodes(suite.ctx, node)

provider, err := suite.ReadConfigFromNode(nodeCtx)
suite.Assert().Nilf(err, "failed to read existing config from node %q: %w", node, err)
suite.Require().Nilf(err, "failed to read existing config from node %q: %s", node, err)

cfg := provider.RawV1Alpha1()
suite.Require().NotNil(cfg)
Expand All @@ -441,7 +441,7 @@ func (suite *ApplyConfigSuite) TestApplyDryRun() {
suite.Require().NoError(err)

cfgDataOut, err := provider.Bytes()
suite.Assert().Nilf(err, "failed to marshal updated machine config data (node %q): %w", node, err)
suite.Require().Nilf(err, "failed to marshal updated machine config data (node %q): %s", node, err)

reply, err := suite.Client.ApplyConfiguration(
nodeCtx, &machineapi.ApplyConfigurationRequest{
Expand All @@ -451,8 +451,8 @@ func (suite *ApplyConfigSuite) TestApplyDryRun() {
},
)

suite.Assert().Nilf(err, "failed to apply configuration (node %q): %w", node, err)
suite.Require().Contains(reply.Messages[0].ModeDetails, "Dry run summary")
suite.Require().Nilf(err, "failed to apply configuration (node %q): %s", node, err)
suite.Assert().Contains(reply.Messages[0].ModeDetails, "Dry run summary")
}

// TestApplyTry applies the config in try mode with a short timeout.
Expand Down Expand Up @@ -496,7 +496,7 @@ func (suite *ApplyConfigSuite) TestApplyTry() {
)

cfgDataOut, err := container.NewV1Alpha1(cfg).Bytes()
suite.Assert().Nilf(err, "failed to marshal updated machine config data (node %q): %s", node, err)
suite.Require().Nilf(err, "failed to marshal updated machine config data (node %q): %s", node, err)

_, err = suite.Client.ApplyConfiguration(
nodeCtx, &machineapi.ApplyConfigurationRequest{
Expand All @@ -508,7 +508,7 @@ func (suite *ApplyConfigSuite) TestApplyTry() {
suite.Assert().Nilf(err, "failed to apply configuration (node %q): %s", node, err)

provider, err = getMachineConfig(nodeCtx)
suite.Assert().Nilf(err, "failed to read existing config from node %q: %w", node, err)
suite.Require().Nilf(err, "failed to read existing config from node %q: %w", node, err)

suite.Assert().NotNil(provider.Config().Machine().Network())
suite.Assert().NotNil(provider.Config().Machine().Network().Devices())
Expand Down

0 comments on commit 813442d

Please sign in to comment.