Skip to content

Commit

Permalink
OCPBUGS-18658: Unify agent install-complete with installer
Browse files Browse the repository at this point in the history
Removed custom agent wait-for install-complete code.

Moved installer WaitForInstallComplete function from
cmd/openshift-install/main to cmd/openshift-install/command so
that the function can be made public.

Modified agent.newWaitForInstallCompleted() to use the common
WaitForInstallComplete function.

The benefit of moving agent over to the common
WaitForInstallComplete function is that the common function has a
step to wait for operators to be in a stable state before calling
the cluster installation complete.
  • Loading branch information
rwsu committed Dec 9, 2024
1 parent 2ea001d commit 38c05c7
Show file tree
Hide file tree
Showing 11 changed files with 516 additions and 663 deletions.
41 changes: 20 additions & 21 deletions cmd/openshift-install/agent/waitfor.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,12 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"k8s.io/client-go/rest"

"github.com/openshift/installer/cmd/openshift-install/command"
agentpkg "github.com/openshift/installer/pkg/agent"
"github.com/openshift/installer/pkg/asset/agent/workflow"
)

const (
exitCodeInstallConfigError = iota + 3
exitCodeInfrastructureFailed
exitCodeBootstrapFailed
exitCodeInstallFailed
assetstore "github.com/openshift/installer/pkg/asset/store"
)

// NewWaitForCmd create the commands for waiting the completion of the agent based cluster installation.
Expand All @@ -36,17 +31,17 @@ func NewWaitForCmd() *cobra.Command {
return cmd
}

func handleBootstrapError(cluster *agentpkg.Cluster, err error) {
func handleBootstrapError(ctx context.Context, config *rest.Config, cluster *agentpkg.Cluster, err error) {
logrus.Debug("Printing the event list gathered from the Agent Rest API")
cluster.PrintInfraEnvRestAPIEventList()
err2 := cluster.API.OpenShift.LogClusterOperatorConditions()
err2 := command.LogClusterOperatorConditions(ctx, config)
if err2 != nil {
logrus.Error("Attempted to gather ClusterOperator status after wait failure: ", err2)
}
logrus.Info("Use the following commands to gather logs from the cluster")
logrus.Info("openshift-install gather bootstrap --help")
logrus.Error(errors.Wrap(err, "Bootstrap failed to complete: "))
logrus.Exit(exitCodeBootstrapFailed)
logrus.Exit(command.ExitCodeBootstrapFailed)
}

func newWaitForBootstrapCompleteCmd() *cobra.Command {
Expand Down Expand Up @@ -74,11 +69,11 @@ func newWaitForBootstrapCompleteCmd() *cobra.Command {
ctx := context.Background()
cluster, err := agentpkg.NewCluster(ctx, assetDir, rendezvousIP, kubeconfigPath, sshKey, workflow.AgentWorkflowTypeInstall)
if err != nil {
logrus.Exit(exitCodeBootstrapFailed)
logrus.Exit(command.ExitCodeBootstrapFailed)
}

if err := agentpkg.WaitForBootstrapComplete(cluster); err != nil {
handleBootstrapError(cluster, err)
handleBootstrapError(ctx, cluster.API.Kube.Config, cluster, err)
}
},
}
Expand Down Expand Up @@ -109,25 +104,29 @@ func newWaitForInstallCompleteCmd() *cobra.Command {
ctx := context.Background()
cluster, err := agentpkg.NewCluster(ctx, assetDir, rendezvousIP, kubeconfigPath, sshKey, workflow.AgentWorkflowTypeInstall)
if err != nil {
logrus.Exit(exitCodeBootstrapFailed)
logrus.Exit(command.ExitCodeBootstrapFailed)
}

if err := agentpkg.WaitForBootstrapComplete(cluster); err != nil {
handleBootstrapError(cluster, err)
handleBootstrapError(ctx, cluster.API.Kube.Config, cluster, err)
}

if err = agentpkg.WaitForInstallComplete(cluster); err != nil {
assetStore, err := assetstore.NewStore(command.RootOpts.Dir)
if err != nil {
logrus.Error(err)
err2 := cluster.API.OpenShift.LogClusterOperatorConditions()
logrus.Exit(command.ExitCodeInstallFailed)
}

if err = command.WaitForInstallComplete(ctx, cluster.API.Kube.Config, assetStore); err != nil {
logrus.Error(err)
err2 := command.LogClusterOperatorConditions(ctx, cluster.API.Kube.Config)
if err2 != nil {
logrus.Error("Attempted to gather ClusterOperator status after wait failure: ", err2)
}
logrus.Error(`Cluster initialization failed because one or more operators are not functioning properly.
The cluster should be accessible for troubleshooting as detailed in the documentation linked below,
https://docs.openshift.com/container-platform/latest/support/troubleshooting/troubleshooting-installations.html`)
logrus.Exit(exitCodeInstallFailed)
command.LogTroubleshootingLink()
logrus.Error(err)
logrus.Exit(command.ExitCodeInstallFailed)
}
cluster.PrintInstallationComplete()
},
}
}
47 changes: 47 additions & 0 deletions cmd/openshift-install/command/gather.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package command

import (
"context"
"fmt"

"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/rest"

configv1 "github.com/openshift/api/config/v1"
configclient "github.com/openshift/client-go/config/clientset/versioned"
)

// LogClusterOperatorConditions logs the current status of cluster operators that
// are still becoming Available.
func LogClusterOperatorConditions(ctx context.Context, config *rest.Config) error {
client, err := configclient.NewForConfig(config)
if err != nil {
return fmt.Errorf("creating a config client: %w", err)
}

operators, err := client.ConfigV1().ClusterOperators().List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("listing ClusterOperator objects: %w", err)
}

for _, operator := range operators.Items {
for _, condition := range operator.Status.Conditions {
switch {
case condition.Type == configv1.OperatorUpgradeable:
continue
case condition.Type == configv1.OperatorAvailable && condition.Status == configv1.ConditionTrue:
continue
case (condition.Type == configv1.OperatorDegraded || condition.Type == configv1.OperatorProgressing) && condition.Status == configv1.ConditionFalse:
continue
}
if condition.Type == configv1.OperatorAvailable || condition.Type == configv1.OperatorDegraded {
logrus.Errorf("Cluster operator %s %s is %s with %s: %s", operator.ObjectMeta.Name, condition.Type, condition.Status, condition.Reason, condition.Message)
} else {
logrus.Infof("Cluster operator %s %s is %s with %s: %s", operator.ObjectMeta.Name, condition.Type, condition.Status, condition.Reason, condition.Message)
}
}
}

return nil
}
Loading

0 comments on commit 38c05c7

Please sign in to comment.