Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Unify warnings about unmovable pods (#389)" #430

Merged
merged 1 commit into from
Dec 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 26 additions & 40 deletions pkg/controller/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"

"fmt"

"github.com/zalando-incubator/postgres-operator/pkg/cluster"
"github.com/zalando-incubator/postgres-operator/pkg/spec"
"github.com/zalando-incubator/postgres-operator/pkg/util"
)

Expand Down Expand Up @@ -58,16 +55,15 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
return
}

if !c.nodeIsReady(nodePrev) {
c.logger.Debugf("The decommissioned node %v should have already triggered master pod migration. Previous k8s-reported state of the node: %v", util.NameFromMeta(nodePrev.ObjectMeta), nodePrev)
if util.MapContains(nodeCur.Labels, map[string]string{"master": "true"}) {
return
}

if c.nodeIsReady(nodeCur) {
c.logger.Debugf("The decommissioned node %v become schedulable again. Current k8s-reported state of the node: %v", util.NameFromMeta(nodeCur.ObjectMeta), nodeCur)
// do nothing if the node should have already triggered an update or
// if only one of the label and the unschedulability criteria are met.
if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) {
return
}

c.moveMasterPodsOffNode(nodeCur)
}

Expand All @@ -77,17 +73,16 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool {
}

func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {

nodeName := util.NameFromMeta(node.ObjectMeta)
c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label %q",
c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q",
nodeName, c.opConfig.NodeReadinessLabel)

opts := metav1.ListOptions{
LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(),
}
podList, err := c.KubeClient.Pods(c.opConfig.WatchedNamespace).List(opts)
if err != nil {
c.logger.Errorf("could not fetch the list of Spilo pods: %v", err)
c.logger.Errorf("could not fetch list of the pods: %v", err)
return
}

Expand All @@ -98,25 +93,17 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
}
}

movedMasterPods := 0
movableMasterPods := make(map[*v1.Pod]*cluster.Cluster)
unmovablePods := make(map[spec.NamespacedName]string)

clusters := make(map[*cluster.Cluster]bool)

masterPods := make(map[*v1.Pod]*cluster.Cluster)
movedPods := 0
for _, pod := range nodePods {

podName := util.NameFromMeta(pod.ObjectMeta)

role, ok := pod.Labels[c.opConfig.PodRoleLabel]
if !ok {
// pods with an unknown role cannot be safely moved to another node
unmovablePods[podName] = fmt.Sprintf("could not move pod %q from node %q: pod has no role label %q", podName, nodeName, c.opConfig.PodRoleLabel)
continue
}

// deployments can transparently re-create replicas so we do not move away such pods
if cluster.PostgresRole(role) == cluster.Replica {
if !ok || cluster.PostgresRole(role) != cluster.Master {
if !ok {
c.logger.Warningf("could not move pod %q: pod has no role", podName)
}
continue
}

Expand All @@ -126,45 +113,44 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
cl, ok := c.clusters[clusterName]
c.clustersMu.RUnlock()
if !ok {
unmovablePods[podName] = fmt.Sprintf("could not move master pod %q from node %q: pod belongs to an unknown Postgres cluster %q", podName, nodeName, clusterName)
c.logger.Warningf("could not move pod %q: pod does not belong to a known cluster", podName)
continue
}

if !clusters[cl] {
clusters[cl] = true
}

movableMasterPods[pod] = cl
masterPods[pod] = cl
}

for cl := range clusters {
cl.Lock()
}

for pod, cl := range movableMasterPods {

for pod, cl := range masterPods {
podName := util.NameFromMeta(pod.ObjectMeta)
if err := cl.MigrateMasterPod(podName); err == nil {
movedMasterPods++

if err := cl.MigrateMasterPod(podName); err != nil {
c.logger.Errorf("could not move master pod %q: %v", podName, err)
} else {
unmovablePods[podName] = fmt.Sprintf("could not move master pod %q from node %q: %v", podName, nodeName, err)
movedPods++
}
}

for cl := range clusters {
cl.Unlock()
}

if leftPods := len(unmovablePods); leftPods > 0 {
c.logger.Warnf("could not move %d master or unknown role pods from the node %q, you may have to delete them manually",
leftPods, nodeName)
for _, reason := range unmovablePods {
c.logger.Warning(reason)
}
}
totalPods := len(masterPods)

c.logger.Infof("%d master pods have been moved out from the node %q", movedMasterPods, nodeName)
c.logger.Infof("%d/%d master pods have been moved out from the %q node",
movedPods, totalPods, nodeName)

if leftPods := totalPods - movedPods; leftPods > 0 {
c.logger.Warnf("could not move master %d/%d pods from the %q node",
leftPods, totalPods, nodeName)
}
}

func (c *Controller) nodeDelete(obj interface{}) {
Expand Down
2 changes: 1 addition & 1 deletion run_operator_locally.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ function deploy_self_built_image() {
# update the tag in the postgres operator conf
# since the image with this tag already exists on the machine,
# docker should not attempt to fetch it from the registry due to imagePullPolicy
sed --expression "s/\(image\:.*\:\).*$/\1$TAG/; s/smoke-tested-//" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST"
sed --expression "s/\(image\:.*\:\).*$/\1$TAG/" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST"

retry "kubectl create -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource"
}
Expand Down