Skip to content

Commit f049fb8

Browse files
committed
OCPBUGS-60098: podman-etcd: prevent last active member from leaving the etcd member list
When stopping etcd instances, simultaneous member removal from both nodes can corrupt the etcd Write-Ahead Log (WAL). This change implements a two-part solution: 1. Concurrent stop protection: When multiple nodes are stopping, the alphabetically second node delays its member removal by 10 seconds. This prevents simultaneous member list updates that can corrupt WAL. 2. Last member detection: Checks active resource count after any delay. If this is the last active member, skips member removal to avoid leaving an empty cluster. Additionally, reorders podman_stop() to clear the member_id attribute after leaving the member list, ensuring the attribute reflects actual cluster state during shutdown.
1 parent 677e3ad commit f049fb8

File tree

1 file changed

+59
-16
lines changed

1 file changed

+59
-16
lines changed

heartbeat/podman-etcd

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,7 +1639,7 @@ can_reuse_container() {
16391639
OCF_RESKEY_reuse=0
16401640
return "$OCF_SUCCESS"
16411641
fi
1642-
1642+
16431643
if ! filtered_original_pod_manifest=$(filter_pod_manifest "$OCF_RESKEY_pod_manifest"); then
16441644
return $OCF_ERR_GENERIC
16451645
fi
@@ -2021,6 +2021,62 @@ podman_start()
20212021
done
20222022
}
20232023

2024+
# leave_etcd_member_list removes the current node from the etcd member list during
2025+
# shutdown to ensure clean cluster state.
2026+
#
2027+
# Skips removal if this is the standalone (last) node. When both nodes are stopping
2028+
# concurrently, delays the second node to prevent simultaneous member removal that
2029+
# could corrupt the etcd WAL.
2030+
leave_etcd_member_list()
2031+
{
2032+
if ! member_id=$(attribute_node_member_id get); then
2033+
ocf_log err "error leaving members list: could not get member-id"
2034+
return
2035+
fi
2036+
2037+
if is_standalone; then
2038+
ocf_log info "last member. Not leaving the member list"
2039+
return
2040+
fi
2041+
2042+
local stopping_resources_count
2043+
stopping_resources_count=$(echo "$OCF_RESKEY_CRM_meta_notify_stop_resource" | wc -w)
2044+
ocf_log info "found '$stopping_resources_count' stopping etcd resources (stop: '$OCF_RESKEY_CRM_meta_notify_stop_resource')"
2045+
if [ "$stopping_resources_count" -gt 1 ]; then
2046+
# Prevent WAL corruption by delaying the alphabetically second node's member
2047+
# removal when both nodes are stopping concurrently.
2048+
local delayed_node
2049+
2050+
node_names_sorted=$(echo "$OCF_RESKEY_node_ip_map" | sed 's/:[^;]*//g; s/;/ /g' | tr ' ' '\n' | sort | tr '\n' ' ')
2051+
delayed_node="$(echo "$node_names_sorted" | cut -d' ' -f2)"
2052+
2053+
if [ -z "$delayed_node" ]; then
2054+
ocf_log warn "could not determine node to be delayed: not leaving the member list"
2055+
return
2056+
fi
2057+
2058+
if [ "$NODENAME" = "$delayed_node" ]; then
2059+
ocf_log info "delaying stop for ${DELAY_SECOND_NODE_LEAVE_SEC}s to prevent simultaneous etcd member removal"
2060+
sleep $DELAY_SECOND_NODE_LEAVE_SEC
2061+
fi
2062+
fi
2063+
2064+
local active_resources_count
2065+
active_resources_count=$(get_truly_active_resources_count)
2066+
if [ "$active_resources_count" -lt 1 ]; then
2067+
ocf_log info "last member. Not leaving the member list"
2068+
return
2069+
fi
2070+
2071+
ocf_log info "leaving members list as member with ID $member_id"
2072+
local endpoint
2073+
endpoint="$(ip_url $(attribute_node_ip get)):2379"
2074+
if ! ocf_run podman exec "$CONTAINER" etcdctl member remove "$member_id" --endpoints="$endpoint"; then
2075+
rc=$?
2076+
ocf_log err "error leaving members list, error code: $rc"
2077+
fi
2078+
}
2079+
20242080
podman_stop()
20252081
{
20262082
local timeout=60
@@ -2042,21 +2098,7 @@ podman_stop()
20422098
return $OCF_SUCCESS
20432099
fi
20442100

2045-
if ! member_id=$(attribute_node_member_id get); then
2046-
ocf_log err "error leaving members list: could not get member-id"
2047-
else
2048-
# TODO: is it worth/possible to check the current status instead than relying on cached attributes?
2049-
if is_standalone; then
2050-
ocf_log info "last member. Not leaving the member list"
2051-
else
2052-
ocf_log info "leaving members list as member with ID $member_id"
2053-
endpoint="$(ip_url $(attribute_node_ip get)):2379"
2054-
if ! ocf_run podman exec "$CONTAINER" etcdctl member remove "$member_id" --endpoints="$endpoint"; then
2055-
rc=$?
2056-
ocf_log err "error leaving members list, error code: $rc"
2057-
fi
2058-
fi
2059-
fi
2101+
leave_etcd_member_list
20602102
attribute_node_member_id clear
20612103

20622104
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
@@ -2197,6 +2239,7 @@ ETCD_CERTS_HASH_FILE="${OCF_RESKEY_config_location}/certs.hash"
21972239
# State file location: Uses HA_RSCTMP to ensure automatic cleanup on reboot.
21982240
# This is intentional - reboots are controlled stops, not failures requiring detection.
21992241
CONTAINER_HEARTBEAT_FILE=${HA_RSCTMP}/podman-container-last-running
2242+
DELAY_SECOND_NODE_LEAVE_SEC=10
22002243

22012244
# Note: we currently monitor podman containers by with the "podman exec"
22022245
# command, so make sure that invocation is always valid by enforcing the

0 commit comments

Comments
 (0)