Skip to content

Dev: ui_node: Enable standby and maintenance on pacemaker remote node #1855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2540,7 +2540,7 @@ def bootstrap_remove(context):
if not force_flag:
utils.fatal("Removing self requires --force")
remove_self(force_flag)
elif cluster_node in xmlutil.listnodes():
elif cluster_node in utils.list_cluster_nodes():
remove_node_from_cluster(cluster_node)
else:
utils.fatal("Specified node {} is not configured in cluster! Unable to remove.".format(cluster_node))
Expand All @@ -2553,7 +2553,7 @@ def bootstrap_remove(context):
def remove_self(force_flag=False):
me = utils.this_node()
yes_to_all = _context.yes_to_all
nodes = xmlutil.listnodes(include_remote_nodes=False)
nodes = utils.list_cluster_nodes()
othernode = next((x for x in nodes if x != me), None)
if othernode is not None:
logger.info("Removing node %s from cluster on %s", me, othernode)
Expand Down
6 changes: 1 addition & 5 deletions crmsh/cibquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,7 @@ def get_cluster_nodes(cib: lxml.etree.Element) -> list[ClusterNode]:
node_id = element.get('id')
uname = element.get('uname')
if element.get('type') == 'remote':
xpath = "//primitive[@provider='pacemaker' and @type='remote']/instance_attributes/nvpair[@name='server' and @value='{}']".format(
uname if uname is not None else node_id
)
if cib.xpath(xpath):
continue
continue
assert node_id
assert uname
result.append(ClusterNode(int(node_id), uname))
Expand Down
6 changes: 3 additions & 3 deletions crmsh/completers.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def primitives(args):
return [x.get("id") for x in nodes if xmlutil.is_primitive(x)]


nodes = call(xmlutil.listnodes)
online_nodes = call(lambda x: xmlutil.CrmMonXmlParser().get_node_list(standby=x), False)
standby_nodes = call(lambda x: xmlutil.CrmMonXmlParser().get_node_list(standby=x), True)
nodes = call(xmlutil.CrmMonXmlParser.get_node_list)
online_nodes = call(lambda x: xmlutil.CrmMonXmlParser.get_node_list(standby=x), False)
standby_nodes = call(lambda x: xmlutil.CrmMonXmlParser.get_node_list(standby=x), True)

shadows = call(xmlutil.listshadows)
7 changes: 5 additions & 2 deletions crmsh/idmgmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,13 @@ def check_xml(node):
return ok


def store_xml(node):
def store_xml(node, thin=True):
if not check_xml(node):
return False
xmlutil.xmltraverse_thin(node, _store_node)
if thin:
xmlutil.xmltraverse_thin(node, _store_node)
else:
xmlutil.xmltraverse(node, _store_node)
return True


Expand Down
36 changes: 33 additions & 3 deletions crmsh/ui_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import copy
import subprocess
from lxml import etree
from argparse import ArgumentParser, RawDescriptionHelpFormatter

from . import config
Expand All @@ -14,6 +15,7 @@
from . import ui_utils
from . import utils
from . import xmlutil
from . import idmgmt
from .cliformat import cli_nvpairs, nvpairs2list
from . import term
from .cibconfig import cib_factory
Expand Down Expand Up @@ -78,8 +80,17 @@

node_obj = cib_factory.find_node(cluster_node_name)
if node_obj is None:
logger.error("CIB is not valid!")
return False
# Append node section for remote node
if xmlutil.CrmMonXmlParser().is_node_remote(cluster_node_name):
xml_item = etree.Element("node", id=cluster_node_name, uname=cluster_node_name, type="remote")
cib_factory.create_from_node(xml_item)
node_obj = cib_factory.find_node(cluster_node_name)
if node_obj is None:
logger.error("Failed to create remote node '%s'", cluster_node_name)
return False

Check warning on line 90 in crmsh/ui_node.py

View check run for this annotation

Codecov / codecov/patch

crmsh/ui_node.py#L84-L90

Added lines #L84 - L90 were not covered by tests
else:
logger.error("Node '%s' not found in CIB", cluster_node_name)
return False

Check warning on line 93 in crmsh/ui_node.py

View check run for this annotation

Codecov / codecov/patch

crmsh/ui_node.py#L92-L93

Added lines #L92 - L93 were not covered by tests

logger.debug("update_xml_node: %s", node_obj.obj_id)

Expand Down Expand Up @@ -243,7 +254,8 @@
if options.all and args:
context.fatal_error("Should either use --all or specific node(s)")

return utils.validate_and_get_reachable_nodes(args, options.all)
include_remote = action_type in ["standby", "online"]
return utils.validate_and_get_reachable_nodes(args, options.all, include_remote)


class NodeMgmt(command.UI):
Expand Down Expand Up @@ -382,6 +394,24 @@
orig_cib = copy.deepcopy(cib)

xml_item_list = cib.xpath(xml_path)
uname_list_in_xml = [xml_item.get("uname") for xml_item in xml_item_list]
crm_mon_parser_inst = xmlutil.CrmMonXmlParser()
create_remote_node = False
for node in node_list:
if node not in uname_list_in_xml:
# Append node section for remote node
if crm_mon_parser_inst.is_node_remote(node) and lifetime_opt == "forever":
xml_item = etree.Element("node", id=node, uname=node, type="remote")
cib.find("configuration/nodes").append(xml_item)
create_remote_node = True
else:
logger.error("Node '%s' not found in CIB", node)
return False

Check warning on line 409 in crmsh/ui_node.py

View check run for this annotation

Codecov / codecov/patch

crmsh/ui_node.py#L408-L409

Added lines #L408 - L409 were not covered by tests
if create_remote_node:
# Store exsisting objects' id to avoid duplicate id generation
idmgmt.store_xml(cib, False)
xml_item_list = cib.xpath(xml_path)

for xml_item in xml_item_list:
if xml_item.get("uname") in node_list:
node_id = xml_item.get('id')
Expand Down
46 changes: 23 additions & 23 deletions crmsh/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2134,6 +2134,8 @@ def valid_nodeid(nodeid):


def get_nodeid_from_name(name):
if xmlutil.CrmMonXmlParser().is_node_remote(name):
return name
rc, out = ShellUtils().get_stdout('crm_node -l')
if rc != 0:
return None
Expand Down Expand Up @@ -2470,9 +2472,8 @@ def check_all_nodes_reachable(action_to_do: str, peer_node: str = None):
"""
Check if all cluster nodes are reachable
"""
crm_mon_inst = xmlutil.CrmMonXmlParser(peer_node)
online_nodes = crm_mon_inst.get_node_list()
offline_nodes = crm_mon_inst.get_node_list(online=False)
online_nodes = xmlutil.CrmMonXmlParser.get_node_list(online=True, only_member=True, peer=peer_node)
offline_nodes = xmlutil.CrmMonXmlParser.get_node_list(online=False, peer=peer_node)
dead_nodes = []
for node in offline_nodes:
try:
Expand Down Expand Up @@ -2699,14 +2700,6 @@ def fatal(error_msg):
raise ValueError(error_msg)


def is_standby(node):
"""
Check if the node is already standby
"""
out = sh.cluster_shell().get_stdout_or_raise_error("crm_mon -1")
return re.search(r'Node\s+{}:\s+standby'.format(node), out) is not None


def get_dlm_option_dict(peer=None):
"""
Get dlm config option dictionary
Expand Down Expand Up @@ -3249,8 +3242,9 @@ def __bool__(self):


def validate_and_get_reachable_nodes(
nodes: typing.List[str] = [],
all_nodes: bool = False
nodes_from_args: typing.List[str] = [],
all_nodes: bool = False,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not test all_nodes at the very beginning in this function? If all_node is true, you don't need to do the other complex logic.

include_remote: bool = False
) -> typing.List[str]:

no_cib = False
Expand All @@ -3262,30 +3256,36 @@ def validate_and_get_reachable_nodes(

if not cluster_member_list:
fatal("Cannot get the member list of the cluster")
for node in nodes:
if node not in cluster_member_list:
pcmk_remote_list = []
if include_remote:
pcmk_remote_list = xmlutil.CrmMonXmlParser.get_node_list(online=True, only_remote=True)
for node in nodes_from_args:
if node not in cluster_member_list and node not in pcmk_remote_list:
fatal(f"Node '{node}' is not a member of the cluster")

local_node = this_node()
# Return local node if no nodes specified
if not nodes and not all_nodes:
if not nodes_from_args and not all_nodes:
return [local_node]
# Use all cluster members if no nodes specified and all_nodes is True
node_list = nodes or cluster_member_list

# Use all nodes if no nodes specified and all_nodes is True
node_list = nodes_from_args or cluster_member_list + pcmk_remote_list
member_list = [node for node in node_list if node not in pcmk_remote_list]
remote_list = [node for node in node_list if node in pcmk_remote_list]
# Filter out unreachable nodes
node_list = get_reachable_node_list(node_list)
member_list = get_reachable_node_list(member_list)
if no_cib:
return node_list
return member_list

shell = sh.cluster_shell()
crm_mon_inst = xmlutil.CrmMonXmlParser()
for node in node_list[:]:
for node in member_list[:]:
if node == local_node or crm_mon_inst.is_node_online(node):
continue
out = shell.get_stdout_or_raise_error("crm node show", node)
if not re.search(rf"^{local_node}\(\d\): member", out, re.M):
logger.error("From the view of node '%s', node '%s' is not a member of the cluster", node, local_node)
node_list.remove(node)
member_list.remove(node)

return node_list
return member_list + remote_list
# vim:ts=4:sw=4:et:
73 changes: 51 additions & 22 deletions crmsh/xmlutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,26 +359,14 @@ def mk_rsc_type(n):
return ''.join((s1, s2, ra_type))


def listnodes(include_remote_nodes=True):
cib = cibdump2elem()
if cib is None:
return []
local_nodes = cib.xpath('/cib/configuration/nodes/node/@uname')
if include_remote_nodes:
remote_nodes = cib.xpath('/cib/status/node_state[@remote_node="true"]/@uname')
else:
remote_nodes = []
return list(set([n for n in local_nodes + remote_nodes if n]))


def is_our_node(s):
'''
Check if s is in a list of our nodes (ignore case).
This is not fast, perhaps should be cached.

Includes remote nodes as well
'''
for n in listnodes():
for n in CrmMonXmlParser.get_node_list():
if n.lower() == s.lower():
return True
return False
Expand Down Expand Up @@ -1531,20 +1519,61 @@ def is_node_online(self, node):
xpath = f'//node[@name="{node}" and @online="true"]'
return bool(self.xml_elem.xpath(xpath))

def get_node_list(self, online=True, standby=False, exclude_remote=True) -> list[str]:
def is_node_standby(self, node):
"""
Check if a node is in standby mode
"""
xpath = f'//node[@name="{node}" and @standby="true"]'
return bool(self.xml_elem.xpath(xpath))

def is_node_maintenance(self, node):
"""
Check if a node is in maintenance mode
"""
xpath = f'//node[@name="{node}" and @maintenance="true"]'
return bool(self.xml_elem.xpath(xpath))

def is_node_remote(self, node):
"""
Check if a node is remote
"""
xpath = f'//node[@name="{node}" and @type="remote"]'
return bool(self.xml_elem.xpath(xpath))

@classmethod
def get_node_list(
cls,
online: bool = None,
standby: bool = None,
maintenance: bool = None,
only_member: bool = False,
only_remote: bool = False,
peer: str = None
) -> list[str]:
"""
Get a list of nodes based on the given attribute
Return all nodes if no attributes are given
"""
instance = cls(peer)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you change this to a classmethod and create new instance in the method? This does not make sense.

xpath_str = '//nodes/node'
conditions = []
online_value = "true" if online else "false"
conditions.append(f'@online="{online_value}"')
standby_value = "true" if standby else "false"
conditions.append(f'@standby="{standby_value}"')
if exclude_remote:
filters = {
"online": online,
"standby": standby,
"maintenance": maintenance
}
conditions = [
f'@{key}="{str(value).lower()}"'
for key, value in filters.items() if value is not None
]

if only_member:
conditions.append('@type="member"')
xpath_str += '[' + ' and '.join(conditions) + ']'
return [elem.get('name') for elem in self.xml_elem.xpath(xpath_str)]
elif only_remote:
conditions.append('@type="remote"')

if conditions:
xpath_str += '[' + ' and '.join(conditions) + ']'
return [elem.get('name') for elem in instance.xml_elem.xpath(xpath_str)]

def is_resource_configured(self, ra_type):
"""
Expand Down
28 changes: 19 additions & 9 deletions test/features/pacemaker_remote.feature
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,26 @@ Feature: Test deployment of pacemaker remote
And Run "scp -rp /etc/pacemaker pcmk-remote-node2:/etc" on "hanode1"
And Run "systemctl start pacemaker_remote" on "pcmk-remote-node1"
And Run "systemctl start pacemaker_remote" on "pcmk-remote-node2"
And Run "crm configure primitive remote-node1 ocf:pacemaker:remote params server=pcmk-remote-node1 reconnect_interval=10m op monitor interval=30s" on "hanode1"
And Run "crm configure primitive remote-node2 ocf:pacemaker:remote params server=pcmk-remote-node2 reconnect_interval=10m op monitor interval=30s" on "hanode1"
And Run "crm configure primitive pcmk-remote-node1 ocf:pacemaker:remote params server=pcmk-remote-node1 reconnect_interval=10m op monitor interval=30s" on "hanode1"
And Run "crm configure primitive pcmk-remote-node2 ocf:pacemaker:remote params server=pcmk-remote-node2 reconnect_interval=10m op monitor interval=30s" on "hanode1"
And Wait "5" seconds
Then Remote online nodes are "remote-node1 remote-node2"
Then Remote online nodes are "pcmk-remote-node1 pcmk-remote-node2"

Scenario: Test standby/online/maintenance/ready remote node
When Run "crm node standby pcmk-remote-node1" on "hanode1"
Then Node "pcmk-remote-node1" is standby
When Run "crm node online pcmk-remote-node1" on "hanode1"
Then Node "pcmk-remote-node1" is online
When Run "crm node maintenance pcmk-remote-node1" on "hanode1"
Then Node "pcmk-remote-node1" is maintenance
When Run "crm node ready pcmk-remote-node1" on "hanode1"
Then Node "pcmk-remote-node1" is ready

Scenario: Prevent adding remote RA to group, order and colocation
When Run "crm configure primitive d Dummy" on "hanode1"
When Try "crm configure group g d remote-node1"
Then Expected "Cannot put remote resource 'remote-node1' in a group" in stderr
When Try "crm configure order o1 d remote-node1"
Then Expected "Cannot put remote resource 'remote-node1' in order constraint" in stderr
When Try "crm configure colocation c1 inf: d remote-node1"
Then Expected "Cannot put remote resource 'remote-node1' in colocation constraint" in stderr
When Try "crm configure group g d pcmk-remote-node1"
Then Expected "Cannot put remote resource 'pcmk-remote-node1' in a group" in stderr
When Try "crm configure order o1 d pcmk-remote-node1"
Then Expected "Cannot put remote resource 'pcmk-remote-node1' in order constraint" in stderr
When Try "crm configure colocation c1 inf: d pcmk-remote-node1"
Then Expected "Cannot put remote resource 'pcmk-remote-node1' in colocation constraint" in stderr
15 changes: 13 additions & 2 deletions test/features/steps/step_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import behave_agent
from crmsh import corosync, userdir, bootstrap
from crmsh import utils as crmutils
from crmsh import xmlutil
from crmsh import sbd
from crmsh import ui_configure
from crmsh.sh import ShellUtils
Expand Down Expand Up @@ -257,12 +258,22 @@ def step_impl(context, nodelist):

@then('Node "{node}" is standby')
def step_impl(context, node):
assert crmutils.is_standby(node) is True
assert xmlutil.CrmMonXmlParser().is_node_standby(node) is True


@then('Node "{node}" is online')
def step_impl(context, node):
assert crmutils.is_standby(node) is False
assert xmlutil.CrmMonXmlParser().is_node_standby(node) is False


@then('Node "{node}" is maintenance')
def step_impl(context, node):
assert xmlutil.CrmMonXmlParser().is_node_maintenance(node) is True


@then('Node "{node}" is ready')
def step_impl(context, node):
assert xmlutil.CrmMonXmlParser().is_node_maintenance(node) is False


@then('IP "{addr}" is used by corosync on "{node}"')
Expand Down
Loading