Skip to content

Commit

Permalink
MGMT-3108: Adding bootstrap-in-place installation support
Browse files Browse the repository at this point in the history
  • Loading branch information
tsorya authored and osherdp committed Dec 28, 2020
1 parent 81683e9 commit c732fe8
Show file tree
Hide file tree
Showing 13 changed files with 374 additions and 71 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ destroy_all_nodes_from_namespaces:
destroy_all_nodes:
skipper run $(SKIPPER_PARAMS) 'discovery-infra/delete_nodes.py --delete-all'

deploy_ibip: _test_setup
skipper make $(SKIPPER_PARAMS) _deploy_nodes $(SKIPPER_PARAMS) ADDITIONAL_PARAMS="'--bootstrap-in-place'" NUM_WORKERS=0 NUM_MASTERS=1 NAMESPACE_INDEX=0

redeploy_nodes: destroy_nodes deploy_nodes

Expand Down Expand Up @@ -360,4 +362,4 @@ _test_setup:
cp -p discovery-infra/test_infra/tools/tf_network_pool.json /tmp/tf_network_pool.json

_test_parallel: $(REPORTS) _test_setup
python3 -m pytest -n $(or ${TEST_WORKERS_NUM}, '2') $(or ${TEST},discovery-infra/tests) -k $(or ${TEST_FUNC},'') -m $(or ${TEST_MARKER},'') --verbose -s --junit-xml=$(REPORTS)/unittest.xml
python3 -m pytest -n $(or ${TEST_WORKERS_NUM}, '2') $(or ${TEST},discovery-infra/tests) -k $(or ${TEST_FUNC},'') -m $(or ${TEST_MARKER},'') --verbose -s --junit-xml=$(REPORTS)/unittest.xml
145 changes: 145 additions & 0 deletions discovery-infra/bootstrap_in_place.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import os
import shutil
import shlex
import logging
import yaml

import waiting

from oc_utils import get_operators_status
from test_infra import utils, consts
from test_infra.tools.assets import NetworkAssets
from test_infra.controllers.node_controllers.terraform_controller import TerraformController

BUILD_DIR = "build"
INSTALL_CONFIG_FILE_NAME = "install-config.yaml"
IBIP_DIR = os.path.join(BUILD_DIR, "ibip")
RESOURCES_DIR = os.path.join("discovery-infra", "resources")
INSTALL_CONFIG = os.path.join(IBIP_DIR, INSTALL_CONFIG_FILE_NAME)
INSTALLER_BINARY = os.path.join(BUILD_DIR, "openshift-install")
EMBED_IMAGE_NAME = "installer-SNO-image.iso"
KUBE_CONFIG = os.path.join(IBIP_DIR, "auth", "kubeconfig")


def installer_generate():
logging.info("Installer generate manifests")
utils.run_command(f"{INSTALLER_BINARY} create manifests --dir={IBIP_DIR}")
logging.info("Installer generate ignitions")
# TODO delete
shutil.copy(f"{RESOURCES_DIR}/sno_manifest.yaml", os.path.join(IBIP_DIR, "openshift"))
utils.run_command(f"{INSTALLER_BINARY} create ignition-configs --dir={IBIP_DIR}")


def download_live_image(download_path, rhcos_version=None):
if os.path.exists(download_path):
logging.info("Image %s already exists, skipping download", download_path)
return

logging.info("Downloading iso to %s", download_path)
rhcos_version = rhcos_version or os.getenv('RHCOS_VERSION', "46.82.202009222340-0")
utils.run_command(f"curl https://releases-art-rhcos.svc.ci.openshift.org/art/storage/releases/rhcos-4.6/"
f"{rhcos_version}/x86_64/rhcos-{rhcos_version}-live.x86_64.iso --retry 5 -o {download_path}")


def embed(image_name, ignition_file, embed_image_name):
logging.info("Embed ignition %s to iso %s", ignition_file, image_name)
embedded_image = os.path.join(BUILD_DIR, embed_image_name)
os.remove(embedded_image) if os.path.exists(embedded_image) else None

flags = shlex.split(f"--privileged --rm -v /dev:/dev -v /run/udev:/run/udev -v .:/data -w /data")
utils.run_container("coreos-installer", "quay.io/coreos/coreos-installer:release", flags,
f"iso ignition embed {BUILD_DIR}/{image_name} "
f"-f --ignition-file /data/{IBIP_DIR}/{ignition_file} -o /data/{embedded_image}")

image_path = os.path.join(consts.BASE_IMAGE_FOLDER, embed_image_name)
shutil.move(embedded_image, image_path)
return image_path


def fill_install_config(pull_secret, ssh_pub_key, net_asset, cluster_name):
yaml.add_representer(str, str_presenter)
with open(INSTALL_CONFIG, "r") as _file:
config = yaml.safe_load(_file)

config["pullSecret"] = pull_secret
config["sshKey"] = ssh_pub_key
config["metadata"]["name"] = cluster_name
config["networking"]["machineNetwork"][0]["cidr"] = net_asset["machine_cidr"]

with open(INSTALL_CONFIG, "w") as _file:
yaml.dump(config, _file)


def setup_files_and_folders(args, net_asset, cluster_name):
logging.info("Creating needed files and folders")
utils.recreate_folder(consts.BASE_IMAGE_FOLDER, force_recreate=False)
utils.recreate_folder(IBIP_DIR, with_chmod=False, force_recreate=True)
shutil.copy(os.path.join(RESOURCES_DIR, INSTALL_CONFIG_FILE_NAME), IBIP_DIR)
fill_install_config(args.pull_secret, args.ssh_key, net_asset, cluster_name)


def str_presenter(dumper, data):
if "ssh-rsa" in data: # check for multiline string
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
return dumper.represent_scalar('tag:yaml.org,2002:str', data)


def create_controller(net_asset):
return TerraformController(
cluster_name="test-infra-cluster",
num_masters=1,
num_workers=0,
master_memory=32 * 1024, # 32GB of RAM
net_asset=net_asset,
iso_download_path="<TBD>", # will be set later on
bootstrap_in_place=True,
)


def all_operators_up():
statuses = get_operators_status(KUBE_CONFIG)
if not statuses:
logging.debug("No operator has been found currently...")
return False

invalid_operators = [operator for operator, up in statuses.items() if not up]

all_operators_are_valid = len(invalid_operators) == 0

if not all_operators_are_valid:
logging.debug("Following operators are still down: %s", ", ".join(invalid_operators))

return all_operators_are_valid


def execute_ibip_flow(args):
openshift_release_image = os.getenv('OPENSHIFT_INSTALL_RELEASE_IMAGE')
if not openshift_release_image:
raise ValueError("os env OPENSHIFT_INSTALL_RELEASE_IMAGE must be provided")

net_asset = NetworkAssets().get()
controller = create_controller(net_asset)
setup_files_and_folders(args, net_asset, controller.cluster_name)

utils.extract_installer(openshift_release_image, BUILD_DIR)
installer_generate()

download_live_image(f"{BUILD_DIR}/installer-image.iso")
image_path = embed("installer-image.iso", "bootstrap.ign", EMBED_IMAGE_NAME)

logging.info("Starting nodes...")
controller.image_path = image_path
controller.start_all_nodes()
logging.info("Nodes started!")

logging.info("Configuring /etc/hosts...")
utils.config_etc_hosts(cluster_name=controller.cluster_name,
base_dns_domain=controller.cluster_domain,
api_vip=controller.master_ips[0][0])

logging.info("Waiting for installation to complete...")
waiting.wait(all_operators_up,
sleep_seconds=20,
timeout_seconds=60 * 60,
waiting_for="all operators to get up")
logging.info("Installation completed successfully!")
31 changes: 30 additions & 1 deletion discovery-infra/oc_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import urllib3
import json
import urllib3
import subprocess

from kubernetes.config.kube_config import load_kube_config
from kubernetes.config.kube_config import Configuration
Expand Down Expand Up @@ -168,3 +169,31 @@ def _load_resource_config_dict(resource):
'kubectl.kubernetes.io/last-applied-configuration'
]
return json.loads(raw)


def get_operators_status(kubeconfig):
command = ["/usr/local/bin/oc",
"--kubeconfig", kubeconfig,
"get", "clusteroperators", "-o", "json"]

response = subprocess.run(command, stdout=subprocess.PIPE)
if response.returncode != 0:
return {}

output = json.loads(response.stdout)
statuses = {}

for item in output["items"]:
name = item["metadata"]["name"]
if "conditions" not in item["status"]:
statuses[name] = False
continue

for condition in item["status"]["conditions"]:
if condition["type"] == "Available":
statuses[name] = condition["status"] == "True"
break
else:
statuses[name] = False

return statuses
31 changes: 31 additions & 0 deletions discovery-infra/resources/install-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: v1
baseDomain: redhat.com
compute:
- architecture: amd64
hyperthreading: Enabled
name: worker
platform: {}
replicas: 0
controlPlane:
architecture: amd64
hyperthreading: Enabled
name: master
platform: {}
replicas: 1
metadata:
creationTimestamp: null
name: test-infra-cluster
networking:
clusterNetwork:
- cidr: 10.128.0.0/14
hostPrefix: 23
machineNetwork:
- cidr: 192.168.126.0/24
networkType: OpenShiftSDN
serviceNetwork:
- 172.30.0.0/16
platform:
none: {}
publish: External
pullSecret: '{}'
sshKey: ""
22 changes: 22 additions & 0 deletions discovery-infra/resources/sno_manifest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: machineconfiguration.openshift.io/v1
kind: MachineConfig
metadata:
labels:
machineconfiguration.openshift.io/role: master
name: after-reboot
spec:
config:
ignition:
version: 3.1.0
storage:
files:
- contents:
source: data:text/plain;charset=utf-8;base64,IyEvYmluL2Jhc2ggLXgKZXhwb3J0IEtVQkVDT05GSUc9L2V0Yy9rdWJlcm5ldGVzL2Jvb3RzdHJhcC1zZWNyZXRzL2t1YmVjb25maWcKCmZ1bmN0aW9uIHdhaXRfZm9yX2FwaSB7CiAgdW50aWwgb2MgZ2V0IGNzciAmPiAvZGV2L251bGwKICAgIGRvCiAgICAgICAgZWNobyAiV2FpdGluZyBmb3IgYXBpIC4uLiIKICAgICAgICBzbGVlcCAzMAogICAgZG9uZQp9CmZ1bmN0aW9uIHJlc3RhcnRfa3ViZWxldCB7CiAgZWNobyAiUmVzdGFydGluZyBrdWJlbGV0IgogIHdoaWxlIGNhdCAvZXRjL2t1YmVybmV0ZXMvbWFuaWZlc3RzL2t1YmUtYXBpc2VydmVyLXBvZC55YW1sICB8IGdyZXAgYm9vdHN0cmFwLWt1YmUtYXBpc2VydmVyOyBkbwogICAgZWNobyAiV2FpdGluZyBmb3Iga3ViZS1hcGlzZXJ2ZXIgdG8gYXBwbHkgdGhlIG5ldyBzdGF0aWMgcG9kIGNvbmZpZ3VyYXRpb24iCiAgICBzbGVlcCAxMAogIGRvbmUKICBzeXN0ZW1jdGwgZGFlbW9uLXJlbG9hZAogIHN5c3RlbWN0bCByZXN0YXJ0IGt1YmVsZXQKfQpmdW5jdGlvbiBhcHByb3ZlX2NzciB7CiAgZWNobyAiQXBwcm92aW5nIGNzcnMgLi4uIgogIG5lZWRlZF90b19hcHByb3ZlPWZhbHNlCiAgdW50aWwgWyAkKG9jIGdldCBub2RlcyB8IGdyZXAgbWFzdGVyIHwgZ3JlcCAtdiBOb3RSZWFkeSB8IGdyZXAgUmVhZHkgfCB3YyAtbCkgLWVxIDEgXTsgZG8KICAgICAgbmVlZGVkX3RvX2FwcHJvdmU9dHJ1ZQogICAgICBlY2hvICJBcHByb3ZpbmcgY3NycyAuLi4iCiAgICAgb2MgZ2V0IGNzciAtbyBnby10ZW1wbGF0ZT0ne3tyYW5nZSAuaXRlbXN9fXt7aWYgbm90IC5zdGF0dXN9fXt7Lm1ldGFkYXRhLm5hbWV9fXt7IlxuIn19e3tlbmR9fXt7ZW5kfX0nIHwgeGFyZ3Mgb2MgYWRtIGNlcnRpZmljYXRlIGFwcHJvdmUgJj4gL2Rldi9udWxsIHx8IHRydWUKICAgICBzbGVlcCAzMAogICAgZG9uZQogICMgUmVzdGFydCBrdWJlbGV0IG9ubHkgaWYgbm9kZSB3YXMgYWRkZWQKICBpZiAkbmVlZGVkX3RvX2FwcHJvdmUgOyB0aGVuCiAgICBzbGVlcCA2MAogICAgcmVzdGFydF9rdWJlbGV0CiAgZmkKfQpmdW5jdGlvbiB3YWl0X2Zvcl9jdm8gewogIGVjaG8gIldhaXRpbmcgZm9yIGN2byIKICB1bnRpbCBbICIkKG9jIGdldCBjbHVzdGVydmVyc2lvbiAtbyBqc29ucGF0aD0ney5pdGVtc1swXS5zdGF0dXMuY29uZGl0aW9uc1s/KEAudHlwZT09IkF2YWlsYWJsZSIpXS5zdGF0dXN9JykiID09ICJUcnVlIiBdOyBkbwogICAgICBlY2hvICJTdGlsbCB3YWl0aW5nIGZvciBjdm8gLi4uIgogICAgIHNsZWVwIDMwCiAgICBkb25lCn0KZnVuY3Rpb24gY2xlYW4gewogIGlmIFsgLWQgIi9ldGMva3ViZXJuZXRlcy9ib290c3RyYXAtc2VjcmV0cyIgXTsgdGhlbgogICAgIHJtIC1yZiAvZXRjL2t1YmVybmV0ZXMvYm9vdHN0cmFwLSoKICBmaQp9Cgp3YWl0X2Zvcl9hcGkKYXBwcm92ZV9jc3IKd2FpdF9mb3JfY3ZvCmNsZWFu
mode: 365
overwrite: true
path: /usr/local/bin/after_reboot.sh
systemd:
units:
- name: after_reboot.service
contents: "[Unit]\nDescription=Master Install\nWants=kubelet.service\nAfter=kubelet.service\n[Service]\nType=oneshot\nExecStart=/usr/local/bin/after_reboot.sh\n\nRestartSec=5s\n\n[Install]\nWantedBy=multi-user.target\n"
enabled: true
29 changes: 18 additions & 11 deletions discovery-infra/start_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,24 @@
import json
import os
import time
import uuid
from functools import partial
from distutils.dir_util import copy_tree
import distutils.util
from pathlib import Path
from netaddr import IPNetwork

from test_infra import assisted_service_api, consts, utils
import install_cluster
import oc_utils
import day2
import waiting
from logger import log
from test_infra.utils import config_etc_hosts
from test_infra.tools import terraform_utils
import bootstrap_in_place as ibip


class MachineNetwork(object):

YES_VALUES = [ 'yes', 'true', 'y']
YES_VALUES = ['yes', 'true', 'y']

def __init__(self, ip_v4, ip_v6, machine_cidr_4, machine_cidr_6, ns_index):

self.has_ip_v4 = ip_v4.lower() in MachineNetwork.YES_VALUES
self.has_ip_v6 = ip_v6.lower() in MachineNetwork.YES_VALUES

Expand Down Expand Up @@ -94,7 +89,6 @@ def fill_tfvars(
machine_cidr_addresses += [machine_net.cidr_v6]
provisioning_cidr_addresses += [machine_net.provisioning_cidr_v6]


tfvars['machine_cidr_addresses'] = machine_cidr_addresses
tfvars['provisioning_cidr_addresses'] = provisioning_cidr_addresses
tfvars['api_vip'] = _get_vips_ips(machine_net)[0]
Expand Down Expand Up @@ -255,6 +249,7 @@ def update_hosts(client, cluster_id, libvirt_nodes, update_hostnames=False):

client.update_hosts(cluster_id=cluster_id, hosts_with_roles=added_hosts, hosts_names=hostnames)


def set_cluster_vips(client, cluster_id, machine_net):
cluster_info = client.cluster_get(cluster_id)
api_vip, ingress_vip = _get_vips_ips(machine_net)
Expand All @@ -272,7 +267,6 @@ def set_cluster_machine_cidr(client, cluster_id, machine_net):


def _get_vips_ips(machine_net):

if machine_net.has_ip_v4:
network_subnet_starting_ip = str(
ipaddress.ip_address(
Expand Down Expand Up @@ -343,6 +337,7 @@ def _get_provisioning_cidr(cidr, ns_index):
provisioning_cidr += ns_index + consts.NAMESPACE_POOL_SIZE
return str(provisioning_cidr)


def _get_provisioning_cidr6(cidr, ns_index):
provisioning_cidr = IPNetwork(cidr)
provisioning_cidr += ns_index
Expand Down Expand Up @@ -462,11 +457,13 @@ def nodes_flow(client, cluster_name, cluster, image_path):
config_etc_hosts(cluster_info.name, cluster_info.base_dns_domain, cluster_info.api_vip)
utils.wait_for_cvo_available()


def _get_libvirt_nodes_from_tf_state(network_name, tf_state):
nodes = _extract_nodes_from_tf_state(tf_state, network_name, consts.NodeRoles.MASTER)
nodes.update(_extract_nodes_from_tf_state(tf_state, network_name, consts.NodeRoles.WORKER))
return nodes


def _extract_nodes_from_tf_state(tf_state, network_name, role):
domains = next(r["instances"] for r in tf_state.resources if r["type"] == "libvirt_domain" and r["name"] == role)
data = {}
Expand All @@ -476,10 +473,11 @@ def _extract_nodes_from_tf_state(tf_state, network_name, role):
if nic["network_name"] != network_name:
continue

data[nic["mac"]] = {"ip": nic["addresses"], "name": d["attributes"]["name"], "role": role}
data[nic["mac"]] = {"ip": nic["addresses"], "name": d["attributes"]["name"], "role": role}

return data


def execute_day1_flow(cluster_name):
client = None
cluster = {}
Expand Down Expand Up @@ -548,6 +546,8 @@ def main():
day2.execute_day2_cloud_flow(cluster_id, args)
if args.day2_ocp_cluster:
day2.execute_day2_ocp_flow(cluster_id, args)
if args.bootstrap_in_place:
ibip.execute_ibip_flow(args)


if __name__ == "__main__":
Expand Down Expand Up @@ -813,8 +813,15 @@ def main():
type=str,
default=''
)

parser.add_argument(
"--bootstrap-in-place",
help="single node cluster with bootstrap in place flow",
action="store_true",
)

oc_utils.extend_parser_with_oc_arguments(parser)
args = parser.parse_args()
if not args.pull_secret and args.install_cluster:
if not args.pull_secret:
raise Exception("Can't install cluster without pull secret, please provide one")
main()
Loading

0 comments on commit c732fe8

Please sign in to comment.