-
Notifications
You must be signed in to change notification settings - Fork 105
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MGMT-3108: Adding bootstrap-in-place installation support
- Loading branch information
Showing
16 changed files
with
536 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import os | ||
import shutil | ||
import shlex | ||
import logging | ||
import yaml | ||
|
||
import waiting | ||
|
||
from oc_utils import get_operators_status | ||
from download_logs import download_must_gather | ||
from test_infra import utils, consts | ||
from test_infra.tools.assets import NetworkAssets | ||
from test_infra.controllers.node_controllers.ssh import SshConnection | ||
from test_infra.controllers.node_controllers.terraform_controller import TerraformController | ||
|
||
BUILD_DIR = "build" | ||
INSTALL_CONFIG_FILE_NAME = "install-config.yaml" | ||
IBIP_DIR = os.path.join(BUILD_DIR, "ibip") | ||
RESOURCES_DIR = os.path.join("discovery-infra", "resources") | ||
INSTALL_CONFIG = os.path.join(IBIP_DIR, INSTALL_CONFIG_FILE_NAME) | ||
INSTALLER_BINARY = os.path.join(BUILD_DIR, "openshift-install") | ||
EMBED_IMAGE_NAME = "installer-SNO-image.iso" | ||
KUBE_CONFIG = os.path.join(IBIP_DIR, "auth", "kubeconfig") | ||
MUST_GATHER_DIR = os.path.join(IBIP_DIR, "must-gather") | ||
SOSREPORT_SCRIPT = os.path.join(RESOURCES_DIR, "man_sosreport.sh") | ||
SSH_KEY = os.path.join("ssh_key", "key") | ||
|
||
|
||
def installer_generate(): | ||
logging.info("Installer generate manifests") | ||
utils.run_command(f"{INSTALLER_BINARY} create manifests --dir={IBIP_DIR}") | ||
logging.info("Installer generate ignitions") | ||
# TODO delete | ||
shutil.copy(f"{RESOURCES_DIR}/sno_manifest.yaml", os.path.join(IBIP_DIR, "openshift")) | ||
utils.run_command(f"{INSTALLER_BINARY} create ignition-configs --dir={IBIP_DIR}") | ||
|
||
|
||
def download_live_image(download_path, rhcos_version=None): | ||
if os.path.exists(download_path): | ||
logging.info("Image %s already exists, skipping download", download_path) | ||
return | ||
|
||
logging.info("Downloading iso to %s", download_path) | ||
rhcos_version = rhcos_version or os.getenv('RHCOS_VERSION', "46.82.202009222340-0") | ||
utils.run_command(f"curl https://releases-art-rhcos.svc.ci.openshift.org/art/storage/releases/rhcos-4.6/" | ||
f"{rhcos_version}/x86_64/rhcos-{rhcos_version}-live.x86_64.iso --retry 5 -o {download_path}") | ||
|
||
|
||
def embed(image_name, ignition_file, embed_image_name): | ||
logging.info("Embed ignition %s to iso %s", ignition_file, image_name) | ||
embedded_image = os.path.join(BUILD_DIR, embed_image_name) | ||
os.remove(embedded_image) if os.path.exists(embedded_image) else None | ||
|
||
flags = shlex.split(f"--privileged --rm -v /dev:/dev -v /run/udev:/run/udev -v .:/data -w /data") | ||
utils.run_container("coreos-installer", "quay.io/coreos/coreos-installer:release", flags, | ||
f"iso ignition embed {BUILD_DIR}/{image_name} " | ||
f"-f --ignition-file /data/{IBIP_DIR}/{ignition_file} -o /data/{embedded_image}") | ||
|
||
image_path = os.path.join(consts.BASE_IMAGE_FOLDER, embed_image_name) | ||
shutil.move(embedded_image, image_path) | ||
return image_path | ||
|
||
|
||
def fill_install_config(pull_secret, ssh_pub_key, net_asset, cluster_name): | ||
yaml.add_representer(str, str_presenter) | ||
with open(INSTALL_CONFIG, "r") as _file: | ||
config = yaml.safe_load(_file) | ||
|
||
config["pullSecret"] = pull_secret | ||
config["sshKey"] = ssh_pub_key | ||
config["metadata"]["name"] = cluster_name | ||
config["networking"]["machineNetwork"][0]["cidr"] = net_asset["machine_cidr"] | ||
|
||
with open(INSTALL_CONFIG, "w") as _file: | ||
yaml.dump(config, _file) | ||
|
||
|
||
def setup_files_and_folders(args, net_asset, cluster_name): | ||
logging.info("Creating needed files and folders") | ||
utils.recreate_folder(consts.BASE_IMAGE_FOLDER, force_recreate=False) | ||
utils.recreate_folder(IBIP_DIR, with_chmod=False, force_recreate=True) | ||
shutil.copy(os.path.join(RESOURCES_DIR, INSTALL_CONFIG_FILE_NAME), IBIP_DIR) | ||
fill_install_config(args.pull_secret, args.ssh_key, net_asset, cluster_name) | ||
|
||
|
||
def str_presenter(dumper, data): | ||
if "ssh-rsa" in data: # check for multiline string | ||
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') | ||
return dumper.represent_scalar('tag:yaml.org,2002:str', data) | ||
|
||
|
||
def create_controller(net_asset): | ||
return TerraformController( | ||
cluster_name="test-infra-cluster", | ||
num_masters=1, | ||
num_workers=0, | ||
master_memory=32 * 1024, # 32GB of RAM | ||
master_vcpu=12, | ||
net_asset=net_asset, | ||
iso_download_path="<TBD>", # will be set later on | ||
bootstrap_in_place=True, | ||
) | ||
|
||
|
||
def all_operators_up(): | ||
statuses = get_operators_status(KUBE_CONFIG) | ||
if not statuses: | ||
logging.debug("No operator has been found currently...") | ||
return False | ||
|
||
invalid_operators = [operator for operator, up in statuses.items() if not up] | ||
|
||
all_operators_are_valid = len(invalid_operators) == 0 | ||
|
||
if not all_operators_are_valid: | ||
logging.debug("Following operators are still down: %s", ", ".join(invalid_operators)) | ||
|
||
return all_operators_are_valid | ||
|
||
|
||
def gather_sosreport_data(host_ip): | ||
with SshConnection(ip=host_ip, private_ssh_key_path=SSH_KEY) as ssh: | ||
ssh.upload_file(SOSREPORT_SCRIPT, "/tmp/man_sosreport.sh") | ||
ssh.execute("chmod a+x /tmp/man_sosreport.sh") | ||
ssh.execute("sudo /tmp/man_sosreport.sh") | ||
ssh.download_file("/tmp/sosreport.tar.bz2", IBIP_DIR) | ||
|
||
|
||
def execute_ibip_flow(args): | ||
host_ip = None | ||
try: | ||
openshift_release_image = os.getenv('OPENSHIFT_INSTALL_RELEASE_IMAGE') | ||
if not openshift_release_image: | ||
raise ValueError("os env OPENSHIFT_INSTALL_RELEASE_IMAGE must be provided") | ||
|
||
net_asset = NetworkAssets().get() | ||
controller = create_controller(net_asset) | ||
setup_files_and_folders(args, net_asset, controller.cluster_name) | ||
|
||
utils.extract_installer(openshift_release_image, BUILD_DIR) | ||
installer_generate() | ||
|
||
download_live_image(f"{BUILD_DIR}/installer-image.iso") | ||
image_path = embed("installer-image.iso", "bootstrap.ign", EMBED_IMAGE_NAME) | ||
|
||
logging.info("Starting nodes...") | ||
controller.image_path = image_path | ||
controller.start_all_nodes() | ||
logging.info("Nodes started!") | ||
|
||
logging.info("Configuring /etc/hosts...") | ||
host_ip = controller.master_ips[0][0] | ||
utils.config_etc_hosts(cluster_name=controller.cluster_name, | ||
base_dns_domain=controller.cluster_domain, | ||
api_vip=host_ip) | ||
|
||
logging.info("Waiting for installation to complete...") | ||
waiting.wait(all_operators_up, | ||
sleep_seconds=20, | ||
timeout_seconds=60 * 60, | ||
waiting_for="all operators to get up") | ||
logging.info("Installation completed successfully!") | ||
|
||
finally: | ||
if host_ip is not None: | ||
logging.info("Gathering sosreport data from host...") | ||
gather_sosreport_data(host_ip) | ||
|
||
logging.info("Gathering information via must-gather...") | ||
utils.recreate_folder(MUST_GATHER_DIR) | ||
download_must_gather(KUBE_CONFIG, MUST_GATHER_DIR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
apiVersion: v1 | ||
baseDomain: redhat.com | ||
compute: | ||
- architecture: amd64 | ||
hyperthreading: Enabled | ||
name: worker | ||
platform: {} | ||
replicas: 0 | ||
controlPlane: | ||
architecture: amd64 | ||
hyperthreading: Enabled | ||
name: master | ||
platform: {} | ||
replicas: 1 | ||
metadata: | ||
creationTimestamp: null | ||
name: test-infra-cluster | ||
networking: | ||
clusterNetwork: | ||
- cidr: 10.128.0.0/14 | ||
hostPrefix: 23 | ||
machineNetwork: | ||
- cidr: 192.168.126.0/24 | ||
networkType: OpenShiftSDN | ||
serviceNetwork: | ||
- 172.30.0.0/16 | ||
platform: | ||
none: {} | ||
publish: External | ||
pullSecret: '{}' | ||
sshKey: "" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#!/bin/bash | ||
export LANG=C | ||
|
||
# If this script hangs, un-comment the below two entries and note the command that the script hangs on. Then comment out that command and re-run the script. | ||
# set -x | ||
# set -o verbose | ||
|
||
[[ -d /tmp/sosreport ]] && rm -rf /tmp/sosreport | ||
mkdir /tmp/sosreport && cd /tmp/sosreport && mkdir -p var/log etc/lvm etc/sysconfig network storage sos_commands/networking | ||
|
||
echo -e "Gathering system information..." | ||
hostname &> hostname | ||
cp -a /etc/redhat-release ./etc/ 2>> error_log | ||
uptime &> uptime | ||
|
||
echo -e "Gathering application information..." | ||
chkconfig --list &> chkconfig | ||
top -bn1 &> top_bn1 | ||
service --status-all &> service_status_all | ||
date &> date | ||
ps auxww &> ps_auxww | ||
ps -elf &> ps_-elf | ||
rpm -qa --last &> rpm-qa | ||
echo -e "Running 'rpm -Va'. This may take a moment." | ||
rpm -Va &> rpm-Va | ||
|
||
echo -e "Gathering memory information..." | ||
free -m &> free | ||
vmstat 1 10 &> vmstat | ||
|
||
echo -e "Gathering network information..." | ||
ifconfig &> ./network/ifconfig | ||
netstat -s &>./network/netstat_-s | ||
netstat -agn &> ./network/netstat_-agn | ||
netstat -neopa &> ./network/netstat_-neopa | ||
route -n &> ./network/route_-n | ||
for i in $(ls /etc/sysconfig/network-scripts/{ifcfg,route,rule}-*) ; do echo -e "$i\n----------------------------------"; cat $i;echo " "; done &> ./sos_commands/networking/ifcfg-files | ||
for i in $(ifconfig | grep "^[a-z]" | cut -f 1 -d " "); do echo -e "$i\n-------------------------" ; ethtool $i; ethtool -k $i; ethtool -S $i; ethtool -i $i;echo -e "\n" ; done &> ./sos_commands/networking/ethtool.out | ||
cp /etc/sysconfig/network ./sos_commands/networking/ 2>> error_log | ||
cp /etc/sysconfig/network-scripts/ifcfg-* ./sos_commands/networking/ 2>> error_log | ||
cp /etc/sysconfig/network-scripts/route-* ./sos_commands/networking/ 2>> error_log | ||
cat /proc/net/bonding/bond* &> ./sos_commands/networking/proc-net-bonding-bond 2>> error_log | ||
iptables --list --line-numbers &> ./sos_commands/networking/iptables_--list_--line-numbers | ||
ip route show table all &> ./sos_commands/networking/ip_route_show_table_all | ||
ip link &> ./sos_commands/networking/ip_link | ||
|
||
echo -e "Gathering Storage/Filesystem information..." | ||
df -l &> df | ||
fdisk -l &> fdisk | ||
parted -l &> parted | ||
cp -a /etc/fstab ./etc/ 2>> error_log | ||
cp -a /etc/lvm/lvm.conf ./etc/lvm/ 2>> error_log | ||
cp -a /etc/lvm/backup/ ./etc/lvm/ 2>> error_log | ||
cp -a /etc/lvm/archive/ ./etc/lvm/ 2>> error_log | ||
cp -a /etc/multipath.conf ./etc/ 2>> error_log | ||
cat /proc/mounts &> mount | ||
iostat -tkx 1 10 &> iostat_-tkx_1_10 | ||
parted -l &> storage/parted_-l | ||
vgdisplay -v &> storage/vgdisplay | ||
lvdisplay &> storage/lvdisplay | ||
pvdisplay &> storage/pvdisplay | ||
pvs -a -v &> storage/pvs | ||
vgs -v &> storage/vgs | ||
lvs -o +devices &> storage/lvs | ||
multipath -v4 -ll &> storage/multipath_ll | ||
pvscan -vvvv &> storage/pvscan | ||
vgscan -vvvv &> storage/vgscan | ||
lvscan -vvvv &> storage/lvscan | ||
lsblk &> storage/lsblk | ||
lsblk -t &> storage/lsblk_t | ||
dmsetup info -C &> storage/dmsetup_info_c | ||
dmsetup status &> storage/dmsetup_status | ||
dmsetup table &> storage/dmsetup_table | ||
ls -lahR /dev &> storage/dev | ||
|
||
echo -e "Gathering kernel information..." | ||
cp -a /etc/security/limits.conf ./etc/ 2>> error_log | ||
cp -a /etc/sysctl.conf ./etc/ 2>> error_log | ||
ulimit -a &> ulimit | ||
cat /proc/slabinfo &> slabinfo | ||
cat /proc/interrupts &> interrupts | ||
cat /proc/iomem &> iomem | ||
cat /proc/ioports &> ioports | ||
slabtop -o &> slabtop_-o | ||
uname -a &> uname | ||
sysctl -a &> sysctl_-a | ||
lsmod &> lsmod | ||
cp -a /etc/modprobe.conf ./etc/ 2>> error_log | ||
cp -a /etc/sysconfig/* ./etc/sysconfig/ 2>> error_log | ||
for MOD in `lsmod | grep -v "Used by"| awk '{ print $1 }'`; do modinfo $MOD 2>&1 >> modinfo; done; | ||
ipcs -a &> ipcs_-a | ||
ipcs -s | awk '/^0x/ {print $2}' | while read semid; do ipcs -s -i $semid; done &> ipcs_-s_verbose | ||
sar -A &> sar_-A | ||
cp -a /var/log/dmesg dmesg 2>> error_log | ||
dmesg &> dmesg_now | ||
|
||
echo -e "Gathering hardware information..." | ||
dmidecode &> dmidecode | ||
lspci -vvv &> lspci_-vvv | ||
lspci &> lspci | ||
cat /proc/meminfo &> meminfo | ||
cat /proc/cpuinfo &> cpuinfo | ||
|
||
echo -e "Gathering kdump information..." | ||
cp -a /etc/kdump.conf ./etc/ 2>> error_log | ||
ls -laR /var/crash &> ls-lar-var-crash | ||
ls -1 /var/crash | while read n; do mkdir -p var/crash/${n}; cp -a /var/crash/${n}/vmcore-dmesg* var/crash/${n}/ 2>> error_log; done | ||
|
||
echo -e "Gathering container related information..." | ||
mkdir container | ||
rpm -q podman || alias podman="docker" | ||
podman ps &> container/ps | ||
podman image list &> container/image_list | ||
podman ps | awk '$1!="CONTAINER" {print $1}' | while read id; do podman inspect $id &> container/inspect_${id}; done | ||
|
||
echo -e "Gathering logs..." | ||
cp -a /var/log/{containers*,message*,secure*,boot*,cron*,yum*,Xorg*,sa,rhsm,audit,dmesg} ./var/log/ 2>> error_log | ||
cp -a /etc/*syslog.conf ./etc/ 2>> error_log | ||
|
||
echo -e "Compressing files..." | ||
tar -cjf /tmp/sosreport.tar.bz2 ./ | ||
|
||
echo -e "Script complete." |
Oops, something went wrong.