Skip to content

Commit

Permalink
MGMT-3420 Make test infra work with none platform and IPv6 (openshift…
Browse files Browse the repository at this point in the history
…#538)

- Add load balancer to support it
  • Loading branch information
ori-amizur authored Feb 22, 2021
1 parent a896202 commit 31d9fd8
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 67 deletions.
1 change: 1 addition & 0 deletions Dockerfile.test-infra
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ RUN curl -Lo kubectl https://storage.googleapis.com/kubernetes-release/release/v
RUN mkdir -p /root/.terraform.d/plugins
RUN curl -SL https://github.com/dmacvicar/terraform-provider-libvirt/releases/download/v0.6.2/terraform-provider-libvirt-0.6.2+git.1585292411.8cbe9ad0.Fedora_28.x86_64.tar.gz | \
tar -xz -C /root/.terraform.d/plugins
RUN curl -SL https://releases.hashicorp.com/terraform-provider-local/2.0.0/terraform-provider-local_2.0.0_linux_amd64.zip > /tmp/tpl.zip && unzip -d /root/.terraform.d/plugins /tmp/tpl.zip && rm -f /tmp/tpl.zip

COPY requirements.txt /tmp/
COPY --from=service /clients/assisted-service-client-*.tar.gz /build/pip/
Expand Down
17 changes: 14 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ PLATFORM := $(or ${PLATFORM},baremetal)
all: create_full_environment run_full_flow_with_install


destroy: destroy_nodes delete_minikube_profile kill_port_forwardings delete_podman_localhost
destroy: destroy_nodes delete_minikube_profile kill_port_forwardings delete_podman_localhost stop_load_balancer

###############
# Environment #
Expand Down Expand Up @@ -157,6 +157,17 @@ delete_minikube_profile:
delete_podman_localhost:
make -C assisted-service/ clean-onprem || true

####################
# Load balancer #
####################

start_load_balancer:
# Start load balancer if it does not already exist. Map the directory $(HOME)/.test-infra/etc/nginx/stream.d to be /etc/nginx/stream.d so it will be used by the python code to fill up load balancing definitions
if [ "$(PLATFORM)" = "none" ] ; then id=`podman ps --quiet --filter "name=load_balancer"` ; ( test -z "$$id" && podman run -d --rm --net=host --name=load_balancer -v $(HOME)/.test-infra/etc/nginx/stream.d:/etc/nginx/stream.d load_balancer:latest ) || ! test -z "$$id" ; fi

stop_load_balancer:
id=`podman ps --quiet --filter "name=load_balancer"`; test ! -z "$$id" && podman rm -f load_balancer ; rm -f $(HOME)/.test-infra/etc/nginx/stream.d/*.conf >& /dev/null || /bin/true

#############
# Terraform #
#############
Expand Down Expand Up @@ -274,15 +285,15 @@ install_cluster:
_deploy_nodes:
discovery-infra/start_discovery.py -i $(ISO) -n $(NUM_MASTERS) -p $(STORAGE_POOL_PATH) -k '$(SSH_PUB_KEY)' -md $(MASTER_DISK) -wd $(WORKER_DISK) -mm $(MASTER_MEMORY) -wm $(WORKER_MEMORY) -nw $(NUM_WORKERS) -ps '$(PULL_SECRET)' -bd $(BASE_DOMAIN) -cN $(CLUSTER_NAME) -vN $(NETWORK_CIDR) -nM $(NETWORK_MTU) -iU $(REMOTE_SERVICE_URL) -id $(CLUSTER_ID) -mD $(BASE_DNS_DOMAINS) -ns $(NAMESPACE) -pX $(HTTP_PROXY_URL) -sX $(HTTPS_PROXY_URL) -nX $(NO_PROXY_VALUES) --service-name $(SERVICE_NAME) --vip-dhcp-allocation $(VIP_DHCP_ALLOCATION) --profile $(PROFILE) --ns-index $(NAMESPACE_INDEX) --deploy-target $(DEPLOY_TARGET) $(DAY1_PARAMS) $(OC_PARAMS) $(KEEP_ISO_FLAG) $(ADDITIONAL_PARAMS) $(DAY2_PARAMS) -ndw $(NUM_DAY2_WORKERS) --ipv4 $(IPv4) --ipv6 $(IPv6) --platform $(PLATFORM) --proxy $(PROXY) --iso-image-type $(ISO_IMAGE_TYPE)

deploy_nodes_with_install:
deploy_nodes_with_install: start_load_balancer
bash scripts/utils.sh local_setup_before_deployment $(PLATFORM) $(NAMESPACE) $(OC_FLAG)
skipper make $(SKIPPER_PARAMS) _deploy_nodes NAMESPACE_INDEX=$(shell bash scripts/utils.sh get_namespace_index $(NAMESPACE) $(OC_FLAG)) NAMESPACE=$(NAMESPACE) ADDITIONAL_PARAMS="'-in ${ADDITIONAL_PARAMS}'" $(SKIPPER_PARAMS) DAY1_PARAMS=--day1-cluster


deploy_static_ips_nodes_with_install:
make deploy_nodes_with_install ADDITIONAL_PARAMS="'--with-static-ips'"

deploy_nodes:
deploy_nodes: start_load_balancer
bash scripts/utils.sh local_setup_before_deployment $(PLATFORM) $(NAMESPACE) $(OC_FLAG)
skipper make $(SKIPPER_PARAMS) _deploy_nodes NAMESPACE_INDEX=$(shell bash scripts/utils.sh get_namespace_index $(NAMESPACE) $(OC_FLAG)) NAMESPACE=$(NAMESPACE) ADDITIONAL_PARAMS=$(ADDITIONAL_PARAMS) DAY1_PARAMS=--day1-cluster

Expand Down
19 changes: 16 additions & 3 deletions discovery-infra/start_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import install_cluster
import oc_utils
from logger import log
from test_infra.controllers.load_balancer_controller import LoadBalancerController


class MachineNetwork(object):
Expand Down Expand Up @@ -394,9 +395,15 @@ def nodes_flow(client, cluster_name, cluster):
tf=tf
)

is_ipv4 = machine_net.has_ip_v4 or not machine_net.has_ip_v6
main_cidr = args.vm_network_cidr if is_ipv4 else args.vm_network_cidr6
secondary_cidr = machine_net.provisioning_cidr_v4 if is_ipv4 else machine_net.provisioning_cidr_v6

if client:
cluster_info = client.cluster_get(cluster.id)
macs = utils.get_libvirt_nodes_macs(nodes_details["libvirt_network_name"])
if is_none_platform_mode():
macs += utils.get_libvirt_nodes_macs(nodes_details["libvirt_secondary_network_name"])

if not (cluster_info.api_vip and cluster_info.ingress_vip):
utils.wait_till_hosts_with_macs_are_in_status(
Expand All @@ -411,11 +418,11 @@ def nodes_flow(client, cluster_name, cluster):
)

if args.master_count == 1:
is_ip4 = machine_net.has_ip_v4 or not machine_net.has_ip_v6
cidr = args.vm_network_cidr if is_ip4 else args.vm_network_cidr6
tf.change_variables({"single_node_ip": helper_cluster.Cluster.get_ip_for_single_node(
client, cluster.id, cidr, ipv4_first=is_ip4)})
client, cluster.id, main_cidr, ipv4_first=is_ipv4)})
set_cluster_machine_cidr(client, cluster.id, machine_net, set_vip_dhcp_allocation=False)
elif is_none_platform_mode():
set_cluster_vips(client, cluster.id, machine_net)
elif args.vip_dhcp_allocation:
set_cluster_machine_cidr(client, cluster.id, machine_net)
else:
Expand All @@ -425,6 +432,12 @@ def nodes_flow(client, cluster_name, cluster):

set_hosts_roles(client, cluster, nodes_details, machine_net, tf, args.master_count, args.with_static_ips)

if is_none_platform_mode() and args.master_count > 1:
master_ips = helper_cluster.Cluster.get_master_ips(client, cluster.id, main_cidr) + helper_cluster.Cluster.get_master_ips(client, cluster.id, secondary_cidr)
load_balancer_ip = _get_host_ip_from_cidr(machine_net.cidr_v6 if machine_net.has_ip_v6 and not machine_net.has_ip_v4 else machine_net.cidr_v4)
lb_controller = LoadBalancerController(tf)
lb_controller.set_load_balancing_config(load_balancer_ip, master_ips)

utils.wait_till_hosts_with_macs_are_in_status(
client=client,
cluster_id=cluster.id,
Expand Down
51 changes: 51 additions & 0 deletions discovery-infra/test_infra/controllers/load_balancer_controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import re
import waiting
import socket
from logger import log


class LoadBalancerController:

def __init__(self, tf):
self._tf = tf

def _render_socket_endpoint(self, ip, port):
return f'{ip}:{port}' if '.' in ip else f'[{ip}]:{port}'

def _render_upstream_server(self, ip, port):
return f'\t\tserver {self._render_socket_endpoint(ip, port)};'

def _render_upstream_servers(self, master_ips, port):
return '\n'.join([self._render_upstream_server(ip, port) for ip in master_ips]) + '\n'

def _render_upstream_block(self, master_ips, port, upstream_name):
return f'\tupstream {upstream_name} {{\n{self._render_upstream_servers(master_ips, port)}\t}}\n'

def _render_server_block(self, load_balancer_ip, port, upstream_name):
return f'\tserver {{\n\t\tlisten {self._render_socket_endpoint(load_balancer_ip, port)};\n\t\tproxy_pass {upstream_name};\n\t}}\n'

def _render_port_entities(self, load_balancer_ip, master_ips, port):
upstream_name = f'upstream_{re.sub(r"[.:]", r"_", load_balancer_ip)}_{port}'
return self._render_upstream_block(master_ips, port, upstream_name) + self._render_server_block(load_balancer_ip, port, upstream_name)

def _render_load_balancer_config_file(self, load_balancer_ip, master_ips):
return '\n'.join([self._render_port_entities(load_balancer_ip, master_ips, port) for port in [443, 6443, 22623]])

def _connect_to_load_balancer(self, load_balancer_ip):
family = socket.AF_INET6 if ':' in load_balancer_ip else socket.AF_INET
try:
with socket.socket(family, socket.SOCK_STREAM) as s:
s.connect((load_balancer_ip, 6443))
return True
except Exception as e:
log.warning("Could not connect to load balancer endpoint %s: %s", self._render_socket_endpoint(load_balancer_ip, 6443), e)
return False

def _wait_for_load_balancer(self, load_balancer_ip):
log.info("Waiting for load balancer %s to be up", load_balancer_ip)
waiting.wait(lambda : self._connect_to_load_balancer(load_balancer_ip), timeout_seconds=120, sleep_seconds=5, waiting_for="Waiting for load balancer to be active")

def set_load_balancing_config(self, load_balancer_ip, master_ips):
load_balancer_config_file = self._render_load_balancer_config_file(load_balancer_ip, master_ips)
self._tf.change_variables({"load_balancer_ip": load_balancer_ip, "load_balancer_config_file": load_balancer_config_file})
self._wait_for_load_balancer(load_balancer_ip)
19 changes: 18 additions & 1 deletion discovery-infra/test_infra/helper_classes/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,10 @@ def get_network_interface_ip(interface):
'ip': get_network_interface_ip(interface), 'speed': interface.speed_mbps} for interface in
interfaces_list]

@staticmethod
def get_hosts_nics_data(hosts: list, ipv4_first=True):
return [Cluster.get_inventory_host_nics_data(h, ipv4_first=ipv4_first) for h in hosts]

@staticmethod
def get_ip_for_single_node(client, cluster_id, machine_cidr, ipv4_first=True):
cluster_info = client.cluster_get(cluster_id).to_dict()
Expand All @@ -728,7 +732,20 @@ def get_ip_for_single_node(client, cluster_id, machine_cidr, ipv4_first=True):
ip = intf["ip"]
if IPAddress(ip) in network:
return ip
raise Exception("IP for single node IPv6 not found")
raise Exception("IP for single node not found")

@staticmethod
def get_master_ips(client, cluster_id, network):
cluster_info = client.cluster_get(cluster_id).to_dict()
ret = []
net = IPNetwork(network)
hosts_interfaces = Cluster.get_hosts_nics_data([h for h in cluster_info["hosts"] if h["role"] == consts.NodeRoles.MASTER])
for host_interfaces in hosts_interfaces:
for intf in host_interfaces:
ip = IPAddress(intf["ip"])
if ip in net:
ret = ret + [intf["ip"]]
return ret

def get_host_disks(self, host, filter=None):
hosts = self.get_hosts()
Expand Down
13 changes: 7 additions & 6 deletions discovery-infra/test_infra/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import itertools
import json
import logging
import re
import os
import random
import shlex
Expand Down Expand Up @@ -669,15 +670,15 @@ def get_libvirt_nodes_from_tf_state(network_names, tf_state):


def extract_nodes_from_tf_state(tf_state, network_names, role):
domains = next(r["instances"] for r in tf_state.resources if r["type"] == "libvirt_domain" and r["name"] == role)
data = {}
for d in domains:
for nic in d["attributes"]["network_interface"]:
for domains in [r["instances"] for r in tf_state.resources if r["type"] == "libvirt_domain" and role in r["name"]]:
for d in domains:
for nic in d["attributes"]["network_interface"]:

if nic["network_name"] not in network_names:
continue
if nic["network_name"] not in network_names:
continue

data[nic["mac"]] = {"ip": nic["addresses"], "name": d["attributes"]["name"], "role": role}
data[nic["mac"]] = {"ip": nic["addresses"], "name": d["attributes"]["name"], "role": role}

return data

Expand Down
22 changes: 21 additions & 1 deletion scripts/install_environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ function config_firewalld() {
function config_squid() {
echo "Config squid"
sudo dnf install -y squid
sudo sed -i -e '/^.*allowed_ips.*$/d' -e '/^acl CONNECT.*/a acl allowed_ips src 1001:db8::/120' -e '/^http_access deny all/i http_access allow allowed_ips' /etc/squid/squid.conf
sudo sed -i -e '/^.*allowed_ips.*$/d' -e '/^acl CONNECT.*/a acl allowed_ips src 1001:db8::/120' -e '/^acl CONNECT.*/a acl allowed_ips src 1001:db8:0:200::/120' -e '/^http_access deny all/i http_access allow allowed_ips' /etc/squid/squid.conf
sudo systemctl restart squid
sudo firewall-cmd --zone=libvirt --add-port=3128/tcp
sudo firewall-cmd --zone=libvirt --add-port=3129/tcp
Expand Down Expand Up @@ -160,6 +160,24 @@ function config_chronyd() {
sudo firewall-cmd --zone=libvirt --add-port=123/udp
}

function config_nginx() {
echo "Config nginx"

# Create a container image to be used as the load balancer. Initially, it starts nginx that opens a stream includes all conf files
# in directory /etc/nginx/stream.d. The nginx is refreshed every 60 seconds
cat <<EOF | sudo podman build --tag load_balancer:latest -
FROM quay.io/centos/centos:8.3.2011
RUN dnf install -y nginx
RUN sed -i -e '/^http {/,\$d' /etc/nginx/nginx.conf
RUN sed -i -e '\$a stream {\ninclude /etc/nginx/stream.d/*.conf;\n}' -e '/^stream {/,\$d' /etc/nginx/nginx.conf
CMD ["bash", "-c", "while /bin/true ; do (ps -ef | grep -v grep | grep -q nginx && nginx -s reload) || nginx ; sleep 60 ; done"]
EOF
sudo podman rm -f load_balancer || /bin/true
sudo mkdir -p $HOME/.test-infra/etc/nginx/stream.d
sudo firewall-cmd --zone=libvirt --add-port=6443/tcp
sudo firewall-cmd --zone=libvirt --add-port=22623/tcp
}

function additional_configs() {
if [ "${ADD_USER_TO_SUDO}" != "n" ]; then
current_user=$(whoami)
Expand All @@ -186,6 +204,7 @@ function additional_configs() {

echo "enabling ipv6"
sudo sed -ir 's/net.ipv6.conf.all.disable_ipv6[[:blank:]]*=[[:blank:]]*1/net.ipv6.conf.all.disable_ipv6 = 0/g' /etc/sysctl.conf
sudo sed -i -e '/net.core.somaxconn/d' -e '$a net.core.somaxconn = 2000' /etc/sysctl.conf
sudo sysctl --load
}

Expand All @@ -197,4 +216,5 @@ config_firewalld
config_squid
fix_ipv6_routing
config_chronyd
config_nginx
additional_configs
1 change: 1 addition & 0 deletions skipper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ volumes:
- /dev/:/dev
- /run/udev:/run/udev
- /var/ai-logs:/var/ai-logs
- $HOME/.test-infra/etc/nginx/stream.d:/etc/nginx/stream.d
env:
PULL_SECRET: $PULL_SECRET
SECOND_PULL_SECRET: $SECOND_PULL_SECRET
Expand Down
Loading

0 comments on commit 31d9fd8

Please sign in to comment.