From 40cea7a5f0b47a9e89d2651194c4936721d9919f Mon Sep 17 00:00:00 2001 From: Ryan Kraus Date: Tue, 29 Jun 2021 00:46:08 -0400 Subject: [PATCH] QA fixes to destroy cluster command. This is for issue #72 --- app/cli/destroy | 1 + app/playbooks/destroy.d/cluster/destroy.yaml | 313 ------------------- app/playbooks/destroy.d/cluster/destroy.yml | 131 ++++++++ app/roles/rhcos-images/templates/wipe.ign.j2 | 3 +- devel.env | 4 +- 5 files changed, 136 insertions(+), 316 deletions(-) create mode 120000 app/cli/destroy delete mode 100755 app/playbooks/destroy.d/cluster/destroy.yaml create mode 100755 app/playbooks/destroy.d/cluster/destroy.yml diff --git a/app/cli/destroy b/app/cli/destroy new file mode 120000 index 0000000..1fa21ed --- /dev/null +++ b/app/cli/destroy @@ -0,0 +1 @@ +.command \ No newline at end of file diff --git a/app/playbooks/destroy.d/cluster/destroy.yaml b/app/playbooks/destroy.d/cluster/destroy.yaml deleted file mode 100755 index 08ca963..0000000 --- a/app/playbooks/destroy.d/cluster/destroy.yaml +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env ansible-playbook -- name: Are you sure? - hosts: all - any_errors_fatal: yes - max_fail_percentage: 0 - serial: 100% - gather_facts: no - - vars_prompt: - - name: confirm - prompt: This will destroy the running cluster and you will lose all data. Are you sure? [yes to continue] - private: no - - tasks: - - name: Validate user input - fail: - msg: "Action cancelled." - when: 'confirm != "yes"' - -- import_playbook: /app/playbooks/util_vm_facts.yml - -- name: Destroy cluster nodes - hosts: cluster - become: no - gather_facts: no - - pre_tasks: - - include_role: - name: management - defaults_from: main.yml - tasks_from: "poweroff/{{ mgmt_provider }}.yml" - vars_from: "{{ mgmt_provider }}.yml" - handlers_from: "{{ mgmt_provider }}.yml" - vars: - management_hostname: "{{ mgmt_hostname }}" - management_user: "{{ mgmt_user }}" - management_pass: "{{ mgmt_password }}" - - roles: - - name: pxelinux-kickstarts - pxelinux_kickstarts_pxe_server: "{{ groups.bastion_hosts.0 }}" - pxelinux_kickstarts_content_server: "http://{{ hostvars[groups.bastion_hosts.0].ansible_host }}:8081" - pxelinux_kickstarts_host_role: "{{ node_role }}" - pxelinux_kickstarts_install_disk: "{{ install_disk }}" - pxelinux_kickstarts_profile: wipe - - tasks: - - include_role: - name: management - defaults_from: main.yml - tasks_from: "netboot/{{ mgmt_provider }}.yml" - vars_from: "{{ mgmt_provider }}.yml" - handlers_from: "{{ mgmt_provider }}.yml" - vars: - management_hostname: "{{ mgmt_hostname }}" - management_user: "{{ mgmt_user }}" - management_pass: "{{ mgmt_password }}" - - - name: wait for nodes to start wiping - shell: ping -c 1 {{ ansible_host }} - delegate_to: "{{ groups.bastion_hosts[0] }}" - become: no - register: node_ping - until: "node_ping is not failed" - retries: 30 - delay: 10 - changed_when: no - -- name: Terminate the Bootstrap node - hosts: bootstrap - become: no - gather_facts: no - - tasks: - - include_role: - name: management - defaults_from: main.yml - tasks_from: "poweroff/{{ mgmt_provider }}.yml" - vars_from: "{{ mgmt_provider }}.yml" - handlers_from: "{{ mgmt_provider }}.yml" - vars: - management_hostname: "{{ mgmt_hostname }}" - management_user: "{{ mgmt_user }}" - management_pass: "{{ mgmt_password }}" - -- name: Clean coreos install repos - hosts: bootstrap - become: yes - gather_facts: no - - tasks: - - name: erase stored images and igniton configs - shell: rm -f /var/www/html/* - - - name: erase pxelinux boot kickstarts - file: - path: /var/lib/tftpboot/pxelinux.cfg - state: absent - - - name: erase uefi grub kickstarts - file: - path: /var/lib/tftpboot/uefi - state: absent - - - name: erase coreos boot images - file: - path: /var/lib/tftpboot/rhcos - state: absent - -- name: Remove cluster dhcp records - hosts: cluster, management - gather_facts: no - serial: 1 - - roles: - - name: dhcp - dhcp_present: no - -- name: Remove ignored MAC Addresses - hosts: localhost - gather_facts: no - serial: 1 - - vars: - dns_domain: "{{ cluster_name }}.{{ cluster_domain }}" - dns_reverse_domain: "{{ reverse_ptr_zone }}" - - tasks: - - name: configure extra records - include_role: - name: dhcp - vars: - dhcp_name: "{{ node.name }}" - dhcp_mac_address: "{{ node.mac }}" - dhcp_ignore: yes - dhcp_present: no - loop: "{{ ignored_macs }}" - loop_control: - loop_var: node - -- name: Remove cluster node records - hosts: cluster, management - gather_facts: no - serial: 1 - - vars: - dns_domain: "{{ cluster_name }}.{{ cluster_domain }}" - dns_reverse_domain: "{{ reverse_ptr_zone }}" - - roles: - - name: dns - dns_present: no - -- name: Remove Load Balanced DNS entries - hosts: loadbalancer - gather_facts: no - serial: 1 - - vars: - dns_domain: "{{ cluster_name }}.{{ cluster_domain }}" - dns_reverse_domain: "{{ reverse_ptr_zone }}" - - roles: - - name: dns - dns_present: no - dns_hostname: "api" - dns_reverse: no - - name: dns - dns_present: no - dns_hostname: "api-int" - dns_reverse: no - - name: dns - dns_present: no - dns_hostname: "*.apps" - dns_reverse: no - - name: dns - dns_present: no - dns_hostname: "loadbalancer" - -- name: Remove etcd DNS entries - hosts: control_plane - gather_facts: no - serial: 1 - - vars: - dns_domain: "{{ cluster_name }}.{{ cluster_domain }}" - dns_reverse_domain: "{{ reverse_ptr_zone }}" - - roles: - - name: dns - dns_present: no - dns_hostname: "etcd-{{ cp_node_id }}" - dns_reverse: no - - name: dns - dns_present: no - dns_hostname: "_etcd-server-ssl.tcp" - dns_type: 'SRV' - dns_value: "0 10 2380 etcd-{{ cp_node_id }}.{{ cluster_name }}.{{ cluster_domain }}." - -- name: Remove extra DNS records - hosts: localhost - gather_facts: no - serial: 1 - - vars: - dns_domain: "{{ cluster_name }}.{{ cluster_domain }}" - dns_reverse_domain: "{{ reverse_ptr_zone }}" - - tasks: - - name: configure extra records - include_role: - name: dns - vars: - dns_present: no - dns_hostname: "{{ node.name }}" - dns_value: "{{ node.ip }}" - loop: "{{ extra_nodes }}" - loop_control: - loop_var: node - -- name: Purge openshift installation resource caches - host: localhost - gather_facts: no - - tasks: - - name: remove installer data cache - file: - path: /data/openshift-installer - state: absent - - - name: remove ip address database - file: - path: /data/ip_addresses - state: absent - -- name: Uninstall cluster load balancer - host: bastion - gather_facts: no - - tasks: - - name: shutdown the load balancer service - systemd: - name: haproxy - state: stopped - enabled: no - force: yes - - - name: shutdown the VIP service - systemd: - name: keepalived - state: stopped - enabled: no - force: yes - - - name: delete load balancer configuration - file: - path: /etc/haproxy/haproxy.cfg - state: absent - - - name: delete the vip configuration - file: - path: /etc/keepalived/keepalived.conf - state: absent - - - name: remove vip cached password - file: - path: /data/keepalived.pass - state: absent - delegate_to: localhost - -- name: Clean firewall configuration - host: bastion - gather_facts: no - become: yes - - roles: - - name: router - router_all_interfaces: "{{ all_interfaces }}" - router_lan_interfaces: "{{ lan_interfaces }}" - router_lan_address: "{{ hostvars.lan.ansible_host }}" - router_lan_subnet: "{{ subnet }}/{{ subnet_mask }}" - router_subnet_mask: "{{ subnet_mask }}" - router_wan_interface: "{{ wan_interface }}" - router_loadbalancer: "{{ loadbalancer_vip }}" - router_dns_forwarders: "{{ dns_forwarders }}" - allowed_services: - - "SSH to Bastion" - -- name: Clean cockpit interface - host: bastion - gather_facts: no - become: yes - - tasks: - - name: remove faros page from cockpit - file: - path: /usr/local/share/cockpit/faros - state: absent - -- name: Restart network services - hosts: bastion - become: yes - - tasks: - - name: restart named - systemd: - name: named - state: restarted - - - name: restart dhcpd - systemd: - name: dhcpd - state: restarted diff --git a/app/playbooks/destroy.d/cluster/destroy.yml b/app/playbooks/destroy.d/cluster/destroy.yml new file mode 100755 index 0000000..9355a15 --- /dev/null +++ b/app/playbooks/destroy.d/cluster/destroy.yml @@ -0,0 +1,131 @@ +#!/usr/bin/env ansible-playbook +- name: Are you sure? + hosts: all + any_errors_fatal: yes + max_fail_percentage: 0 + serial: 100% + gather_facts: no + + vars_prompt: + - name: confirm + prompt: This will destroy the running cluster and you will lose all data. Are you sure? [yes to continue] + private: no + + tasks: + - name: Validate user input + fail: + msg: "Action cancelled." + when: 'confirm != "yes"' + +- import_playbook: /app/playbooks/util_vm_facts.yml + +- name: Destroy cluster nodes + hosts: cluster:!bootstrap + become: no + gather_facts: no + any_errors_fatal: yes + max_fail_percentage: 0 + serial: 1 + + pre_tasks: + - include_role: + name: management + defaults_from: main.yml + tasks_from: "poweroff/{{ mgmt_provider }}.yml" + vars_from: "{{ mgmt_provider }}.yml" + handlers_from: "{{ mgmt_provider }}.yml" + vars: + management_hostname: "{{ mgmt_hostname }}" + management_user: "{{ mgmt_user }}" + management_pass: "{{ mgmt_password }}" + + roles: + - name: pxelinux-kickstarts + pxelinux_kickstarts_pxe_server: "{{ groups.bastion_hosts.0 }}" + pxelinux_kickstarts_content_server: "http://{{ hostvars[groups.bastion_hosts.0].ansible_host }}:8081" + pxelinux_kickstarts_host_role: "{{ node_role }}" + pxelinux_kickstarts_install_disk: "{{ install_disk }}" + pxelinux_kickstarts_profile: wipe + + tasks: + - include_role: + name: management + defaults_from: main.yml + tasks_from: "netboot/{{ mgmt_provider }}.yml" + vars_from: "{{ mgmt_provider }}.yml" + handlers_from: "{{ mgmt_provider }}.yml" + vars: + management_hostname: "{{ mgmt_hostname }}" + management_user: "{{ mgmt_user }}" + management_pass: "{{ mgmt_password }}" + +- name: Wait for cluster nodes to start wiping + hosts: cluster:!bootstrap + become: no + gather_facts: no + any_errors_fatal: yes + max_fail_percentage: 0 + + tasks: + - name: wait for nodes to start wiping + shell: ping -c 1 {{ ansible_host }} + delegate_to: "{{ groups.bastion_hosts[0] }}" + become: no + register: node_ping + until: "node_ping is not failed" + retries: 30 + delay: 10 + changed_when: no + +- name: Terminate the Bootstrap node + hosts: bootstrap + become: no + gather_facts: no + + tasks: + - include_role: + name: management + defaults_from: main.yml + tasks_from: "poweroff/{{ mgmt_provider }}.yml" + vars_from: "{{ mgmt_provider }}.yml" + handlers_from: "{{ mgmt_provider }}.yml" + vars: + management_hostname: "{{ mgmt_hostname }}" + management_user: "{{ mgmt_user }}" + management_pass: "{{ mgmt_password }}" + +- name: Clean cockpit interface + hosts: bastion + gather_facts: no + become: yes + + tasks: + - name: remove faros page from cockpit + file: + path: /usr/local/share/cockpit/faros + state: absent + + +- name: Wait for hosts to finish disk wipe + hosts: cluster:!bootstrap + become: no + gather_facts: no + + tasks: + - name: create tmp cache dir + tempfile: + state: directory + register: cache + delegate_to: localhost + + - name: wait for node to power off + shell: | + ilorest --cache-dir="{{ cache.path }}" login "{{ mgmt_hostname }}" -u '{{ mgmt_user }}' -p '{{ mgmt_password }}' &>/dev/null + ilorest --cache-dir="{{ cache.path }}" --nologo get --selector=ComputerSystem PowerState --json + register: powercheck + changed_when: False + delegate_to: localhost + retries: 60 + delay: 10 + until: "(powercheck.stdout | from_json)['PowerState'] == 'Off'" + diff --git a/app/roles/rhcos-images/templates/wipe.ign.j2 b/app/roles/rhcos-images/templates/wipe.ign.j2 index a2e4ff6..e22a1a0 100644 --- a/app/roles/rhcos-images/templates/wipe.ign.j2 +++ b/app/roles/rhcos-images/templates/wipe.ign.j2 @@ -1,4 +1,5 @@ -tion": { +{ + "ignition": { "version": "3.2.0" }, "systemd": { diff --git a/devel.env b/devel.env index 1707140..bc427aa 100644 --- a/devel.env +++ b/devel.env @@ -1,5 +1,5 @@ -WAN_INT=ens1 -BASTION_IP_ADDR=192.168.5.16 +WAN_INT=eno2 +BASTION_IP_ADDR=192.168.8.1 BASTION_SSH_USER=core BASTION_HOST_NAME=bastion CLUSTER_DOMAIN=faros.site