From 32e9af15636fb1a1ef5e20f903f1026d33b30746 Mon Sep 17 00:00:00 2001 From: Michael Nguyen Date: Tue, 22 May 2018 13:45:25 -0400 Subject: [PATCH] Fix race condition in reboot flag test PR #368 added a check for a boot_id to confirm reboots in the reboot role. This caused a race condition in the rpm_ostree_install and rpm_ostree_uninstall roles when using the reboot flag. The reboot role has an option to not perform a reboot and just check that the system comes down and back up. This was leverage by rpm_ostree_install and rpm_ostree_uninstall roles to fire the the respective commands with the -r flag. The -r flag causes a reboot to occur when the command executes but requires to be run asychronously (the command won't return when the system goes down and will cause Ansible to fail). When rpm_ostree_install/rpm_ostree_uninstall was called with the -r flag it calls the reboot role. If the reboot role can execute before the reboot occurs, it will successfully execute. If the system goes down before the reboot role can grab the boot_id, it will fail. This PR modifies the reboot_flag test to not use the reboot role. It copies most of the logic from the reboot role into the test itself. Since this was a corner case for the -r flag, I felt like it was an appropriate exception to not re-use the role. --- tests/pkg-layering/reboot_flag.yml | 77 ++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/tests/pkg-layering/reboot_flag.yml b/tests/pkg-layering/reboot_flag.yml index a4022c5..871055c 100644 --- a/tests/pkg-layering/reboot_flag.yml +++ b/tests/pkg-layering/reboot_flag.yml @@ -1,12 +1,47 @@ --- # set ft=ansible # +- set_fact: + real_ansible_host: "{{ ansible_host }}" + timeout: "{{ cli_reboot_timeout | default('300') }}" -- import_role: - name: rpm_ostree_install - vars: - roi_packages: "{{ g_pkg1 }}" - roi_reboot: true +# Have to account for both because Fedora STR uses the old version of these +# inventory values for some reason. +- when: ansible_port is defined + set_fact: + real_ansible_port: "{{ ansible_port }}" + +- when: ansible_ssh_port is defined + set_fact: + real_ansible_port: "{{ ansible_ssh_port }}" + +- name: Get original bootid + command: cat /proc/sys/kernel/random/boot_id + register: orig_bootid + +- name: Package layer {{ g_pkg1 }} with reboot flag + command: rpm-ostree install {{ g_pkg1 | quote }} -r + async: 60 + poll: 0 + ignore_errors: true + +- name: wait for hosts to come back up + local_action: + wait_for host={{ real_ansible_host }} + port={{ real_ansible_port | default('22') }} + state=started + delay=30 + timeout={{ timeout }} + search_regex="OpenSSH" + become: false + +# I'm not sure the retries are even necessary, but I'm keeping them in +- name: Wait until bootid changes + command: cat /proc/sys/kernel/random/boot_id + register: new_bootid + until: new_bootid.stdout != orig_bootid.stdout + retries: 6 + delay: 10 - import_role: name: rpm_ostree_install_verify @@ -14,11 +49,33 @@ roiv_package_name: "{{ g_pkg1 }}" roiv_binary_name: "{{ g_pkg1 }}" -- import_role: - name: rpm_ostree_uninstall - vars: - rou_packages: "{{ g_pkg1 }}" - rou_reboot: true +- name: Get original bootid + command: cat /proc/sys/kernel/random/boot_id + register: orig_bootid + +- name: Remove {{ g_pkg1 }} with reboot flag + command: rpm-ostree uninstall {{ g_pkg1 | quote }} -r + async: 60 + poll: 0 + ignore_errors: true + +- name: wait for hosts to come back up + local_action: + wait_for host={{ real_ansible_host }} + port={{ real_ansible_port | default('22') }} + state=started + delay=30 + timeout={{ timeout }} + search_regex="OpenSSH" + become: false + +# I'm not sure the retries are even necessary, but I'm keeping them in +- name: Wait until bootid changes + command: cat /proc/sys/kernel/random/boot_id + register: new_bootid + until: new_bootid.stdout != orig_bootid.stdout + retries: 6 + delay: 10 - import_role: name: rpm_ostree_uninstall_verify