Skip to content

Commit

Permalink
feat(test): add optional timeout in SSH connections
Browse files Browse the repository at this point in the history
We have a mechanism that allows us to run a command inside a microVM.
This mechanism ends up using Popen.communicate to retrieve the output of
the SSH command. Popen.communicate comes with a timeout variable that we
were not using. However, it is useful in cases where we don't want to
wait for the result of the command we execute in the microVM.

This commit extends our SSH mechanism to accept the timeout argument.
Then, it uses the timeout in the test_balloon.py::test_deflate_on_oom
when launching the fillmem process. fillmem drains the memory of the VM,
which sometimes results in the SSH connection hanging.

Signed-off-by: Babis Chalios <bchalios@amazon.es>
  • Loading branch information
bchalios authored and pb8o committed Oct 6, 2023
1 parent bfd8d26 commit b4871b1
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 18 deletions.
12 changes: 4 additions & 8 deletions tests/framework/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def get_free_mem_ssh(ssh_connection):
raise Exception("Available memory not found in `/proc/meminfo")


def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None):
def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None, timeout=None):
"""
Execute a given command.
Expand All @@ -469,7 +469,7 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None):
)

# Capture stdout/stderr
stdout, stderr = proc.communicate()
stdout, stderr = proc.communicate(timeout=timeout)

output_message = f"\n[{proc.pid}] Command:\n{cmd}"
# Append stdout/stderr to the output message
Expand All @@ -493,18 +493,14 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None):
return CommandReturn(proc.returncode, stdout.decode(), stderr.decode())


def run_cmd(cmd, ignore_return_code=False, no_shell=False, cwd=None):
def run_cmd(cmd, **kwargs):
"""
Run a command using the sync function that logs the output.
:param cmd: command to run
:param ignore_return_code: whether a non-zero return code should be ignored
:param noshell: don't run the command in a sub-shell
:returns: tuple of (return code, stdout, stderr)
"""
return run_cmd_sync(
cmd=cmd, ignore_return_code=ignore_return_code, no_shell=no_shell, cwd=cwd
)
return run_cmd_sync(cmd, **kwargs)


def eager_map(func, iterable):
Expand Down
9 changes: 5 additions & 4 deletions tests/host_tools/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,19 @@ def _init_connection(self):
if ecode != 0:
raise ConnectionError

def run(self, cmd_string):
def run(self, cmd_string, timeout=None):
"""Execute the command passed as a string in the ssh context."""
return self._exec(
[
"ssh",
*self.options,
f"{self.user}@{self.host}",
cmd_string,
]
],
timeout,
)

def _exec(self, cmd):
def _exec(self, cmd, timeout=None):
"""Private function that handles the ssh client invocation."""

# TODO: If a microvm runs in a particular network namespace, we have to
Expand All @@ -111,7 +112,7 @@ def _exec(self, cmd):
if self.netns_file_path is not None:
ctx = Namespace(self.netns_file_path, "net")
with ctx:
return utils.run_cmd(cmd, ignore_return_code=True)
return utils.run_cmd(cmd, ignore_return_code=True, timeout=timeout)


def mac_from_ip(ip_address):
Expand Down
21 changes: 15 additions & 6 deletions tests/integration_tests/functional/test_balloon.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import logging
import time
from subprocess import TimeoutExpired

import pytest
from retry import retry
Expand Down Expand Up @@ -66,12 +67,20 @@ def make_guest_dirty_memory(ssh_connection, amount_mib=32):
lower_ssh_oom_chance(ssh_connection)

cmd = f"/usr/local/bin/fillmem {amount_mib}"
exit_code, stdout, stderr = ssh_connection.run(cmd)
# add something to the logs for troubleshooting
if exit_code != 0:
logger.error("while running: %s", cmd)
logger.error("stdout: %s", stdout)
logger.error("stderr: %s", stderr)
try:
exit_code, stdout, stderr = ssh_connection.run(cmd, timeout=1.0)
# add something to the logs for troubleshooting
if exit_code != 0:
logger.error("while running: %s", cmd)
logger.error("stdout: %s", stdout)
logger.error("stderr: %s", stderr)

cmd = "cat /tmp/fillmem_output.txt"
except TimeoutExpired:
# It's ok if this expires. Some times the SSH connection
# gets killed by the OOM killer *after* the fillmem program
# started. As a result, we can ignore timeouts here.
pass

time.sleep(5)

Expand Down

0 comments on commit b4871b1

Please sign in to comment.