Skip to content

Commit

Permalink
[SPARK-39870][PYTHON][TESTS] Add flag to 'run-tests.py' to support re…
Browse files Browse the repository at this point in the history
…taining the output

### What changes were proposed in this pull request?

This patch adds a new flag to 'run-tests.py' that supports retaining
the output of the run test. Previously all log files and data will
be deleted as soon as the test successfully finishes. This makes it
harder to write and debug test cases when incrementally developing.

The new flag uses the already exising 'target' flag to redirect the
test output to this location and simply bypass the deletion in the
end.

The semantics of the test output are not changed.

### Does this PR introduce _any_ user-facing change?

Adds a new flag to the 'run-tests.py' script. The flag is by default unused.

### How was this patch tested?
Manually tested with and without the flag for successful and failing
tests.

Closes apache#37288 from grundprinzip/pyspark_runtests.

Lead-authored-by: Martin Grund <martin.grund@databricks.com>
Co-authored-by: Martin Grund <grundprinzip@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
2 people authored and HyukjinKwon committed Jul 27, 2022
1 parent 01d41e7 commit d31505e
Showing 1 changed file with 36 additions and 9 deletions.
45 changes: 36 additions & 9 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,22 @@ def get_valid_filename(s):
raise RuntimeError("Cannot find assembly build directory, please build Spark first.")


def run_individual_python_test(target_dir, test_name, pyspark_python):
def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_output):
"""
Runs an individual test. This function is called by the multi-process runner of all tests.
Parameters
----------
target_dir
Destination for the Hive and log directory.
test_name
Test name.
pyspark_python
Python version used to run the test.
keep_test_output
Flag indicating if the test output should be retained after successful execution.
"""
env = dict(os.environ)
env.update({
'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH,
Expand Down Expand Up @@ -107,20 +122,24 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args)

output_prefix = get_valid_filename(pyspark_python + "__" + test_name + "__").lstrip("_")
per_test_output = tempfile.NamedTemporaryFile(prefix=output_prefix, suffix=".log")
# Delete is always set to False since the cleanup will be either done by removing the
# whole test dir, or the test output is retained.
per_test_output = tempfile.NamedTemporaryFile(prefix=output_prefix, dir=tmp_dir,
suffix=".log", delete=False)
LOGGER.info(
"Starting test(%s): %s (temp output: %s)", pyspark_python, test_name, per_test_output.name)
start_time = time.time()
try:
retcode = subprocess.Popen(
[os.path.join(SPARK_HOME, "bin/pyspark")] + test_name.split(),
stderr=per_test_output, stdout=per_test_output, env=env).wait()
# There exists a race condition in Python and it causes flakiness in MacOS
# https://github.com/python/cpython/issues/73885
if platform.system() == "Darwin":
os.system("rm -rf " + tmp_dir)
else:
shutil.rmtree(tmp_dir, ignore_errors=True)
if not keep_test_output:
# There exists a race condition in Python and it causes flakiness in MacOS
# https://github.com/python/cpython/issues/73885
if platform.system() == "Darwin":
os.system("rm -rf " + tmp_dir)
else:
shutil.rmtree(tmp_dir, ignore_errors=True)
except BaseException:
LOGGER.exception("Got exception while running %s with %s", test_name, pyspark_python)
# Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if
Expand Down Expand Up @@ -226,6 +245,13 @@ def parse_opts():
"'pyspark.sql.tests FooTests.test_foo' to run the specific unittest in the class. "
"'--modules' option is ignored if they are given.")
)
group.add_argument(
"-k", "--keep-test-output", action='store_true',
default=False,
help=("If set to true will retain the temporary test directories. In addition, the "
"standard output and standard error are redirected to a file in the target "
"directory.")
)

args, unknown = parser.parse_known_args()
if unknown:
Expand Down Expand Up @@ -317,7 +343,8 @@ def process_queue(task_queue):
except Queue.Empty:
break
try:
run_individual_python_test(target_dir, test_goal, python_exec)
run_individual_python_test(target_dir, test_goal,
python_exec, opts.keep_test_output)
finally:
task_queue.task_done()

Expand Down

0 comments on commit d31505e

Please sign in to comment.