From 480d75500c023af4b7215b086aff51f4c00f5e96 Mon Sep 17 00:00:00 2001 From: Mikhail Bautin Date: Tue, 24 Aug 2021 08:41:24 -0700 Subject: [PATCH] [#9804] Add Python type annotations to the Spark test runner Summary: Add Python type annotations to the Spark test runner. Also use the codecheck tool to verify a subset of Python scripts during the build. Test Plan: Jenkins: build type: release Reviewers: jharveysmith, steve.varnau Reviewed By: steve.varnau Subscribers: ybase Differential Revision: https://phabricator.dev.yugabyte.com/D12701 --- bin/run_codecheck | 9 ++ build-support/common-test-env.sh | 4 +- build-support/run_tests_on_spark.py | 183 +++++++++++++---------- build-support/split_long_command_line.py | 27 ++-- build-support/validate_build_root.py | 2 +- codecheck.ini | 47 ++++++ python/yb/yb_dist_tests.py | 115 +++++++++----- requirements.txt | 2 + requirements_frozen.txt | 26 ++-- tox.ini | 16 ++ 10 files changed, 282 insertions(+), 149 deletions(-) create mode 100755 bin/run_codecheck create mode 100644 codecheck.ini create mode 100644 tox.ini diff --git a/bin/run_codecheck b/bin/run_codecheck new file mode 100755 index 000000000000..a89167bf9998 --- /dev/null +++ b/bin/run_codecheck @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -euo pipefail +. "${BASH_SOURCE%/*}/../build-support/common-build-env.sh" + +activate_virtualenv +set_pythonpath + +codecheck "$@" diff --git a/build-support/common-test-env.sh b/build-support/common-test-env.sh index 1ab71f02e6f1..059aafbda06e 100644 --- a/build-support/common-test-env.sh +++ b/build-support/common-test-env.sh @@ -1850,11 +1850,13 @@ run_python_doctest() { run_python_tests() { activate_virtualenv + check_python_script_syntax ( export PYTHONPATH=$YB_SRC_ROOT/python run_python_doctest + log "Invoking the codecheck tool" + python3 -m codecheck ) - check_python_script_syntax } should_run_java_test_methods_separately() { diff --git a/build-support/run_tests_on_spark.py b/build-support/run_tests_on_spark.py index df991aff1dd5..41abb0e288bf 100755 --- a/build-support/run_tests_on_spark.py +++ b/build-support/run_tests_on_spark.py @@ -64,14 +64,13 @@ import sys import time import traceback -import tempfile import errno import signal from datetime import datetime from collections import defaultdict -from typing import List +from typing import List, Dict, Set, Tuple, Optional, Any, cast BUILD_SUPPORT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -86,12 +85,12 @@ TIME_SEC_TO_START_RUNNING_TEST = 5 * 60 -def wait_for_path_to_exist(target_path): +def wait_for_path_to_exist(target_path: str) -> None: if os.path.exists(target_path): return waited_for_sec = 0 start_time_sec = time.time() - printed_msg_at_sec = 0 + printed_msg_at_sec: float = 0.0 MSG_PRINT_INTERVAL_SEC = 5.0 TIMEOUT_SEC = 120 while not os.path.exists(target_path): @@ -114,8 +113,7 @@ def wait_for_path_to_exist(target_path): sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python')) from yb import yb_dist_tests # noqa from yb import command_util # noqa -from yb.common_util import set_to_comma_sep_str, get_bool_env_var, is_macos # noqa -from yb.yb_dist_tests import TestDescriptor # noqa +from yb.common_util import set_to_comma_sep_str, is_macos # noqa # Special Jenkins environment variables. They are propagated to tasks running in a distributed way # on Spark. @@ -177,21 +175,21 @@ def wait_for_path_to_exist(target_path): # Global variables. Some of these are used on the remote worker side. verbose = False g_spark_master_url_override = None -propagated_env_vars = {} +propagated_env_vars: Dict[str, str] = {} global_conf_dict = None spark_context = None archive_sha256sum = None -def configure_logging(): +def configure_logging() -> None: log_level = logging.INFO logging.basicConfig( level=log_level, format="[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s") -def is_pid_running(pid): - import psutil +def is_pid_running(pid: int) -> bool: + import psutil # type: ignore try: process = psutil.Process(pid) return process.status() != psutil.STATUS_ZOMBIE @@ -200,7 +198,7 @@ def is_pid_running(pid): return False -def delete_if_exists_log_errors(file_path): +def delete_if_exists_log_errors(file_path: str) -> None: if os.path.exists(file_path): try: if os.path.isdir(file_path): @@ -211,20 +209,20 @@ def delete_if_exists_log_errors(file_path): logging.error("Error deleting file %s: %s", file_path, os_error) -def log_heading(msg): +def log_heading(msg: str) -> None: logging.info('\n%s\n%s\n%s' % ('-' * 80, msg, '-' * 80)) # Initializes the spark context. The details list will be incorporated in the Spark application # name visible in the Spark web UI. -def init_spark_context(details=[]): +def init_spark_context(details: List[str] = []) -> None: global spark_context if spark_context: return log_heading("Initializing Spark context") - global_conf = yb_dist_tests.global_conf - build_type = yb_dist_tests.global_conf.build_type - from pyspark import SparkContext + global_conf = yb_dist_tests.get_global_conf() + build_type = global_conf.build_type + from pyspark import SparkContext # type: ignore SparkContext.setSystemProperty('spark.task.maxFailures', str(SPARK_TASK_MAX_FAILURES)) spark_master_url = g_spark_master_url_override @@ -232,7 +230,7 @@ def init_spark_context(details=[]): if is_macos(): logging.info("This is macOS, using the macOS Spark cluster") spark_master_url = SPARK_URLS['macos'] - elif yb_dist_tests.global_conf.build_type in ['asan', 'tsan']: + elif build_type in ['asan', 'tsan']: logging.info("Using a separate Spark cluster for ASAN and TSAN tests") spark_master_url = SPARK_URLS['linux_asan_tsan'] else: @@ -267,23 +265,23 @@ def init_spark_context(details=[]): log_heading("Initialized Spark context") -def set_global_conf_for_spark_jobs(): +def set_global_conf_for_spark_jobs() -> None: global global_conf_dict - global_conf_dict = vars(yb_dist_tests.global_conf) + global_conf_dict = vars(yb_dist_tests.get_global_conf()) -def get_bash_path(): +def get_bash_path() -> str: if sys.platform == 'darwin': return '/usr/local/bin/bash' return '/bin/bash' -def parallel_run_test(test_descriptor_str): +def parallel_run_test(test_descriptor_str: str) -> yb_dist_tests.TestResult: """ This is invoked in parallel to actually run tests. """ global_conf = initialize_remote_task() - from yb import yb_dist_tests, command_util + from yb import yb_dist_tests wait_for_path_to_exist(global_conf.build_root) @@ -321,7 +319,7 @@ def parallel_run_test(test_descriptor_str): # We could use "run_program" here, but it collects all the output in memory, which is not # ideal for a large amount of test log output. The "tee" part also makes the output visible in # the standard error of the Spark task as well, which is sometimes helpful for debugging. - def run_test(): + def run_test() -> Tuple[int, float]: start_time_sec = time.time() error_log_dir_path = os.path.dirname(os.path.abspath(test_descriptor.error_output_path)) if not os.path.isdir(error_log_dir_path): @@ -468,22 +466,24 @@ def run_test(): os.umask(old_umask) -def get_bash_shebang(): +def get_bash_shebang() -> str: # Prefer /usr/local/bin/bash as we install Bash 4+ there on macOS. if os.path.exists('/usr/local/bin/bash'): return '/usr/local/bin/bash' return '/usr/bin/env bash' -def initialize_remote_task(): +# This is executed on a Spark executor as part of running a task. +def initialize_remote_task() -> yb_dist_tests.GlobalTestConfig: configure_logging() + assert global_conf_dict is not None global_conf = yb_dist_tests.set_global_conf_from_dict(global_conf_dict) global_conf.set_env_on_spark_worker(propagated_env_vars) if not global_conf.archive_for_workers: return global_conf - from pyspark import SparkFiles + from pyspark import SparkFiles # type: ignore archive_name = os.path.basename(SparkFiles.get(global_conf.archive_for_workers)) expected_archive_sha256sum = global_conf.archive_sha256sum assert expected_archive_sha256sum is not None @@ -576,7 +576,7 @@ def initialize_remote_task(): return global_conf -def parallel_list_test_descriptors(rel_test_path): +def parallel_list_test_descriptors(rel_test_path: str) -> List[str]: """ This is invoked in parallel to list all individual tests within our C++ test programs. Without this, listing all gtest tests across 330 test programs might take about 5 minutes on TSAN and 2 @@ -623,8 +623,8 @@ def parallel_list_test_descriptors(rel_test_path): # BloomStatsTestWithIter/1 # GetParam() = (true, false) # BloomStatsTestWithIter/2 # GetParam() = (false, false) - current_test = None - test_descriptors = [] + current_test: Optional[str] = None + test_descriptors: List[str] = [] test_descriptor_prefix = rel_test_path + yb_dist_tests.TEST_DESCRIPTOR_SEPARATOR for line in prog_result.stdout.split("\n"): if ('Starting tracking the heap' in line or 'Dumping heap profile to' in line): @@ -632,6 +632,7 @@ def parallel_list_test_descriptors(rel_test_path): line = line.rstrip() trimmed_line = HASH_COMMENT_RE.sub('', line.strip()).strip() if line.startswith(' '): + assert current_test is not None test_descriptors.append(test_descriptor_prefix + current_test + trimmed_line) else: current_test = trimmed_line @@ -639,7 +640,7 @@ def parallel_list_test_descriptors(rel_test_path): return test_descriptors -def get_username(): +def get_username() -> str: try: return os.getlogin() except OSError as ex: @@ -652,7 +653,7 @@ def get_username(): user_from_env = os.getenv('USER') if user_from_env: return user_from_env - id_output = subprocess.check_output('id').strip() + id_output = subprocess.check_output('id').strip().decode('utf-8') ID_OUTPUT_RE = re.compile(r'^uid=\d+[(]([^)]+)[)]\s.*') match = ID_OUTPUT_RE.match(id_output) if match: @@ -663,38 +664,47 @@ def get_username(): raise ex -def get_mac_shared_nfs(path): +def get_mac_shared_nfs(path: str) -> str: LOCAL_PATH = "/Volumes/share" if not path.startswith(LOCAL_PATH): raise ValueError("Local path %s does not start with expected prefix '%s'.\n" % (path, LOCAL_PATH)) relpath = path[len(LOCAL_PATH):] - return "/Volumes/net/v1/" + os.environ.get('YB_BUILD_HOST') + relpath + yb_build_host = os.environ.get('YB_BUILD_HOST') + if yb_build_host is None: + raise ValueError("The YB_BUILD_HOST environment variable is not set") + return "/Volumes/net/v1/" + yb_build_host + relpath -def get_jenkins_job_name(): - return os.environ.get('JOB_NAME', None) +def get_jenkins_job_name() -> Optional[str]: + return os.environ.get('JOB_NAME') -def get_jenkins_job_name_path_component(): +def get_jenkins_job_name_path_component() -> str: jenkins_job_name = get_jenkins_job_name() if jenkins_job_name: return "job_" + jenkins_job_name - else: - return "unknown_jenkins_job" + return "unknown_jenkins_job" -def get_report_parent_dir(report_base_dir): + +def get_report_parent_dir(report_base_dir: str) -> str: """ @return a directory to store build report, relative to the given base directory. Path components are based on build type, Jenkins job name, etc. """ - return os.path.join(report_base_dir, - yb_dist_tests.global_conf.build_type, - get_jenkins_job_name_path_component()) - - -def save_json_to_paths(short_description, json_data, output_paths, should_gzip=False): + global_conf = yb_dist_tests.get_global_conf() + return os.path.join( + report_base_dir, + global_conf.build_type, + get_jenkins_job_name_path_component()) + + +def save_json_to_paths( + short_description: str, + json_data: Any, + output_paths: List[str], + should_gzip: bool = False) -> None: """ Saves the given JSON-friendly data structure to the list of paths (exact copy at each path), optionally gzipping the output. @@ -710,17 +720,21 @@ def save_json_to_paths(short_description, json_data, output_paths, should_gzip=F final_output_path = output_path + ('.gz' if should_gzip else '') logging.info("Saving {} to {}".format(short_description, final_output_path)) if should_gzip: - with gzip.open(final_output_path, 'wb') as output_file: - output_file.write(json_data_str.encode('utf-8')) + with gzip.open(final_output_path, 'wb') as output_file_plain: + output_file_plain.write(json_data_str.encode('utf-8')) else: - with open(final_output_path, 'w') as output_file: - output_file.write(json_data_str) + with open(final_output_path, 'w') as output_file_gzip: + output_file_gzip.write(json_data_str) -def save_report(report_base_dir, results, total_elapsed_time_sec, spark_succeeded, - save_to_build_dir=False): +def save_report( + report_base_dir: str, + results: List[yb_dist_tests.TestResult], + total_elapsed_time_sec: float, + spark_succeeded: bool, + save_to_build_dir: bool = False) -> None: historical_report_path = None - global_conf = yb_dist_tests.global_conf + global_conf = yb_dist_tests.get_global_conf() if report_base_dir: historical_report_parent_dir = get_report_parent_dir(report_base_dir) @@ -742,12 +756,12 @@ def save_report(report_base_dir, results, total_elapsed_time_sec, spark_succeede historical_report_path = os.path.join( historical_report_parent_dir, - '{}_{}__user_{}__build_{}.json'.format( + '{}.json'.format('_'.join([ global_conf.build_type, time.strftime('%Y-%m-%dT%H_%M_%S'), username, get_jenkins_job_name_path_component(), - os.environ.get('BUILD_ID', 'unknown'))) + os.environ.get('BUILD_ID', 'unknown')]))) test_reports_by_descriptor = {} for result in results: @@ -789,7 +803,7 @@ def save_report(report_base_dir, results, total_elapsed_time_sec, spark_succeede save_json_to_paths('short build report', report, [short_report_path], should_gzip=False) -def is_one_shot_test(rel_binary_path): +def is_one_shot_test(rel_binary_path: str) -> bool: if rel_binary_path in ONE_SHOT_TESTS: return True for non_gtest_test in ONE_SHOT_TESTS: @@ -798,13 +812,14 @@ def is_one_shot_test(rel_binary_path): return False -def collect_cpp_tests(cpp_test_program_filter: List[str]) -> List[yb_dist_tests.TestDescriptor]: +def collect_cpp_tests( + cpp_test_program_filter_list: List[str]) -> List[yb_dist_tests.TestDescriptor]: """ Collect C++ test programs to run. - @param cpp_test_program_filter: a list of C++ test program names to be used as a filter + @param cpp_test_program_filter_list: a list of C++ test program names to be used as a filter """ - global_conf = yb_dist_tests.global_conf + global_conf = yb_dist_tests.get_global_conf() logging.info("Collecting the list of C++ test programs (locally; not a Spark job)") start_time_sec = time.time() build_root_realpath = os.path.realpath(global_conf.build_root) @@ -835,8 +850,8 @@ def collect_cpp_tests(cpp_test_program_filter: List[str]) -> List[yb_dist_tests. logging.info("Collected %d test programs in %.2f sec" % ( len(test_programs), elapsed_time_sec)) - if cpp_test_program_filter: - cpp_test_program_filter = set(cpp_test_program_filter) + if cpp_test_program_filter_list: + cpp_test_program_filter = set(cpp_test_program_filter_list) unfiltered_test_programs = test_programs # test_program contains test paths relative to the root directory (including directory @@ -891,8 +906,9 @@ def collect_cpp_tests(cpp_test_program_filter: List[str]) -> List[yb_dist_tests. # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 + assert spark_context is not None all_test_descriptor_lists = run_spark_action( - lambda: spark_context.parallelize( + lambda: spark_context.parallelize( # type: ignore test_programs, numSlices=num_slices).map(parallel_list_test_descriptors).collect() ) elapsed_time_sec = time.time() - start_time_sec @@ -916,21 +932,22 @@ def collect_cpp_tests(cpp_test_program_filter: List[str]) -> List[yb_dist_tests. return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs] -def is_writable(dir_path): +def is_writable(dir_path: str) -> bool: return os.access(dir_path, os.W_OK) -def is_parent_dir_writable(file_path): +def is_parent_dir_writable(file_path: str) -> bool: return is_writable(os.path.dirname(file_path)) -def fatal_error(msg): +def fatal_error(msg: str) -> None: logging.error("Fatal: " + msg) raise RuntimeError(msg) -def get_java_test_descriptors(): - java_test_list_path = os.path.join(yb_dist_tests.global_conf.build_root, 'java_test_list.txt') +def get_java_test_descriptors() -> List[yb_dist_tests.TestDescriptor]: + java_test_list_path = os.path.join( + yb_dist_tests.get_global_conf().build_root, 'java_test_list.txt') if not os.path.exists(java_test_list_path): raise IOError( "Java test list not found at '%s'. Please run ./yb_build.sh --collect-java-tests to " @@ -949,7 +966,7 @@ def get_java_test_descriptors(): return java_test_descriptors -def collect_tests(args): +def collect_tests(args: argparse.Namespace) -> List[yb_dist_tests.TestDescriptor]: test_conf = {} if args.test_conf: with open(args.test_conf) as test_conf_file: @@ -967,7 +984,7 @@ def collect_tests(args): cpp_test_descriptors = [] if args.run_cpp_tests: cpp_test_programs = test_conf.get('cpp_test_programs') - cpp_test_descriptors = collect_cpp_tests(cpp_test_programs) + cpp_test_descriptors = collect_cpp_tests(cast(List[str], cpp_test_programs)) java_test_descriptors = [] if args.run_java_tests: @@ -992,7 +1009,7 @@ def collect_tests(args): return test_descriptors -def load_test_list(test_list_path): +def load_test_list(test_list_path: str) -> List[yb_dist_tests.TestDescriptor]: logging.info("Loading the list of tests to run from %s", test_list_path) test_descriptors = [] with open(test_list_path, 'r') as input_file: @@ -1003,7 +1020,7 @@ def load_test_list(test_list_path): return test_descriptors -def propagate_env_vars(): +def propagate_env_vars() -> None: num_propagated = 0 for env_var_name in JENKINS_ENV_VARS: if env_var_name in os.environ: @@ -1019,8 +1036,8 @@ def propagate_env_vars(): logging.info("Number of propagated environment variables: %s", num_propagated) -def run_spark_action(action): - import py4j +def run_spark_action(action: Any) -> Any: + import py4j # type: ignore try: results = action() except py4j.protocol.Py4JJavaError: @@ -1030,7 +1047,7 @@ def run_spark_action(action): return results -def main(): +def main() -> None: parser = argparse.ArgumentParser( description='Run tests on Spark.') parser.add_argument('--verbose', action='store_true', @@ -1138,11 +1155,12 @@ def main(): fatal_error("File specified by --test_list does not exist or is not a file: '{}'".format( test_list_path)) + global_conf = yb_dist_tests.get_global_conf() if ('YB_MVN_LOCAL_REPO' not in os.environ and args.run_java_tests and args.send_archive_to_workers): os.environ['YB_MVN_LOCAL_REPO'] = os.path.join( - yb_dist_tests.global_conf.build_root, 'm2_repository') + global_conf.build_root, 'm2_repository') logging.info("Automatically setting YB_MVN_LOCAL_REPO to %s", os.environ['YB_MVN_LOCAL_REPO']) @@ -1162,10 +1180,12 @@ def main(): # This needs to be done before Spark context initialization, which will happen as we try to # collect all gtest tests in all C++ test programs. if args.send_archive_to_workers: - archive_exists = os.path.exists(yb_dist_tests.global_conf.archive_for_workers) + archive_exists = ( + global_conf.archive_for_workers is not None and + os.path.exists(global_conf.archive_for_workers)) if args.recreate_archive_for_workers or not archive_exists: - archive_sha_path = os.path.join(yb_dist_tests.global_conf.yb_src_root, - 'extracted_from_archive.sha256') + archive_sha_path = os.path.join( + global_conf.yb_src_root, 'extracted_from_archive.sha256') if os.path.exists(archive_sha_path): os.remove(archive_sha_path) @@ -1174,8 +1194,9 @@ def main(): yb_dist_tests.compute_archive_sha256sum() # Local host may also be worker, so leave expected checksum here after archive created. + assert global_conf.archive_sha256sum is not None with open(archive_sha_path, 'w') as archive_sha: - archive_sha.write(yb_dist_tests.global_conf.archive_sha256sum) + archive_sha.write(global_conf.archive_sha256sum) else: yb_dist_tests.compute_archive_sha256sum() @@ -1233,7 +1254,7 @@ def main(): # Randomize test order to avoid any kind of skew. random.shuffle(test_descriptors) - test_names_rdd = spark_context.parallelize( + test_names_rdd = spark_context.parallelize( # type: ignore [test_descriptor.descriptor_str for test_descriptor in test_descriptors], numSlices=total_num_tests) @@ -1248,8 +1269,8 @@ def main(): logging.info("Tests are done, set of exit codes: %s, tentative global exit code: %s", sorted(test_exit_codes), global_exit_code) - num_tests_by_language = defaultdict(int) - failures_by_language = defaultdict(int) + num_tests_by_language: Dict[str, int] = defaultdict(int) + failures_by_language: Dict[str, int] = defaultdict(int) failed_test_desc_strs = [] had_errors_copying_artifacts = False for result in results: diff --git a/build-support/split_long_command_line.py b/build-support/split_long_command_line.py index c1a12521d706..808bb47d0c10 100755 --- a/build-support/split_long_command_line.py +++ b/build-support/split_long_command_line.py @@ -18,17 +18,18 @@ import sys -line_length = 0 -buffer = '' -for line in sys.stdin: - for c in line.rstrip(): - if c.isspace() and line_length > 80: - print(buffer + c + '\\\\') - buffer = '' - line_length = 0 - else: - buffer += c - line_length += 1 +if __name__ == '__main__': + line_length = 0 + buffer = '' + for line in sys.stdin: + for c in line.rstrip(): + if c.isspace() and line_length > 80: + print(buffer + c + '\\\\') + buffer = '' + line_length = 0 + else: + buffer += c + line_length += 1 -if buffer: - sys.stdout.write(buffer) + if buffer: + sys.stdout.write(buffer) diff --git a/build-support/validate_build_root.py b/build-support/validate_build_root.py index 2fb06556dd65..14e361636e0e 100755 --- a/build-support/validate_build_root.py +++ b/build-support/validate_build_root.py @@ -25,7 +25,7 @@ if not build_root.startswith(internal_build_root_parent_dir) and \ not build_root.startswith(external_build_root_parent_dir): - print ("Build root '{}' is not within either '{}' or '{}'".format( + print("Build root '{}' is not within either '{}' or '{}'".format( build_root, internal_build_root_parent_dir, internal_build_root_parent_dir), file=sys.stderr) diff --git a/codecheck.ini b/codecheck.ini new file mode 100644 index 000000000000..123e638b32fb --- /dev/null +++ b/codecheck.ini @@ -0,0 +1,47 @@ +# Copyright (c) Yugabyte, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations +# under the License. + +[default] +mypy_config = mypy.ini + +[checks] + +# Only keep Python-related checks on for now, and run them for a limited subset of files. +mypy = on +compile = on +pycodestyle = on +doctest = on +import = on +unittest = on +shellcheck = off + +[files] + +# TODO: add codecheck support for a list of plain file paths (not regexes) and use it here. +included_regex_list = + ^python/yb/__init__[.]py$ + ^python/yb/command_util[.]py$ + ^python/yb/library_packager[.]py$ + ^python/yb/linuxbrew[.]py$ + ^python/yb/common_util[.]py$ + ^python/yb/tool_base[.]py$ + ^python/yb/compile_commands[.]py$ + ^python/yb/run_pvs_studio_analyzer[.]py$ + ^python/yb/build_postgres[.]py$ + ^python/yb/os_detection[.]py$ + ^python/yb/thirdparty_tool[.]py$ + ^python/yb/yb_dist_tests[.]py$ + ^build-support/is_same_path[.]py$ + ^build-support/kill_long_running_minicluster_daemons[.]py$ + ^build-support/split_long_command_line[.]py$ + ^build-support/validate_build_root[.]py$ + ^build-support/run_tests_on_spark[.]py$ diff --git a/python/yb/yb_dist_tests.py b/python/yb/yb_dist_tests.py index 4acc41a3087d..14c711e0414a 100644 --- a/python/yb/yb_dist_tests.py +++ b/python/yb/yb_dist_tests.py @@ -24,6 +24,7 @@ import tempfile import atexit import glob +import argparse from functools import total_ordering @@ -31,7 +32,7 @@ from yb.common_util import get_build_type_from_build_root, \ get_compiler_type_from_build_root, \ is_macos # nopep8 - +from typing import Optional, List, Set, Dict, cast # This is used to separate relative binary path from gtest_filter for C++ tests in what we call # a "test descriptor" (a string that identifies a particular test). @@ -46,7 +47,7 @@ TEST_DESCRIPTOR_ATTEMPT_INDEX_RE = re.compile( r'^(.*)' + TEST_DESCRIPTOR_ATTEMPT_PREFIX + r'(\d+)$') -global_conf = None +global_conf: Optional['GlobalTestConfig'] = None CLOCK_SYNC_WAIT_LOGGING_INTERVAL_SEC = 10 @@ -69,7 +70,15 @@ class TestDescriptor: - A Java test class source path (including .java/.scala extension) relative to the "java" directory in the YugabyteDB source tree. """ - def __init__(self, descriptor_str): + descriptor_str_without_attempt_index: str + attempt_index: int + is_jvm_based: bool + language: str + args_for_run_test: str + error_output_path: str + + def __init__(self, descriptor_str: str) -> None: + assert global_conf is not None self.descriptor_str = descriptor_str attempt_index_match = TEST_DESCRIPTOR_ATTEMPT_INDEX_RE.match(descriptor_str) @@ -104,14 +113,23 @@ def __init__(self, descriptor_str): else: # The "test descriptors string " is the Java source file path relative to the "java" # directory. - mvn_module, package_and_class_with_slashes = JAVA_TEST_DESCRIPTOR_RE.match( - self.descriptor_str_without_attempt_index).groups() + java_descriptor_match = JAVA_TEST_DESCRIPTOR_RE.match( + self.descriptor_str_without_attempt_index) + if java_descriptor_match is None: + raise ValueError( + f"Java/Scala test descriptor {self.descriptor_str_without_attempt_index} " + f"could not be parsed using the regular expression " + f"{JAVA_TEST_DESCRIPTOR_RE}") + + mvn_module, package_and_class_with_slashes = java_descriptor_match.groups() package_and_class = package_and_class_with_slashes.replace('/', '.') self.args_for_run_test = "{} {}".format(mvn_module, package_and_class) output_file_name = package_and_class else: self.language = 'C++' + test_name: Optional[str] + # This is a C++ test. if TEST_DESCRIPTOR_SEPARATOR in self.descriptor_str_without_attempt_index: rel_test_binary, test_name = self.descriptor_str_without_attempt_index.split( @@ -135,28 +153,30 @@ def __init__(self, descriptor_str): self.error_output_path = os.path.join( global_conf.build_root, 'yb-test-logs', output_file_name + '__error.log') - def __str__(self): + def __str__(self) -> str: if self.attempt_index == 1: return self.descriptor_str_without_attempt_index - return "{}{}{}".format[ + return ''.join([ self.descriptor_str_without_attempt_index, TEST_DESCRIPTOR_ATTEMPT_PREFIX, - self.attempt_index - ] + str(self.attempt_index) + ]) - def str_for_file_name(self): + def str_for_file_name(self) -> str: return str(self).replace('/', '__').replace(':', '_') - def __eq__(self, other): - return self.descriptor_str == other.descriptor_str + def __eq__(self, other: object) -> bool: + other_descriptor = cast(TestDescriptor, other) + return self.descriptor_str == other_descriptor.descriptor_str - def __ne__(self, other): + def __ne__(self, other: object) -> bool: return not (self == other) - def __lt__(self, other): - return self.descriptor_str < other.descriptor_str + def __lt__(self, other: object) -> bool: + other_descriptor = cast(TestDescriptor, other) + return self.descriptor_str < other_descriptor.descriptor_str - def with_attempt_index(self, attempt_index): + def with_attempt_index(self, attempt_index: int) -> 'TestDescriptor': assert attempt_index >= 1 copied = copy.copy(self) copied.attempt_index = attempt_index @@ -167,14 +187,23 @@ def with_attempt_index(self, attempt_index): class GlobalTestConfig: - def __init__(self, - build_root, - build_type, - yb_src_root, - archive_for_workers, - rel_build_root, - archive_sha256sum, - compiler_type): + build_root: str + build_type: str + yb_src_root: str + archive_for_workers: Optional[str] + rel_build_root: str + archive_sha256sum: Optional[str] + compiler_type: str + + def __init__( + self, + build_root: str, + build_type: str, + yb_src_root: str, + archive_for_workers: Optional[str], + rel_build_root: str, + archive_sha256sum: Optional[str], + compiler_type: str) -> None: self.build_root = os.path.abspath(build_root) self.build_type = build_type self.yb_src_root = yb_src_root @@ -183,10 +212,11 @@ def __init__(self, self.archive_sha256sum = archive_sha256sum self.compiler_type = compiler_type - def get_run_test_script_path(self): + def get_run_test_script_path(self) -> str: return os.path.join(self.yb_src_root, 'build-support', 'run-test.sh') - def set_env_on_spark_worker(self, propagated_env_vars={}): + def set_env_on_spark_worker( + self, propagated_env_vars: Dict[str, str] = {}) -> None: """ Used on the distributed worker side (inside functions that run on Spark) to configure the necessary environment. @@ -209,7 +239,7 @@ def set_env_on_spark_worker(self, propagated_env_vars={}): 'num_errors_copying_artifacts']) -def set_global_conf_from_args(args): +def set_global_conf_from_args(args: argparse.Namespace) -> GlobalTestConfig: build_root = os.path.realpath(args.build_root) # This module is expected to be under python/yb. @@ -260,7 +290,7 @@ def set_global_conf_from_args(args): return global_conf -def set_global_conf_from_dict(global_conf_dict): +def set_global_conf_from_dict(global_conf_dict: Dict[str, str]) -> GlobalTestConfig: """ This is used in functions that run on Spark. We use a dictionary to pass the configuration from the main program to distributed workers. @@ -268,9 +298,9 @@ def set_global_conf_from_dict(global_conf_dict): global global_conf try: global_conf = GlobalTestConfig(**global_conf_dict) - except TypeError as ex: - raise TypeError("Cannot set global configuration from dictionary %s: %s" % ( - repr(global_conf_dict), ex.message)) + except Exception as ex: + logging.exception("Cannot set global configuration from dictionary %s" % global_conf_dict) + raise ex return global_conf @@ -311,7 +341,7 @@ def set_global_conf_from_dict(global_conf_dict): ] -def find_rel_java_paths_to_archive(yb_src_root): +def find_rel_java_paths_to_archive(yb_src_root: str) -> List[str]: paths = [] for ent in [False, True]: path_components = [] @@ -364,7 +394,8 @@ def validate_mvn_local_repo(mvn_local_repo: str) -> None: logging.info(f"All Maven plugin patterns were found in local repo {mvn_local_repo}") -def create_archive_for_workers(): +def create_archive_for_workers() -> None: + assert global_conf is not None dest_path = global_conf.archive_for_workers if dest_path is None: return @@ -448,12 +479,12 @@ def create_archive_for_workers(): # These SHA256-related functions are duplicated in download_and_extract_archive.py, because that # script should not depend on any Python modules. -def validate_sha256sum(checksum_str): +def validate_sha256sum(checksum_str: str) -> None: if not re.match(r'^[0-9a-f]{64}$', checksum_str): raise ValueError("Invalid SHA256 checksum: '%s', expected 64 hex characters", checksum_str) -def compute_sha256sum(file_path): +def compute_sha256sum(file_path: str) -> str: cmd_line = None if sys.platform.startswith('linux'): cmd_line = ['sha256sum', file_path] @@ -467,30 +498,32 @@ def compute_sha256sum(file_path): return checksum_str -def compute_archive_sha256sum(): +def compute_archive_sha256sum() -> None: + assert global_conf is not None if global_conf.archive_for_workers is not None: global_conf.archive_sha256sum = compute_sha256sum(global_conf.archive_for_workers) logging.info("SHA256 checksum of archive %s: %s" % ( global_conf.archive_for_workers, global_conf.archive_sha256sum)) -def to_real_nfs_path(path): +def to_real_nfs_path(path: str) -> str: assert path.startswith('/'), "Expecting the path to be absolute: %s" % path path = os.path.abspath(path) return '/real_%s' % path[1:] -def get_tmp_filename(prefix='', suffix='', auto_remove=False): +def get_tmp_filename(prefix: str = '', suffix: str = '', auto_remove: bool = False) -> str: fd, file_path = tempfile.mkstemp(prefix=prefix, suffix=suffix) os.close(fd) os.remove(file_path) if auto_remove: - def cleanup(): + def cleanup() -> None: if os.path.exists(file_path): os.remove(file_path) atexit.register(cleanup) return file_path -if __name__ == '__main__': - main() +def get_global_conf() -> GlobalTestConfig: + assert global_conf is not None + return global_conf diff --git a/requirements.txt b/requirements.txt index 964b3201a83d..1240fd20408f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,5 @@ semantic-version six wheel yugabyte_pycommon +codecheck +pycodestyle \ No newline at end of file diff --git a/requirements_frozen.txt b/requirements_frozen.txt index 1de95d9882c9..7e0ada846654 100644 --- a/requirements_frozen.txt +++ b/requirements_frozen.txt @@ -1,33 +1,35 @@ bashlex==0.15 boto==2.49.0 -certifi==2020.12.5 -cffi==1.14.5 -chardet==4.0.0 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 click==8.0.1 +codecheck==1.1.2 compiledb==0.10.1 Deprecated==1.2.12 -distro==1.5.0 +distro==1.6.0 downloadutil==1.0.2 -idna==2.10 -mypy==0.812 +idna==3.2 +mypy==0.910 mypy-extensions==0.4.3 overrides==6.1.0 -packaging==20.9 +packaging==21.0 psutil==5.8.0 +pycodestyle==2.7.0 pycparser==2.20 PyGithub==1.55 PyJWT==2.1.0 PyNaCl==1.4.0 pyparsing==2.4.7 -requests==2.25.1 -ruamel.yaml==0.17.4 -ruamel.yaml.clib==0.2.2 +requests==2.26.0 +ruamel.yaml==0.17.13 +ruamel.yaml.clib==0.2.6 semantic-version==2.8.5 shutilwhich==1.1.0 six==1.16.0 -typed-ast==1.4.3 +toml==0.10.2 typing-extensions==3.10.0.0 typing-utils==0.1.0 -urllib3==1.26.5 +urllib3==1.26.6 wrapt==1.12.1 yugabyte-pycommon==1.9.15 diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000000..487442e7efce --- /dev/null +++ b/tox.ini @@ -0,0 +1,16 @@ +# Copyright (c) Yugabyte, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations +# under the License. + +# Pycodestyle looks at tox.ini to get the project-specific style. +# https://pycodestyle.pycqa.org/en/latest/intro.html +[pycodestyle] +max-line-length = 100