Skip to content

Commit

Permalink
[pytorch_ci] Python target determinator (pytorch#33577)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#33577

Pull Request resolved: pytorch#33221

This will make it so that if a pull request is just pure Python files, then we'll only run the Python tests that are connected to the dependency graph of the touched files.

Assumptions made:
- the Python code does not do dynamic imports
- test_X.py never imports from test_Y.py

Right now this is only done for test_nn (presumably the largest test entrypoint), but it's not much more work to do it for all the other test entrypoints too.

Test Plan:
CircleCI results when touching just a few Python files:
- pytorch_macos_10_13_py3_test: 41 ->13 minutes https://circleci.com/gh/pytorch/pytorch/4550574?utm_campaign=vcs-integration-link&utm_medium=referral&utm_source=github-build-link
- pytorch_windows_vs2019_py36_cuda10.1_test1: 11 -> 2 minutes https://circleci.com/gh/pytorch/pytorch/4550846?utm_campaign=vcs-integration-link&utm_medium=referral&utm_source=github-build-link
- pytorch_windows_vs2019_py36_cuda10.1_test2: 51 -> 21 minutes https://circleci.com/gh/pytorch/pytorch/4550845?utm_campaign=vcs-integration-link&utm_medium=referral&utm_source=github-build-link
- pytorch_linux_xenial_py3_6_gcc5_4_test: 41 -> 14 minutes https://circleci.com/gh/pytorch/pytorch/4550543?utm_campaign=vcs-integration-link&utm_medium=referral&utm_source=github-build-link

Differential Revision: D20009089

fbshipit-source-id: 41708cc301d1c866eb92a04421d8346feb0e3cb5
  • Loading branch information
yns88 authored and facebook-github-bot committed Mar 4, 2020
1 parent 7c20578 commit 7cee787
Show file tree
Hide file tree
Showing 10 changed files with 335 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ jobs:
if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
else
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
fi
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
Expand Down
2 changes: 1 addition & 1 deletion .circleci/verbatim-sources/pytorch-job-specs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ jobs:
if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
else
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
fi
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
Expand Down
8 changes: 8 additions & 0 deletions .jenkins/pytorch/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,11 @@ function get_exit_code() {
set -e
return $retcode
}
function file_diff_from_base() {
# The fetch may fail on Docker hosts, but it's not always necessary.
set +e
git fetch origin master --quiet
set -e
git diff --name-only "$(git merge-base origin master HEAD)" > "$1"
}
9 changes: 8 additions & 1 deletion .jenkins/pytorch/macos-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,14 @@ test_python_all() {
# using the address associated with the loopback interface.
export GLOO_SOCKET_IFNAME=lo0
echo "Ninja version: $(ninja --version)"
python test/run_test.py --verbose

if [ -n "$CIRCLE_PULL_REQUEST" ]; then
DETERMINE_FROM=$(mktemp)
file_diff_from_base "$DETERMINE_FROM"
fi

python test/run_test.py --verbose --determine-from="$DETERMINE_FROM"

assert_git_not_dirty
}

Expand Down
13 changes: 9 additions & 4 deletions .jenkins/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -130,23 +130,28 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX2-* ]]; then
export ATEN_CPU_CAPABILITY=avx
fi

if [ -n "$CIRCLE_PULL_REQUEST" ]; then
DETERMINE_FROM=$(mktemp)
file_diff_from_base "$DETERMINE_FROM"
fi

test_python_nn() {
time python test/run_test.py --include test_nn --verbose
time python test/run_test.py --include test_nn --verbose --determine-from="$DETERMINE_FROM"
assert_git_not_dirty
}

test_python_ge_config_simple() {
time python test/run_test.py --include test_jit_simple --verbose
time python test/run_test.py --include test_jit_simple --verbose --determine-from="$DETERMINE_FROM"
assert_git_not_dirty
}

test_python_ge_config_legacy() {
time python test/run_test.py --include test_jit_legacy test_jit_fuser_legacy --verbose
time python test/run_test.py --include test_jit_legacy test_jit_fuser_legacy --verbose --determine-from="$DETERMINE_FROM"
assert_git_not_dirty
}

test_python_all_except_nn() {
time python test/run_test.py --exclude test_nn test_jit_simple test_jit_legacy test_jit_fuser_legacy --verbose --bring-to-front test_quantization test_quantized test_quantized_tensor test_quantized_nn_mods
time python test/run_test.py --exclude test_nn test_jit_simple test_jit_legacy test_jit_fuser_legacy --verbose --bring-to-front test_quantization test_quantized test_quantized_tensor test_quantized_nn_mods --determine-from="$DETERMINE_FROM"
assert_git_not_dirty
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
cd test && python run_test.py --exclude test_nn test_jit_simple test_jit_legacy test_jit_fuser_legacy --verbose && cd ..
cd test && python run_test.py --exclude test_nn test_jit_simple test_jit_legacy test_jit_fuser_legacy --verbose --determine-from="%1" && cd ..
if ERRORLEVEL 1 exit /b 1
2 changes: 1 addition & 1 deletion .jenkins/pytorch/win-test-helpers/test_python_nn.bat
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ python %SCRIPT_HELPERS_DIR%\run_python_nn_smoketests.py
if ERRORLEVEL 1 exit /b 1

echo Run nn tests
python run_test.py --include test_nn --verbose
python run_test.py --include test_nn --verbose --determine-from="%1"
if ERRORLEVEL 1 exit /b 1

popd
Expand Down
12 changes: 8 additions & 4 deletions .jenkins/pytorch/win-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,22 @@ fi

export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers

if [ -n "$CIRCLE_PULL_REQUEST" ]; then
DETERMINE_FROM="${TMP_DIR}/determine_from"
file_diff_from_base "$DETERMINE_FROM"
fi

run_tests() {
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then
$SCRIPT_HELPERS_DIR/test_python_nn.bat && \
$SCRIPT_HELPERS_DIR/test_python_all_except_nn.bat && \
$SCRIPT_HELPERS_DIR/test_python_nn.bat "$DETERMINE_FROM" && \
$SCRIPT_HELPERS_DIR/test_python_all_except_nn.bat "$DETERMINE_FROM" && \
$SCRIPT_HELPERS_DIR/test_custom_script_ops.bat && \
$SCRIPT_HELPERS_DIR/test_libtorch.bat
else
if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
$SCRIPT_HELPERS_DIR/test_python_nn.bat
$SCRIPT_HELPERS_DIR/test_python_nn.bat "$DETERMINE_FROM"
elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then
$SCRIPT_HELPERS_DIR/test_python_all_except_nn.bat && \
$SCRIPT_HELPERS_DIR/test_python_all_except_nn.bat "$DETERMINE_FROM" && \
$SCRIPT_HELPERS_DIR/test_custom_script_ops.bat && \
$SCRIPT_HELPERS_DIR/test_libtorch.bat
fi
Expand Down
180 changes: 180 additions & 0 deletions test/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import argparse
from datetime import datetime
import modulefinder
import os
import shutil
import signal
Expand All @@ -16,6 +17,7 @@
from torch.utils import cpp_extension
from torch.testing._internal.common_utils import TEST_WITH_ROCM, shell
import torch.distributed as dist
PY2 = sys.version_info <= (3,)
PY33 = sys.version_info >= (3, 3)
PY36 = sys.version_info >= (3, 6)

Expand Down Expand Up @@ -68,6 +70,7 @@
'test_jit_disabled',
'test_function_schema',
'test_overrides',
'test_determination',
]

# skip < 3.3 because mock is added in 3.3 and is used in rpc_spawn
Expand Down Expand Up @@ -107,6 +110,39 @@
'distributed/rpc/jit/test_dist_autograd_spawn',
]

# These tests are slow enough that it's worth calculating whether the patch
# touched any related files first.
SLOW_TESTS = [
'test_nn',
'test_autograd',
'test_cpp_extensions_jit',
'test_jit_legacy',
'test_quantized',
'test_dataloader',
'test_overrides',
'test_jit_simple',
'test_jit',
'test_torch',
'distributed/test_distributed',
'distributed/rpc/test_rpc_spawn',
'distributed/rpc/test_dist_autograd_spawn',
'test_cuda',
'test_cuda_primary_ctx',
'test_cpp_extensions_aot_ninja',
'test_cpp_extensions_aot_no_ninja',
'test_serialization',
'test_distributions',
'test_optim',
'test_utils',
'test_multiprocessing',
'test_tensorboard',
'distributed/test_c10d',
'distributed/test_c10d_spawn',
'test_quantization',
'test_determination',
]
_DEP_MODULES_CACHE = {}

DISTRIBUTED_TESTS_CONFIG = {}


Expand Down Expand Up @@ -349,6 +385,9 @@ def parse_args():
'--ignore-win-blacklist',
action='store_true',
help='always run blacklisted windows tests')
parser.add_argument(
'--determine-from',
help='File of affected source filenames to determine which tests to run.')
parser.add_argument(
'additional_unittest_args',
nargs='*',
Expand Down Expand Up @@ -450,6 +489,133 @@ def get_selected_tests(options):
return selected_tests


def test_impact_of_file(filename):
"""Determine what class of impact this file has on test runs.
Possible values:
TORCH - torch python code
CAFFE2 - caffe2 python code
TEST - torch test code
UNKNOWN - may affect all tests
NONE - known to have no effect on test outcome
CI - CI configuration files
"""
parts = filename.split(os.sep)
if parts[0] in ['.jenkins', '.circleci']:
return 'CI'
if parts[0] in ['docs', 'scripts', 'CODEOWNERS', 'README.md']:
return 'NONE'
elif parts[0] == 'torch':
if parts[-1].endswith('.py') or parts[-1].endswith('.pyi'):
return 'TORCH'
elif parts[0] == 'caffe2':
if parts[-1].endswith('.py') or parts[-1].endswith('.pyi'):
return 'CAFFE2'
elif parts[0] == 'test':
if parts[-1].endswith('.py') or parts[-1].endswith('.pyi'):
return 'TEST'

return 'UNKNOWN'


def log_test_reason(file_type, filename, test, options):
if options.verbose:
print_to_stderr(
'Determination found {} file {} -- running {}'.format(
file_type,
filename,
test,
)
)


def get_dep_modules(test):
# Cache results in case of repitition
if test in _DEP_MODULES_CACHE:
return _DEP_MODULES_CACHE[test]

repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
test_location = os.path.join(repo_root, 'test', test + '.py')
finder = modulefinder.ModuleFinder(
# Ideally exclude all third party modules, to speed up calculation.
excludes=[
'scipy',
'numpy',
'numba',
'multiprocessing',
'sklearn',
'setuptools',
'hypothesis',
'llvmlite',
'joblib',
'email',
'importlib',
'unittest',
'urllib',
'json',
'collections',
],
)
# HACK: some platforms default to ascii, so we can't just run_script :(
if PY2:
finder.run_script(test_location)
else:
with open(test_location, 'r', encoding='utf-8') as fp:
finder.load_module('__main__', fp, test_location, ('', 'r', 1))

dep_modules = set(finder.modules.keys())
_DEP_MODULES_CACHE[test] = dep_modules
return dep_modules


def determine_target(test, touched_files, options):
test = parse_test_module(test)
# Some tests are faster to execute than to determine.
if test not in SLOW_TESTS:
if options.verbose:
print_to_stderr('Running {} without determination'.format(test))
return True
# HACK: "no_ninja" is not a real module
if test.endswith('_no_ninja'):
test = test[:(-1 * len('_no_ninja'))]
if test.endswith('_ninja'):
test = test[:(-1 * len('_ninja'))]

dep_modules = get_dep_modules(test)

for touched_file in touched_files:
file_type = test_impact_of_file(touched_file)
if file_type == 'NONE':
continue
elif file_type == 'CI':
# Force all tests to run if any change is made to the CI
# configurations.
log_test_reason(file_type, touched_file, test, options)
return True
elif file_type == 'UNKNOWN':
# Assume uncategorized source files can affect every test.
log_test_reason(file_type, touched_file, test, options)
return True
elif file_type in ['TORCH', 'CAFFE2', 'TEST']:
parts = os.path.splitext(touched_file)[0].split(os.sep)
touched_module = ".".join(parts)
# test/ path does not have a "test." namespace
if touched_module.startswith('test.'):
touched_module = touched_module.split('test.')[1]
if (
touched_module in dep_modules
or touched_module == test.replace('/', '.')
):
log_test_reason(file_type, touched_file, test, options)
return True

# If nothing has determined the test has run, don't run the test.
if options.verbose:
print_to_stderr('Determination is skipping {}'.format(test))

return False


def main():
options = parse_args()
executable = get_executable_command(options) # this is a list
Expand All @@ -466,6 +632,20 @@ def main():
if options.jit:
selected_tests = filter(lambda test_name: "jit" in test_name, TESTS)

if options.determine_from is not None and os.path.exists(options.determine_from):
with open(options.determine_from, 'r') as fh:
touched_files = [
os.path.normpath(name.strip()) for name in fh.read().split('\n')
if len(name.strip()) > 0
]
# HACK: Ensure the 'test' paths can be traversed by Modulefinder
sys.path.append('test')
selected_tests = [
test for test in selected_tests
if determine_target(test, touched_files, options)
]
sys.path.remove('test')

for test in selected_tests:

test_module = parse_test_module(test)
Expand Down
Loading

0 comments on commit 7cee787

Please sign in to comment.