diff --git a/.github/workflows/detect-changed-modules-and-build-reports.yml b/.github/workflows/detect-changed-modules-and-build-reports.yml new file mode 100644 index 000000000000..ca5d9f9e730d --- /dev/null +++ b/.github/workflows/detect-changed-modules-and-build-reports.yml @@ -0,0 +1,42 @@ +name: Detect Changed Modules and Build Reports +on: + push: +jobs: + detect-changed-modules: + name: Detect Changed Modules + timeout-minutes: 5 + runs-on: ubuntu-latest + outputs: + changed-modules: ${{ steps.detect-changed-modules.outputs.changed-modules }} + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + with: + fetch-depth: 1000 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Intall Requirements + run: pip install ./tools/ci_static_check_reports/. + - name: Detect Changed Modules + id: detect-changed-modules + run: | + git fetch + echo "::set-output name=changed-modules::'$(ci_detect_changed_modules $(git diff --name-only $(git merge-base HEAD origin/master)))'" + build-reports: + name: Build Python Static Checkers Reports + needs: + - detect-changed-modules + runs-on: ubuntu-latest + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Intall Requirements + run: pip install ./tools/ci_static_check_reports/. + - name: Build Reports + run: ci_build_python_checkers_reports ${{needs.detect-changed-modules.outputs.changed-modules}} diff --git a/.gitignore b/.gitignore index a226c0688301..17b544c1d7e2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ data .project .settings **/gmon.out +static_checker_reports/ # Logs acceptance_tests_logs/ diff --git a/airbyte-integrations/connectors/tasks.py b/airbyte-integrations/connectors/tasks.py index 2082e9900e91..a210d283a041 100644 --- a/airbyte-integrations/connectors/tasks.py +++ b/airbyte-integrations/connectors/tasks.py @@ -6,7 +6,7 @@ import tempfile from glob import glob from multiprocessing import Pool -from typing import Any, Dict, Iterable, List, Set +from typing import Any, Dict, Iterable, List, Optional, Set import virtualenv from invoke import Context, Exit, task @@ -22,9 +22,11 @@ "colorama": "0.4.4", "coverage": "6.2", "flake": "0.0.1a2", + "flake_junit": "2.1", "isort": "5.10.1", "mccabe": "0.6.1", "mypy": "0.910", + "lxml": "4.7", } @@ -94,13 +96,26 @@ def _run_single_connector_task(args: Iterable) -> int: return _run_task(*args) -def _run_task(ctx: Context, connector_string: str, task_name: str, multi_envs: bool = True, **kwargs: Any) -> int: +def _run_task( + ctx: Context, + connector_string: str, + task_name: str, + multi_envs: bool = True, + module_path: Optional[str] = None, + task_commands: Dict = TASK_COMMANDS, + **kwargs: Any, +) -> int: """ Run task in its own environment. """ + cur_dir = os.getcwd() if multi_envs: - source_path = f"source_{connector_string.replace('-', '_')}" - os.chdir(os.path.join(CONNECTORS_DIR, f"source-{connector_string}")) + if module_path: + os.chdir(module_path) + source_path = connector_string + else: + os.chdir(os.path.join(CONNECTORS_DIR, f"source-{connector_string}")) + source_path = f"source_{connector_string.replace('-', '_')}" else: source_path = connector_string @@ -111,7 +126,7 @@ def _run_task(ctx: Context, connector_string: str, task_name: str, multi_envs: b commands = [] - commands.extend([cmd.format(source_path=source_path, venv=venv_name, **kwargs) for cmd in TASK_COMMANDS[task_name]]) + commands.extend([cmd.format(source_path=source_path, venv=venv_name, **kwargs) for cmd in task_commands[task_name]]) exit_code: int = 0 @@ -125,6 +140,9 @@ def _run_task(ctx: Context, connector_string: str, task_name: str, multi_envs: b finally: shutil.rmtree(venv_name, ignore_errors=True) + if module_path: + os.chdir(cur_dir) + return exit_code diff --git a/tools/ci_static_check_reports/__init__.py b/tools/ci_static_check_reports/__init__.py new file mode 100644 index 000000000000..46b7376756ec --- /dev/null +++ b/tools/ci_static_check_reports/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py new file mode 100644 index 000000000000..46b7376756ec --- /dev/null +++ b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py new file mode 100644 index 000000000000..6e779b3e4e1f --- /dev/null +++ b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py @@ -0,0 +1,100 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +import argparse +import json +import os +import sys +from typing import Dict, List + +from invoke import Context + +sys.path.insert(0, "airbyte-integrations/connectors") +from tasks import CONFIG_FILE, TOOLS_VERSIONS, _run_task # noqa + +TASK_COMMANDS: Dict[str, List[str]] = { + "black": [ + f"pip install black~={TOOLS_VERSIONS['black']}", + f"XDG_CACHE_HOME={os.devnull} black -v {{check_option}} --diff {{source_path}}/. > {{reports_path}}/black.txt", + ], + "coverage": [ + "pip install .", + f"pip install coverage[toml]~={TOOLS_VERSIONS['coverage']}", + "coverage xml --rcfile={toml_config_file} -o {reports_path}/coverage.xml", + ], + "flake": [ + f"pip install mccabe~={TOOLS_VERSIONS['mccabe']}", + f"pip install pyproject-flake8~={TOOLS_VERSIONS['flake']}", + f"pip install flake8-junit-report~={TOOLS_VERSIONS['flake_junit']}", + "pflake8 -v {source_path} --output-file={reports_path}/flake.txt --bug-report", + "flake8_junit {reports_path}/flake.txt {reports_path}/flake.xml", + "rm -f {reports_path}/flake.txt", + ], + "isort": [ + f"pip install colorama~={TOOLS_VERSIONS['colorama']}", + f"pip install isort~={TOOLS_VERSIONS['isort']}", + "isort -v {check_option} {source_path}/. > {reports_path}/isort.txt", + ], + "mypy": [ + "pip install .", + f"pip install lxml~={TOOLS_VERSIONS['lxml']}", + f"pip install mypy~={TOOLS_VERSIONS['mypy']}", + "mypy {source_path} --config-file={toml_config_file} --cobertura-xml-report={reports_path}", + ], + "test": [ + "mkdir {venv}/source-acceptance-test", + "cp -f $(git ls-tree -r HEAD --name-only {source_acceptance_test_path} | tr '\n' ' ') {venv}/source-acceptance-test", + "pip install build", + f"python -m build {os.path.join('{venv}', 'source-acceptance-test')}", + f"pip install {os.path.join('{venv}', 'source-acceptance-test', 'dist', 'source_acceptance_test-*.whl')}", + "[ -f requirements.txt ] && pip install -r requirements.txt 2> /dev/null", + "pip install .", + "pip install .[tests]", + "pip install pytest-cov", + "pytest -v --cov={source_path} --cov-report xml:{reports_path}/pytest.xml {source_path}/unit_tests", + ], +} + + +def build_static_checkers_reports(modules: list, static_checker_reports_path: str) -> int: + ctx = Context() + toml_config_file = os.path.join(os.getcwd(), "pyproject.toml") + + for module_path in modules: + reports_path = f"{os.getcwd()}/{static_checker_reports_path}/{module_path}" + if not os.path.exists(reports_path): + os.makedirs(reports_path) + + for checker in TASK_COMMANDS: + _run_task( + ctx, + f"{os.getcwd()}/{module_path}", + checker, + module_path=module_path, + multi_envs=True, + check_option="", + task_commands=TASK_COMMANDS, + toml_config_file=toml_config_file, + reports_path=reports_path, + source_acceptance_test_path=os.path.join(os.getcwd(), "airbyte-integrations/bases/source-acceptance-test"), + ) + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description="Working with Python Static Report Builder.") + parser.add_argument("changed_modules", nargs="*") + parser.add_argument("--static-checker-reports-path", help="SonarQube host", required=False, type=str, default="static_checker_reports") + + args = parser.parse_args() + changed_python_module_paths = [ + module["dir"] + for module in json.loads(args.changed_modules[0]) + if module["lang"] == "py" and os.path.exists(module["dir"]) and "setup.py" in os.listdir(module["dir"]) + ] + print("Changed python modules: ", changed_python_module_paths) + return build_static_checkers_reports(changed_python_module_paths, static_checker_reports_path=args.static_checker_reports_path) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_static_check_reports/ci_detect_changed_modules/__init__.py b/tools/ci_static_check_reports/ci_detect_changed_modules/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/ci_static_check_reports/ci_detect_changed_modules/main.py b/tools/ci_static_check_reports/ci_detect_changed_modules/main.py new file mode 100644 index 000000000000..a2a68c3be06f --- /dev/null +++ b/tools/ci_static_check_reports/ci_detect_changed_modules/main.py @@ -0,0 +1,52 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +import json +import os +import sys +from typing import Dict, List, Set + +# Filenames used to detect whether the dir is a module +LANGUAGE_MODULE_ID_FILE = { + ".py": "setup.py", + # TODO: Add ID files for other languages +} + + +def find_base_path(path: str, modules: List[Dict[str, str]], unique_modules: Set[str], file_ext: str = "", lookup_file: str = None) -> None: + filename, file_extension = os.path.splitext(path) + lookup_file = lookup_file or LANGUAGE_MODULE_ID_FILE.get(file_extension) + + dir_path = os.path.dirname(filename) + if dir_path and os.path.exists(dir_path): + is_module_root = lookup_file in os.listdir(dir_path) + if is_module_root: + if dir_path not in unique_modules: + modules.append({"dir": dir_path, "lang": file_ext[1:]}) + unique_modules.add(dir_path) + else: + find_base_path(dir_path, modules, unique_modules, file_ext=file_extension, lookup_file=lookup_file) + + +def list_changed_modules(changed_files: List[str]) -> List[Dict[str, str]]: + """ + changed_filed are the list of files which were modified in current branch. + E.g. changed_files = ["tools/ci_static_check_reports/__init__.py", "tools/ci_static_check_reports/setup.py", ...] + """ + + modules: List[Dict[str, str]] = [] + unique_modules: set = set() + for file_path in changed_files: + _, file_extension = os.path.splitext(file_path) + find_base_path(file_path, modules, file_ext=file_extension, unique_modules=unique_modules) + return modules + + +def main() -> int: + changed_modules = list_changed_modules(sys.argv[1:]) + print(json.dumps(changed_modules)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_static_check_reports/setup.py b/tools/ci_static_check_reports/setup.py new file mode 100644 index 000000000000..424114232884 --- /dev/null +++ b/tools/ci_static_check_reports/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +TEST_REQUIREMENTS = [ + "pytest~=6.1", +] + +setup( + name="ci_static_check_reports", + description="CI tool to detect changed modules and then generate static check reports.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=["invoke~=1.6.0", "virtualenv~=20.10.0"], + package_data={"": ["*.json", "schemas/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, + entry_points={ + "console_scripts": [ + "ci_detect_changed_modules = ci_detect_changed_modules.main:main", + "ci_build_python_checkers_reports = ci_build_python_static_checkers_reports.main:main", + ], + }, +) diff --git a/tools/ci_static_check_reports/unit_tests/__init__.py b/tools/ci_static_check_reports/unit_tests/__init__.py new file mode 100644 index 000000000000..46b7376756ec --- /dev/null +++ b/tools/ci_static_check_reports/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py b/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py new file mode 100644 index 000000000000..77b9437d4a72 --- /dev/null +++ b/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py @@ -0,0 +1,42 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +import os +import subprocess + +import pytest + + +@pytest.mark.parametrize( + "changed_module,should_build_reports", + [ + ('[{"dir": "tools/ci_static_check_reports", "lang": "py"}]', True), + ('[{"dir": "airbyte-integrations/connectors/destination-bigquery", "lang": "java"}]', False), + ('[{"dir": "airbyte-integrations/connectors/not-existing-module", "lang": "other"}]', False), + ], +) +def test_build_static_checkers_reports(changed_module: str, should_build_reports: bool) -> None: + subprocess.call(["ci_build_python_checkers_reports", changed_module], shell=False) + static_checker_reports_path = f"static_checker_reports/{changed_module}" + + static_checker_reports_path_exists = os.path.exists(static_checker_reports_path) + black_exists = os.path.exists(os.path.join(static_checker_reports_path, "black.txt")) + coverage_exists = os.path.exists(os.path.join(static_checker_reports_path, "coverage.xml")) + flake_exists = os.path.exists(os.path.join(static_checker_reports_path, "flake.xml")) + isort_exists = os.path.exists(os.path.join(static_checker_reports_path, "isort.txt")) + cobertura_exists = os.path.exists(os.path.join(static_checker_reports_path, "cobertura.xml")) + pytest_exists = os.path.exists(os.path.join(static_checker_reports_path, "pytest.xml")) + report_paths_exist = [ + static_checker_reports_path_exists, + black_exists, + coverage_exists, + flake_exists, + isort_exists, + cobertura_exists, + pytest_exists, + ] + + if should_build_reports: + assert all(report_paths_exist) + else: + assert not all(report_paths_exist) diff --git a/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py b/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py new file mode 100644 index 000000000000..468e7dc21ac0 --- /dev/null +++ b/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py @@ -0,0 +1,58 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +from typing import List, Set + +import pytest +from ci_detect_changed_modules.main import list_changed_modules + + +@pytest.mark.parametrize( + "changed_files,changed_modules", + [ + (["path/to/file1", "file2.txt", "path/to/file3.txt"], []), + ( + [ + "airbyte-cdk/python/airbyte_cdk/entrypoint.py", + "airbyte-cdk/python/airbyte_cdk/file1", + "airbyte-cdk/python/airbyte_cdk/file2.py", + ], + [{"dir": "airbyte-cdk/python", "lang": "py"}], + ), + ( + [ + "airbyte-cdk/python/airbyte_cdk/entrypoint.py", + "airbyte-integrations/connectors/source-asana/source_asana/streams.py", + "airbyte-integrations/connectors/source-asana/source_asana/source.py", + "airbyte-integrations/connectors/source-braintree/integration_tests/abnormal_state.json", + ], + [{"dir": "airbyte-cdk/python", "lang": "py"}, {"dir": "airbyte-integrations/connectors/source-asana", "lang": "py"}], + ), + ( + [], + [], + ), + # TODO: update test after non-python modules are supported + ( + [ + "airbyte-integrations/connectors/source-clickhouse-strict-encrypt/src/main/" + "java/io/airbyte/integrations/source/clickhouse/ClickHouseStrictEncryptSource.java" + ], + [], + ), + ( + ["airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json"], + [], + ), + ( + ["airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py"], + [ + {"dir": "airbyte-integrations/connectors/destination-amazon-sqs", "lang": "py"}, + ], + ), + ], +) +def test_list_changed_modules(changed_files: List[str], changed_modules: Set[str]) -> None: + calculated_changed_modules = list_changed_modules(changed_files) + + assert calculated_changed_modules == changed_modules