From 190d2f7d9c71751d2f2da191ff1e265d82565775 Mon Sep 17 00:00:00 2001 From: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com> Date: Thu, 30 Nov 2023 17:17:46 +0000 Subject: [PATCH] Add E2E test for tools (#3325) * Update new.feature Signed-off-by: SajidAlamQB <90610031+SajidAlamQB@users.noreply.github.com> * Update new.feature Signed-off-by: SajidAlamQB <90610031+SajidAlamQB@users.noreply.github.com> * Update new.feature * add-ons e2e test * fix minor errors * update e2e add-ons * add tests for pyspark and viz * install dependencies * lint * add hadoop * Update cli_steps.py * Update e2e-tests.yml * add java to ENV * use java github action * remove hadoop * dummy hadoop * fix version * add VC++ * Update e2e-tests.yml * Update e2e-tests.yml * Update e2e-tests.yml * use only choco * Update e2e-tests.yml * hadoop_home attempt 2 * revert choco installs * try hadoop 2.7.1 and try starters hadoop fix * Update e2e-tests.yml * Update e2e-tests.yml * add file check for all addons * lint * update add-ons e2e test * rename to tools Signed-off-by: SajidAlamQB <90610031+SajidAlamQB@users.noreply.github.com> * Update cli_steps.py Signed-off-by: SajidAlamQB <90610031+SajidAlamQB@users.noreply.github.com> * changes based on review Signed-off-by: SajidAlamQB <90610031+SajidAlamQB@users.noreply.github.com> --------- Signed-off-by: SajidAlamQB <90610031+SajidAlamQB@users.noreply.github.com> --- .github/workflows/e2e-tests.yml | 14 +++++++ features/steps/cli_steps.py | 68 +++++++++++++++++++++++++++++++++ features/tools.feature | 41 ++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 features/tools.feature diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index d39505f8fc..45e37525ac 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -42,6 +42,20 @@ jobs: - name: Add MSBuild to PATH if: inputs.os == 'windows-latest' uses: microsoft/setup-msbuild@v1 + - name: Install Microsoft Visual C++ Redistributable + if: inputs.os == 'windows-latest' + run: | + choco install vcredist-all + - name: Setup Hadoop binary + if: inputs.os == 'windows-latest' + run: | + Invoke-WebRequest "https://github.com/steveloughran/winutils/blob/master/hadoop-2.7.1/bin/winutils.exe?raw=true" -OutFile winutils.exe + Invoke-WebRequest "https://github.com/steveloughran/winutils/blob/master/hadoop-2.7.1/bin/hadoop.dll?raw=true" -OutFile hadoop.dll + Move-Item .\hadoop.dll C:\Windows\System32 + New-Item -ItemType directory -Path C:\hadoop\bin + Move-Item .\winutils.exe C:\hadoop\bin + echo "HADOOP_HOME=C:\hadoop" | Out-File -Append -Encoding ascii -FilePath $env:GITHUB_ENV + echo "PATH=$env:HADOOP_HOME\bin;$env:PATH" | Out-File -Append -Encoding ascii -FilePath $env:GITHUB_ENV - name: Install dependencies run: | pip install --upgrade pip diff --git a/features/steps/cli_steps.py b/features/steps/cli_steps.py index 4494aade8a..1f9465aa38 100644 --- a/features/steps/cli_steps.py +++ b/features/steps/cli_steps.py @@ -162,6 +162,31 @@ def create_config_file(context): yaml.dump(config, config_file, default_flow_style=False) +@given('I have prepared a config file with tools "{tools}"') +def create_config_file_with_tools(context, tools): + """Behave step to create a temporary config file + (given the existing temp directory) and store it in the context. + It takes a custom tools list and sets example prompt to `y`. + """ + + tools_str = tools if tools != "none" else "" + + context.config_file = context.temp_dir / "config.yml" + context.project_name = "project-dummy" + context.root_project_dir = context.temp_dir / context.project_name + context.package_name = context.project_name.replace("-", "_") + config = { + "tools": tools_str, + "example_pipeline": "y", + "project_name": context.project_name, + "repo_name": context.project_name, + "output_dir": str(context.temp_dir), + "python_package": context.package_name, + } + with context.config_file.open("w") as config_file: + yaml.dump(config, config_file, default_flow_style=False) + + @given("I have installed the project dependencies") def pip_install_dependencies(context): """Install project dependencies using pip.""" @@ -453,6 +478,49 @@ def is_created(name): assert is_created(path) +@then('the expected tool directories and files should be created with "{tools}"') +def check_created_project_structure_from_tools(context, tools): + """Behave step to check the subdirectories created by kedro new with tools.""" + + def is_created(name): + """Check if path exists.""" + return (context.root_project_dir / name).exists() + + # Base checks for any project + for path in ["README.md", "src", "pyproject.toml", "requirements.txt"]: + assert is_created(path), f"{path} does not exist" + + tools_list = ( + tools.split(",") if tools != "all" else ["1", "2", "3", "4", "5", "6", "7"] + ) + + if "1" in tools_list: # lint tool + pass # No files are added + + if "2" in tools_list: # test tool + assert is_created("tests"), "tests directory does not exist" + + if "3" in tools_list: # log tool + assert is_created("conf/logging.yml"), "logging configuration does not exist" + + if "4" in tools_list: # docs tool + assert is_created("docs"), "docs directory does not exist" + + if "5" in tools_list: # data tool + assert is_created("data"), "data directory does not exist" + + if "6" in tools_list: # PySpark tool + assert is_created("conf/base/spark.yml"), "spark.yml does not exist" + + if "7" in tools_list: # viz tool + expected_reporting_path = Path( + f"src/{context.package_name}/pipelines/reporting" + ) + assert is_created( + expected_reporting_path + ), "reporting pipeline directory does not exist" + + @then("the logs should show that {number} nodes were run") def check_one_node_run(context, number): expected_log_line = f"Completed {number} out of {number} tasks" diff --git a/features/tools.feature b/features/tools.feature new file mode 100644 index 0000000000..8a18565df3 --- /dev/null +++ b/features/tools.feature @@ -0,0 +1,41 @@ +Feature: New Kedro project with tools + + Scenario: Create a new Kedro project without any tools + Given I have prepared a config file with tools "none" + When I run a non-interactive kedro new without starter + Then the expected tool directories and files should be created with "none" + Given I have installed the project dependencies + When I execute the kedro command "run" + Then I should get a successful exit code + + Scenario: Create a new Kedro project with all tools except 'viz' and 'pyspark' + Given I have prepared a config file with tools "1,2,3,4,5" + When I run a non-interactive kedro new without starter + Then the expected tool directories and files should be created with "1,2,3,4,5" + Given I have installed the project dependencies + When I execute the kedro command "run" + Then I should get a successful exit code + + Scenario: Create a new Kedro project with all tools + Given I have prepared a config file with tools "all" + When I run a non-interactive kedro new without starter + Then the expected tool directories and files should be created with "all" + Given I have installed the project dependencies + When I execute the kedro command "run" + Then I should get a successful exit code + + Scenario: Create a new Kedro project with only 'pyspark' tool + Given I have prepared a config file with tools "6" + When I run a non-interactive kedro new without starter + Then the expected tool directories and files should be created with "6" + Given I have installed the project dependencies + When I execute the kedro command "run" + Then I should get a successful exit code + + Scenario: Create a new Kedro project with only 'viz' tool + Given I have prepared a config file with tools "7" + When I run a non-interactive kedro new without starter + Then the expected tool directories and files should be created with "7" + Given I have installed the project dependencies + When I execute the kedro command "run" + Then I should get a successful exit code