From bb7fbfc17896818d189ba6d34fb90f88bdfe41d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Mr=C3=A1zek?= Date: Mon, 7 Nov 2022 11:27:40 +0100 Subject: [PATCH] move templates to this direcotry --- templates/blank/.gitignore | 133 ++++++++++++ templates/blank/README.md | 37 ++++ templates/blank/cookiecutter.json | 13 ++ .../.dockerignore | 9 + .../{{cookiecutter.project_slug}}/.gitignore | 152 ++++++++++++++ .../.pre-commit-config.yaml | 79 +++++++ .../{{cookiecutter.project_slug}}/README.md | 29 +++ .../dockerbuild.yaml | 0 .../docs/index.md | 10 + .../{{cookiecutter.project_slug}}/mkdocs.yaml | 64 ++++++ .../pipeline/config.py | 22 ++ .../pipeline/params.yaml | 2 + .../pipeline/pipeline.py | 35 ++++ .../pyproject.toml | 90 ++++++++ .../{{cookiecutter.project_slug}}/wanna.yaml | 29 +++ templates/sklearn/.gitignore | 133 ++++++++++++ templates/sklearn/README.md | 37 ++++ templates/sklearn/cookiecutter.json | 12 ++ .../.dockerignore | 9 + .../{{cookiecutter.project_slug}}/.gitignore | 152 ++++++++++++++ .../{{cookiecutter.project_slug}}/README.md | 24 +++ .../dockerbuild.yaml | 0 .../notebook/train/Dockerfile | 5 + .../notebook/train/requirements.txt | 3 + .../pipeline/__init__.py | 0 .../pipeline/components/__init__.py | 0 .../pipeline/components/data/__init__.py | 0 .../pipeline/components/data/get_data.py | 30 +++ .../pipeline/components/predictor/__init__.py | 0 .../predictor/make_prediction_request.py | 36 ++++ .../pipeline/components/trainer/__init__.py | 0 .../pipeline/components/trainer/eval_model.py | 42 ++++ .../components/trainer/train_xgb_model.py | 48 +++++ .../pipeline/config.py | 34 +++ .../pipeline/params.yaml | 2 + .../pipeline/pipeline.py | 194 ++++++++++++++++++ .../pyproject.toml | 75 +++++++ .../{{cookiecutter.project_slug}}/wanna.yaml | 47 +++++ 38 files changed, 1587 insertions(+) create mode 100644 templates/blank/.gitignore create mode 100644 templates/blank/README.md create mode 100644 templates/blank/cookiecutter.json create mode 100644 templates/blank/{{cookiecutter.project_slug}}/.dockerignore create mode 100644 templates/blank/{{cookiecutter.project_slug}}/.gitignore create mode 100644 templates/blank/{{cookiecutter.project_slug}}/.pre-commit-config.yaml create mode 100644 templates/blank/{{cookiecutter.project_slug}}/README.md create mode 100644 templates/blank/{{cookiecutter.project_slug}}/dockerbuild.yaml create mode 100644 templates/blank/{{cookiecutter.project_slug}}/docs/index.md create mode 100644 templates/blank/{{cookiecutter.project_slug}}/mkdocs.yaml create mode 100644 templates/blank/{{cookiecutter.project_slug}}/pipeline/config.py create mode 100644 templates/blank/{{cookiecutter.project_slug}}/pipeline/params.yaml create mode 100644 templates/blank/{{cookiecutter.project_slug}}/pipeline/pipeline.py create mode 100644 templates/blank/{{cookiecutter.project_slug}}/pyproject.toml create mode 100644 templates/blank/{{cookiecutter.project_slug}}/wanna.yaml create mode 100644 templates/sklearn/.gitignore create mode 100644 templates/sklearn/README.md create mode 100644 templates/sklearn/cookiecutter.json create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/.dockerignore create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/.gitignore create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/README.md create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/dockerbuild.yaml create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/Dockerfile create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/requirements.txt create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/__init__.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/__init__.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/data/__init__.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/data/get_data.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/predictor/__init__.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/predictor/make_prediction_request.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/__init__.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/eval_model.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/train_xgb_model.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/config.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/params.yaml create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pipeline/pipeline.py create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/pyproject.toml create mode 100644 templates/sklearn/{{cookiecutter.project_slug}}/wanna.yaml diff --git a/templates/blank/.gitignore b/templates/blank/.gitignore new file mode 100644 index 00000000..5a07f3b1 --- /dev/null +++ b/templates/blank/.gitignore @@ -0,0 +1,133 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + + + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +.vscode \ No newline at end of file diff --git a/templates/blank/README.md b/templates/blank/README.md new file mode 100644 index 00000000..b77b4ffd --- /dev/null +++ b/templates/blank/README.md @@ -0,0 +1,37 @@ +# blank wanna-ml-cookiecutter + +cookiecutter template for wanna-ml projects + +## Get started + +# Install + +``` +pip install wanna-ml +``` + +# Launch to cutter +``` +wanna init --template blank +``` +or without wanna, manually +``` +cookiecutter https://github.com/avast/wanna-blank-cookiecutter +``` + +# Answer the following question, values will be used to in your wanna-ml config + +``` +project_name [project_name]: +project_owner_fullname [project owner]: +project_owner_email [you@avast.com]: +project_version [0.0.0]: +project_description [Link to WANNA project page on CML]: +project_slug [project_name]: +gcp_project_id []: +gcp_service_account []: +gcp_bucket []: +``` + +# cd into your project_slug +cd project_name diff --git a/templates/blank/cookiecutter.json b/templates/blank/cookiecutter.json new file mode 100644 index 00000000..4d3394be --- /dev/null +++ b/templates/blank/cookiecutter.json @@ -0,0 +1,13 @@ +{ + "project_name": "project_name", + "project_owner_fullname": "project owner", + "project_owner_email": "you@avast.com", + "project_version": "0.0.0", + "project_description": "Link to MLOps project page on CML", + "project_slug": "{{cookiecutter.project_name|lower|replace('_', '-')|replace(' ', '-')}}", + "project_repo_name": "mlops/{{cookiecutter.project_slug}}", + "gcp_project_id": "your-gcp-project-id", + "gcp_service_account": "", + "gcp_artifact_registry_repository": "", + "gcp_bucket": "your-staging-bucket-name" +} \ No newline at end of file diff --git a/templates/blank/{{cookiecutter.project_slug}}/.dockerignore b/templates/blank/{{cookiecutter.project_slug}}/.dockerignore new file mode 100644 index 00000000..c456340e --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/.dockerignore @@ -0,0 +1,9 @@ +# docker-py tars the whole build directory and sends it to the docker-machine +# excluding large folders and files unrelated to docker build can save you _a lot of_ time on each build + +.git +.idea +.vscode +.mypy_cache + +venv/ diff --git a/templates/blank/{{cookiecutter.project_slug}}/.gitignore b/templates/blank/{{cookiecutter.project_slug}}/.gitignore new file mode 100644 index 00000000..3224b962 --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/.gitignore @@ -0,0 +1,152 @@ +build/ + +# This file contains settings for pytest. We are using it for local development +# only. Settings for CI/CD are in setup.cfg +pytest.ini + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.pyc +*.py[cod] +*$py.class +.mypy_cache + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +.DS_Store + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + + +tags + +.vim/* +*/.vim/* +*.swp +*.swo + +#VS code +.vscode + +#teamcity +.teamcity/*.iml +.teamcity/target/ + + +.idea/* \ No newline at end of file diff --git a/templates/blank/{{cookiecutter.project_slug}}/.pre-commit-config.yaml b/templates/blank/{{cookiecutter.project_slug}}/.pre-commit-config.yaml new file mode 100644 index 00000000..9800241f --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/.pre-commit-config.yaml @@ -0,0 +1,79 @@ +files: \.py +repos: + - repo: local + hooks: + - id: isort + name: isort + entry: poetry run isort + language: python + args: [ + "--line-length=120", + "--py", "38", + "--profile", "black", + ] + verbose: false + - id: black + name: black + entry: poetry run black + args: [ + "--line-length=120", + "--target-version", "py38" + ] + language: python + verbose: false + - id: flake8 + name: flake8 + entry: poetry run flake8 + language: python + args: [ + "--max-line-length=120", + "--extend-ignore=E203,E712,E722,W503,W605" + ] + # E203 - Whitespace before ':' + # E712 - Comparison to true should be 'if cond is true:' or 'if cond:' + # E722 - Do not use bare except, specify exception instead + # W503 - Line break occurred before a binary operator + # W605 - Invalid escape sequence 'x' + verbose: false + - id: pylint + name: pylint + entry: poetry run pylint + language: python + args: [ + "--enable-all-extensions", + "--errors-only", + "--max-line-length=120", + "--generated-members=torch.*", + "--ignored-modules=multiprocess", + "--disable=C0103,C0114,C0115,C0116,E1101" + ] + # C0103 - invalid-name + # C0114 - missing-module-docstring + # C0115 - missing-class-docstring + # C0116 - missing-function-docstring + verbose: false + - id: mypy + name: mypy + entry: poetry run mypy + language: python + args: [ + "--ignore-missing-imports" + ] + verbose: false + - id: pytest + name: pytest + entry: poetry run pytest + language: python + args: ["-W", "ignore", "-s"] + always_run: true + pass_filenames: false + verbose: true + + # - repo: https://github.com/pre-commit/pre-commit-hooks + # rev: v2.3.0 + # hooks: + # - id: check-yaml + # args: [--allow-multiple-documents] + # - id: trailing-whitespace + # - id: end-of-file-fixer + # - id: check-added-large-files \ No newline at end of file diff --git a/templates/blank/{{cookiecutter.project_slug}}/README.md b/templates/blank/{{cookiecutter.project_slug}}/README.md new file mode 100644 index 00000000..31c8454e --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/README.md @@ -0,0 +1,29 @@ +# WANNA ML - {{ cookiecutter.project_name }} project + +# Setup + +```bash +# create a local env +conda create -n {{cookiecutter.project_slug}} python=3.8 poetry + +# activate local env +conda activate {{cookiecutter.project_slug}} + +# installs all dependencies from pyproject.toml including your project to the virtual env +poetry install + +# Run any wanna-ml command +wanna --help + +# runs the task `check` defined in :code:`[tool.taskipy.tasks]` in pyproject.toml +# by default it runs linters for the code, you can modify it based on your preferences +task check + +# runs the task check as well as tests and mypy via pre-commit. +task build +``` + + +### Generating documentation + +Docs use [mkdocs](https://www.mkdocs.org/) and can be checked on localhost before pushing via `task docs-serve`. To deploy execute `task docs-deploy` diff --git a/templates/blank/{{cookiecutter.project_slug}}/dockerbuild.yaml b/templates/blank/{{cookiecutter.project_slug}}/dockerbuild.yaml new file mode 100644 index 00000000..e69de29b diff --git a/templates/blank/{{cookiecutter.project_slug}}/docs/index.md b/templates/blank/{{cookiecutter.project_slug}}/docs/index.md new file mode 100644 index 00000000..ca908f4c --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/docs/index.md @@ -0,0 +1,10 @@ +--- +title: Overview +summary: {{cookiecutter.project_name}} overview +authors: + - {{cookiecutter.project_owner_email}} +date: 2024-05-24 +--- + +# Welcome to {{cookiecutter.project_name}} + diff --git a/templates/blank/{{cookiecutter.project_slug}}/mkdocs.yaml b/templates/blank/{{cookiecutter.project_slug}}/mkdocs.yaml new file mode 100644 index 00000000..ff13b686 --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/mkdocs.yaml @@ -0,0 +1,64 @@ +site_name: {{ cookiecutter.project_name }} +repo_url: https://git.int.avast.com/{{ cookiecutter.project_repo_name }} +repo_name: {{ cookiecutter.project_repo_name }} +edit_uri: edit/master/docs/ +theme: + name: material + palette: + - scheme: default + toggle: + icon: material/toggle-switch-off-outline + name: Switch to dark mode + # primary: indigo + # accent: indigo + features: + - navigation.instant + - navigation.tracking + - navigation.tabs + - navigation.tabs.sticky + - navigation.sections + - navigation.expand + - navigation.indexes + - navigation.top + - toc.follow + - toc.integrate + highlightjs: true + hljs_languages: + - yaml + - python + - bash + - scheme: slate + toggle: + icon: material/toggle-switch + name: Switch to light mode + # primary: indigo + # accent: indigo + features: + - navigation.instant + - navigation.tracking + - navigation.tabs + - navigation.tabs.sticky + - navigation.sections + - navigation.expand + - navigation.indexes + - navigation.top + - toc.follow + - toc.integrate + highlightjs: true + hljs_languages: + - yaml + - python + - bash + +nav: + - Overview: 'index.md' + +plugins: +- search +- mkdocstrings + +markdown_extensions: + - mkdocs-typer + - mkdocs-click + - toc: + permalink: "#" diff --git a/templates/blank/{{cookiecutter.project_slug}}/pipeline/config.py b/templates/blank/{{cookiecutter.project_slug}}/pipeline/config.py new file mode 100644 index 00000000..167ddee1 --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/pipeline/config.py @@ -0,0 +1,22 @@ +import json +import os +from datetime import datetime + +from caseconverter import snakecase + +# Env exported from wanna pipeline cli command +PIPELINE_NAME_PREFIX = snakecase("{{ cookiecutter.project_slug }}-pipeline").upper() + +PROJECT_ID = os.getenv(f"{PIPELINE_NAME_PREFIX}_PROJECT_ID") +BUCKET = os.getenv(f"{PIPELINE_NAME_PREFIX}_BUCKET") +REGION = os.getenv(f"{PIPELINE_NAME_PREFIX}_REGION") +PIPELINE_NAME = os.getenv(f"{PIPELINE_NAME_PREFIX}_PIPELINE_NAME") +PIPELINE_JOB_ID = os.getenv(f"{PIPELINE_NAME_PREFIX}_PIPELINE_JOB_ID") +VERSION = os.getenv(f"{PIPELINE_NAME_PREFIX}_VERSION", datetime.now().strftime("%Y%m%d%H%M%S")) +PIPELINE_LABELS = json.loads(os.getenv(f"{PIPELINE_NAME_PREFIX}_PIPELINE_LABELS", "{}")) +TENSORBOARD = os.getenv(f"{PIPELINE_NAME_PREFIX}_TENSORBOARD") + +# Pipeline config +MODEL_NAME = f"{PIPELINE_NAME.lower()}" # type: ignore +PIPELINE_ROOT = f"{BUCKET}/pipeline-root/{MODEL_NAME}" +MODEL_DISPLAY_NAME = f"{MODEL_NAME}-{VERSION}" diff --git a/templates/blank/{{cookiecutter.project_slug}}/pipeline/params.yaml b/templates/blank/{{cookiecutter.project_slug}}/pipeline/params.yaml new file mode 100644 index 00000000..8350dbfd --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/pipeline/params.yaml @@ -0,0 +1,2 @@ +#is_hp_tuning_enabled: "n" +eval_acc_threshold: 0.87 diff --git a/templates/blank/{{cookiecutter.project_slug}}/pipeline/pipeline.py b/templates/blank/{{cookiecutter.project_slug}}/pipeline/pipeline.py new file mode 100644 index 00000000..4b0e6004 --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/pipeline/pipeline.py @@ -0,0 +1,35 @@ +# ignore: import-error +# pylint: disable = no-value-for-parameter +from pathlib import Path + +import config as cfg +from kfp.v2 import dsl +from kfp.v2.dsl import component + + +@component( + base_image="python:3.9", +) +def on_exit(): + import logging + + logging.getLogger().setLevel(logging.INFO) + + logging.info("This Component will run on exit, as last") + + +@dsl.pipeline( + # A name for the pipeline. Use to determine the pipeline Context. + name=cfg.PIPELINE_NAME, + pipeline_root=cfg.PIPELINE_ROOT, +) +def wanna_pipeline(eval_acc_threshold: float): + pipeline_dir = Path(__file__).parent.resolve() + + # =================================================================== + # Get pipeline result notification + # =================================================================== + exit_task = on_exit().set_display_name("On Exit Dummy Task").set_caching_options(False) + + with dsl.ExitHandler(exit_task): + pass diff --git a/templates/blank/{{cookiecutter.project_slug}}/pyproject.toml b/templates/blank/{{cookiecutter.project_slug}}/pyproject.toml new file mode 100644 index 00000000..2879fd06 --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/pyproject.toml @@ -0,0 +1,90 @@ +[tool.poetry] +authors = ["{{cookiecutter.project_owner_fullname}}", "{{cookiecutter.project_owner_email}}"] +description = "{{cookiecutter.project_description}}" +name = "{{cookiecutter.project_slug}}" +version = "{{cookiecutter.project_version}}" + +[tool.poetry.dependencies] +python = ">=3.8,<3.11" + +[tool.poetry.dev-dependencies] +black = "^22.3.0" +flake8 = "^3.8.4" +isort = "^5.10.1" +pylint = "^2.13.8" +pytest = "^7.1.2" +taskipy = "^1.10.2" +google-cloud-pipeline-components= "^1.0.5" +kfp = "^1.8.12" +wanna-ml = "^0.1.5" +pandas = "^1.4.2" +pre-commit = "^2.10.1" +mkdocs="1.3.0" +mkdocs-click = "^0.7.0" +mkdocs-graphviz = "^1.4.4" +mkdocs-material = "^8.2.15" +mkdocs-typer = "^0.0.2" +mkdocstrings = "^0.18.1" +mypy = "^0.950" + +[tool.taskipy.tasks] +docs-deploy = "mkdocs gh-deploy" +docs-serve = "mkdocs serve" +build = "pre-commit run --all-files" +pre_build = "pre-commit install" +black = "black pipeline" +check = "task isort && task black && task pylint" +isort = "isort pipeline" +pylint = "pylint pipeline" + +[tool.pytest.ini_options] +filterwarnings = [ + "error", + "ignore::UserWarning", # note the use of single quote below to denote "raw" strings in TOML + 'ignore::DeprecationWarning', +] + +[tool.pylint.master] +good-names = "logger,e,i,j,n,m,f,_,xs,ys,df" +signature-mutators=""" + click.decorators.option, + click.decorators.argument, + click.decorators.version_option, + click.decorators.help_option, + click.decorators.pass_context, + click.decorators.confirmation_option""" + +[tool.pylint.messages_control] +disable = "all" +enable = """, + unused-import, + fixme, + useless-object-inheritance, + unused-variable, + unused-argument, + unexpected-keyword-arg, + string, + unreachable, + invalid-name, + logging-format-interpolation, + logging-fstring-interpolation, + unnecesary-pass, + """ +ignored-argument-names = "_.*|^ignored_|^unused_|args|kwargs" + +[black] +extend-ignore = "E203" +max-line-length = 120 + +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core>=1.0.0"] + +[[tool.poetry.source]] +name = "avast-repo-local" # This name will be used in the configuration to retrieve the proper credentials +url = "https://artifactory.ida.avast.com/artifactory/api/pypi/pypi-local/simple/" # URL used to download your packages from + +[[tool.poetry.source]] +default = true +name = "avast-repo-remote" # This name will be used in the configuration to retrieve the proper credentials +url = "https://artifactory.ida.avast.com/artifactory/api/pypi/pypi-remote/simple/" # URL used to download your packages from diff --git a/templates/blank/{{cookiecutter.project_slug}}/wanna.yaml b/templates/blank/{{cookiecutter.project_slug}}/wanna.yaml new file mode 100644 index 00000000..93c0dcb8 --- /dev/null +++ b/templates/blank/{{cookiecutter.project_slug}}/wanna.yaml @@ -0,0 +1,29 @@ +wanna_project: + name: {{ cookiecutter.project_slug|replace('_', '-') }} + version: {{ cookiecutter.project_version }} + authors: [{{ cookiecutter.project_owner_email }}] + +gcp_profiles: + - profile_name: default + project_id: {{ cookiecutter.gcp_project_id }} + zone: europe-west1-b + bucket: {{ cookiecutter.gcp_bucket }} + labels: + service_account: {{ cookiecutter.gcp_service_account }} + +tensorboards: + - name: {{ cookiecutter.project_slug|replace('_', '-') }}-board + +docker: + images: [] + repository: {{ cookiecutter.gcp_artifact_registry_repository }} + +pipelines: + - name: {{ cookiecutter.project_slug|replace('_', '-') }}-pipeline + schedule: + cron: 2 * * * * + bucket: gs://{{ cookiecutter.gcp_bucket }} + pipeline_file: pipeline/pipeline.py + pipeline_params: pipeline/params.yaml + docker_image_ref: [] + tensorboard_ref: {{ cookiecutter.project_slug|replace('_', '-') }}-board diff --git a/templates/sklearn/.gitignore b/templates/sklearn/.gitignore new file mode 100644 index 00000000..5a07f3b1 --- /dev/null +++ b/templates/sklearn/.gitignore @@ -0,0 +1,133 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + + + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +.vscode \ No newline at end of file diff --git a/templates/sklearn/README.md b/templates/sklearn/README.md new file mode 100644 index 00000000..c3c0823e --- /dev/null +++ b/templates/sklearn/README.md @@ -0,0 +1,37 @@ +# wanna-ml-cookiecutter + +cookiecutter template for wanna-ml projects + +## Get started + +# Install + +``` +pip install wanna-ml +``` + +# Launch to cutter +``` +wanna init +``` +or without wanna, manually +``` +cookiecutter https://github.com/avast/wanna-ml-cookiecutter +``` + +# Answer the following question, values will be used to in your wanna-ml config + +``` +project_name [project_name]: +project_owner_fullname [project owner]: +project_owner_email [you@avast.com]: +project_version [0.0.0]: +project_description [Link to WANNA project page on CML]: +project_slug [project_name]: +gcp_project_id []: +gcp_service_account []: +gcp_bucket []: +``` + +# cd into your project_slug +cd project_name diff --git a/templates/sklearn/cookiecutter.json b/templates/sklearn/cookiecutter.json new file mode 100644 index 00000000..3e5a125e --- /dev/null +++ b/templates/sklearn/cookiecutter.json @@ -0,0 +1,12 @@ +{ + "project_name": "project_name", + "project_owner_fullname": "project owner", + "project_owner_email": "you@avast.com", + "project_version": "0.0.0", + "project_description": "Link to MLOps project page on CML", + "project_slug": "{{cookiecutter.project_name|lower|replace('_', '-')|replace(' ', '-')}}", + "gcp_project_id": "your-gcp-project-id", + "gcp_service_account": "", + "gcp_artifact_registry_repository": "", + "gcp_bucket": "your-staging-bucket-name" +} \ No newline at end of file diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/.dockerignore b/templates/sklearn/{{cookiecutter.project_slug}}/.dockerignore new file mode 100644 index 00000000..c456340e --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/.dockerignore @@ -0,0 +1,9 @@ +# docker-py tars the whole build directory and sends it to the docker-machine +# excluding large folders and files unrelated to docker build can save you _a lot of_ time on each build + +.git +.idea +.vscode +.mypy_cache + +venv/ diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/.gitignore b/templates/sklearn/{{cookiecutter.project_slug}}/.gitignore new file mode 100644 index 00000000..3224b962 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/.gitignore @@ -0,0 +1,152 @@ +build/ + +# This file contains settings for pytest. We are using it for local development +# only. Settings for CI/CD are in setup.cfg +pytest.ini + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.pyc +*.py[cod] +*$py.class +.mypy_cache + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +.DS_Store + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + + +tags + +.vim/* +*/.vim/* +*.swp +*.swo + +#VS code +.vscode + +#teamcity +.teamcity/*.iml +.teamcity/target/ + + +.idea/* \ No newline at end of file diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/README.md b/templates/sklearn/{{cookiecutter.project_slug}}/README.md new file mode 100644 index 00000000..802444e6 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/README.md @@ -0,0 +1,24 @@ +# WANNA ML project {{ cookiecutter.project_slug }} + +# Setup + +```bash +# create a local env +conda create -n {{cookiecutter.project_slug}} python=3.8 poetry + +# activate local env +conda activate {{cookiecutter.project_slug}} + +# installs all dependencies from pyproject.toml including your project to the virtual env +poetry install + +# Run any wanna-ml command +wanna --help + +# runs the task `check` defined in :code:`[tool.taskipy.tasks]` in pyproject.toml +# by default it runs linters for the code, you can modify it based on your preferences +poetry run task check + +# runs the task `build`. +poetry run task build +``` diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/dockerbuild.yaml b/templates/sklearn/{{cookiecutter.project_slug}}/dockerbuild.yaml new file mode 100644 index 00000000..e69de29b diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/Dockerfile b/templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/Dockerfile new file mode 100644 index 00000000..30e7da49 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/Dockerfile @@ -0,0 +1,5 @@ +FROM gcr.io/deeplearning-platform-release/base-cpu:latest + +COPY requirements.txt requirements.txt + +RUN pip install -r requirements.txt \ No newline at end of file diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/requirements.txt b/templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/requirements.txt new file mode 100644 index 00000000..802e1a3d --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/notebook/train/requirements.txt @@ -0,0 +1,3 @@ +pandas +sklearn +xgboost \ No newline at end of file diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/__init__.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/__init__.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/data/__init__.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/data/get_data.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/data/get_data.py new file mode 100644 index 00000000..86707550 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/data/get_data.py @@ -0,0 +1,30 @@ +from typing import NamedTuple + +from kfp.v2.dsl import Dataset, Output, component + + +@component( + base_image="python:3.9", + packages_to_install=["pandas", "sklearn"], +) +def get_data_op( + dataset_train: Output[Dataset], dataset_test: Output[Dataset] +) -> NamedTuple("outputs", [("dataset_train_path", str), ("dataset_test_path", str)]): + from collections import namedtuple + + import pandas as pd + from sklearn import datasets + from sklearn.model_selection import train_test_split as tts + + # import some data to play with + data_raw = datasets.load_breast_cancer() + data = pd.DataFrame(data_raw.data, columns=data_raw.feature_names) + data["target"] = data_raw.target + + train, test = tts(data, test_size=0.3) + + train.to_csv(dataset_train.path) + test.to_csv(dataset_test.path) + + outputs = namedtuple("outputs", ["dataset_train_path", "dataset_test_path"]) + return outputs(dataset_train_path=dataset_train.path, dataset_test_path=dataset_test.path) diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/predictor/__init__.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/predictor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/predictor/make_prediction_request.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/predictor/make_prediction_request.py new file mode 100644 index 00000000..d52556b8 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/predictor/make_prediction_request.py @@ -0,0 +1,36 @@ +from typing import List + +from kfp.v2.dsl import component + + +@component( + base_image="python:3.9", + packages_to_install=["google-cloud-aiplatform", "google-cloud-pipeline-components"], +) +def make_prediction_request(project: str, bucket: str, endpoint: str, instances: List[List[float]]): + """custom pipeline component to pass prediction requests to Vertex AI + endpoint and get responses + """ + import logging + + from google.cloud import aiplatform + from google.protobuf.json_format import Parse + from google_cloud_pipeline_components.proto.gcp_resources_pb2 import GcpResources + + logging.getLogger().setLevel(logging.INFO) + aiplatform.init(project=project, staging_bucket=bucket) + + # parse endpoint resource + logging.info(f"Endpoint = {endpoint}") + gcp_resources = Parse(endpoint, GcpResources()) + endpoint_uri = gcp_resources.resources[0].resource_uri + endpoint_id = "/".join(endpoint_uri.split("/")[-8:-2]) + logging.info(f"Endpoint ID = {endpoint_id}") + + # define endpoint client + _endpoint = aiplatform.Endpoint(endpoint_id) + + # call prediction endpoint for each instance + for instance in instances: + response = _endpoint.predict(instances=[instance]) + logging.info(f"Prediction response: {response.predictions}") diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/__init__.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/eval_model.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/eval_model.py new file mode 100644 index 00000000..bf116e20 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/eval_model.py @@ -0,0 +1,42 @@ +from typing import NamedTuple + +from kfp.v2.dsl import ClassificationMetrics, Dataset, Input, Metrics, Model, Output, component + + +@component( + base_image="python:3.9", + packages_to_install=["pandas", "sklearn", "xgboost"], +) +def eval_model_op( + test_set: Input[Dataset], xgb_model: Input[Model], metrics: Output[ClassificationMetrics], smetrics: Output[Metrics] +) -> NamedTuple("outputs", [("test_score", float),]): + from collections import namedtuple + + import pandas as pd + from sklearn.metrics import confusion_matrix, roc_curve + from xgboost import XGBClassifier + + test_set = pd.read_csv(test_set.path) + model = XGBClassifier() + model.load_model(xgb_model.path) + + X_test, y_test = test_set.drop(columns=["target"]), test_set.target + + score = model.score(X_test, y_test) + + y_scores = model.predict_proba(X_test)[:, 1] + + fpr, tpr, thresholds = roc_curve(y_true=y_test.to_numpy(), y_score=y_scores, pos_label=True) + metrics.log_roc_curve(fpr.tolist(), tpr.tolist(), thresholds.tolist()) + + y_pred = model.predict(X_test) + metrics.log_confusion_matrix( + ["False", "True"], + confusion_matrix(y_test, y_pred).tolist(), # .tolist() to convert np array to list. + ) + + test_score = float(score) + xgb_model.metadata["test_score"] = test_score + smetrics.log_metric("score", test_score) + outputs = namedtuple("outputs", ["test_score"]) + return outputs(test_score=test_score) diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/train_xgb_model.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/train_xgb_model.py new file mode 100644 index 00000000..6c6875a2 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/components/trainer/train_xgb_model.py @@ -0,0 +1,48 @@ +from typing import NamedTuple + +import config as cfg +from kfp.v2.dsl import Dataset, Input, Model, Output, component + + +@component( + base_image=cfg.TRAIN_IMAGE_URI, + packages_to_install=[ + "pandas", + "sklearn", + "xgboost", + ], +) +def train_xgb_model_op( + dataset: Input[Dataset], model_artifact: Output[Model] +) -> NamedTuple("outputs", [("train_score", float), ("model_artifact_path", str)]): + + from collections import namedtuple + + import pandas as pd + from xgboost import XGBClassifier + + data = pd.read_csv(dataset.path) + + model = XGBClassifier(objective="binary:logistic") + + model.fit( + data.drop(columns=["target"]), + data.target, + ) + + score = model.score( + data.drop(columns=["target"]), + data.target, + ) + + model_artifact.metadata["train_score"] = float(score) + model_artifact.metadata["framework"] = "XGBOOST" + # Vertex AI default serving expects model file to be called model.bst + model_path = f"""{model_artifact.path.replace("model_artifact", "model.bst")}""" + model_artifact.path = model_path + model.save_model(model_artifact.path) + + # After save make model path match GCS counter part + model_path = str(model_artifact.path).replace("/gcs/", "gs://").replace("model.bst", "") + outputs = namedtuple("outputs", ["train_score", "model_artifact_path"]) + return outputs(train_score=float(score), model_artifact_path=model_path) diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/config.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/config.py new file mode 100644 index 00000000..285a9f7f --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/config.py @@ -0,0 +1,34 @@ +import json +import os +from datetime import datetime + +from caseconverter import snakecase + +# Env exported from wanna pipeline cli command +PIPELINE_NAME_PREFIX = snakecase("{{ cookiecutter.project_slug }}-pipeline").upper() + +PROJECT_ID = os.getenv(f"{PIPELINE_NAME_PREFIX}_PROJECT_ID") +BUCKET = os.getenv(f"{PIPELINE_NAME_PREFIX}_BUCKET") +REGION = os.getenv(f"{PIPELINE_NAME_PREFIX}_REGION") +PIPELINE_NAME = os.getenv(f"{PIPELINE_NAME_PREFIX}_PIPELINE_NAME") +PIPELINE_JOB_ID = os.getenv(f"{PIPELINE_NAME_PREFIX}_PIPELINE_JOB_ID") +VERSION = os.getenv(f"{PIPELINE_NAME_PREFIX}_PIPELINE_VERSION", datetime.now().strftime("%Y%m%d%H%M%S")) +PIPELINE_LABELS = json.loads(os.getenv(f"{PIPELINE_NAME_PREFIX}_PIPELINE_LABELS", "{}")) +TENSORBOARD = os.getenv(f"{PIPELINE_NAME_PREFIX}_TENSORBOARD") + +# Pipeline config +MODEL_NAME = f"{PIPELINE_NAME.lower()}" # type: ignore +PIPELINE_ROOT = f"{BUCKET}/pipeline_root/{MODEL_NAME}" +MODEL_DISPLAY_NAME = f"{MODEL_NAME}-{VERSION}" + +# custom training image +TRAIN_IMAGE_URI = os.environ[f"TRAIN_DOCKER_URI"] # Fail + +# Custom Serving Config +# "europe-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-4:latest" +SERVE_IMAGE_URI = os.environ[f"SERVE_DOCKER_URI"] # Yes, fail during compilation + +SERVING_MACHINE_TYPE = "n1-standard-4" +SERVING_MIN_REPLICA_COUNT = 1 +SERVING_MAX_REPLICA_COUNT = 2 +SERVING_TRAFFIC_SPLIT = '{"0": 100}' diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/params.yaml b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/params.yaml new file mode 100644 index 00000000..8350dbfd --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/params.yaml @@ -0,0 +1,2 @@ +#is_hp_tuning_enabled: "n" +eval_acc_threshold: 0.87 diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/pipeline.py b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/pipeline.py new file mode 100644 index 00000000..b130b286 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pipeline/pipeline.py @@ -0,0 +1,194 @@ +# ignore: import-error +# pylint: disable = no-value-for-parameter + +import config as cfg +from components.data.get_data import get_data_op +from components.predictor import make_prediction_request +from components.trainer.eval_model import eval_model_op +from components.trainer.train_xgb_model import train_xgb_model_op +from google_cloud_pipeline_components import aiplatform as aip_components +from kfp.v2 import dsl +from kfp.v2.dsl import component + + +@component( + base_image="python:3.9", +) +def on_exit(): + import logging + + logging.getLogger().setLevel(logging.INFO) + + logging.info("This Component will run on exit") + + +@dsl.pipeline( + # A name for the pipeline. Use to determine the pipeline Context. + name=cfg.PIPELINE_NAME, + pipeline_root=cfg.PIPELINE_ROOT, +) +def wanna_sklearn_sample(eval_acc_threshold: float): + + # =================================================================== + # Get pipeline result notification + # =================================================================== + # collect datasets provided by sklearn + exit_task = on_exit().set_display_name("On Exit Dummy Task").set_caching_options(False) + + with dsl.ExitHandler(exit_task): + + # =================================================================== + # collect datasets + # =================================================================== + # collect datasets provided by sklearn + dataset_op = get_data_op() + + # =================================================================== + # train model + # =================================================================== + # simple model training directly in component + # kfp.components.load_component_from_file() + train_op = train_xgb_model_op(dataset_op.outputs["dataset_train"]) + + # =================================================================== + # eval model + # =================================================================== + # collect model metrics for deployment condition + eval_op = eval_model_op( + test_set=dataset_op.outputs["dataset_test"], xgb_model=train_op.outputs["model_artifact"] + ) + + # ======================================================================== + # model deployment when threshold condition is met + # ======================================================================== + with dsl.Condition( + eval_op.outputs["test_score"] > eval_acc_threshold, + name="model-deploy-decision", + ): + # =================================================================== + # create model resource + # =================================================================== + # upload model to vertex ai + model_upload_task = ( + aip_components.ModelUploadOp( + project=cfg.PROJECT_ID, + display_name=cfg.MODEL_DISPLAY_NAME, + location=cfg.REGION, + serving_container_image_uri=cfg.SERVE_IMAGE_URI, + labels=cfg.PIPELINE_LABELS, + artifact_uri=train_op.outputs["model_artifact_path"], + ) + .set_display_name("Upload model") + .after(eval_op) + ) + + # =================================================================== + # create Vertex AI Endpoint + # =================================================================== + # create endpoint to deploy one or more models + # An endpoint provides a service URL where the prediction requests are sent + endpoint_create_task = ( + aip_components.EndpointCreateOp( + project=cfg.PROJECT_ID, + location=cfg.REGION, + display_name=cfg.MODEL_NAME + "-model-endpoint", + labels=cfg.PIPELINE_LABELS, + ) + .set_display_name("Create model endpoint") + .after(model_upload_task) + ) + + # =================================================================== + # deploy model to Vertex AI Endpoint + # =================================================================== + # deploy models to endpoint to associates physical resources with the model + # so it can serve online predictions + model_deploy_task = aip_components.ModelDeployOp( + endpoint=endpoint_create_task.outputs["endpoint"], + model=model_upload_task.outputs["model"], + deployed_model_display_name=cfg.MODEL_NAME, + dedicated_resources_machine_type=cfg.SERVING_MACHINE_TYPE, + dedicated_resources_min_replica_count=cfg.SERVING_MIN_REPLICA_COUNT, + dedicated_resources_max_replica_count=cfg.SERVING_MAX_REPLICA_COUNT, + traffic_split=cfg.SERVING_TRAFFIC_SPLIT, + ).set_display_name("Deploy model to endpoint") + + # =================================================================== + # test model deployment + # =================================================================== + # test model deployment by making online prediction requests + test_instances = [ + [ + 36, + 14.25, + 21.72, + 93.63, + 633.0, + 0.09823, + 0.1098, + 0.1319, + 0.05598, + 0.1885, + 0.06125, + 0.286, + 1.019, + 2.657, + 24.91, + 0.005878, + 0.02995, + 0.04815, + 0.01161, + 0.02028, + 0.004022, + 15.89, + 30.36, + 116.2, + 799.6, + 0.1446, + 0.4238, + 0.5186, + 0.1447, + 0.3591, + 0.1014, + ], + [ + 226, + 10.44, + 15.46, + 66.62, + 329.6, + 0.1053, + 0.07722, + 0.006643, + 0.01216, + 0.1788, + 0.0645, + 0.1913, + 0.9027, + 1.208, + 11.86, + 0.006513, + 0.008061, + 0.002817, + 0.004972, + 0.01502, + 0.002821, + 11.52, + 19.8, + 73.47, + 395.4, + 0.1341, + 0.1153, + 0.02639, + 0.04464, + 0.2615, + 0.08269, + ], + ] + response = make_prediction_request.make_prediction_request( + project=cfg.PROJECT_ID, + bucket=cfg.BUCKET, + endpoint=model_deploy_task.outputs["gcp_resources"], + instances=test_instances, + ).set_display_name("Make prediction request") + response diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/pyproject.toml b/templates/sklearn/{{cookiecutter.project_slug}}/pyproject.toml new file mode 100644 index 00000000..7e5095d7 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/pyproject.toml @@ -0,0 +1,75 @@ +[tool.poetry] +authors = ["{{cookiecutter.project_owner_fullname}}", "{{cookiecutter.project_owner_email}}"] +description = "{{cookiecutter.project_description}}" +name = "{{cookiecutter.project_slug}}" +packages = [ + {include = "{{cookiecutter.project_slug}}", from = "pipeline"}, +] +version = "{{cookiecutter.project_version}}" + +[tool.poetry.dependencies] +python = ">=3.8,<3.11" +google-cloud-pipeline-components= "^1.0.5" +kfp = "^1.8.12" +wanna-ml = "0.1.3" +pandas = "^1.4.2" + +[tool.poetry.dev-dependencies] +black = "^22.3.0" +isort = "^5.10.1" +pylint = "^2.13.8" +pytest = "^7.1.2" +taskipy = "^1.10.1" + +[tool.taskipy.tasks] +black = "black pipeline" +check = "task isort && task black && task pylint" +isort = "isort pipeline" +pylint = "pylint pipeline" + +[tool.pytest.ini_options] +addopts = "--verbose --pylint --pylint-rcfile=pyproject.toml --cov={{cookiecutter.project_slug}} --cov-report html --mypy" +filterwarnings = [ + "error", + "ignore::UserWarning", # note the use of single quote below to denote "raw" strings in TOML + 'ignore::DeprecationWarning', +] + +[tool.pylint.master] +good-names = "logger,e,i,j,n,m,f,_,xs,ys,df" +ignore = "sphinx" + +[tool.pylint.messages_control] +disable = "all" +enable = """, + unused-import, + fixme, + useless-object-inheritance, + unused-variable, + unused-argument, + unexpected-keyword-arg, + string, + unreachable, + invalid-name, + logging-format-interpolation, + logging-fstring-interpolation, + unnecesary-pass, + """ +ignored-argument-names = "_.*|^ignored_|^unused_|args|kwargs" + +[black] +extend-ignore = "E203" +max-line-length = 120 + +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core>=1.0.0"] + +[[tool.poetry.source]] +name = "avast-repo-local" # This name will be used in the configuration to retrieve the proper credentials +url = "https://artifactory.ida.avast.com/artifactory/api/pypi/pypi-local/simple/" # URL used to download your packages from + +[[tool.poetry.source]] +default = true +name = "avast-repo-remote" # This name will be used in the configuration to retrieve the proper credentials +url = "https://artifactory.ida.avast.com/artifactory/api/pypi/pypi-remote/simple/" # URL used to download your packages from diff --git a/templates/sklearn/{{cookiecutter.project_slug}}/wanna.yaml b/templates/sklearn/{{cookiecutter.project_slug}}/wanna.yaml new file mode 100644 index 00000000..a03e7b47 --- /dev/null +++ b/templates/sklearn/{{cookiecutter.project_slug}}/wanna.yaml @@ -0,0 +1,47 @@ +wanna_project: + name: {{ cookiecutter.project_slug }} + version: {{ cookiecutter.project_version }} + authors: [{{ cookiecutter.project_owner_email }}] + +gcp_profiles: + - profile_name: default + project_id: {{ cookiecutter.gcp_project_id }} + zone: europe-west1-b + bucket: {{ cookiecutter.gcp_bucket }} + labels: + service_account: {{ cookiecutter.gcp_service_account }} + +tensorboards: + - name: {{ cookiecutter.project_slug }}-board + +docker: + images: + - build_type: local_build_image + name: train + context_dir: notebook/train + dockerfile: notebook/train/Dockerfile + + - build_type: provided_image + name: serve + image_url: europe-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-4:latest + repository: {{ cookiecutter.gcp_artifact_registry_repository }} + +notebooks: + - name: {{ cookiecutter.project_slug }}-notebook + instance_owner: {{ cookiecutter.gcp_service_account }} + machine_type: n1-standard-4 + labels: + notebook_usecase: {{ cookiecutter.project_slug }} + environment: + docker_image_ref: train + tensorboard_ref: {{ cookiecutter.project_slug }}-board + +pipelines: + - name: {{ cookiecutter.project_slug }}-pipeline + schedule: + cron: 2 * * * * + bucket: gs://{{ cookiecutter.gcp_bucket }} + pipeline_file: pipeline/pipeline.py + pipeline_params: pipeline/params.yaml + docker_image_ref: ["train", "serve"] + tensorboard_ref: {{ cookiecutter.project_slug }}-board