Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/github-actions-basic.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Python package

on: [push]

jobs:
build:

runs-on: ubuntu-latest
env:
working-directory: Chapter02
defaults:
run:
working-directory: ${{ env.working-directory }}
strategy:
matrix:
python-version: ["3.9", "3.10"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
34 changes: 34 additions & 0 deletions Chapter02/plot_outlier_detection_wine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Outlier detection example for github actions.
"""

import numpy as np
from sklearn.covariance import EllipticEnvelope
from sklearn.svm import OneClassSVM
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn.datasets import load_wine

# Define "classifiers" to be used
classifiers = {
"Empirical Covariance": EllipticEnvelope(support_fraction=1.0, contamination=0.25),
"Robust Covariance (Minimum Covariance Determinant)": EllipticEnvelope(
contamination=0.25
),
"OCSVM": OneClassSVM(nu=0.25, gamma=0.35),
}
colors = ["m", "g", "b"]
legend1 = {}
legend2 = {}

# Get data
X1 = load_wine()["data"][:, [1, 2]] # two clusters

# Learn a frontier for outlier detection with several classifiers
xx1, yy1 = np.meshgrid(np.linspace(0, 6, 500), np.linspace(1, 4.5, 500))
for i, (clf_name, clf) in enumerate(classifiers.items()):
clf.fit(X1)
Z1 = clf.decision_function(np.c_[xx1.ravel(), yy1.ravel()])
Z1 = Z1.reshape(xx1.shape)


23 changes: 23 additions & 0 deletions Chapter02/simple_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Random Forest Classifier on the wine dataset.
"""

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
#import joblib

# Load the dataset
X, y = load_wine(return_X_y=True)
y = y == 2

# Train and test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Train the classifier
rfc = RandomForestClassifier(n_estimators=10, random_state=42)
rfc.fit(X_train, y_train)

# Dump file to joblib
# joblib.dump(rfc, 'rfc.joblib')

2 changes: 2 additions & 0 deletions Chapter02/test_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def test_example():
pass
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ def my_pipeline():
# got this from running kubectl cluster-info --context kind-mlewp (this is cluster name)
#endpoint = 'https://127.0.0.1:50663'
kfp_client = Client(host=endpoint)
run = kfp_client.create_run_from_pipeline_func(ß
run = kfp_client.create_run_from_pipeline_func(
my_pipeline,
mode=kfp.dsl.PipelineExecutionMode.V2_COMPATIBLE,
)ßßß
)
url = f'{endpoint}/#/runs/details/{run.run_id}'
print(url)
4 changes: 2 additions & 2 deletions Chapter05/reaching_zen/reaching_zen/steps/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def data_loader(
dataset = load_iris(as_frame=True).frame
elif params.dataset == SklearnDataset.breast_cancer:
dataset = load_breast_cancer(as_frame=True).frame
elif params.dataset == SklearnDataset.diabetes:
dataset = load_diabetes(as_frame=True).frame
# elif params.dataset == SklearnDataset.diabetes:
# dataset = load_diabetes(as_frame=True).frame
logger.info(f"Loaded dataset {params.dataset.value}: %s", dataset.info())
logger.info(dataset.head())
### YOUR CODE ENDS HERE ###
Expand Down