Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions deploy/nannyml/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,26 @@ ENV UV_LINK_MODE=copy
WORKDIR /app

RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --group monitoring
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --group monitoring


FROM python:3.12-slim
WORKDIR /app
COPY --from=builder /app/.venv /app/.venv

RUN apt-get update && \
apt-get install -yqq --no-install-recommends libgomp1
apt-get install -yqq --no-install-recommends libgomp1

ARG NANNYML_ESTIMATOR
COPY deploy/nannyml/main.py .

ARG NANNYML_ESTIMATOR
COPY $NANNYML_ESTIMATOR ./nannyml_estimator.pkl
ENV NANNYML_ESTIMATOR=/app/nannyml_estimator.pkl

ARG NANNYML_DRIFT_CALCULATOR
COPY $NANNYML_DRIFT_CALCULATOR ./nannyml_drift_calc.pkl
ENV NANNYML_DRIFT_CALCULATOR=/app/nannyml_drift_calc.pkl

CMD ["/app/.venv/bin/uvicorn", "--reload", "main:app", "--host", "0.0.0.0", "--port", "4040"]
20 changes: 10 additions & 10 deletions src/income_prediction/assets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .fairness import evaluate_fairness
from .model import model_container as model_container
from .monitoring import nannyml_container as nannyml_container
from .monitoring import nannyml_drift_calculator as nannyml_drift_calculator
from .monitoring import nannyml_estimator as nannyml_estimator
from .monitoring import reference_dataset as reference_dataset

Expand Down Expand Up @@ -108,16 +109,6 @@ def optuna_search_xgb(
)
mlflow.log_input(test_ds)

mlflow.sklearn.log_model(
best_model,
artifact_path="model",
registered_model_name=model_name,
code_paths=["src/asec"],
input_example=train_data.drop(columns=CensusASECMetadata.TARGET).head(
5
),
)

mlflow.evaluate(
model=best_model.predict,
data=test_ds,
Expand All @@ -135,4 +126,13 @@ def optuna_search_xgb(
fairness_metrics = evaluate_fairness(test_data, y_pred)
log_fairness_metrics(fairness_metrics)

mlflow.sklearn.log_model(
best_model,
artifact_path="model",
registered_model_name=model_name,
code_paths=["src/asec"],
input_example=train_data.drop(columns=CensusASECMetadata.TARGET).head(
5
),
)
return best_model
61 changes: 50 additions & 11 deletions src/income_prediction/assets/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,31 +63,70 @@ def nannyml_estimator(
return estimator


@dg.asset(kinds={"docker"}, group_name="deployment", deps=["nannyml_estimator"])
@dg.asset(
group_name="deployment",
)
def nannyml_drift_calculator(
reference_dataset: pd.DataFrame,
nanny_ml_config: NannyMLConfig,
) -> nml.UnivariateDriftCalculator:
feature_cols = [
col
for col in reference_dataset.columns
if not col.startswith(("prediction", "prob_", "target"))
]
calc = nml.UnivariateDriftCalculator(
column_names=feature_cols,
treat_as_categorical=CensusASECMetadata.CATEGORICAL_FEATURES,
continuous_methods=["kolmogorov_smirnov"],
categorical_methods=["chi2"],
chunk_size=nanny_ml_config.chunk_size,
)
calc.fit(reference_dataset)
return calc


@dg.asset(
kinds={"docker"},
group_name="deployment",
deps=["nannyml_estimator", "nannyml_drift_calculator"],
)
def nannyml_container(
context: dg.AssetExecutionContext,
model_version: ModelVersion,
nannyml_estimator: nml.CBPE,
nannyml_drift_calculator: nml.UnivariateDriftCalculator,
) -> dg.Output:
build_context = Path(__file__).parents[3]
image_tags = [f"nannyml:{suffix}" for suffix in [model_version.version, "latest"]]
context.log.info(f"{image_tags=}")

# Create tempfile inside the build context, so it can be copied into the image
with NamedTemporaryFile(
prefix="nannyml-cbpe-", suffix=".pkl", dir=build_context
) as tmp_file:
pkl_path = Path(tmp_file.name)

with open(pkl_path, "wb") as f:
pickle.dump(nannyml_estimator, f)
with (
NamedTemporaryFile(
prefix="nannyml-cbpe-", suffix=".pkl", dir=build_context
) as cbpe_file,
NamedTemporaryFile(
prefix="nannyml-drift-", suffix=".pkl", dir=build_context
) as drift_file,
):
pickle.dump(nannyml_estimator, cbpe_file)
pickle.dump(nannyml_drift_calculator, drift_file)

drift_file.flush()
cbpe_file.flush()

context.log.info(f"{pkl_path=}")
context.log.info(f"{build_context=}")
build_result = build_container_image(
build_context,
image_tags,
build_args={"NANNYML_ESTIMATOR": str(pkl_path.name)},
build_args={
"NANNYML_ESTIMATOR": str(
Path(cbpe_file.name).relative_to(build_context)
),
"NANNYML_DRIFT_CALCULATOR": str(
Path(drift_file.name).relative_to(build_context)
),
},
docker_file=build_context / "deploy" / "nannyml" / "Dockerfile",
)

Expand Down
14 changes: 12 additions & 2 deletions src/income_prediction/jobs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import dagster as dg

from .assets.model import model_container
from .assets.monitoring import nannyml_container, nannyml_estimator, reference_dataset
from .assets.monitoring import (
nannyml_container,
nannyml_estimator,
reference_dataset,
nannyml_drift_calculator,
)

model_container_job = dg.define_asset_job(
name="model_container_job",
Expand All @@ -12,5 +17,10 @@
nannyml_container_job = dg.define_asset_job(
name="nannyml_container_job",
description="Monitoring service container image build",
selection=[reference_dataset, nannyml_estimator, nannyml_container],
selection=[
reference_dataset,
nannyml_estimator,
nannyml_drift_calculator,
nannyml_container,
],
)