forked from mlflow/mlflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ML-16200] Add
pip_requirements
and additional_pip_requirements
t…
…o `log_model` (mlflow#4519) * revert changes on other flavors Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * Rename additional_pip_requirements to extra_pip_requirements Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * fix Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * clean up Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
- Loading branch information
Showing
11 changed files
with
684 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" | ||
This example demonstrates how to specify pip requirements using `pip_requirements` and | ||
`extra_pip_requirements` when logging a model via `mlflow.*.log_model`. | ||
""" | ||
|
||
import tempfile | ||
|
||
import sklearn | ||
from sklearn.datasets import load_iris | ||
import xgboost as xgb | ||
import mlflow | ||
|
||
|
||
def get_pip_requirements(run_id, artifact_path): | ||
client = mlflow.tracking.MlflowClient() | ||
local_path = client.download_artifacts(run_id, f"{artifact_path}/requirements.txt") | ||
with open(local_path) as f: | ||
return f.read().splitlines() | ||
|
||
|
||
def main(): | ||
iris = load_iris() | ||
dtrain = xgb.DMatrix(iris.data, iris.target) | ||
model = xgb.train({}, dtrain) | ||
|
||
xgb_req = f"xgboost=={xgb.__version__}" | ||
sklearn_req = f"scikit-learn=={sklearn.__version__}" | ||
|
||
with mlflow.start_run() as run: | ||
run_id = run.info.run_id | ||
|
||
# Default (both `pip_requirements` and `extra_pip_requirements` are unspecified) | ||
artifact_path = "default" | ||
mlflow.xgboost.log_model(model, artifact_path) | ||
pip_reqs = get_pip_requirements(run_id, artifact_path) | ||
assert pip_reqs == ["mlflow", xgb_req], pip_reqs | ||
|
||
# Overwrite the default set of pip requirements using `pip_requirements` | ||
artifact_path = "pip_requirements" | ||
mlflow.xgboost.log_model(model, artifact_path, pip_requirements=[sklearn_req]) | ||
pip_reqs = get_pip_requirements(run_id, artifact_path) | ||
assert pip_reqs == ["mlflow", sklearn_req], pip_reqs | ||
|
||
# Add extra pip requirements on top of the default set of pip requirements | ||
# using `extra_pip_requirements` | ||
artifact_path = "extra_pip_requirements" | ||
mlflow.xgboost.log_model(model, artifact_path, extra_pip_requirements=[sklearn_req]) | ||
pip_reqs = get_pip_requirements(run_id, artifact_path) | ||
assert pip_reqs == ["mlflow", xgb_req, sklearn_req], pip_reqs | ||
|
||
# Specify pip requirements using a requirements file | ||
with tempfile.NamedTemporaryFile("w", suffix=".requirements.txt") as f: | ||
f.write(sklearn_req) | ||
f.flush() | ||
|
||
# Path to a pip requirements file | ||
artifact_path = "requirements_file_path" | ||
mlflow.xgboost.log_model(model, artifact_path, pip_requirements=f.name) | ||
pip_reqs = get_pip_requirements(run_id, artifact_path) | ||
assert pip_reqs == ["mlflow", sklearn_req], pip_reqs | ||
|
||
# List of pip requirement strings | ||
artifact_path = "requirements_file_list" | ||
mlflow.xgboost.log_model( | ||
model, artifact_path, pip_requirements=[xgb_req, f"-r {f.name}"] | ||
) | ||
pip_reqs = get_pip_requirements(run_id, artifact_path) | ||
assert pip_reqs == ["mlflow", xgb_req, sklearn_req], pip_reqs | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""" | ||
This module provides a set of utilities for interpreting and creating requirements files | ||
(e.g. pip's `requirements.txt`), which is useful for managing ML software environments. | ||
""" | ||
import os | ||
from itertools import filterfalse | ||
|
||
|
||
def _is_comment(line): | ||
return line.startswith("#") | ||
|
||
|
||
def _is_empty(line): | ||
return line == "" | ||
|
||
|
||
def _strip_inline_comment(line): | ||
return line[: line.find(" #")].rstrip() if " #" in line else line | ||
|
||
|
||
def _is_requirements_file(line): | ||
return line.startswith("-r ") or line.startswith("--requirement ") | ||
|
||
|
||
def _join_continued_lines(lines): | ||
""" | ||
Joins lines ending with '\\'. | ||
>>> _join_continued_lines["a\\", "b\\", "c"] | ||
>>> 'abc' | ||
""" | ||
continued_lines = [] | ||
|
||
for line in lines: | ||
if line.endswith("\\"): | ||
continued_lines.append(line.rstrip("\\")) | ||
else: | ||
continued_lines.append(line) | ||
yield "".join(continued_lines) | ||
continued_lines.clear() | ||
|
||
# The last line ends with '\' | ||
if continued_lines: | ||
yield "".join(continued_lines) | ||
|
||
|
||
# TODO: Add support for constraint files: | ||
# https://github.com/mlflow/mlflow/pull/4519#discussion_r668412179 | ||
def _parse_requirements(requirements_file): | ||
""" | ||
A simplified version of `pip._internal.req.parse_requirements` which performs the following | ||
operations on the given requirements file and yields the parsed requirements. | ||
- Remove comments and blank lines | ||
- Join continued lines | ||
- Resolve requirements file references (e.g. '-r requirements.txt') | ||
References: | ||
- `pip._internal.req.parse_requirements`: | ||
https://github.com/pypa/pip/blob/7a77484a492c8f1e1f5ef24eaf71a43df9ea47eb/src/pip/_internal/req/req_file.py#L118 | ||
- Requirements File Format: | ||
https://pip.pypa.io/en/stable/cli/pip_install/#requirements-file-format | ||
:param requirements_file: A string path to a requirements file on the local filesystem. | ||
:return: A list of parsed requirements (e.g. ``["scikit-learn==0.24.2", ...]``). | ||
""" | ||
with open(requirements_file) as f: | ||
lines = f.read().splitlines() | ||
|
||
lines = map(str.strip, lines) | ||
lines = map(_strip_inline_comment, lines) | ||
lines = _join_continued_lines(lines) | ||
lines = filterfalse(_is_comment, lines) | ||
lines = filterfalse(_is_empty, lines) | ||
|
||
for line in lines: | ||
if _is_requirements_file(line): | ||
req_file = line.split(maxsplit=1)[1] | ||
abs_path = ( | ||
req_file | ||
if os.path.isabs(req_file) | ||
else os.path.join(os.path.dirname(requirements_file), req_file) | ||
) | ||
yield from _parse_requirements(abs_path) | ||
else: | ||
yield line |
Oops, something went wrong.