Skip to content

Commit

Permalink
[ML-16200] Add pip_requirements and additional_pip_requirements t…
Browse files Browse the repository at this point in the history
…o `log_model` (mlflow#4519)

* revert changes on other flavors

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Rename additional_pip_requirements to extra_pip_requirements

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* fix

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* clean up

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
  • Loading branch information
harupy authored Jul 15, 2021
1 parent 8ec8603 commit c89caef
Show file tree
Hide file tree
Showing 11 changed files with 684 additions and 20 deletions.
2 changes: 2 additions & 0 deletions dev/small-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
## Small test reqs
# Required for testing utilities for parsing pip requirements
pip>=20.1
scipy
# NB: We're specifying a test-only minimum version bound for sqlalchemy in order to reliably
# execute schema consistency checks, the semantics of which were changed in sqlalchemy 1.3.21
Expand Down
8 changes: 8 additions & 0 deletions docs/source/tutorials-and-examples/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,14 @@ Now that you have your training code, you can package it so that other data scie
parameters = list(alpha = 0.2)
)
.. _pip-requirements-example:

Specifying pip requirements using ``pip_requirements`` and ``extra_pip_requirements``
-------------------------------------------------------------------------------------

.. literalinclude:: ../../../examples/pip_requirements/pip_requirements.py


Serving the Model
-----------------

Expand Down
72 changes: 72 additions & 0 deletions examples/pip_requirements/pip_requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
This example demonstrates how to specify pip requirements using `pip_requirements` and
`extra_pip_requirements` when logging a model via `mlflow.*.log_model`.
"""

import tempfile

import sklearn
from sklearn.datasets import load_iris
import xgboost as xgb
import mlflow


def get_pip_requirements(run_id, artifact_path):
client = mlflow.tracking.MlflowClient()
local_path = client.download_artifacts(run_id, f"{artifact_path}/requirements.txt")
with open(local_path) as f:
return f.read().splitlines()


def main():
iris = load_iris()
dtrain = xgb.DMatrix(iris.data, iris.target)
model = xgb.train({}, dtrain)

xgb_req = f"xgboost=={xgb.__version__}"
sklearn_req = f"scikit-learn=={sklearn.__version__}"

with mlflow.start_run() as run:
run_id = run.info.run_id

# Default (both `pip_requirements` and `extra_pip_requirements` are unspecified)
artifact_path = "default"
mlflow.xgboost.log_model(model, artifact_path)
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", xgb_req], pip_reqs

# Overwrite the default set of pip requirements using `pip_requirements`
artifact_path = "pip_requirements"
mlflow.xgboost.log_model(model, artifact_path, pip_requirements=[sklearn_req])
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", sklearn_req], pip_reqs

# Add extra pip requirements on top of the default set of pip requirements
# using `extra_pip_requirements`
artifact_path = "extra_pip_requirements"
mlflow.xgboost.log_model(model, artifact_path, extra_pip_requirements=[sklearn_req])
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", xgb_req, sklearn_req], pip_reqs

# Specify pip requirements using a requirements file
with tempfile.NamedTemporaryFile("w", suffix=".requirements.txt") as f:
f.write(sklearn_req)
f.flush()

# Path to a pip requirements file
artifact_path = "requirements_file_path"
mlflow.xgboost.log_model(model, artifact_path, pip_requirements=f.name)
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", sklearn_req], pip_reqs

# List of pip requirement strings
artifact_path = "requirements_file_list"
mlflow.xgboost.log_model(
model, artifact_path, pip_requirements=[xgb_req, f"-r {f.name}"]
)
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", xgb_req, sklearn_req], pip_reqs


if __name__ == "__main__":
main()
78 changes: 75 additions & 3 deletions mlflow/utils/environment.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import yaml
import tempfile
import os

from mlflow.utils import PYTHON_VERSION
from mlflow.utils.requirements_utils import _parse_requirements

_conda_header = """\
name: mlflow-env
Expand Down Expand Up @@ -68,9 +70,9 @@ def _mlflow_additional_pip_env(
return requirements


def _get_additional_pip_dep(conda_env):
def _get_pip_deps(conda_env):
"""
:return: The additional pip dependencies from the conda env
:return: The pip dependencies from the conda env
"""
if conda_env is not None:
for dep in conda_env["dependencies"]:
Expand All @@ -80,5 +82,75 @@ def _get_additional_pip_dep(conda_env):


def _log_pip_requirements(conda_env, path, requirements_file="requirements.txt"):
pip_deps = _get_additional_pip_dep(conda_env)
pip_deps = _get_pip_deps(conda_env)
_mlflow_additional_pip_env(pip_deps, path=os.path.join(path, requirements_file))


def _parse_pip_requirements(pip_requirements):
"""
Parses an iterable of pip requirement strings or a pip requirements file.
:param pip_requirements: Either an iterable of pip requirement strings
(e.g. ``["scikit-learn", "-r requirements.txt"]``) or the string path to a pip requirements
file on the local filesystem (e.g. ``"requirements.txt"``). If ``None``, an empty list will
be returned.
:return: A list of pip requirement strings.
"""
if pip_requirements is None:
return []

def _is_string(x):
return isinstance(x, str)

def _is_iterable(x):
try:
iter(x)
return True
except Exception:
return False

if _is_string(pip_requirements):
return list(_parse_requirements(pip_requirements))
elif _is_iterable(pip_requirements) and all(map(_is_string, pip_requirements)):
try:
# Create a temporary requirements file in the current working directory
tmp_req_file = tempfile.NamedTemporaryFile(
mode="w",
prefix="mlflow.",
suffix=".tmp.requirements.txt",
dir=os.getcwd(),
# Setting `delete` to True causes a permission-denied error on Windows
# while trying to read the generated temporary file.
delete=False,
)
tmp_req_file.write("\n".join(pip_requirements))
tmp_req_file.close()
return _parse_pip_requirements(tmp_req_file.name)
finally:
# Clean up the temporary requirements file
os.remove(tmp_req_file.name)
else:
raise TypeError(
"`pip_requirements` must be either a string path to a pip requirements file on the "
"local filesystem or an iterable of pip requirement strings, but got `{}`".format(
type(pip_requirements)
)
)


def _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements):
"""
Validates that only one or none of `conda_env`, `pip_requirements`, and
`extra_pip_requirements` is specified.
"""
args = [
conda_env,
pip_requirements,
extra_pip_requirements,
]
specified = [arg for arg in args if arg is not None]
if len(specified) > 1:
raise ValueError(
"Only one of `conda_env`, `pip_requirements`, and "
"`extra_pip_requirements` can be specified"
)
86 changes: 86 additions & 0 deletions mlflow/utils/requirements_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
This module provides a set of utilities for interpreting and creating requirements files
(e.g. pip's `requirements.txt`), which is useful for managing ML software environments.
"""
import os
from itertools import filterfalse


def _is_comment(line):
return line.startswith("#")


def _is_empty(line):
return line == ""


def _strip_inline_comment(line):
return line[: line.find(" #")].rstrip() if " #" in line else line


def _is_requirements_file(line):
return line.startswith("-r ") or line.startswith("--requirement ")


def _join_continued_lines(lines):
"""
Joins lines ending with '\\'.
>>> _join_continued_lines["a\\", "b\\", "c"]
>>> 'abc'
"""
continued_lines = []

for line in lines:
if line.endswith("\\"):
continued_lines.append(line.rstrip("\\"))
else:
continued_lines.append(line)
yield "".join(continued_lines)
continued_lines.clear()

# The last line ends with '\'
if continued_lines:
yield "".join(continued_lines)


# TODO: Add support for constraint files:
# https://github.com/mlflow/mlflow/pull/4519#discussion_r668412179
def _parse_requirements(requirements_file):
"""
A simplified version of `pip._internal.req.parse_requirements` which performs the following
operations on the given requirements file and yields the parsed requirements.
- Remove comments and blank lines
- Join continued lines
- Resolve requirements file references (e.g. '-r requirements.txt')
References:
- `pip._internal.req.parse_requirements`:
https://github.com/pypa/pip/blob/7a77484a492c8f1e1f5ef24eaf71a43df9ea47eb/src/pip/_internal/req/req_file.py#L118
- Requirements File Format:
https://pip.pypa.io/en/stable/cli/pip_install/#requirements-file-format
:param requirements_file: A string path to a requirements file on the local filesystem.
:return: A list of parsed requirements (e.g. ``["scikit-learn==0.24.2", ...]``).
"""
with open(requirements_file) as f:
lines = f.read().splitlines()

lines = map(str.strip, lines)
lines = map(_strip_inline_comment, lines)
lines = _join_continued_lines(lines)
lines = filterfalse(_is_comment, lines)
lines = filterfalse(_is_empty, lines)

for line in lines:
if _is_requirements_file(line):
req_file = line.split(maxsplit=1)[1]
abs_path = (
req_file
if os.path.isabs(req_file)
else os.path.join(os.path.dirname(requirements_file), req_file)
)
yield from _parse_requirements(abs_path)
else:
yield line
Loading

0 comments on commit c89caef

Please sign in to comment.