Skip to content

Commit

Permalink
[pip_requirements and extra_pip_requirements] Add support for con…
Browse files Browse the repository at this point in the history
…straints files (mlflow#4564)

* Add support for constraints files

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* plural

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* update param doc

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* indent

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* refactor

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Fix broken tests

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Fix test_parse_requirements

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* add process_pip_requirements and process_conda_env

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Add error test

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Add new test case

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Use write_to

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Fix filename

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Add tests for log_model

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* use not isinstance

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
  • Loading branch information
harupy authored Jul 21, 2021
1 parent 9bd64dc commit cfed457
Show file tree
Hide file tree
Showing 8 changed files with 480 additions and 105 deletions.
40 changes: 36 additions & 4 deletions examples/pip_requirements/pip_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,24 @@
import mlflow


def get_pip_requirements(run_id, artifact_path):
client = mlflow.tracking.MlflowClient()
local_path = client.download_artifacts(run_id, f"{artifact_path}/requirements.txt")
with open(local_path) as f:
def read_lines(path):
with open(path) as f:
return f.read().splitlines()


def get_pip_requirements(run_id, artifact_path, return_constraints=False):
client = mlflow.tracking.MlflowClient()
req_path = client.download_artifacts(run_id, f"{artifact_path}/requirements.txt")
reqs = read_lines(req_path)

if return_constraints:
con_path = client.download_artifacts(run_id, f"{artifact_path}/constraints.txt")
cons = read_lines(con_path)
return reqs, cons

return reqs


def main():
iris = load_iris()
dtrain = xgb.DMatrix(iris.data, iris.target)
Expand All @@ -34,19 +45,22 @@ def main():
mlflow.xgboost.log_model(model, artifact_path)
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", xgb_req], pip_reqs
print("Model URI:", mlflow.get_artifact_uri(artifact_path))

# Overwrite the default set of pip requirements using `pip_requirements`
artifact_path = "pip_requirements"
mlflow.xgboost.log_model(model, artifact_path, pip_requirements=[sklearn_req])
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", sklearn_req], pip_reqs
print("Model URI:", mlflow.get_artifact_uri(artifact_path))

# Add extra pip requirements on top of the default set of pip requirements
# using `extra_pip_requirements`
artifact_path = "extra_pip_requirements"
mlflow.xgboost.log_model(model, artifact_path, extra_pip_requirements=[sklearn_req])
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", xgb_req, sklearn_req], pip_reqs
print("Model URI:", mlflow.get_artifact_uri(artifact_path))

# Specify pip requirements using a requirements file
with tempfile.NamedTemporaryFile("w", suffix=".requirements.txt") as f:
Expand All @@ -58,6 +72,7 @@ def main():
mlflow.xgboost.log_model(model, artifact_path, pip_requirements=f.name)
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", sklearn_req], pip_reqs
print("Model URI:", mlflow.get_artifact_uri(artifact_path))

# List of pip requirement strings
artifact_path = "requirements_file_list"
Expand All @@ -66,6 +81,23 @@ def main():
)
pip_reqs = get_pip_requirements(run_id, artifact_path)
assert pip_reqs == ["mlflow", xgb_req, sklearn_req], pip_reqs
print("Model URI:", mlflow.get_artifact_uri(artifact_path))

# Using a constraints file
with tempfile.NamedTemporaryFile("w", suffix=".constraints.txt") as f:
f.write(sklearn_req)
f.flush()

artifact_path = "constraints_file"
mlflow.xgboost.log_model(
model, artifact_path, pip_requirements=[xgb_req, f"-c {f.name}"]
)
pip_reqs, pip_cons = get_pip_requirements(
run_id, artifact_path, return_constraints=True
)
assert pip_reqs == ["mlflow", xgb_req, "-c constraints.txt"], pip_reqs
assert pip_cons == [sklearn_req], pip_cons
print("Model URI:", mlflow.get_artifact_uri(artifact_path))


if __name__ == "__main__":
Expand Down
129 changes: 124 additions & 5 deletions mlflow/utils/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@

from mlflow.utils import PYTHON_VERSION
from mlflow.utils.requirements_utils import _parse_requirements
from packaging.requirements import Requirement


_conda_header = """\
name: mlflow-env
channels:
- conda-forge
"""

_REQUIREMENTS_FILE_NAME = "requirements.txt"
_CONSTRAINTS_FILE_NAME = "constraints.txt"


def _mlflow_conda_env(
path=None,
Expand Down Expand Up @@ -70,18 +75,55 @@ def _mlflow_additional_pip_env(
return requirements


def _is_pip_deps(dep):
"""
Returns True if `dep` is a dict representing pip dependencies
"""
return isinstance(dep, dict) and "pip" in dep


def _get_pip_deps(conda_env):
"""
:return: The pip dependencies from the conda env
"""
if conda_env is not None:
for dep in conda_env["dependencies"]:
if isinstance(dep, dict) and "pip" in dep:
if _is_pip_deps(dep):
return dep["pip"]
return []


def _log_pip_requirements(conda_env, path, requirements_file="requirements.txt"):
def _overwrite_pip_deps(conda_env, new_pip_deps):
"""
Overwrites the pip dependencies section in the given conda env dictionary.
{
"name": "env",
"channels": [...],
"dependencies": [
...,
"pip",
{"pip": [...]}, <- Overwrite this
],
}
"""
deps = conda_env.get("dependencies", [])
new_deps = []
contains_pip_deps = False
for dep in deps:
if _is_pip_deps(dep):
contains_pip_deps = True
new_deps.append({"pip": new_pip_deps})
else:
new_deps.append(dep)

if not contains_pip_deps:
new_deps.append({"pip": new_pip_deps})

return {**conda_env, "dependencies": new_deps}


def _log_pip_requirements(conda_env, path, requirements_file=_REQUIREMENTS_FILE_NAME):
pip_deps = _get_pip_deps(conda_env)
_mlflow_additional_pip_env(pip_deps, path=os.path.join(path, requirements_file))

Expand All @@ -94,10 +136,10 @@ def _parse_pip_requirements(pip_requirements):
(e.g. ``["scikit-learn", "-r requirements.txt"]``) or the string path to a pip requirements
file on the local filesystem (e.g. ``"requirements.txt"``). If ``None``, an empty list will
be returned.
:return: A list of pip requirement strings.
:return: A tuple of parsed requirements and constraints.
"""
if pip_requirements is None:
return []
return [], []

def _is_string(x):
return isinstance(x, str)
Expand All @@ -110,7 +152,15 @@ def _is_iterable(x):
return False

if _is_string(pip_requirements):
return list(_parse_requirements(pip_requirements))
requirements = []
constraints = []
for req_or_con in _parse_requirements(pip_requirements, is_constraint=False):
if req_or_con.is_constraint:
constraints.append(req_or_con.req_str)
else:
requirements.append(req_or_con.req_str)

return requirements, constraints
elif _is_iterable(pip_requirements) and all(map(_is_string, pip_requirements)):
try:
# Create a temporary requirements file in the current working directory
Expand Down Expand Up @@ -154,3 +204,72 @@ def _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements)
"Only one of `conda_env`, `pip_requirements`, and "
"`extra_pip_requirements` can be specified"
)


def _is_mlflow_requirement(requirement_string):
"""
Returns True if `requirement_string` represents a requirement for mlflow (e.g. 'mlflow==1.2.3').
"""
return Requirement(requirement_string).name.lower() == "mlflow"


def _contains_mlflow_requirement(requirements):
"""
Returns True if `requirements` contains a requirement for mlflow (e.g. 'mlflow==1.2.3').
"""
return any(map(_is_mlflow_requirement, requirements))


def _process_pip_requirements(
default_pip_requirements, pip_requirements=None, extra_pip_requirements=None
):
"""
Processes `pip_requirements` and `extra_pip_requirements` passed to `mlflow.*.save_model` or
`mlflow.*.log_model`, and returns a tuple of (conda_env, pip_requirements, pip_constraints).
"""
constraints = []
if pip_requirements is not None:
pip_reqs, constraints = _parse_pip_requirements(pip_requirements)
elif extra_pip_requirements is not None:
extra_pip_requirements, constraints = _parse_pip_requirements(extra_pip_requirements)
pip_reqs = default_pip_requirements + extra_pip_requirements
else:
pip_reqs = default_pip_requirements

if not _contains_mlflow_requirement(pip_reqs):
pip_reqs.insert(0, "mlflow")

if constraints:
pip_reqs.append(f"-c {_CONSTRAINTS_FILE_NAME}")

# Set `install_mlflow` to False because `pip_reqs` already contains `mlflow`
conda_env = _mlflow_conda_env(additional_pip_deps=pip_reqs, install_mlflow=False)
return conda_env, pip_reqs, constraints


def _process_conda_env(conda_env):
"""
Processes `conda_env` passed to `mlflow.*.save_model` or `mlflow.*.log_model`, and returns
a tuple of (conda_env, pip_requirements, pip_constraints).
"""
if isinstance(conda_env, str):
with open(conda_env, "r") as f:
conda_env = yaml.safe_load(f)
elif not isinstance(conda_env, dict):
raise TypeError(
"Expected a string path to a conda env yaml file or a `dict` representing a conda env, "
"but got `{}`".format(type(conda_env).__name__)
)

# User-specified `conda_env` may contain requirements/constraints file references
pip_reqs = _get_pip_deps(conda_env)
pip_reqs, constraints = _parse_pip_requirements(pip_reqs)

if not _contains_mlflow_requirement(pip_reqs):
pip_reqs.insert(0, "mlflow")

if constraints:
pip_reqs.append(f"-c {_CONSTRAINTS_FILE_NAME}")

conda_env = _overwrite_pip_deps(conda_env, pip_reqs)
return conda_env, pip_reqs, constraints
42 changes: 29 additions & 13 deletions mlflow/utils/requirements_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import os
from itertools import filterfalse
from collections import namedtuple


def _is_comment(line):
Expand All @@ -22,6 +23,10 @@ def _is_requirements_file(line):
return line.startswith("-r ") or line.startswith("--requirement ")


def _is_constraints_file(line):
return line.startswith("-c ") or line.startswith("--constraint ")


def _join_continued_lines(lines):
"""
Joins lines ending with '\\'.
Expand All @@ -44,25 +49,34 @@ def _join_continued_lines(lines):
yield "".join(continued_lines)


# TODO: Add support for constraint files:
# https://github.com/mlflow/mlflow/pull/4519#discussion_r668412179
def _parse_requirements(requirements_file):
# Represents a pip requirement.
#
# :param req_str: A requirement string (e.g. "scikit-learn == 0.24.2").
# :param is_constraint: A boolean indicating whether this requirement is a constraint.
_Requirement = namedtuple("_Requirement", ["req_str", "is_constraint"])


def _parse_requirements(requirements_file, is_constraint):
"""
A simplified version of `pip._internal.req.parse_requirements` which performs the following
operations on the given requirements file and yields the parsed requirements.
- Remove comments and blank lines
- Join continued lines
- Resolve requirements file references (e.g. '-r requirements.txt')
- Resolve constraints file references (e.g. '-c constraints.txt')
:param requirements_file: A string path to a requirements file on the local filesystem.
:param is_constraint: Indicates the parsed requirements file is a constraint file.
:return: A list of ``_Requirement`` instances.
References:
- `pip._internal.req.parse_requirements`:
https://github.com/pypa/pip/blob/7a77484a492c8f1e1f5ef24eaf71a43df9ea47eb/src/pip/_internal/req/req_file.py#L118
- Requirements File Format:
https://pip.pypa.io/en/stable/cli/pip_install/#requirements-file-format
:param requirements_file: A string path to a requirements file on the local filesystem.
:return: A list of parsed requirements (e.g. ``["scikit-learn==0.24.2", ...]``).
- Constraints Files:
https://pip.pypa.io/en/stable/user_guide/#constraints-files
"""
with open(requirements_file) as f:
lines = f.read().splitlines()
Expand All @@ -76,11 +90,13 @@ def _parse_requirements(requirements_file):
for line in lines:
if _is_requirements_file(line):
req_file = line.split(maxsplit=1)[1]
abs_path = (
req_file
if os.path.isabs(req_file)
else os.path.join(os.path.dirname(requirements_file), req_file)
)
yield from _parse_requirements(abs_path)
# If `req_file` is an absolute path, `os.path.join` returns `req_file`:
# https://docs.python.org/3/library/os.path.html#os.path.join
abs_path = os.path.join(os.path.dirname(requirements_file), req_file)
yield from _parse_requirements(abs_path, is_constraint=False)
elif _is_constraints_file(line):
req_file = line.split(maxsplit=1)[1]
abs_path = os.path.join(os.path.dirname(requirements_file), req_file)
yield from _parse_requirements(abs_path, is_constraint=True)
else:
yield line
yield _Requirement(line, is_constraint)
Loading

0 comments on commit cfed457

Please sign in to comment.