Skip to content

Commit 53a03e4

Browse files
committed
Add additional data to Studio live metrics post messages to support live experiments in monorepos
1 parent 44b78b8 commit 53a03e4

File tree

9 files changed

+196
-69
lines changed

9 files changed

+196
-69
lines changed

dvc/repo/__init__.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
)
1414
from dvc.ignore import DvcIgnoreFilter
1515
from dvc.log import logger
16-
from dvc.utils import as_posix
1716
from dvc.utils.objects import cached_property
1817

1918
if TYPE_CHECKING:
@@ -351,16 +350,6 @@ def fs(self, fs: "FileSystem"):
351350
# fs.
352351
self._reset()
353352

354-
@property
355-
def subrepo_relpath(self) -> str:
356-
from dvc.fs import GitFileSystem
357-
358-
scm_root_dir = "/" if isinstance(self.fs, GitFileSystem) else self.scm.root_dir
359-
360-
relpath = as_posix(self.fs.relpath(self.root_dir, scm_root_dir))
361-
362-
return "" if relpath == "." else relpath
363-
364353
@property
365354
def data_index(self) -> "DataIndex":
366355
from dvc_data.index import DataIndex

dvc/repo/experiments/executor/base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@
2323
from dvc.stage.serialize import to_lockfile
2424
from dvc.utils import dict_sha256, env2bool, relpath
2525
from dvc.utils.fs import remove
26-
from dvc.utils.studio import env_to_config
26+
from dvc.utils.studio import (
27+
env_to_config,
28+
get_dvc_experiment_parent_data,
29+
get_subrepo_relpath,
30+
)
2731

2832
if TYPE_CHECKING:
2933
from queue import Queue
@@ -624,6 +628,10 @@ def _repro_dvc(
624628
params=to_studio_params(dvc.params.show()),
625629
dvc_studio_config=dvc_studio_config,
626630
message=message,
631+
subdir=get_subrepo_relpath(dvc),
632+
dvc_experiment_parent_data=get_dvc_experiment_parent_data(
633+
dvc, info.baseline_rev
634+
),
627635
)
628636
logger.debug("Running repro in '%s'", os.getcwd())
629637
yield dvc

dvc/scm.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Manages source control systems (e.g. Git)."""
2+
23
import os
34
from collections.abc import Iterator, Mapping
45
from contextlib import contextmanager
@@ -181,7 +182,7 @@ def resolve_rev(scm: Union["Git", "NoSCM"], rev: str) -> str:
181182
raise RevError(str(exc)) # noqa: B904
182183

183184

184-
def _get_n_commits(scm: "Git", revs: list[str], num: int) -> list[str]:
185+
def get_n_commits(scm: "Git", revs: list[str], num: int) -> list[str]:
185186
results = []
186187
for rev in revs:
187188
if num == 0:
@@ -227,7 +228,7 @@ def iter_revs(
227228
return {}
228229

229230
revs = revs or []
230-
results: list[str] = _get_n_commits(scm, revs, num)
231+
results: list[str] = get_n_commits(scm, revs, num)
231232

232233
if all_commits:
233234
results.extend(scm.list_all_commits())

dvc/utils/studio.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import TYPE_CHECKING, Any, Optional
1+
from typing import TYPE_CHECKING, Any, Optional, Union
22
from urllib.parse import urljoin
33

44
import requests
@@ -12,10 +12,14 @@
1212
DVC_STUDIO_URL,
1313
)
1414
from dvc.log import logger
15+
from dvc.utils import as_posix
1516

1617
if TYPE_CHECKING:
1718
from requests import Response
1819

20+
from dvc.repo import Repo
21+
22+
1923
logger = logger.getChild(__name__)
2024

2125
STUDIO_URL = "https://studio.iterative.ai"
@@ -111,3 +115,51 @@ def env_to_config(env: dict[str, Any]) -> dict[str, Any]:
111115
if DVC_STUDIO_URL in env:
112116
config["url"] = env[DVC_STUDIO_URL]
113117
return config
118+
119+
120+
def get_subrepo_relpath(repo: "Repo") -> str:
121+
from dvc.fs import GitFileSystem
122+
123+
scm_root_dir = "/" if isinstance(repo.fs, GitFileSystem) else repo.scm.root_dir
124+
125+
relpath = as_posix(repo.fs.relpath(repo.root_dir, scm_root_dir))
126+
127+
return "" if relpath == "." else relpath
128+
129+
130+
def get_dvc_experiment_parent_data(
131+
repo: "Repo", baseline_rev: Union[str, None]
132+
) -> Union[dict[str, Any], None]:
133+
from scmrepo.exceptions import SCMError
134+
135+
from dvc.scm import NoSCM, get_n_commits
136+
137+
scm = repo.scm
138+
139+
try:
140+
if (
141+
not baseline_rev
142+
or not scm
143+
or isinstance(scm, NoSCM)
144+
or not (commit := scm.resolve_commit(baseline_rev))
145+
or not (
146+
first_100_parent_shas := get_n_commits(scm, [baseline_rev], 101)[1:]
147+
)
148+
):
149+
return None
150+
151+
return {
152+
"author": {
153+
"name": commit.author_name,
154+
"email": commit.author_email,
155+
},
156+
"date": commit.author_datetime.isoformat(),
157+
"message": commit.message,
158+
"parent_shas": first_100_parent_shas,
159+
"sha": commit.hexsha,
160+
"title": commit.message.partition("\n")[0].strip(),
161+
}
162+
163+
except SCMError:
164+
# unable to resolve all required baseline information
165+
return None

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ dependencies = [
3838
"dvc-data>=3.10,<3.11",
3939
"dvc-http>=2.29.0",
4040
"dvc-render>=1.0.1,<2",
41-
"dvc-studio-client>=0.17.1,<1",
41+
"dvc-studio-client@git+https://github.com/iterative/dvc-studio-client.git@refs/pull/144/head",
4242
"dvc-task>=0.3.0,<1",
4343
"flatten_dict<1,>=0.4.1",
4444
# https://github.com/iterative/dvc/issues/9654

tests/integration/test_studio_live_experiments.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
@pytest.mark.parametrize("tmp", [True, False])
1414
@pytest.mark.parametrize("offline", [True, False])
1515
def test_post_to_studio(
16-
tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp, offline
16+
M, tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp, offline
1717
):
1818
valid_response = mocker.MagicMock()
1919
valid_response.status_code = 200
@@ -53,6 +53,17 @@ def test_post_to_studio(
5353
"name": name,
5454
"params": {"params.yaml": {"foo": 1}},
5555
"client": "dvc",
56+
"dvc_experiment_parent_data": {
57+
"author": {
58+
"email": "dvctester@example.com",
59+
"name": "DVC Tester",
60+
},
61+
"date": M.any,
62+
"message": "init",
63+
"parent_shas": M.any,
64+
"title": "init",
65+
"sha": baseline_sha,
66+
},
5667
}
5768

5869
assert done_call.kwargs["json"] == {
@@ -68,7 +79,7 @@ def test_post_to_studio(
6879

6980
@pytest.mark.parametrize("tmp", [True, False])
7081
def test_post_to_studio_custom_message(
71-
tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp
82+
M, tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp
7283
):
7384
valid_response = mocker.MagicMock()
7485
valid_response.status_code = 200
@@ -97,4 +108,15 @@ def test_post_to_studio_custom_message(
97108
"params": {"params.yaml": {"foo": 1}},
98109
"client": "dvc",
99110
"message": "foo",
111+
"dvc_experiment_parent_data": {
112+
"author": {
113+
"email": "dvctester@example.com",
114+
"name": "DVC Tester",
115+
},
116+
"date": M.any,
117+
"message": "init",
118+
"parent_shas": M.any,
119+
"title": "init",
120+
"sha": baseline_sha,
121+
},
100122
}

tests/unit/command/test_studio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
from dvc_studio_client.auth import AuthenticationExpired
1+
from dvc_studio_client.auth import AuthorizationExpiredError
22

33
from dvc.cli import main
44
from dvc.utils.studio import STUDIO_URL
55

66

77
def test_studio_login_token_check_failed(mocker):
88
mocker.patch(
9-
"dvc_studio_client.auth.get_access_token", side_effect=AuthenticationExpired
9+
"dvc_studio_client.auth.get_access_token", side_effect=AuthorizationExpiredError
1010
)
1111

1212
assert main(["studio", "login"]) == 1

tests/unit/repo/test_repo.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -135,51 +135,3 @@ def test_dynamic_cache_initialization(tmp_dir, scm):
135135
dvc.close()
136136

137137
Repo(str(tmp_dir)).close()
138-
139-
140-
def test_monorepo_relpath(tmp_dir, scm):
141-
from dvc.repo.destroy import destroy
142-
143-
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
144-
145-
non_monorepo = Repo.init(tmp_dir)
146-
assert non_monorepo.subrepo_relpath == ""
147-
148-
destroy(non_monorepo)
149-
150-
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
151-
152-
assert monorepo_project_a.subrepo_relpath == "project_a"
153-
154-
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
155-
156-
assert monorepo_project_b.subrepo_relpath == "subdir/project_b"
157-
158-
159-
def test_virtual_monorepo_relpath(tmp_dir, scm):
160-
from dvc.fs.git import GitFileSystem
161-
from dvc.repo.destroy import destroy
162-
163-
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
164-
scm.commit("initial commit")
165-
gfs = GitFileSystem(scm=scm, rev="master")
166-
167-
non_monorepo = Repo.init(tmp_dir)
168-
non_monorepo.fs = gfs
169-
non_monorepo.root_dir = "/"
170-
171-
assert non_monorepo.subrepo_relpath == ""
172-
173-
destroy(non_monorepo)
174-
175-
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
176-
monorepo_project_a.fs = gfs
177-
monorepo_project_a.root_dir = "/project_a"
178-
179-
assert monorepo_project_a.subrepo_relpath == "project_a"
180-
181-
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
182-
monorepo_project_b.fs = gfs
183-
monorepo_project_b.root_dir = "/subdir/project_b"
184-
185-
assert monorepo_project_b.subrepo_relpath == "subdir/project_b"

tests/unit/utils/test_studio.py

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from urllib.parse import urljoin
23

34
import pytest
@@ -9,7 +10,15 @@
910
DVC_STUDIO_TOKEN,
1011
DVC_STUDIO_URL,
1112
)
12-
from dvc.utils.studio import STUDIO_URL, config_to_env, env_to_config, notify_refs
13+
from dvc.repo import Repo
14+
from dvc.utils.studio import (
15+
STUDIO_URL,
16+
config_to_env,
17+
env_to_config,
18+
get_dvc_experiment_parent_data,
19+
get_subrepo_relpath,
20+
notify_refs,
21+
)
1322

1423
CONFIG = {"offline": True, "repo_url": "repo_url", "token": "token", "url": "url"}
1524

@@ -67,3 +76,97 @@ def test_config_to_env():
6776

6877
def test_env_to_config():
6978
assert env_to_config(ENV) == CONFIG
79+
80+
81+
@pytest.mark.studio
82+
def test_monorepo_relpath(tmp_dir, scm):
83+
from dvc.repo.destroy import destroy
84+
85+
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
86+
87+
non_monorepo = Repo.init(tmp_dir)
88+
assert get_subrepo_relpath(non_monorepo) == ""
89+
90+
destroy(non_monorepo)
91+
92+
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
93+
94+
assert get_subrepo_relpath(monorepo_project_a) == "project_a"
95+
96+
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
97+
98+
assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"
99+
100+
101+
@pytest.mark.studio
102+
def test_virtual_monorepo_relpath(tmp_dir, scm):
103+
from dvc.fs.git import GitFileSystem
104+
from dvc.repo.destroy import destroy
105+
106+
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
107+
scm.commit("initial commit")
108+
gfs = GitFileSystem(scm=scm, rev="master")
109+
110+
non_monorepo = Repo.init(tmp_dir)
111+
non_monorepo.fs = gfs
112+
non_monorepo.root_dir = "/"
113+
114+
assert get_subrepo_relpath(non_monorepo) == ""
115+
116+
destroy(non_monorepo)
117+
118+
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
119+
monorepo_project_a.fs = gfs
120+
monorepo_project_a.root_dir = "/project_a"
121+
122+
assert get_subrepo_relpath(monorepo_project_a) == "project_a"
123+
124+
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
125+
monorepo_project_b.fs = gfs
126+
monorepo_project_b.root_dir = "/subdir/project_b"
127+
128+
assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"
129+
130+
131+
@pytest.mark.studio
132+
def test_dvc_experiment_parent_data(M, tmp_dir, scm, dvc):
133+
parent_shas = [scm.get_rev()]
134+
135+
for i in range(5):
136+
tmp_dir.scm_gen({"metrics.json": json.dumps({"metric": i})}, commit=f"step {i}")
137+
parent_shas.insert(0, scm.get_rev())
138+
139+
title = "a final commit with a fairly long message"
140+
message = f"{title}\nthat is split over two lines"
141+
142+
tmp_dir.scm_gen({"metrics.json": json.dumps({"metric": 100})}, commit=message)
143+
144+
head_sha = scm.get_rev()
145+
146+
assert isinstance(head_sha, str)
147+
assert head_sha not in parent_shas
148+
149+
dvc_experiment_parent_data = get_dvc_experiment_parent_data(dvc, head_sha)
150+
151+
assert dvc_experiment_parent_data is not None
152+
assert isinstance(dvc_experiment_parent_data["date"], str)
153+
154+
assert dvc_experiment_parent_data == {
155+
"author": {
156+
"email": "dvctester@example.com",
157+
"name": "DVC Tester",
158+
},
159+
"date": M.any,
160+
"message": message,
161+
"parent_shas": parent_shas,
162+
"title": title,
163+
"sha": head_sha,
164+
}
165+
166+
167+
@pytest.mark.parametrize("func", ["get_rev", "resolve_commit"])
168+
@pytest.mark.studio
169+
def test_no_dvc_experiment_parent_data(mocker, scm, dvc, func):
170+
mocker.patch.object(scm, func, return_value=None)
171+
172+
assert get_dvc_experiment_parent_data(dvc, scm.get_rev()) is None

0 commit comments

Comments
 (0)