Skip to content

Commit 9f1d799

Browse files
committed
Add additional data to Studio live metrics post messages to support live experiments in monorepos
1 parent 44b78b8 commit 9f1d799

File tree

8 files changed

+182
-67
lines changed

8 files changed

+182
-67
lines changed

dvc/repo/__init__.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
)
1414
from dvc.ignore import DvcIgnoreFilter
1515
from dvc.log import logger
16-
from dvc.utils import as_posix
1716
from dvc.utils.objects import cached_property
1817

1918
if TYPE_CHECKING:
@@ -351,16 +350,6 @@ def fs(self, fs: "FileSystem"):
351350
# fs.
352351
self._reset()
353352

354-
@property
355-
def subrepo_relpath(self) -> str:
356-
from dvc.fs import GitFileSystem
357-
358-
scm_root_dir = "/" if isinstance(self.fs, GitFileSystem) else self.scm.root_dir
359-
360-
relpath = as_posix(self.fs.relpath(self.root_dir, scm_root_dir))
361-
362-
return "" if relpath == "." else relpath
363-
364353
@property
365354
def data_index(self) -> "DataIndex":
366355
from dvc_data.index import DataIndex

dvc/repo/experiments/executor/base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@
2323
from dvc.stage.serialize import to_lockfile
2424
from dvc.utils import dict_sha256, env2bool, relpath
2525
from dvc.utils.fs import remove
26-
from dvc.utils.studio import env_to_config
26+
from dvc.utils.studio import (
27+
env_to_config,
28+
get_dvc_experiment_parent_data,
29+
get_subrepo_relpath,
30+
)
2731

2832
if TYPE_CHECKING:
2933
from queue import Queue
@@ -624,6 +628,10 @@ def _repro_dvc(
624628
params=to_studio_params(dvc.params.show()),
625629
dvc_studio_config=dvc_studio_config,
626630
message=message,
631+
subdir=get_subrepo_relpath(dvc),
632+
dvc_experiment_parent_data=get_dvc_experiment_parent_data(
633+
dvc, info.baseline_rev
634+
),
627635
)
628636
logger.debug("Running repro in '%s'", os.getcwd())
629637
yield dvc

dvc/utils/studio.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import TYPE_CHECKING, Any, Optional
1+
from typing import TYPE_CHECKING, Any, Optional, Union
22
from urllib.parse import urljoin
33

44
import requests
@@ -12,10 +12,14 @@
1212
DVC_STUDIO_URL,
1313
)
1414
from dvc.log import logger
15+
from dvc.utils import as_posix
1516

1617
if TYPE_CHECKING:
1718
from requests import Response
1819

20+
from dvc.repo import Repo
21+
22+
1923
logger = logger.getChild(__name__)
2024

2125
STUDIO_URL = "https://studio.iterative.ai"
@@ -111,3 +115,52 @@ def env_to_config(env: dict[str, Any]) -> dict[str, Any]:
111115
if DVC_STUDIO_URL in env:
112116
config["url"] = env[DVC_STUDIO_URL]
113117
return config
118+
119+
120+
def get_subrepo_relpath(repo: "Repo") -> str:
121+
from dvc.fs import GitFileSystem
122+
123+
scm_root_dir = "/" if isinstance(repo.fs, GitFileSystem) else repo.scm.root_dir
124+
125+
relpath = as_posix(repo.fs.relpath(repo.root_dir, scm_root_dir))
126+
127+
return "" if relpath == "." else relpath
128+
129+
130+
def get_dvc_experiment_parent_data(
131+
repo: "Repo", baseline_rev: Union[str, None]
132+
) -> Union[dict[str, Any], None]:
133+
from scmrepo.exceptions import SCMError
134+
135+
from dvc.scm import NoSCM
136+
137+
scm = repo.scm
138+
139+
if (
140+
not baseline_rev
141+
or not scm
142+
or isinstance(scm, NoSCM)
143+
or not (sha := scm.resolve_rev(baseline_rev))
144+
or not (commit := scm.resolve_commit(sha))
145+
):
146+
return None
147+
148+
dvc_experiment_parent_data = {
149+
"sha": commit.hexsha,
150+
"message": commit.message,
151+
"title": commit.message.partition("\n")[0].strip(),
152+
"author": {
153+
"name": commit.author_name,
154+
"email": commit.author_email,
155+
},
156+
"date": commit.commit_datetime.isoformat(),
157+
}
158+
159+
try:
160+
if branch := scm.active_branch():
161+
dvc_experiment_parent_data["branch"] = branch
162+
except SCMError:
163+
# head is detached
164+
pass
165+
166+
return dvc_experiment_parent_data

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ dependencies = [
3838
"dvc-data>=3.10,<3.11",
3939
"dvc-http>=2.29.0",
4040
"dvc-render>=1.0.1,<2",
41-
"dvc-studio-client>=0.17.1,<1",
41+
"dvc-studio-client@git+https://github.com/iterative/dvc-studio-client.git@refs/pull/144/head",
4242
"dvc-task>=0.3.0,<1",
4343
"flatten_dict<1,>=0.4.1",
4444
# https://github.com/iterative/dvc/issues/9654

tests/integration/test_studio_live_experiments.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
@pytest.mark.parametrize("tmp", [True, False])
1414
@pytest.mark.parametrize("offline", [True, False])
1515
def test_post_to_studio(
16-
tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp, offline
16+
M, tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp, offline
1717
):
1818
valid_response = mocker.MagicMock()
1919
valid_response.status_code = 200
@@ -53,6 +53,16 @@ def test_post_to_studio(
5353
"name": name,
5454
"params": {"params.yaml": {"foo": 1}},
5555
"client": "dvc",
56+
"dvc_experiment_parent_data": {
57+
"author": {
58+
"email": "dvctester@example.com",
59+
"name": "DVC Tester",
60+
},
61+
"date": M.any,
62+
"message": "init",
63+
"title": "init",
64+
"sha": baseline_sha,
65+
},
5666
}
5767

5868
assert done_call.kwargs["json"] == {
@@ -68,7 +78,7 @@ def test_post_to_studio(
6878

6979
@pytest.mark.parametrize("tmp", [True, False])
7080
def test_post_to_studio_custom_message(
71-
tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp
81+
M, tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp
7282
):
7383
valid_response = mocker.MagicMock()
7484
valid_response.status_code = 200
@@ -97,4 +107,14 @@ def test_post_to_studio_custom_message(
97107
"params": {"params.yaml": {"foo": 1}},
98108
"client": "dvc",
99109
"message": "foo",
110+
"dvc_experiment_parent_data": {
111+
"author": {
112+
"email": "dvctester@example.com",
113+
"name": "DVC Tester",
114+
},
115+
"date": M.any,
116+
"message": "init",
117+
"title": "init",
118+
"sha": baseline_sha,
119+
},
100120
}

tests/unit/command/test_studio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
from dvc_studio_client.auth import AuthenticationExpired
1+
from dvc_studio_client.auth import AuthorizationExpiredError
22

33
from dvc.cli import main
44
from dvc.utils.studio import STUDIO_URL
55

66

77
def test_studio_login_token_check_failed(mocker):
88
mocker.patch(
9-
"dvc_studio_client.auth.get_access_token", side_effect=AuthenticationExpired
9+
"dvc_studio_client.auth.get_access_token", side_effect=AuthorizationExpiredError
1010
)
1111

1212
assert main(["studio", "login"]) == 1

tests/unit/repo/test_repo.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -135,51 +135,3 @@ def test_dynamic_cache_initialization(tmp_dir, scm):
135135
dvc.close()
136136

137137
Repo(str(tmp_dir)).close()
138-
139-
140-
def test_monorepo_relpath(tmp_dir, scm):
141-
from dvc.repo.destroy import destroy
142-
143-
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
144-
145-
non_monorepo = Repo.init(tmp_dir)
146-
assert non_monorepo.subrepo_relpath == ""
147-
148-
destroy(non_monorepo)
149-
150-
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
151-
152-
assert monorepo_project_a.subrepo_relpath == "project_a"
153-
154-
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
155-
156-
assert monorepo_project_b.subrepo_relpath == "subdir/project_b"
157-
158-
159-
def test_virtual_monorepo_relpath(tmp_dir, scm):
160-
from dvc.fs.git import GitFileSystem
161-
from dvc.repo.destroy import destroy
162-
163-
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
164-
scm.commit("initial commit")
165-
gfs = GitFileSystem(scm=scm, rev="master")
166-
167-
non_monorepo = Repo.init(tmp_dir)
168-
non_monorepo.fs = gfs
169-
non_monorepo.root_dir = "/"
170-
171-
assert non_monorepo.subrepo_relpath == ""
172-
173-
destroy(non_monorepo)
174-
175-
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
176-
monorepo_project_a.fs = gfs
177-
monorepo_project_a.root_dir = "/project_a"
178-
179-
assert monorepo_project_a.subrepo_relpath == "project_a"
180-
181-
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
182-
monorepo_project_b.fs = gfs
183-
monorepo_project_b.root_dir = "/subdir/project_b"
184-
185-
assert monorepo_project_b.subrepo_relpath == "subdir/project_b"

tests/unit/utils/test_studio.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,15 @@
99
DVC_STUDIO_TOKEN,
1010
DVC_STUDIO_URL,
1111
)
12-
from dvc.utils.studio import STUDIO_URL, config_to_env, env_to_config, notify_refs
12+
from dvc.repo import Repo
13+
from dvc.utils.studio import (
14+
STUDIO_URL,
15+
config_to_env,
16+
env_to_config,
17+
get_dvc_experiment_parent_data,
18+
get_subrepo_relpath,
19+
notify_refs,
20+
)
1321

1422
CONFIG = {"offline": True, "repo_url": "repo_url", "token": "token", "url": "url"}
1523

@@ -67,3 +75,88 @@ def test_config_to_env():
6775

6876
def test_env_to_config():
6977
assert env_to_config(ENV) == CONFIG
78+
79+
80+
@pytest.mark.studio
81+
def test_monorepo_relpath(tmp_dir, scm):
82+
from dvc.repo.destroy import destroy
83+
84+
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
85+
86+
non_monorepo = Repo.init(tmp_dir)
87+
assert get_subrepo_relpath(non_monorepo) == ""
88+
89+
destroy(non_monorepo)
90+
91+
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
92+
93+
assert get_subrepo_relpath(monorepo_project_a) == "project_a"
94+
95+
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
96+
97+
assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"
98+
99+
100+
@pytest.mark.studio
101+
def test_virtual_monorepo_relpath(tmp_dir, scm):
102+
from dvc.fs.git import GitFileSystem
103+
from dvc.repo.destroy import destroy
104+
105+
tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
106+
scm.commit("initial commit")
107+
gfs = GitFileSystem(scm=scm, rev="master")
108+
109+
non_monorepo = Repo.init(tmp_dir)
110+
non_monorepo.fs = gfs
111+
non_monorepo.root_dir = "/"
112+
113+
assert get_subrepo_relpath(non_monorepo) == ""
114+
115+
destroy(non_monorepo)
116+
117+
monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
118+
monorepo_project_a.fs = gfs
119+
monorepo_project_a.root_dir = "/project_a"
120+
121+
assert get_subrepo_relpath(monorepo_project_a) == "project_a"
122+
123+
monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
124+
monorepo_project_b.fs = gfs
125+
monorepo_project_b.root_dir = "/subdir/project_b"
126+
127+
assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"
128+
129+
130+
@pytest.mark.studio
131+
def test_dvc_experiment_parent_data(M, scm, dvc):
132+
title = "a commit with a fairly long message"
133+
message = f"{title}\nthat is split over two lines"
134+
135+
scm.commit(message)
136+
137+
head_sha = scm.get_rev()
138+
assert isinstance(head_sha, str)
139+
140+
dvc_experiment_parent_data = get_dvc_experiment_parent_data(dvc, head_sha)
141+
assert dvc_experiment_parent_data is not None
142+
assert isinstance(dvc_experiment_parent_data["date"], str)
143+
144+
assert dvc_experiment_parent_data == {
145+
"author": {
146+
"email": "dvctester@example.com",
147+
"name": "DVC Tester",
148+
},
149+
"branch": "master",
150+
"date": M.any,
151+
"message": message,
152+
"title": title,
153+
"sha": head_sha,
154+
}
155+
156+
157+
@pytest.mark.parametrize("func", ["get_rev", "resolve_commit"])
158+
@pytest.mark.studio
159+
def test_no_dvc_experiment_parent_data(mocker, scm, dvc, func):
160+
mocker.patch.object(scm, func, return_value=None)
161+
162+
assert get_dvc_experiment_parent_data(dvc, scm.get_rev()) is None

0 commit comments

Comments
 (0)