-
Notifications
You must be signed in to change notification settings - Fork 1.3k
studio: add additional data to live metrics post messages to support live experiments in monorepos #10291
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
studio: add additional data to live metrics post messages to support live experiments in monorepos #10291
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| from typing import TYPE_CHECKING, Any, Optional | ||
| from typing import TYPE_CHECKING, Any, Optional, Union | ||
| from urllib.parse import urljoin | ||
|
|
||
| import requests | ||
|
|
@@ -12,10 +12,14 @@ | |
| DVC_STUDIO_URL, | ||
| ) | ||
| from dvc.log import logger | ||
| from dvc.utils import as_posix | ||
|
|
||
| if TYPE_CHECKING: | ||
| from requests import Response | ||
|
|
||
| from dvc.repo import Repo | ||
|
|
||
|
|
||
| logger = logger.getChild(__name__) | ||
|
|
||
| STUDIO_URL = "https://studio.iterative.ai" | ||
|
|
@@ -111,3 +115,50 @@ def env_to_config(env: dict[str, Any]) -> dict[str, Any]: | |
| if DVC_STUDIO_URL in env: | ||
| config["url"] = env[DVC_STUDIO_URL] | ||
| return config | ||
|
|
||
|
|
||
| def get_subrepo_relpath(repo: "Repo") -> str: | ||
| from dvc.fs import GitFileSystem | ||
|
|
||
| scm_root_dir = "/" if isinstance(repo.fs, GitFileSystem) else repo.scm.root_dir | ||
|
|
||
| relpath = as_posix(repo.fs.relpath(repo.root_dir, scm_root_dir)) | ||
|
|
||
| return "" if relpath == "." else relpath | ||
|
|
||
|
|
||
| def get_dvc_experiment_parent_data( | ||
| repo: "Repo", baseline_rev: Union[str, None] | ||
| ) -> Union[dict[str, Any], None]: | ||
| from scmrepo.exceptions import SCMError | ||
|
|
||
| from dvc.scm import NoSCM, get_n_commits | ||
|
|
||
| scm = repo.scm | ||
|
|
||
| try: | ||
| if ( | ||
| not baseline_rev | ||
| or not scm | ||
| or isinstance(scm, NoSCM) | ||
| or not (commit := scm.resolve_commit(baseline_rev)) | ||
| or not ( | ||
| first_100_parent_shas := get_n_commits(scm, [baseline_rev], 101)[1:] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q: shy do we need this? (looks complicated, and not very obvious)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See https://github.com/iterative/studio/issues/8848#issuecomment-1935151710 It is complicated and it is needed because of commits missing from Studio.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. A few thoughts / questions:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This data is additional to the This solutions is the best (and maybe only) available given the current constraints.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to clarify and double check. My question was - can there a be situation when getting just a single baseline works, but 100 fails.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The baseline is duplicated in the payload sent for the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Shouldn't happen but |
||
| ) | ||
| ): | ||
| return None | ||
|
|
||
| return { | ||
| "author": { | ||
| "name": commit.author_name, | ||
| "email": commit.author_email, | ||
| }, | ||
| "date": commit.author_datetime.isoformat(), | ||
| "message": commit.message, | ||
| "parent_shas": first_100_parent_shas, | ||
| "sha": commit.hexsha, | ||
| "title": commit.message.partition("\n")[0].strip(), | ||
| } | ||
|
|
||
| except SCMError: | ||
| return None | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,7 +38,7 @@ dependencies = [ | |
| "dvc-data>=3.10,<3.12", | ||
| "dvc-http>=2.29.0", | ||
| "dvc-render>=1.0.1,<2", | ||
| "dvc-studio-client>=0.19,<1", | ||
| "dvc-studio-client@git+https://github.com/iterative/dvc-studio-client.git@refs/pull/144/head", | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [I] Do not merge before releasing dvc-studio-client & studio and updating this. |
||
| "dvc-task>=0.3.0,<1", | ||
| "flatten_dict<1,>=0.4.1", | ||
| # https://github.com/iterative/dvc/issues/9654 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,51 +135,3 @@ def test_dynamic_cache_initialization(tmp_dir, scm): | |
| dvc.close() | ||
|
|
||
| Repo(str(tmp_dir)).close() | ||
|
|
||
|
|
||
| def test_monorepo_relpath(tmp_dir, scm): | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see https://github.com/iterative/dvc/pull/10291/files#r1482381232 (moved these along with the method) |
||
| from dvc.repo.destroy import destroy | ||
|
|
||
| tmp_dir.gen({"project_a": {}, "subdir/project_b": {}}) | ||
|
|
||
| non_monorepo = Repo.init(tmp_dir) | ||
| assert non_monorepo.subrepo_relpath == "" | ||
|
|
||
| destroy(non_monorepo) | ||
|
|
||
| monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True) | ||
|
|
||
| assert monorepo_project_a.subrepo_relpath == "project_a" | ||
|
|
||
| monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True) | ||
|
|
||
| assert monorepo_project_b.subrepo_relpath == "subdir/project_b" | ||
|
|
||
|
|
||
| def test_virtual_monorepo_relpath(tmp_dir, scm): | ||
| from dvc.fs.git import GitFileSystem | ||
| from dvc.repo.destroy import destroy | ||
|
|
||
| tmp_dir.gen({"project_a": {}, "subdir/project_b": {}}) | ||
| scm.commit("initial commit") | ||
| gfs = GitFileSystem(scm=scm, rev="master") | ||
|
|
||
| non_monorepo = Repo.init(tmp_dir) | ||
| non_monorepo.fs = gfs | ||
| non_monorepo.root_dir = "/" | ||
|
|
||
| assert non_monorepo.subrepo_relpath == "" | ||
|
|
||
| destroy(non_monorepo) | ||
|
|
||
| monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True) | ||
| monorepo_project_a.fs = gfs | ||
| monorepo_project_a.root_dir = "/project_a" | ||
|
|
||
| assert monorepo_project_a.subrepo_relpath == "project_a" | ||
|
|
||
| monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True) | ||
| monorepo_project_b.fs = gfs | ||
| monorepo_project_b.root_dir = "/subdir/project_b" | ||
|
|
||
| assert monorepo_project_b.subrepo_relpath == "subdir/project_b" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[F] Moved into
dvc.utils.studio. Tests moved too.