Skip to content

Replaces plots dvc_data_version_info with flat fields to be used by VS Code and Studio #9019

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions dvc/commands/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,49 +22,6 @@ def _show_json(renderers, split=False):
ui.write_json(result, highlight=False)


def _adjust_vega_renderers(renderers):
from dvc.render import REVISION_FIELD, VERSION_FIELD
from dvc_render import VegaRenderer

for r in renderers:
if isinstance(r, VegaRenderer):
if _data_versions_count(r) > 1:
summary = _summarize_version_infos(r)
for dp in r.datapoints:
vi = dp.pop(VERSION_FIELD, {})
keys = list(vi.keys())
for key in keys:
if not (len(summary.get(key, set())) > 1):
vi.pop(key)
if vi:
dp["rev"] = "::".join(vi.values())
else:
for dp in r.datapoints:
dp[REVISION_FIELD] = dp[VERSION_FIELD]["revision"]
dp.pop(VERSION_FIELD, {})


def _summarize_version_infos(renderer):
from collections import defaultdict

from dvc.render import VERSION_FIELD

result = defaultdict(set)

for dp in renderer.datapoints:
for key, value in dp.get(VERSION_FIELD, {}).items():
result[key].add(value)
return dict(result)


def _data_versions_count(renderer):
from itertools import product

summary = _summarize_version_infos(renderer)
x = product(summary.get("filename", {None}), summary.get("field", {None}))
return len(set(x))


class CmdPlots(CmdBase):
def _func(self, *args, **kwargs):
raise NotImplementedError
Expand Down Expand Up @@ -132,7 +89,6 @@ def run(self) -> int: # noqa: C901, PLR0911, PLR0912
_show_json(renderers, self.args.split)
return 0

_adjust_vega_renderers(renderers)
if self.args.show_vega:
renderer = first(filter(lambda r: r.TYPE == "vega", renderers))
if renderer:
Expand Down
2 changes: 1 addition & 1 deletion dvc/render/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
INDEX_FIELD = "step"
REVISION_FIELD = "rev"
FILENAME_FIELD = "filename"
VERSION_FIELD = "dvc_data_version_info"
FIELD_PREFIX = "dvc_"
REVISIONS_KEY = "revisions"
TYPE_KEY = "type"
SRC_FIELD = "src"
4 changes: 2 additions & 2 deletions dvc/render/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import defaultdict
from typing import Dict, List, Union

from dvc.render import REVISION_FIELD, REVISIONS_KEY, SRC_FIELD, TYPE_KEY, VERSION_FIELD
from dvc.render import FIELD_PREFIX, REVISION_FIELD, REVISIONS_KEY, SRC_FIELD, TYPE_KEY
from dvc.render.converter.image import ImageConverter
from dvc.render.converter.vega import VegaConverter

Expand All @@ -23,7 +23,7 @@ def _get_converter(
def _group_by_rev(datapoints):
grouped = defaultdict(list)
for datapoint in datapoints:
rev = datapoint.get(VERSION_FIELD, {}).get("revision")
rev = datapoint.get(f"{FIELD_PREFIX}rev")
grouped[rev].append(datapoint)
return dict(grouped)

Expand Down
48 changes: 28 additions & 20 deletions dvc/render/converter/vega.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from funcy import first, last

from dvc.exceptions import DvcException
from dvc.render import FILENAME_FIELD, INDEX_FIELD, VERSION_FIELD
from dvc.render import FIELD_PREFIX, FILENAME_FIELD, INDEX_FIELD, REVISION_FIELD

from . import Converter

Expand Down Expand Up @@ -202,13 +202,7 @@ def flat_datapoints(self, revision): # noqa: C901, PLR0912
xs = list(_get_xs(properties, file2datapoints))

# assign "step" if no x provided
if not xs:
x_file, x_field = (
None,
INDEX_FIELD,
)
else:
x_file, x_field = xs[0]
x_file, x_field = _get_x_props(xs)
props_update["x"] = x_field

ys = list(_get_ys(properties, file2datapoints))
Expand All @@ -222,15 +216,10 @@ def flat_datapoints(self, revision): # noqa: C901, PLR0912
)

all_datapoints = []
if ys:
all_y_files, all_y_fields = list(zip(*ys))
all_y_fields = set(all_y_fields)
all_y_files = set(all_y_files)
else:
all_y_files = set()
all_y_fields = set()
all_y_files, all_y_fields = _get_all_y_props(ys)

# override to unified y field name if there are different y fields
# also get common prefix to drop from field names
if len(all_y_fields) > 1:
props_update["y"] = "dvc_inferred_y_value"
else:
Expand Down Expand Up @@ -272,14 +261,17 @@ def flat_datapoints(self, revision): # noqa: C901, PLR0912
)

y_file_short = y_file[common_prefix_len:].strip("/\\")
all_fields = [revision, y_file_short, y_field]
joined_all = "::".join([f for f in all_fields if f])

_update_all(
datapoints,
update_dict={
VERSION_FIELD: {
"revision": revision,
FILENAME_FIELD: y_file_short,
"field": y_field,
}
REVISION_FIELD: joined_all,
f"{FIELD_PREFIX}id": joined_all,
f"{FIELD_PREFIX}rev": revision,
f"{FIELD_PREFIX}{FILENAME_FIELD}": y_file_short,
f"{FIELD_PREFIX}field": y_field,
},
)

Expand Down Expand Up @@ -344,3 +336,19 @@ def _update_from_index(datapoints: List[Dict], new_field: str):
def _update_all(datapoints: List[Dict], update_dict: Dict):
for datapoint in datapoints:
datapoint.update(update_dict)


def _get_x_props(xs: List):
if not xs:
return (
None,
INDEX_FIELD,
)
return xs[0]


def _get_all_y_props(ys: List):
if ys:
all_y_files, all_y_fields = list(zip(*ys))
return set(all_y_files), set(all_y_fields)
return set(), set()
110 changes: 71 additions & 39 deletions tests/integration/plots/test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from funcy import first

from dvc.cli import main
from dvc.render import REVISION_FIELD, VERSION_FIELD
from dvc.render import REVISION_FIELD

JSON_OUT = "vis_data"

Expand Down Expand Up @@ -187,35 +187,43 @@ def test_repo_with_plots(tmp_dir, scm, dvc, capsys, run_copy_metrics, repo_with_
] == _update_datapoints(
linear_v1,
{
VERSION_FIELD: {
"revision": "workspace",
"filename": "linear.json",
"field": "y",
},
REVISION_FIELD: "workspace::linear.json::y",
"dvc_id": "workspace::linear.json::y",
"dvc_rev": "workspace",
"dvc_filename": "linear.json",
"dvc_field": "y",
},
)
assert html_result["linear.json"]["data"]["values"] == _update_datapoints(
linear_v1,
{
REVISION_FIELD: "workspace",
REVISION_FIELD: "workspace::linear.json::y",
"dvc_id": "workspace::linear.json::y",
"dvc_rev": "workspace",
"dvc_filename": "linear.json",
"dvc_field": "y",
},
)
assert json_result["confusion.json"][0]["content"]["data"][
"values"
] == _update_datapoints(
confusion_v1,
{
VERSION_FIELD: {
"revision": "workspace",
"filename": "confusion.json",
"field": "actual",
},
REVISION_FIELD: "workspace::confusion.json::actual",
"dvc_id": "workspace::confusion.json::actual",
"dvc_rev": "workspace",
"dvc_filename": "confusion.json",
"dvc_field": "actual",
},
)
assert html_result["confusion.json"]["data"]["values"] == _update_datapoints(
confusion_v1,
{
REVISION_FIELD: "workspace",
REVISION_FIELD: "workspace::confusion.json::actual",
"dvc_id": "workspace::confusion.json::actual",
"dvc_rev": "workspace",
"dvc_filename": "confusion.json",
"dvc_field": "actual",
},
)
verify_image(tmp_dir, "workspace", "image.png", image_v1, html_path, json_result)
Expand Down Expand Up @@ -259,63 +267,79 @@ def test_repo_with_plots(tmp_dir, scm, dvc, capsys, run_copy_metrics, repo_with_
] == _update_datapoints(
linear_v2,
{
VERSION_FIELD: {
"revision": "workspace",
"filename": "../linear.json",
"field": "y",
},
REVISION_FIELD: "workspace::../linear.json::y",
"dvc_rev": "workspace",
"dvc_filename": "../linear.json",
"dvc_field": "y",
"dvc_id": "workspace::../linear.json::y",
},
) + _update_datapoints(
linear_v1,
{
VERSION_FIELD: {
"revision": "HEAD",
"filename": "../linear.json",
"field": "y",
},
REVISION_FIELD: "HEAD::../linear.json::y",
"dvc_rev": "HEAD",
"dvc_filename": "../linear.json",
"dvc_field": "y",
"dvc_id": "HEAD::../linear.json::y",
},
)
assert html_result["../linear.json"]["data"]["values"] == _update_datapoints(
linear_v2,
{
REVISION_FIELD: "workspace",
REVISION_FIELD: "workspace::../linear.json::y",
"dvc_rev": "workspace",
"dvc_filename": "../linear.json",
"dvc_field": "y",
"dvc_id": "workspace::../linear.json::y",
},
) + _update_datapoints(
linear_v1,
{
REVISION_FIELD: "HEAD",
REVISION_FIELD: "HEAD::../linear.json::y",
"dvc_rev": "HEAD",
"dvc_filename": "../linear.json",
"dvc_field": "y",
"dvc_id": "HEAD::../linear.json::y",
},
)
assert json_result["../confusion.json"][0]["content"]["data"][
"values"
] == _update_datapoints(
confusion_v2,
{
VERSION_FIELD: {
"revision": "workspace",
"filename": "../confusion.json",
"field": "actual",
},
REVISION_FIELD: "workspace::../confusion.json::actual",
"dvc_rev": "workspace",
"dvc_filename": "../confusion.json",
"dvc_field": "actual",
"dvc_id": "workspace::../confusion.json::actual",
},
) + _update_datapoints(
confusion_v1,
{
VERSION_FIELD: {
"revision": "HEAD",
"filename": "../confusion.json",
"field": "actual",
},
REVISION_FIELD: "HEAD::../confusion.json::actual",
"dvc_rev": "HEAD",
"dvc_filename": "../confusion.json",
"dvc_field": "actual",
"dvc_id": "HEAD::../confusion.json::actual",
},
)
assert html_result["../confusion.json"]["data"]["values"] == _update_datapoints(
confusion_v2,
{
REVISION_FIELD: "workspace",
REVISION_FIELD: "workspace::../confusion.json::actual",
"dvc_rev": "workspace",
"dvc_filename": "../confusion.json",
"dvc_field": "actual",
"dvc_id": "workspace::../confusion.json::actual",
},
) + _update_datapoints(
confusion_v1,
{
REVISION_FIELD: "HEAD",
REVISION_FIELD: "HEAD::../confusion.json::actual",
"dvc_rev": "HEAD",
"dvc_filename": "../confusion.json",
"dvc_field": "actual",
"dvc_id": "HEAD::../confusion.json::actual",
},
)

Expand Down Expand Up @@ -412,12 +436,20 @@ def test_repo_with_config_plots(tmp_dir, capsys, repo_with_config_plots):
ble = _update_datapoints(
plots["data"]["linear_train.json"],
{
REVISION_FIELD: "linear_train.json",
REVISION_FIELD: "workspace::linear_train.json::y",
"dvc_id": "workspace::linear_train.json::y",
"dvc_rev": "workspace",
"dvc_filename": "linear_train.json",
"dvc_field": "y",
},
) + _update_datapoints(
plots["data"]["linear_test.json"],
{
REVISION_FIELD: "linear_test.json",
REVISION_FIELD: "workspace::linear_test.json::y",
"dvc_id": "workspace::linear_test.json::y",
"dvc_rev": "workspace",
"dvc_filename": "linear_test.json",
"dvc_field": "y",
},
)

Expand Down
Loading