Skip to content

[SC 7429] Add title as an optional parameter in the run_test #244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/_build/search.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/_build/validmind.html
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ <h1 class="modulename">
<section id="__version__">
<div class="attr variable">
<span class="name">__version__</span> =
<span class="default_value">&#39;2.6.3&#39;</span>
<span class="default_value">&#39;2.6.4&#39;</span>


</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ <h1 class="modulename">
<div class="decorator">@tasks(&#39;text_classification&#39;, &#39;text_summarization&#39;)</div>

<span class="def">def</span>
<span class="name">TextDescription</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">dataset</span><span class="p">:</span> <span class="n"><a href="../../../vm_models.html#VMDataset">validmind.vm_models.VMDataset</a></span>,</span><span class="param"> <span class="n">unwanted_tokens</span><span class="p">:</span> <span class="nb">set</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;mrs&#39;</span><span class="p">,</span> <span class="s1">&#39;dollar&#39;</span><span class="p">,</span> <span class="s1">&#39; &#39;</span><span class="p">,</span> <span class="s1">&#39;dr&#39;</span><span class="p">,</span> <span class="s1">&#39;us&#39;</span><span class="p">,</span> <span class="s2">&quot;&#39;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&#39;s&quot;</span><span class="p">,</span> <span class="s1">&#39;ms&#39;</span><span class="p">,</span> <span class="s1">&#39;s&#39;</span><span class="p">,</span> <span class="s2">&quot;s&#39;&quot;</span><span class="p">,</span> <span class="s1">&#39;``&#39;</span><span class="p">,</span> <span class="s1">&#39;mr&#39;</span><span class="p">}</span>,</span><span class="param"> <span class="n">lang</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;english&#39;</span></span><span class="return-annotation">):</span></span>
<span class="name">TextDescription</span><span class="signature pdoc-code multiline">(<span class="param"> <span class="n">dataset</span><span class="p">:</span> <span class="n"><a href="../../../vm_models.html#VMDataset">validmind.vm_models.VMDataset</a></span>,</span><span class="param"> <span class="n">unwanted_tokens</span><span class="p">:</span> <span class="nb">set</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;s&#39;</span><span class="p">,</span> <span class="s1">&#39; &#39;</span><span class="p">,</span> <span class="s2">&quot;s&#39;&quot;</span><span class="p">,</span> <span class="s1">&#39;us&#39;</span><span class="p">,</span> <span class="s2">&quot;&#39;&#39;&quot;</span><span class="p">,</span> <span class="s1">&#39;mr&#39;</span><span class="p">,</span> <span class="s1">&#39;dollar&#39;</span><span class="p">,</span> <span class="s1">&#39;``&#39;</span><span class="p">,</span> <span class="s1">&#39;ms&#39;</span><span class="p">,</span> <span class="s1">&#39;dr&#39;</span><span class="p">,</span> <span class="s1">&#39;mrs&#39;</span><span class="p">,</span> <span class="s2">&quot;&#39;s&quot;</span><span class="p">}</span>,</span><span class="param"> <span class="n">lang</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;english&#39;</span></span><span class="return-annotation">):</span></span>


</div>
Expand Down
46 changes: 42 additions & 4 deletions notebooks/code_samples/custom_tests/implement_custom_tests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -362,7 +362,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -395,7 +395,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -806,6 +806,44 @@
"![screenshot showing image custom test](../../images/image-in-custom-metric.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you want to log an image as a test result, you can do so by passing the path to the image as a parameter to the custom test and then opening the file in the test function. Here's an example:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@vm.test(\"my_custom_tests.MyPNGCorrelationMatrix\")\n",
"def Image(path: str):\n",
" \"\"\"Opens a png image file and logs it as a test result to ValidMind\"\"\"\n",
" if not path.endswith(\".png\"):\n",
" raise ValueError(\"Image must be a PNG file\")\n",
"\n",
" # return raw image bytes\n",
" with open(path, \"rb\") as f:\n",
" return f.read()\n",
" \n",
"run_test(\n",
" \"my_custom_tests.MyPNGCorrelationMatrix\",\n",
" params={\"path\": \"../../images/pearson-correlation-matrix.png\"},\n",
").log()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The image is displayed in the test result:\n",
"\n",
"![screenshot showing image from file](../../images/pearson-correlation-matrix-test-output.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -906,7 +944,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added notebooks/images/pearson-correlation-matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ description = "ValidMind Library"
license = "Commercial License"
name = "validmind"
readme = "README.pypi.md"
version = "2.6.3"
version = "2.6.4"

[tool.poetry.dependencies]
python = ">=3.8.1,<3.12"
Expand Down
2 changes: 1 addition & 1 deletion validmind/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.6.3"
__version__ = "2.6.4"
10 changes: 8 additions & 2 deletions validmind/ai/test_descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import os
import re
from concurrent.futures import ThreadPoolExecutor
from typing import List, Union
from typing import List, Optional, Union

from jinja2 import Template

Expand Down Expand Up @@ -71,6 +71,7 @@ def generate_description(
tables: List[ResultTable] = None,
metric: Union[float, int] = None,
figures: List[Figure] = None,
title: Optional[str] = None,
):
"""Generate the description for the test results"""
if not tables and not figures and not metric:
Expand All @@ -84,7 +85,7 @@ def generate_description(
client, model = get_client_and_model()

# get last part of test id
test_name = test_id.split(".")[-1]
test_name = title or test_id.split(".")[-1]

# TODO: fully support metrics
if metric is not None:
Expand All @@ -110,6 +111,7 @@ def generate_description(
input_data = {
"test_name": test_name,
"test_description": test_description,
"title": title,
"summary": summary,
"figures": [figure._get_b64_url() for figure in ([] if tables else figures)],
}
Expand All @@ -134,6 +136,7 @@ def background_generate_description(
tables: List[ResultTable] = None,
figures: List[Figure] = None,
metric: Union[int, float] = None,
title: Optional[str] = None,
):
def wrapped():
try:
Expand All @@ -143,6 +146,7 @@ def wrapped():
tables=tables,
figures=figures,
metric=metric,
title=title,
)
except Exception as e:
logger.error(f"Failed to generate description: {e}")
Expand All @@ -159,6 +163,7 @@ def get_result_description(
figures: List[Figure] = None,
metric: Union[int, float] = None,
should_generate: bool = True,
title: Optional[str] = None,
):
"""Get Metadata Dictionary for a Test or Metric Result

Expand Down Expand Up @@ -207,6 +212,7 @@ def get_result_description(
tables=tables,
figures=figures,
metric=metric,
title=title,
)

else:
Expand Down
1 change: 0 additions & 1 deletion validmind/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,6 @@ async def alog_test_result(
request_params["section_id"] = section_id
if position is not None:
request_params["position"] = position

try:
return await _post(
"log_test_results",
Expand Down
25 changes: 24 additions & 1 deletion validmind/tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import time
from datetime import datetime
from inspect import getdoc
from typing import Any, Dict, List, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union
from uuid import uuid4

from validmind import __version__
Expand Down Expand Up @@ -138,12 +138,14 @@ def build_test_result(
params: Union[Dict[str, Any], None],
description: str,
generate_description: bool = True,
title: Optional[str] = None,
):
"""Build a TestResult object from a set of raw test function outputs"""
ref_id = str(uuid4())

result = TestResult(
result_id=test_id,
title=title,
ref_id=ref_id,
inputs=inputs,
params=params if params else None, # None if empty dict or None
Expand All @@ -162,6 +164,7 @@ def build_test_result(
figures=result.figures,
metric=result.metric,
should_generate=generate_description,
title=title,
)

return result
Expand All @@ -175,6 +178,7 @@ def _run_composite_test(
params: Union[Dict[str, Any], None],
param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
generate_description: bool,
title: Optional[str] = None,
):
"""Run a composite test i.e. a test made up of multiple metrics"""
results = [
Expand All @@ -186,6 +190,7 @@ def _run_composite_test(
param_grid=param_grid,
show=False,
generate_description=False,
title=title,
)
for metric_id in metric_ids
]
Expand All @@ -209,6 +214,7 @@ def _run_composite_test(
[_test_description(result.description, num_lines=1) for result in results]
), # join truncated (first line only) test descriptions
generate_description=generate_description,
title=title,
)


Expand All @@ -221,6 +227,7 @@ def _run_comparison_test(
params: Union[Dict[str, Any], None],
param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
generate_description: bool,
title: Optional[str] = None,
):
"""Run a comparison test i.e. a test that compares multiple outputs of a test across
different input and/or param combinations"""
Expand All @@ -240,6 +247,7 @@ def _run_comparison_test(
params=config["params"],
show=False,
generate_description=False,
title=title,
)
for config in run_test_configs
]
Expand All @@ -260,6 +268,7 @@ def _run_comparison_test(
params=combined_params,
description=description,
generate_description=generate_description,
title=title,
)


Expand All @@ -273,6 +282,7 @@ def run_test(
param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None] = None,
show: bool = True,
generate_description: bool = True,
title: Optional[str] = None,
**kwargs,
) -> TestResult:
"""Run a ValidMind or custom test
Expand All @@ -295,6 +305,7 @@ def run_test(
unit_metrics (list, optional): Unit metric IDs to run as composite metric
show (bool, optional): Whether to display results. Defaults to True.
generate_description (bool, optional): Whether to generate a description. Defaults to True.
title (str, optional): Custom title for the test result

Returns:
TestResult: A TestResult object containing the test results
Expand Down Expand Up @@ -325,6 +336,7 @@ def run_test(
if input_grid or param_grid:
result = _run_comparison_test(
test_id=test_id,
title=title,
name=name,
unit_metrics=unit_metrics,
inputs=inputs,
Expand All @@ -342,10 +354,20 @@ def run_test(
test_id=test_id,
metric_ids=unit_metrics,
inputs=inputs,
params=params,
generate_description=generate_description,
title=title,
)

elif input_grid or param_grid:
result = _run_comparison_test(
test_id=test_id,
inputs=inputs,
input_grid=input_grid,
params=params,
param_grid=param_grid,
generate_description=generate_description,
title=title,
)

else:
Expand All @@ -364,6 +386,7 @@ def run_test(
params=param_kwargs,
description=getdoc(test_func),
generate_description=generate_description,
title=title,
)

end_time = time.perf_counter()
Expand Down
16 changes: 12 additions & 4 deletions validmind/vm_models/result/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class TestResult(Result):

name: str = "Test Result"
ref_id: str = None
title: Optional[str] = None
description: Optional[Union[str, DescriptionFuture]] = None
metric: Optional[Union[int, float]] = None
tables: Optional[List[ResultTable]] = None
Expand All @@ -122,10 +123,15 @@ class TestResult(Result):
params: Optional[Dict[str, Any]] = None
inputs: Optional[Dict[str, Union[List[VMInput], VMInput]]] = None
metadata: Optional[Dict[str, Any]] = None

title: Optional[str] = None
_was_description_generated: bool = False
_unsafe: bool = False

@property
def test_name(self) -> str:
"""Get the test name, using custom title if available."""
return self.title or test_id_to_name(self.result_id)

def __repr__(self) -> str:
attrs = [
attr
Expand Down Expand Up @@ -179,13 +185,14 @@ def to_widget(self):
self._was_description_generated = True

if self.metric is not None and not self.tables and not self.figures:
return HTML(f"<h3>{self.result_id}: <code>{self.metric}</code></h3>")
return HTML(
f"<h3>{self.test_name}: <code>{self.metric}</code></h3>"
)

template_data = {
"test_name": test_id_to_name(self.result_id),
"test_name": self.test_name,
"passed_icon": "" if self.passed is None else "✅" if self.passed else "❌",
"description": self.description.replace("h3", "strong"),
# TODO: add inputs
"params": (
json.dumps(self.params, cls=NumpyEncoder, indent=2)
if self.params
Expand Down Expand Up @@ -253,6 +260,7 @@ def serialize(self):
"""Serialize the result for the API"""
return {
"test_name": self.result_id,
"title": self.title,
"ref_id": self.ref_id,
"params": self.params,
"inputs": [_input.input_id for _input in self._get_flat_inputs()],
Expand Down
Loading