Skip to content

Commit

Permalink
Add expect.score (#631)
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw authored Apr 29, 2024
1 parent fccefaf commit 3bee3a3
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 19 deletions.
40 changes: 38 additions & 2 deletions python/langsmith/_expect.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ def test_output_semantically_close():
expect.value(response_txt).to_contain("Hello!")
# Or using a custom check
expect.value(response_txt).against(lambda x: "Hello" in x)
# You can even use this for basic metric logging within unit tests
expect.score(0.8)
expect.score(0.7, key="similarity").to_be_greater_than(0.7)
""" # noqa: E501

from __future__ import annotations
Expand Down Expand Up @@ -72,7 +77,7 @@ def __init__(
max_workers=3
)
rt = rh.get_current_run_tree()
self._run_id = rt.id if rt else run_id
self._run_id = rt.trace_id if rt else run_id

def _submit_feedback(self, score: int, message: Optional[str] = None) -> None:
if not ls_utils.test_tracking_is_disabled():
Expand Down Expand Up @@ -336,6 +341,37 @@ def value(self, value: Any) -> _Matcher:
"""
return _Matcher(self.client, "value", value, _executor=self.executor)

def score(
self,
score: Union[float, int],
*,
key: str = "score",
source_run_id: Optional[ls_client.ID_TYPE] = None,
comment: Optional[str] = None,
) -> _Matcher:
"""Log a numeric score to LangSmith.
Args:
score: The score value to log.
key: The key to use for logging the score. Defaults to "score".
Examples:
>>> expect.score(0.8) # doctest: +ELLIPSIS
<langsmith._expect._Matcher object at ...>
>>> expect.score(0.8, key="similarity").to_be_greater_than(0.7)
"""
self._submit_feedback(
key,
{
"score": score,
"source_info": {"method": "expect.score"},
"source_run_id": source_run_id,
"comment": comment,
},
)
return _Matcher(self.client, key, score, _executor=self.executor)

## Private Methods

@overload
Expand All @@ -354,7 +390,7 @@ def __call__(

def _submit_feedback(self, key: str, results: dict):
current_run = rh.get_current_run_tree()
run_id = current_run.id if current_run else None
run_id = current_run.trace_id if current_run else None
if not ls_utils.test_tracking_is_disabled():
self.executor.submit(
self.client.create_feedback, run_id=run_id, key=key, **results
Expand Down
6 changes: 5 additions & 1 deletion python/langsmith/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,11 @@ def _end_tests(
test_suite.client.update_project(
test_suite.experiment_id,
end_time=datetime.datetime.now(datetime.timezone.utc),
metadata={**git_info, "dataset_version": test_suite.get_version()},
metadata={
**git_info,
"dataset_version": test_suite.get_version(),
"revision_id": ls_env.get_langchain_env_var_metadata().get("revision_id"),
},
)
test_suite.wait()

Expand Down
26 changes: 13 additions & 13 deletions python/langsmith/run_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import functools
import inspect
import logging
import traceback
import uuid
import warnings
from contextvars import copy_context
Expand Down Expand Up @@ -448,8 +447,7 @@ async def async_wrapper(
):
function_result = await fr_coro
except BaseException as e:
stacktrace = traceback.format_exc()
_container_end(run_container, error=stacktrace)
_container_end(run_container, error=e)
raise e
_container_end(run_container, outputs=function_result)
return function_result
Expand Down Expand Up @@ -521,8 +519,7 @@ async def async_generator_wrapper(
except StopAsyncIteration:
pass
except BaseException as e:
stacktrace = traceback.format_exc()
_container_end(run_container, error=stacktrace)
_container_end(run_container, error=e)
raise e
if results:
if reduce_fn:
Expand Down Expand Up @@ -564,8 +561,7 @@ def wrapper(
func, *args, **kwargs
)
except BaseException as e:
stacktrace = traceback.format_exc()
_container_end(run_container, error=stacktrace)
_container_end(run_container, error=e)
raise e
_container_end(run_container, outputs=function_result)
return function_result
Expand Down Expand Up @@ -620,8 +616,7 @@ def generator_wrapper(
pass

except BaseException as e:
stacktrace = traceback.format_exc()
_container_end(run_container, error=stacktrace)
_container_end(run_container, error=e)
raise e
if results:
if reduce_fn:
Expand Down Expand Up @@ -712,7 +707,7 @@ def trace(
else:
new_run = run_trees.RunTree(
name=name,
run_id=run_id,
id=run_id or uuid.uuid4(),
reference_example_id=reference_example_id,
run_type=run_type,
extra=extra_outer,
Expand All @@ -730,7 +725,8 @@ def trace(
if exceptions_to_handle and isinstance(e, exceptions_to_handle):
tb = None
else:
tb = traceback.format_exc()
tb = utils._format_exc()
tb = f"{e.__class__.__name__}: {e}\n\n{tb}"
new_run.end(error=tb)
new_run.patch()
raise e
Expand Down Expand Up @@ -930,15 +926,19 @@ class _ContainerInput(TypedDict, total=False):
def _container_end(
container: _TraceableContainer,
outputs: Optional[Any] = None,
error: Optional[str] = None,
error: Optional[BaseException] = None,
):
"""End the run."""
run_tree = container.get("new_run")
if run_tree is None:
# Tracing enabled
return
outputs_ = outputs if isinstance(outputs, dict) else {"output": outputs}
run_tree.end(outputs=outputs_, error=error)
error_ = None
if error:
stacktrace = utils._format_exc()
error_ = f"{repr(error)}\n\n{stacktrace}"
run_tree.end(outputs=outputs_, error=error_)
run_tree.patch()
if error:
try:
Expand Down
9 changes: 9 additions & 0 deletions python/langsmith/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import os
import pathlib
import subprocess
import sys
import threading
import traceback
from typing import (
Any,
Callable,
Expand Down Expand Up @@ -488,3 +490,10 @@ def with_optional_cache(
yield
else:
yield


def _format_exc() -> str:
# Used internally to format exceptions without cluttering the traceback
tb_lines = traceback.format_exception(*sys.exc_info())
filtered_lines = [line for line in tb_lines if "langsmith/" not in line]
return "".join(filtered_lines)
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langsmith"
version = "0.1.51"
version = "0.1.52"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
authors = ["LangChain <support@langchain.dev>"]
license = "MIT"
Expand Down
11 changes: 9 additions & 2 deletions python/tests/unit_tests/test_run_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import sys
import time
import uuid
import warnings
from typing import Any, AsyncGenerator, Generator, Optional, cast
from unittest.mock import MagicMock, patch
Expand Down Expand Up @@ -473,8 +474,12 @@ async def some_async_func(queries: list) -> AsyncGenerator[list, None]:

@traceable
async def another_async_func(query: str) -> str:
with langsmith.trace(name="zee-cm", inputs={"query": query}) as run_tree:
rid = uuid.uuid4()
with langsmith.trace(
name="zee-cm", inputs={"query": query}, run_id=rid
) as run_tree:
run_tree.end(outputs={"query": query})
assert run_tree.id == rid
return query

@traceable
Expand Down Expand Up @@ -848,11 +853,13 @@ def child_fn(a: int, b: int) -> int:

mock_client_ = _get_mock_client()
with tracing_context(enabled=True):
rid = uuid.uuid4()
with langsmith.trace(
name="parent_fn", inputs={"a": 1, "b": 2}, client=mock_client_
name="parent_fn", inputs={"a": 1, "b": 2}, client=mock_client_, run_id=rid
) as run:
result = child_fn(1, 2)
run.end(outputs={"result": result})
assert run.id == rid

assert result == 3
assert run.name == "parent_fn"
Expand Down

0 comments on commit 3bee3a3

Please sign in to comment.