Skip to content

Commit

Permalink
Register the new dataset (#155)
Browse files Browse the repository at this point in the history
Register the new dataset
  • Loading branch information
eyurtsev authored Dec 19, 2023
1 parent e7bac2c commit af9a980
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 40 deletions.
1 change: 1 addition & 0 deletions langchain_benchmarks/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
type_writer.TYPE_WRITER_TASK,
type_writer_26_funcs.TYPE_WRITER_26_FUNCS_TASK,
relational_data.RELATIONAL_DATA_TASK,
multiverse_math.MULTIVERSE_MATH,
multiverse_math.MULTIVERSE_MATH_TINY,
email_task.EMAIL_EXTRACTION_TASK,
chat_extraction.CHAT_EXTRACTION_TASK,
Expand Down
10 changes: 3 additions & 7 deletions langchain_benchmarks/tool_usage/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,13 @@
* Agents must output "intermediate_steps" in their run outputs.
* The dataset must have "expected_steps" in its outputs.
"""
from typing import Literal, Optional, Union

import re
from typing import Any, Optional
from typing import Any, Literal, Optional, Union

from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import StringEvaluator
from langchain.callbacks.manager import collect_runs
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import EvaluatorType, load_evaluator
from langchain.evaluation import EvaluatorType, StringEvaluator, load_evaluator
from langchain.evaluation.schema import StringEvaluator
from langchain.smith import RunEvalConfig
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
Expand Down
100 changes: 67 additions & 33 deletions langchain_benchmarks/tool_usage/tasks/multiverse_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,36 +127,6 @@ def get_environment() -> ToolUsageEnvironment:
)


MULTIVERSE_MATH_TINY = ToolUsageTask(
name="Multiverse Math (Tiny)",
dataset_id="https://smith.langchain.com/public/594f9f60-30a0-49bf-b075-f44beabf546a/d",
create_environment=get_environment,
instructions=(
"You are requested to solve math questions in an alternate "
"mathematical universe. The operations have been altered to yield "
"different results than expected. Do not guess the answer or rely on your "
" innate knowledge of math. Use the provided tools to answer the question. "
"While associativity and commutativity apply, distributivity does not. Answer "
"the question using the fewest possible tools. Only include the numeric "
"response without any clarifications."
),
description=(
"""\
An environment that contains a few basic math operations, but with altered results.
For example, multiplication of 5*3 will be re-interpreted as 5*3*1.1. \
The basic operations retain some basic properties, such as commutativity, \
associativity, and distributivity; however, the results are different than expected.
The objective of this task is to evaluate the ability to use the provided tools to \
solve simple math questions and ignore any innate knowledge about math.
"""
),
eval_params={
"output_evaluation": "qa_math_without_question",
},
)

# Source dataset used to create the public dataset in LangSmith
DATASET_TINY = [
{
Expand Down Expand Up @@ -275,6 +245,70 @@ def get_environment() -> ToolUsageEnvironment:
},
]

MULTIVERSE_MATH_TINY = ToolUsageTask(
name="Multiverse Math (Tiny)",
dataset_id="https://smith.langchain.com/public/594f9f60-30a0-49bf-b075-f44beabf546a/d",
create_environment=get_environment,
instructions=(
"You are requested to solve math questions in an alternate "
"mathematical universe. The operations have been altered to yield "
"different results than expected. Do not guess the answer or rely on your "
" innate knowledge of math. Use the provided tools to answer the question. "
"While associativity and commutativity apply, distributivity does not. Answer "
"the question using the fewest possible tools. Only include the numeric "
"response without any clarifications."
),
description=(
"""\
An environment that contains a few basic math operations, but with altered results.
For example, multiplication of 5*3 will be re-interpreted as 5*3*1.1. \
The basic operations retain some basic properties, such as commutativity, \
associativity, and distributivity; however, the results are different than expected.
The objective of this task is to evaluate the ability to use the provided tools to \
solve simple math questions and ignore any innate knowledge about math.
This is a tiny version of the Multiverse Math task, with 10 examples only.
"""
),
eval_params={
"output_evaluation": "qa_math_without_question",
},
)

MULTIVERSE_MATH = ToolUsageTask(
name="Multiverse Math",
dataset_id="https://smith.langchain.com/public/47ed57bc-e852-4f84-a23e-cce4793864e9/d",
create_environment=get_environment,
instructions=(
"You are requested to solve math questions in an alternate "
"mathematical universe. The operations have been altered to yield "
"different results than expected. Do not guess the answer or rely on your "
" innate knowledge of math. Use the provided tools to answer the question. "
"While associativity and commutativity apply, distributivity does not. Answer "
"the question using the fewest possible tools. Only include the numeric "
"response without any clarifications."
),
description=(
"""\
An environment that contains a few basic math operations, but with altered results.
For example, multiplication of 5*3 will be re-interpreted as 5*3*1.1. \
The basic operations retain some basic properties, such as commutativity, \
associativity, and distributivity; however, the results are different than expected.
The objective of this task is to evaluate the ability to use the provided tools to \
solve simple math questions and ignore any innate knowledge about math.
This task is associated with 20 test examples.
"""
),
eval_params={
"output_evaluation": "qa_math_without_question",
},
)


def _create_dataset() -> None:
"""Create a dataset with the langsmith client."""
Expand All @@ -283,11 +317,11 @@ def _create_dataset() -> None:
client = Client()

dataset = client.create_dataset(
dataset_name=MULTIVERSE_MATH_TINY.name,
description=MULTIVERSE_MATH_TINY.description,
dataset_name=MULTIVERSE_MATH.name,
description=MULTIVERSE_MATH.description,
)

for example in DATASET_TINY:
for example in DATASET:
client.create_example(
inputs={
"question": example["question"],
Expand Down

0 comments on commit af9a980

Please sign in to comment.