deploy: 9b6c0c6

langchain-ai · Nov 21, 2023 · 91593de · 91593de
commit 91593de
Show file tree

Hide file tree

Showing 176 changed files with 23,507 additions and 0 deletions.
diff --git a/.buildinfo b/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 01d274e2f7565ff162ac2bf7e824ea45
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle
diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree
diff --git a/.doctrees/notebooks/datasets.doctree b/.doctrees/notebooks/datasets.doctree
diff --git a/.doctrees/notebooks/extraction/email.doctree b/.doctrees/notebooks/extraction/email.doctree
diff --git a/.doctrees/notebooks/rag_evaluations.doctree b/.doctrees/notebooks/rag_evaluations.doctree
diff --git a/.doctrees/notebooks/rag_langchain_docs.doctree b/.doctrees/notebooks/rag_langchain_docs.doctree
diff --git a/.doctrees/notebooks/rag_semi_structured.doctree b/.doctrees/notebooks/rag_semi_structured.doctree
diff --git a/.doctrees/notebooks/tool_usage/multiverse_math.doctree b/.doctrees/notebooks/tool_usage/multiverse_math.doctree
diff --git a/.doctrees/notebooks/tool_usage/relational_data.doctree b/.doctrees/notebooks/tool_usage/relational_data.doctree
diff --git a/.doctrees/notebooks/tool_usage/typewriter_1.doctree b/.doctrees/notebooks/tool_usage/typewriter_1.doctree
diff --git a/.doctrees/notebooks/tool_usage/typewriter_26.doctree b/.doctrees/notebooks/tool_usage/typewriter_26.doctree
diff --git a/.nojekyll b/.nojekyll
diff --git a/_sources/index.md b/_sources/index.md
@@ -0,0 +1,54 @@
+🚧 Under Active Development 🚧
+
+# 🦜💪 LangChain Benchmarks
+
+A package to help benchmark various LLM related tasks.
+
+The benchmarks are organized by end-to-end use cases, and
+utilize [LangSmith](https://smith.langchain.com/) heavily.
+
+We have several goals in open sourcing this:
+
+- Showing how we collect our benchmark datasets for each task
+- Showing what the benchmark datasets we use for each task is
+- Showing how we evaluate each task
+- Encouraging others to benchmark their solutions on these tasks (we are always looking for better ways of doing things!)
+
+We currently include the following tasks:
+- [CSV Question Answering](https://github.com/langchain-ai/langchain-benchmarks/tree/main/csv-qa)
+- [Extraction](https://github.com/langchain-ai/langchain-benchmarks/tree/main/extraction)
+- [Q&A over the LangChain docs](https://github.com/langchain-ai/langchain-benchmarks/tree/main/langchain-docs-benchmarking)
+- [Meta-evaluation of 'correctness' evaluators](https://github.com/langchain-ai/langchain-benchmarks/tree/main/meta-evals)
+```{toctree}
+:maxdepth: 2
+:caption: Introduction
+
+./notebooks/datasets
+```
+
+
+```{toctree}
+:maxdepth: 2
+:caption: Tool Usage
+
+./notebooks/tool_usage/relational_data
+./notebooks/tool_usage/multiverse_math
+./notebooks/tool_usage/typewriter_1
+./notebooks/tool_usage/typewriter_26
+```
+
+```{toctree}
+:maxdepth: 2
+:caption: Extraction
+
+./notebooks/extraction/email
+```
+
+```{toctree}
+:maxdepth: 2
+:caption: RAG
+
+./notebooks/rag_langchain_docs
+./notebooks/rag_semi_structured
+./notebooks/rag_evaluations
+```
diff --git a/_sources/notebooks/datasets.ipynb b/_sources/notebooks/datasets.ipynb
@@ -0,0 +1,203 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "033684fb-65b2-4586-a959-68c614741ca2",
+   "metadata": {},
+   "source": [
+    "# Datasets\n",
+    "\n",
+    "Here, we'll see how to work with LangSmith datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6d272fbf-710e-4a49-a0da-67e010541905",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_benchmarks import clone_public_dataset, download_public_dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18ee0f96-e5c4-4ae9-aebf-7d8b88c51662",
+   "metadata": {},
+   "source": [
+    "Let's first download the dataset to the local file system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "58b94f6d-0c91-4361-9b22-f758ffaa150a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fetching examples...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5a2fad8c0c3549ec96a3b38fe8a002b0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/21 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done fetching examples.\n"
+     ]
+    }
+   ],
+   "source": [
+    "download_public_dataset(\n",
+    "    \"https://api.smith.langchain.com/public/e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5/examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "841db832-b0d3-4fd1-8531-1154ec9b3caa",
+   "metadata": {},
+   "source": [
+    "we can take a look at the first two examples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "664e90fc-af84-4c5f-a3dd-5d9ffe649650",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\n",
+      "  {\n",
+      "    \"created_at\": \"2023-11-15T15:26:53.511629\",\n",
+      "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
+      "    \"id\": \"0703a989-2693-4039-a1f6-7281fc1b4cb0\",\n",
+      "    \"inputs\": {\n",
+      "      \"question\": \"do bob and alice live in the same city?\"\n",
+      "    },\n",
+      "    \"modified_at\": \"2023-11-15T15:26:53.511629\",\n",
+      "    \"outputs\": {\n",
+      "      \"expected_steps\": [\n",
+      "        \"find_users_by_name\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_city_for_location\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_city_for_location\"\n",
+      "      ],\n",
+      "      \"order_matters\": false,\n",
+      "      \"reference\": \"no\"\n",
+      "    },\n",
+      "    \"runs\": []\n",
+      "  },\n",
+      "  {\n",
+      "    \"created_at\": \"2023-11-15T15:26:53.491359\",\n",
+      "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
+      "    \"id\": \"b258b95a-9524-4da7-b758-c5481109322d\",\n",
+      "    \"inputs\": {\n",
+      "      \"question\": \"Is it likely that Donna is outside with an umbrella at this time?\"\n",
+      "    },\n",
+      "    \"modified_at\": \"2023-11-15T15:26:53.491359\",\n",
+      "    \"outputs\": {\n",
+      "      \"expected_steps\": [\n",
+      "        \"find_users_by_name\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_current_time_for_location\",\n",
+      "        \"get_current_weather_for_location\"\n",
+      "      ],\n",
+      "      \"order_matters\": false,\n",
+      "      \"reference\": \"yes\"\n",
+      "    },\n",
+      "    \"runs\": []\n",
+      "  }\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "\n",
+    "with open(\"./e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5.json\", \"r\", encoding=\"utf-8\") as f:\n",
+    "    print(json.dumps(json.load(f)[:2], indent=2, sort_keys=True))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c6cf01f-466b-406d-b4c7-2395747780fd",
+   "metadata": {},
+   "source": [
+    "We can also clone the dataset to our local tenant"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e4dea4df-2f1c-436b-a71c-49ffb2295ccc",
+   "metadata": {},
+   "source": [
+    "Executing this command will clone the dataset to your local tenant. \n",
+    "For this to work you must have a langsmith account set up."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18d0b905-2a6a-4752-a7cb-8653bd9049e3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "clone_public_dataset(\n",
+    "    \"https://api.smith.langchain.com/public/e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5/examples\",\n",
+    "    dataset_name=\"Agent Dataset\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}