explodinggradients · jjmachan · Dec 7, 2023 · Dec 5, 2023
diff --git a/docs/howtos/integrations/index.md b/docs/howtos/integrations/index.md
@@ -10,4 +10,5 @@ llamaindex.ipynb
 langchain.ipynb
 langsmith.ipynb
 langfuse.ipynb
+zeno.ipynb
 :::
diff --git a/docs/howtos/integrations/zeno.ipynb b/docs/howtos/integrations/zeno.ipynb
@@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visualizing Ragas Results with Zeno\n",
+    "\n",
+    "You can use the [Zeno](https://zenoml.com) evaluation platform to easily visualize and explore the results of your Ragas evaluation.\n",
+    "\n",
+    "> Check out what the result of this tutorial looks like [here](https://hub.zenoml.com/project/b35c83b8-0b22-4b9c-aedb-80964011d7a7/ragas%20FICA%20eval)\n",
+    "\n",
+    "First, install the `zeno-client` package:\n",
+    "\n",
+    "```bash\n",
+    "pip install zeno-client\n",
+    "```\n",
+    "\n",
+    "Next, create an account at [hub.zenoml.com](https://hub.zenoml.com) and generate an API key on your [account page](https://hub.zenoml.com/account).\n",
+    "\n",
+    "We can now pick up the evaluation where we left off at the [Getting Started](../../getstarted/evaluation.md) guide:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "import pandas as pd\n",
+    "from datasets import load_dataset\n",
+    "from ragas import evaluate\n",
+    "from ragas.metrics import (\n",
+    "    answer_relevancy,\n",
+    "    context_precision,\n",
+    "    context_recall,\n",
+    "    faithfulness,\n",
+    ")\n",
+    "from zeno_client import ZenoClient, ZenoMetric"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set API keys\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\"\n",
+    "os.environ[\"ZENO_API_KEY\"] = \"your-zeno-api-key\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fiqa_eval = load_dataset(\"explodinggradients/fiqa\", \"ragas_eval\")\n",
+    "result = evaluate(\n",
+    "    fiqa_eval[\"baseline\"],\n",
+    "    metrics=[\n",
+    "        context_precision,\n",
+    "        faithfulness,\n",
+    "        answer_relevancy,\n",
+    "        context_recall,\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "df = result.to_pandas()\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now take the `df` with our data and results and upload it to Zeno.\n",
+    "\n",
+    "We first create a project with a custom RAG view specification and the metric columns we want to do evaluation across:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = ZenoClient(os.environ[\"ZENO_API_KEY\"])\n",
+    "\n",
+    "project = client.create_project(\n",
+    "    name=\"Ragas FICA eval\",\n",
+    "    description=\"Evaluation of RAG model using Ragas on the FICA dataset\",\n",
+    "    view={\n",
+    "        \"data\": {\n",
+    "            \"type\": \"vstack\",\n",
+    "            \"keys\": {\n",
+    "                \"question\": {\"type\": \"markdown\"},\n",
+    "                \"texts\": {\n",
+    "                    \"type\": \"list\",\n",
+    "                    \"elements\": {\"type\": \"markdown\"},\n",
+    "                    \"border\": True,\n",
+    "                    \"pad\": True,\n",
+    "                },\n",
+    "            },\n",
+    "        },\n",
+    "        \"label\": {\n",
+    "            \"type\": \"markdown\",\n",
+    "        },\n",
+    "        \"output\": {\n",
+    "            \"type\": \"vstack\",\n",
+    "            \"keys\": {\n",
+    "                \"answer\": {\"type\": \"markdown\"},\n",
+    "                \"ground_truths\": {\n",
+    "                    \"type\": \"list\",\n",
+    "                    \"elements\": {\"type\": \"markdown\"},\n",
+    "                    \"border\": True,\n",
+    "                    \"pad\": True,\n",
+    "                },\n",
+    "            },\n",
+    "        },\n",
+    "        \"size\": \"large\",\n",
+    "    },\n",
+    "    metrics=[\n",
+    "        ZenoMetric(\n",
+    "            name=\"context_precision\", type=\"mean\", columns=[\"context_precision\"]\n",
+    "        ),\n",
+    "        ZenoMetric(name=\"faithfulness\", type=\"mean\", columns=[\"faithfulness\"]),\n",
+    "        ZenoMetric(name=\"answer_relevancy\", type=\"mean\", columns=[\"answer_relevancy\"]),\n",
+    "        ZenoMetric(name=\"context_recall\", type=\"mean\", columns=[\"context_recall\"]),\n",
+    "    ],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we upload the base dataset with the questions and ground truths:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"data\": df.apply(\n",
+    "            lambda x: {\"question\": x[\"question\"], \"texts\": list(x[\"contexts\"])}, axis=1\n",
+    "        ),\n",
+    "        \"label\": df[\"ground_truths\"].apply(lambda x: \"\\n\".join(x)),\n",
+    "    }\n",
+    ")\n",
+    "data_df[\"id\"] = data_df.index\n",
+    "\n",
+    "project.upload_dataset(\n",
+    "    data_df, id_column=\"id\", data_column=\"data\", label_column=\"label\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lastly, we upload the RAG outputs and Ragas metrics. \n",
+    "\n",
+    "You can run this for any number of models when doing comparison and iteration:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_df = df[\n",
+    "    [\n",
+    "        \"context_precision\",\n",
+    "        \"faithfulness\",\n",
+    "        \"answer_relevancy\",\n",
+    "        \"context_recall\",\n",
+    "    ]\n",
+    "].copy()\n",
+    "\n",
+    "output_df['output'] = df.apply(\n",
+    "    lambda x: {\"answer\": x[\"answer\"], \"ground_truths\": list(x[\"ground_truths\"])}, axis=1\n",
+    ")\n",
+    "output_df[\"id\"] = output_df.index\n",
+    "\n",
+    "project.upload_system(\n",
+    "    output_df, name=\"Base System\", id_column=\"id\", output_column=\"output\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Reach out to the Zeno team on [Discord](https://discord.gg/km62pDKAkE) or at [hello@zenoml.com](mailto:hello@zenoml.com) if you have any questions!"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "zeno-build",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}