Merge branch 'dev_main' into ms-637

aiverify-foundation · Nov 12, 2024 · 57b38ea · 57b38ea
2 parents a05a092 + b7e48e4
commit 57b38ea
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 ![Moonshot Logo](https://github.com/aiverify-foundation/moonshot/raw/main/misc/aiverify-moonshot-logo.png)
 
-**Version 0.4.10**
+**Version 0.4.11**
 
 A simple and modular tool to evaluate any LLM application.
 

diff --git a/examples/jupyter-notebook/Tutorial 1 - Basic Workflow - Execute a Benchmark.ipynb b/examples/jupyter-notebook/Tutorial 1 - Basic Workflow - Execute a Benchmark.ipynb
@@ -7,7 +7,7 @@
    "source": [
     "# Tutorial 1 - Basic Workflow - Execute Existing Tests \n",
     "\n",
-    "**Scenario**: You are a model developer and you are told to deploy a system that uses one of the OpenAI models. However, you are uncertain which model performs best for your use case and you want to assess its capabilities using existing list of benchmark in Moonshot. How can you do this? \n",
+    "**Scenario**: You are a model developer and you are told to deploy a system that uses a Large Language Model. However, you are uncertain which model performs best for your use case and you want to assess potential models' capabilities using the pre-built benchmarks in Moonshot. How can you do this? \n",
     "\n",
     "In this tutorial, you will learn how to:\n",
     "\n",
@@ -19,9 +19,7 @@
     "1. Your own copy of `moonshot-data`. You will be setting its path to the `moonshot_path` variable in the first cell\n",
     "2. Your OpenAI key, which you will set to the placeholder `ADD_NEW_TOKEN_HERE` in a cell later\n",
     "\n",
-    "**Before starting this tutorial, please make sure you have already installed `moonshot` and `moonshot-data`.** Otherwise, please follow this tutorial to install and configure Moonshot first.\n",
-    "\n",
-    "You will need just two things for this tutorial:\n",
+    "**Before starting this tutorial, please make sure you have already installed `moonshot` and `moonshot-data`.** Otherwise, please follow [this tutorial](https://aiverify-foundation.github.io/moonshot/getting_started/quick_install) to install and configure Moonshot first.\n",
     "\n"
    ]
   },
@@ -30,9 +28,11 @@
    "id": "9890dfc2-cd4a-405f-b90f-b9284e50dca6",
    "metadata": {},
    "source": [
-    "## Import Moonshot Library API\n",
+    "## Import and configure Moonshot\n",
+    "\n",
+    "In this section, we prepare our Jupyter notebook environment by importing necessary libraries required to execute an existing benchmark.\n",
     "\n",
-    "In this section, we prepare our Jupyter notebook environment by importing necessary libraries required to execute an existing benchmark."
+    "> ⚠️ **Check:** that `moonshot_data_path` below matches the location where you installed `moonshot-data` - and edit the code to match your location if needed."
    ]
   },
   {
@@ -42,17 +42,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Moonshot Framework API Imports\n",
-    "# These imports from the Moonshot framework allow us to interact with the API, \n",
-    "# creating and managing various components such as recipes, cookbooks, and endpoints.\n",
+    "# Python built-ins:\n",
     "import os\n",
     "import json\n",
     "import asyncio\n",
     "import sys\n",
     "\n",
-    "# Ensure that the root of the Moonshot framework is in the system path for module importing.\n",
+    "# IF you're running this notebook from the moonshot/examples/jupyter-notebook folder, the below\n",
+    "# line will enable you to import moonshot from the local source code. If you installed moonshot\n",
+    "# from pip, you can remove this:\n",
     "sys.path.insert(0, '../../')\n",
     "\n",
+    "# Import moonshot utilities:\n",
     "from moonshot.api import (\n",
     "    api_create_endpoint,\n",
     "    api_get_all_endpoint,\n",
@@ -62,28 +63,35 @@
     "    api_set_environment_variables,\n",
     ")\n",
     "\n",
-    "# modify moonshot_path to point to your own copy of moonshot-data\n",
-    "moonshot_path = \"./data/\"\n",
+    "# modify moonshot_data_path to point to your own copy of moonshot-data\n",
+    "moonshot_data_path = \"./data\"\n",
     "env = {\n",
-    "    \"ATTACK_MODULES\": os.path.join(moonshot_path, \"attack-modules\"),\n",
-    "    \"BOOKMARKS\": os.path.join(moonshot_path, \"generated-outputs/bookmarks\"),\n",
-    "    \"CONNECTORS\": os.path.join(moonshot_path, \"connectors\"),\n",
-    "    \"CONNECTORS_ENDPOINTS\": os.path.join(moonshot_path, \"connectors-endpoints\"),\n",
-    "    \"CONTEXT_STRATEGY\": os.path.join(moonshot_path, \"context-strategy\"),\n",
-    "    \"COOKBOOKS\": os.path.join(moonshot_path, \"cookbooks\"),\n",
-    "    \"DATABASES\": os.path.join(moonshot_path, \"generated-outputs/databases\"),\n",
-    "    \"DATABASES_MODULES\": os.path.join(moonshot_path, \"databases-modules\"),\n",
-    "    \"DATASETS\": os.path.join(moonshot_path, \"datasets\"),\n",
-    "    \"IO_MODULES\": os.path.join(moonshot_path, \"io-modules\"),\n",
-    "    \"METRICS\": os.path.join(moonshot_path, \"metrics\"),\n",
-    "    \"PROMPT_TEMPLATES\": os.path.join(moonshot_path, \"prompt-templates\"),\n",
-    "    \"RECIPES\": os.path.join(moonshot_path, \"recipes\"),\n",
-    "    \"RESULTS\": os.path.join(moonshot_path, \"generated-outputs/results\"),\n",
-    "    \"RESULTS_MODULES\": os.path.join(moonshot_path, \"results-modules\"),\n",
-    "    \"RUNNERS\": os.path.join(moonshot_path, \"generated-outputs/runners\"),\n",
-    "    \"RUNNERS_MODULES\": os.path.join(moonshot_path, \"runners-modules\"),\n",
+    "    \"ATTACK_MODULES\": os.path.join(moonshot_data_path, \"attack-modules\"),\n",
+    "    \"BOOKMARKS\": os.path.join(moonshot_data_path, \"generated-outputs/bookmarks\"),\n",
+    "    \"CONNECTORS\": os.path.join(moonshot_data_path, \"connectors\"),\n",
+    "    \"CONNECTORS_ENDPOINTS\": os.path.join(moonshot_data_path, \"connectors-endpoints\"),\n",
+    "    \"CONTEXT_STRATEGY\": os.path.join(moonshot_data_path, \"context-strategy\"),\n",
+    "    \"COOKBOOKS\": os.path.join(moonshot_data_path, \"cookbooks\"),\n",
+    "    \"DATABASES\": os.path.join(moonshot_data_path, \"generated-outputs/databases\"),\n",
+    "    \"DATABASES_MODULES\": os.path.join(moonshot_data_path, \"databases-modules\"),\n",
+    "    \"DATASETS\": os.path.join(moonshot_data_path, \"datasets\"),\n",
+    "    \"IO_MODULES\": os.path.join(moonshot_data_path, \"io-modules\"),\n",
+    "    \"METRICS\": os.path.join(moonshot_data_path, \"metrics\"),\n",
+    "    \"PROMPT_TEMPLATES\": os.path.join(moonshot_data_path, \"prompt-templates\"),\n",
+    "    \"RECIPES\": os.path.join(moonshot_data_path, \"recipes\"),\n",
+    "    \"RESULTS\": os.path.join(moonshot_data_path, \"generated-outputs/results\"),\n",
+    "    \"RESULTS_MODULES\": os.path.join(moonshot_data_path, \"results-modules\"),\n",
+    "    \"RUNNERS\": os.path.join(moonshot_data_path, \"generated-outputs/runners\"),\n",
+    "    \"RUNNERS_MODULES\": os.path.join(moonshot_data_path, \"runners-modules\"),\n",
     "}\n",
     "\n",
+    "# Check user has set moonshot_data_path correctly:\n",
+    "if not os.path.isdir(env[\"ATTACK_MODULES\"]):\n",
+    "    raise ValueError(\n",
+    "        \"Configured path %s does not exist. Is moonshot-data installed at %s?\"\n",
+    "        % (env[\"ATTACK_MODULES\"], moonshot_data_path)\n",
+    "    )\n",
+    "\n",
     "# Apply the environment variables to configure the Moonshot framework.\n",
     "api_set_environment_variables(env)\n",
     "\n",
@@ -95,10 +103,20 @@
    "id": "0792527a-ab68-4826-b4c2-3d2f2a0b2a59",
    "metadata": {},
    "source": [
-    "## Run an existing benchmark\n",
-    "In this section, we will teach you how to run a benchmark. You will first learn how to create the endpoint connector with your OpenAI. Then, you will run the benchmark and view the results.\n",
+    "## Define the target model endpoint / API\n",
+    "\n",
+    "Moonshot provides [connectors](https://aiverify-foundation.github.io/moonshot/api_reference/api_connector/) to a range of different LLM hosting providers - such as OpenAI (direct or Azure), Hugging Face, Amazon Bedrock, and Google Gemini.\n",
+    "\n",
+    "There are some [example endpoint configurations](https://github.com/aiverify-foundation/moonshot-data/tree/main/connectors-endpoints) provided in `moonshot-data`, but they don't include API keys or other credentials: So you'll usually need to edit these configurations, or add your own, to connect to your target LLM.\n",
+    "\n",
+    "You can register new Moonshot endpoints directly from Python, as shown below.\n",
+    "\n",
+    "▶️ **TODO: Edit the cell below to configure your own LLM.**\n",
     "\n",
-    "**Replace `ADD_NEW_TOKEN_HERE` with your own OpenAI token below**"
+    "> If you're using OpenAI, you'll just need to replace `ADD_YOUR_TOKEN_HERE` below with your own OpenAI token.\n",
+    ">\n",
+    "> If you're using a different provider, check out the [list of connector IDs](https://github.com/aiverify-foundation/moonshot-data/tree/main/connectors) provided by `moonshot-data`. Different connectors have different required parameters. For example, the `amazon-bedrock-connector` can automatically pick up credentials configured in the AWS CLI - so you'll usually leave `token` blank for this connector type.\n",
+    "\n"
    ]
   },
   {
@@ -135,6 +153,16 @@
     "print(f\"The newly created endpoint id: {endpoint_id}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "f1d04074",
+   "metadata": {},
+   "source": [
+    "You'll see running the above creates a new configuration file under your Moonshot data `CONNECTORS_ENDPOINTS` folder.\n",
+    "\n",
+    "These stored endpoint IDs are what we'll reference when running tests in Moonshot."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "cbdb703b-94ca-4515-85e6-9dde0bfb9c69",
@@ -146,7 +174,7 @@
     "\n",
     "In this tutorial, we will run a `cookbook` called `leaderboard-cookbook`. This cookbook contains a set of popular benchmarks (e.g., `mmlu`) that can be used to assess the capability of the model. \n",
     "\n",
-    "*For the purpose of this tutorial, we will configure our `runner` to run 1 prompt from every recipe in this cookbook*"
+    "*For the purpose of this tutorial, we will configure our `runner` to run 1 prompt from every recipe in this cookbook - on the endpoint we created*"
    ]
   },
   {
@@ -658,7 +686,7 @@
    "source": [
     "## Beautifying the results\n",
     "\n",
-    "The result above is shown in our raw JSON file. To beautify the results, you can use the`rich` library to put them into a nice table."
+    "The result above is shown in our raw JSON file. To beautify the results, you can use the `rich` library to put them into a nice table."
    ]
   },
   {

diff --git a/moonshot/integrations/web_api/app.py b/moonshot/integrations/web_api/app.py
@@ -71,7 +71,7 @@ def create_app(cfg: providers.Configuration) -> CustomFastAPI:
     }
 
     app: CustomFastAPI = CustomFastAPI(
-        title="Project Moonshot", version="0.4.10", **app_kwargs
+        title="Project Moonshot", version="0.4.11", **app_kwargs
     )
 
     if cfg.cors.enabled():

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "aiverify-moonshot"
-version = "0.4.10"
+version = "0.4.11"
 authors = [
     { name="AI Verify Foundation", email="info@aiverify.sg" }
 ]
@@ -97,7 +97,7 @@ allow-direct-references = true
 
 [tool.poetry]
 name = "aiverify-moonshot"
-version = "0.4.10"
+version = "0.4.11"
 description = "A simple and modular tool to evaluate and red-team any LLM application."
 authors = ["The Moonshot Team <our.moonshot.team@gmail.com>"]
 readme = "README.md"