Add option for reasoning

DhruvSrikanth · DhruvSrikanth · commit f6c6cde69245 · 2024-08-23T14:10:48.000+01:00
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -74,15 +74,12 @@ jobs:
         inputs: >-
           ./dist/*.tar.gz
           ./dist/*.whl
-    - name: Debug Print github.ref_name
-      run: >-
-        echo "github.ref_name: ${{ github.ref_name }}"
     - name: Create GitHub Release
       env:
         GITHUB_TOKEN: ${{ github.token }}
       run: >-
         gh release create
-        'v0.1.7'
+        'v0.1.8'
         --repo '${{ github.repository }}'
         --notes ""
     - name: Upload artifact signatures to GitHub Release
@@ -93,5 +90,5 @@ jobs:
       # sigstore-produced signatures and certificates.
       run: >-
         gh release upload
-        'v0.1.7' dist/**
+        'v0.1.8' dist/**
         --repo '${{ github.repository }}'
diff --git a/README.md b/README.md
@@ -25,12 +25,16 @@ pip install weco
 ```
 
 ## Features
+- Synchronous & Asynchronous client.
+- Batch API
+- Multimodality (Language & Vision)
+- Interpretability (view the reasoning behind outputs)
+
+
+## What We Offer
 
 - The **build** function enables quick and easy prototyping of new functions via LLMs through just natural language. We encourage users to do this through our [web console](https://weco-app.vercel.app/function) for maximum control and ease of use, however, you can also do this through our API as shown in [here](examples/cookbook.ipynb).
 - The **query** function allows you to test and use the newly created function in your own code.
-- We offer asynchronous versions of the above clients.
-- We provide a **batch_query** functions that allows users to batch functions for various inputs as well as multiple inputs for the same function in a query. This is helpful to make a large number of queries more efficiently.
-- We also offer multimodality capabilities. You can now query our client with both **language** AND **vision** inputs!
 
 We provide both services in two ways:
 - `weco.WecoAI` client to be used when you want to maintain the same client service across a portion of code. This is better for dense service usage.
diff --git a/examples/cookbook.ipynb b/examples/cookbook.ipynb
@@ -144,7 +144,7 @@
     "with open(\"/path/to/home_exterior.jpeg\", \"rb\") as img_file:\n",
     "    my_home_exterior = base64.b64encode(img_file.read()).decode('utf-8')\n",
     "\n",
-    "response = query(\n",
+    "query_response = query(\n",
     "    fn_name=fn_name,\n",
     "    text_input=request,\n",
     "    images_input=[\n",
@@ -154,7 +154,10 @@
     "    ]\n",
     ")\n",
     "\n",
-    "print(response)"
+    "for key, value in query_response[\"output\"].items(): print(f\"{key}: {value}\")\n",
+    "print(f\"Input Tokens: {query_response['in_tokens']}\")\n",
+    "print(f\"Output Tokens: {query_response['out_tokens']}\")\n",
+    "print(f\"Latency: {query_response['latency_ms']} ms\")"
    ]
   },
   {
@@ -214,7 +217,10 @@
     "    fn_name=fn_name,\n",
     "    text_input=\"I want to train a model to predict house prices using the Boston Housing dataset hosted on Kaggle.\"\n",
     ")\n",
-    "for key, value in query_response.items(): print(f\"{key}: {value}\")"
+    "for key, value in query_response[\"output\"].items(): print(f\"{key}: {value}\")\n",
+    "print(f\"Input Tokens: {query_response['in_tokens']}\")\n",
+    "print(f\"Output Tokens: {query_response['out_tokens']}\")\n",
+    "print(f\"Latency: {query_response['latency_ms']} ms\")"
    ]
   },
   {
@@ -274,7 +280,12 @@
     "query_responses = batch_query(\n",
     "    fn_names=fn_name,\n",
     "    batch_inputs=[input_1, input_2]\n",
-    ")"
+    ")\n",
+    "for i, query_response in enumerate(query_responses):\n",
+    "    print(\"-\"*50)\n",
+    "    print(f\"For input {i + 1}\")\n",
+    "    for key, value in query_response[\"output\"].items(): print(f\"{key}: {value}\")\n",
+    "    print(\"-\"*50)"
    ]
   },
   {
@@ -323,14 +334,49 @@
     "    fn_name=fn_name,\n",
     "    text_input=\"I want to train a model to predict house prices using the Boston Housing dataset hosted on Kaggle.\"\n",
     ")\n",
-    "for key, value in query_response.items(): print(f\"{key}: {value}\")"
+    "for key, value in query_response[\"output\"].items(): print(f\"{key}: {value}\")\n",
+    "print(f\"Input Tokens: {query_response['in_tokens']}\")\n",
+    "print(f\"Output Tokens: {query_response['out_tokens']}\")\n",
+    "print(f\"Latency: {query_response['latency_ms']} ms\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## A/B Testing with Function Versions"
+    "## Interpretability"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can now understand why a model generated an output simply by passing `return_reasoning=True` at query time!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from weco import build, query\n",
+    "\n",
+    "# Describe the task you want the function to perform\n",
+    "fn_name, fn_desc = build(task_description=task_description)\n",
+    "print(f\"AI Function {fn_name} built. This does the following - \\n{fn_desc}.\")\n",
+    "\n",
+    "# Query the function with a specific input\n",
+    "query_response = query(\n",
+    "    fn_name=fn_name,\n",
+    "    text_input=\"I want to train a model to predict house prices using the Boston Housing dataset hosted on Kaggle.\",\n",
+    "    return_reasoning=True\n",
+    ")\n",
+    "for key, value in query_response[\"output\"].items(): print(f\"{key}: {value}\")\n",
+    "for i, step in enumerate(query_response[\"reasoning_steps\"]): print(f\"Step {i+1}: {step}\")\n",
+    "print(f\"Input Tokens: {query_response['in_tokens']}\")\n",
+    "print(f\"Output Tokens: {query_response['out_tokens']}\")\n",
+    "print(f\"Latency: {query_response['latency_ms']} ms\")"
    ]
   },
   {
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ authors = [
 ]
 description = "A client facing API for interacting with the WeCo AI function builder service."
 readme = "README.md"
-version = "0.1.7"
+version = "0.1.8"
 license = {text = "MIT"}
 requires-python = ">=3.8"
 dependencies = ["asyncio", "httpx[http2]", "pillow"]
diff --git a/tests/test_asynchronous.py b/tests/test_asynchronous.py
@@ -20,6 +20,7 @@ async def assert_query_response(query_response):
     assert isinstance(query_response["in_tokens"], int)
     assert isinstance(query_response["out_tokens"], int)
     assert isinstance(query_response["latency_ms"], float)
+    assert "reasoning_steps" not in query_response
 
 
 @pytest.fixture
diff --git a/tests/test_batching.py b/tests/test_batching.py
@@ -76,6 +76,7 @@ def test_batch_query_image(image_evaluator, image_inputs):
         assert isinstance(query_response["in_tokens"], int)
         assert isinstance(query_response["out_tokens"], int)
         assert isinstance(query_response["latency_ms"], float)
+        assert "reasoning_steps" not in query_response
 
         output = query_response["output"]
         assert set(output.keys()) == {"description", "objects"}
diff --git a/tests/test_reasoning.py b/tests/test_reasoning.py
@@ -0,0 +1,54 @@
+import pytest
+
+from weco import build, query
+
+
+def assert_query_response(query_response):
+    assert isinstance(query_response, dict)
+    assert isinstance(query_response["output"], dict)
+    assert isinstance(query_response["reasoning_steps"], list)
+    for step in query_response["reasoning_steps"]: assert isinstance(step, str)
+    assert isinstance(query_response["in_tokens"], int)
+    assert isinstance(query_response["out_tokens"], int)
+    assert isinstance(query_response["latency_ms"], float)
+
+
+@pytest.fixture
+def text_reasoning_evaluator():
+    fn_name, version_number, fn_desc = build(
+        task_description="Evaluate the sentiment of the given text. Provide a json object with 'sentiment' and 'explanation' keys.",
+        multimodal=False,
+    )
+    return fn_name, version_number, fn_desc
+
+
+def test_text_reasoning_query(text_reasoning_evaluator):
+    fn_name, version_number, _ = text_reasoning_evaluator
+    query_response = query(fn_name=fn_name, version_number=version_number, text_input="I love this product!", return_reasoning=True)
+
+    assert_query_response(query_response)
+    assert set(query_response["output"].keys()) == {"sentiment", "explanation"}
+
+@pytest.fixture
+def vision_reasoning_evaluator():
+    fn_name, version_number, fn_desc = build(
+        task_description="Evaluate, solve and arrive at a numerical answer for the image provided. Perform any additional things if instructed. Provide a json object with 'answer' and 'explanation' keys.",
+        multimodal=True,
+    )
+    return fn_name, version_number, fn_desc
+
+
+def test_vision_reasoning_query(vision_reasoning_evaluator):
+    fn_name, version_number, _ = vision_reasoning_evaluator
+    query_response = query(
+        fn_name=fn_name,
+        version_number=version_number,
+        text_input="Find x and y.",
+        images_input=[
+            "https://i.ytimg.com/vi/cblHUeq3bkE/hq720.jpg?sqp=-oaymwEhCK4FEIIDSFryq4qpAxMIARUAAAAAGAElAADIQj0AgKJD&rs=AOn4CLAKn3piY91QRCBzRgnzAPf7MPrjDQ"
+        ],
+        return_reasoning=True,
+    )
+
+    assert_query_response(query_response)
+    assert set(query_response["output"].keys()) == {"answer", "explanation"}
diff --git a/tests/test_synchronous.py b/tests/test_synchronous.py
@@ -19,6 +19,7 @@ def assert_query_response(query_response):
     assert isinstance(query_response["in_tokens"], int)
     assert isinstance(query_response["out_tokens"], int)
     assert isinstance(query_response["latency_ms"], float)
+    assert "reasoning_steps" not in query_response
 
 
 @pytest.fixture
diff --git a/weco/client.py b/weco/client.py
@@ -39,7 +39,7 @@ class WecoAI:
         Whether to use HTTP/2 protocol for the HTTP requests. Default is True.
     """
 
-    def __init__(self, api_key: str = None, timeout: float = 120.0, http2: bool = True) -> None:
+    def __init__(self, api_key: Union[str, None] = None, timeout: float = 120.0, http2: bool = True) -> None:
         """Initializes the WecoAI client with the provided API key and base URL.
 
         Parameters
@@ -67,7 +67,8 @@ def __init__(self, api_key: str = None, timeout: float = 120.0, http2: bool = Tr
         self.api_key = api_key
         self.http2 = http2
         self.timeout = timeout
-        self.base_url = "https://function.api.weco.ai"
+        # self.base_url = "https://function.api.weco.ai"
+        self.base_url = "https://function-dev.api.weco.ai"
         # Setup clients
         self.client = httpx.Client(http2=http2, timeout=timeout)
         self.async_client = httpx.AsyncClient(http2=http2, timeout=timeout)
@@ -153,12 +154,15 @@ def _process_query_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
         for _warning in response.get("warnings", []):
             warnings.warn(_warning)
 
-        return {
+        returned_response = {
             "output": response["response"],
             "in_tokens": response["num_input_tokens"],
             "out_tokens": response["num_output_tokens"],
             "latency_ms": response["latency_ms"],
         }
+        if "reasoning_steps" in response:
+            returned_response["reasoning_steps"] = response["reasoning_steps"]
+        return returned_response
 
     def _build(
         self, task_description: str, multimodal: bool, is_async: bool
@@ -393,6 +397,7 @@ def _query(
         version_number: Optional[int],
         text_input: Optional[str],
         images_input: Optional[List[str]],
+        return_reasoning: Optional[bool]
     ) -> Union[Dict[str, Any], Coroutine[Any, Any, Dict[str, Any]]]:
         """Internal method to handle both synchronous and asynchronous query requests.
 
@@ -408,6 +413,8 @@ def _query(
             The text input to the function.
         images_input : List[str], optional
             A list of image URLs or images encoded in base64 with their metadata to be sent as input to the function.
+        return_reasoning : bool, optional
+            Whether to return reasoning for the output.
 
         Returns
         -------
@@ -434,7 +441,7 @@ def _query(
 
         # Make the request
         endpoint = "query"
-        data = {"name": fn_name, "text": text_input, "images": image_urls, "version_number": version_number}
+        data = {"name": fn_name, "text": text_input, "images": image_urls, "version_number": version_number, "return_reasoning": return_reasoning}
         request = self._make_request(endpoint=endpoint, data=data, is_async=is_async)
 
         if is_async:
@@ -454,6 +461,7 @@ async def aquery(
         version_number: Optional[int] = -1,
         text_input: Optional[str] = "",
         images_input: Optional[List[str]] = [],
+        return_reasoning: Optional[bool] = False
     ) -> Dict[str, Any]:
         """Asynchronously queries a function with the given function ID and input.
 
@@ -467,6 +475,8 @@ async def aquery(
             The text input to the function.
         images_input : List[str], optional
             A list of image URLs or images encoded in base64 with their metadata to be sent as input to the function.
+        return_reasoning : bool, optional
+            Whether to return reasoning for the output. Default is False.
 
         Returns
         -------
@@ -475,7 +485,7 @@ async def aquery(
             and the latency in milliseconds.
         """
         return await self._query(
-            fn_name=fn_name, version_number=version_number, text_input=text_input, images_input=images_input, is_async=True
+            fn_name=fn_name, version_number=version_number, text_input=text_input, images_input=images_input, return_reasoning=return_reasoning, is_async=True
         )
 
     def query(
@@ -484,6 +494,7 @@ def query(
         version_number: Optional[int] = -1,
         text_input: Optional[str] = "",
         images_input: Optional[List[str]] = [],
+        return_reasoning: Optional[bool] = False
     ) -> Dict[str, Any]:
         """Synchronously queries a function with the given function ID and input.
 
@@ -497,6 +508,8 @@ def query(
             The text input to the function.
         images_input : List[str], optional
             A list of image URLs or images encoded in base64 with their metadata to be sent as input to the function.
+        return_reasoning : bool, optional
+            Whether to return reasoning for the output. Default is False.
 
         Returns
         -------
@@ -505,26 +518,26 @@ def query(
             and the latency in milliseconds.
         """
         return self._query(
-            fn_name=fn_name, version_number=version_number, text_input=text_input, images_input=images_input, is_async=False
+            fn_name=fn_name, version_number=version_number, text_input=text_input, images_input=images_input, return_reasoning=return_reasoning, is_async=False
         )
 
     def batch_query(
-        self, fn_name: str, batch_inputs: List[Dict[str, Any]], version_number: Optional[int] = -1
+        self, fn_name: str, batch_inputs: List[Dict[str, Any]], version_number: Optional[int] = -1, return_reasoning: Optional[bool] = False
     ) -> List[Dict[str, Any]]:
         """Batch queries a function version with a list of inputs.
 
         Parameters
         ----------
         fn_name : str
             The name of the function or a list of function names to query.
-
         batch_inputs : List[Dict[str, Any]]
             A list of inputs for the functions to query. The input must be a dictionary containing the data to be processed. e.g.,
             when providing for a text input, the dictionary should be {"text_input": "input text"}, for an image input, the dictionary should be {"images_input": ["url1", "url2", ...]}
             and for a combination of text and image inputs, the dictionary should be {"text_input": "input text", "images_input": ["url1", "url2", ...]}.
-
         version_number : int, optional
             The version number of the function to query. If not provided, the latest version will be used. Pass -1 to use the latest version.
+        return_reasoning : bool, optional
+            Whether to return reasoning for the output. Default is False.
 
         Returns
         -------
@@ -535,7 +548,7 @@ def batch_query(
 
         async def run_queries():
             tasks = list(
-                map(lambda fn_input: self.aquery(fn_name=fn_name, version_number=version_number, **fn_input), batch_inputs)
+                map(lambda fn_input: self.aquery(fn_name=fn_name, version_number=version_number, return_reasoning=return_reasoning, **fn_input), batch_inputs)
             )
             return await asyncio.gather(*tasks)
 
diff --git a/weco/functional.py b/weco/functional.py

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ authors = [`
`10`	`10`	`]`
`11`	`11`	`description = "A client facing API for interacting with the WeCo AI function builder service."`
`12`	`12`	`readme = "README.md"`
`13`		`-version = "0.1.7"`
	`13`	`+version = "0.1.8"`
`14`	`14`	`license = {text = "MIT"}`
`15`	`15`	`requires-python = ">=3.8"`
`16`	`16`	`dependencies = ["asyncio", "httpx[http2]", "pillow"]`