Merge branch 'dev' into dev-tasks

julep-ai · Jul 14, 2024 · ab0ec70 · ab0ec70
2 parents 80c5af9 + 1aacc65
commit ab0ec70
Show file tree

Hide file tree

Showing 88 changed files with 2,678 additions and 2,353 deletions.
diff --git a/.env.example b/.env.example
@@ -5,24 +5,27 @@ COZO_AUTH_TOKEN=myauthkey
 COZO_HOST=http://memory-store:9070
 COZO_PORT=9070
 COZO_ROCKSDB_DIR=cozo.db
-DTYPE=bfloat16
+DTYPE=float16
 EMBEDDING_SERVICE_URL=http://text-embeddings-inference/embed
 GATEWAY_PORT=80
-OPENAI_API_KEY=""
-GPU_MEMORY_UTILIZATION=0.95
+GPU_MEMORY_UTILIZATION=0.90
+
 HF_TOKEN=""
 HUGGING_FACE_HUB_TOKEN=""
-JWT_SHARED_KEY=this_shared_key_is_32_48_or_64_bytes_long
-MAX_MODEL_LEN=1024
+JWT_SHARED_KEY=
+
+MAX_MODEL_LEN=8192
 MAX_NUM_SEQS=1
 MNT_DIR=/data
-GF_SECURITY_ADMIN_PASSWORD=changethis
 MODEL_API_KEY=myauthkey
 MODEL_API_KEY_HEADER_NAME=Authorization
 MODEL_API_URL=http://model-serving:8000
 MODEL_INFERENCE_URL=http://model-serving:8000/v1
-MODEL_ID=BAAI/llm-embedder
-MODEL_NAME = "julep-ai/samantha-1-turbo"
+MODEL_ID=BAAI/bge-m3
+
+# MODEL_NAME="OpenPipe/Hermes-2-Theta-Llama-3-8B-32k"
+MODEL_NAME="julep-ai/Hermes-2-Theta-Llama-3-8B"
+
 SKIP_CHECK_DEVELOPER_HEADERS=true
 SUMMARIZATION_TOKENS_THRESHOLD=2048
 TEMPERATURE_SCALING_FACTOR=0.9
@@ -33,4 +36,8 @@ TEMPORAL_WORKER_URL=temporal:7233
 TP_SIZE=1
 TRUNCATE_EMBED_TEXT=true
 TRAEFIK_LOG_LEVEL=DEBUG
-WORKER_URL=temporal:7233
+WORKER_URL=temporal:7233
+
+AGENTS_API_DEBUG=false
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
diff --git a/agents-api/agents_api/activities/embed_docs.py b/agents-api/agents_api/activities/embed_docs.py
@@ -1,6 +1,6 @@
 from pydantic import UUID4
 from temporalio import activity
-from agents_api.env import docs_embedding_model_id
+from agents_api.env import embedding_model_id
 from agents_api.models.docs.embed_docs import (
     embed_docs_snippets_query,
 )
@@ -13,7 +13,7 @@
 @activity.defn
 async def embed_docs(doc_id: UUID4, title: str, content: list[str]) -> None:
     indices, snippets = list(zip(*enumerate(content)))
-    model = EmbeddingModel.from_model_name(docs_embedding_model_id)
+    model = EmbeddingModel.from_model_name(embedding_model_id)
     embeddings = await model.embed(
         [
             {

diff --git a/agents-api/agents_api/activities/summarization.py b/agents-api/agents_api/activities/summarization.py
@@ -11,7 +11,7 @@
     entries_summarization_query,
 )
 from agents_api.common.protocol.entries import Entry
-from ..model_registry import JULEP_MODELS
+from ..model_registry import LOCAL_MODELS
 from ..env import model_inference_url, model_api_key, summarization_model_name
 from agents_api.rec_sum.entities import get_entities
 from agents_api.rec_sum.summarize import summarize_messages
@@ -135,7 +135,7 @@ async def run_prompt(
 ) -> str:
     api_base = None
     api_key = None
-    if model in JULEP_MODELS:
+    if model in LOCAL_MODELS:
         api_base = model_inference_url
         api_key = model_api_key
         model = f"openai/{model}"

diff --git a/agents-api/agents_api/embed_models_registry.py b/agents-api/agents_api/embed_models_registry.py
@@ -10,7 +10,7 @@
     PromptTooBigError,
     UnknownTokenizerError,
 )
-from agents_api.env import docs_embedding_service_url
+from agents_api.env import embedding_service_url
 
 
 def normalize_l2(x):
@@ -83,7 +83,7 @@ async def embed(
             embeddings = await embed(
                 input,
                 embedding_service_url=self.embedding_service_url
-                or docs_embedding_service_url,
+                or embedding_service_url,
                 embedding_model_name=self.embedding_model_name,
             )
         elif self.embedding_provider == "openai":
@@ -130,7 +130,7 @@ def normalize(
         tokenizer=tiktoken.encoding_for_model("text-embedding-3-large"),
     ),
     "Alibaba-NLP/gte-large-en-v1.5": EmbeddingModel(
-        embedding_service_url=docs_embedding_service_url,
+        embedding_service_url=embedding_service_url,
         embedding_provider="julep",
         embedding_model_name="Alibaba-NLP/gte-large-en-v1.5",
         original_embedding_dimensions=1024,
@@ -139,7 +139,7 @@ def normalize(
         tokenizer=Tokenizer.from_pretrained("Alibaba-NLP/gte-large-en-v1.5"),
     ),
     "BAAI/bge-m3": EmbeddingModel(
-        embedding_service_url=docs_embedding_service_url,
+        embedding_service_url=embedding_service_url,
         embedding_provider="julep",
         embedding_model_name="BAAI/bge-m3",
         original_embedding_dimensions=1024,
@@ -148,7 +148,7 @@ def normalize(
         tokenizer=Tokenizer.from_pretrained("BAAI/bge-m3"),
     ),
     "BAAI/llm-embedder": EmbeddingModel(
-        embedding_service_url=docs_embedding_service_url,
+        embedding_service_url=embedding_service_url,
         embedding_provider="julep",
         embedding_model_name="BAAI/llm-embedder",
         original_embedding_dimensions=1024,

diff --git a/agents-api/agents_api/env.py b/agents-api/agents_api/env.py
@@ -44,20 +44,12 @@
     "SKIP_CHECK_DEVELOPER_HEADERS", default=False
 )
 
-# embedding service URL
 embedding_service_url: str = env.str(
-    "EMBEDDING_SERVICE_URL", default="http://0.0.0.0:8082/embed"
+    "EMBEDDING_SERVICE_URL", default="http://0.0.0.0:8083/embed"
 )
 
-docs_embedding_service_url: str = env.str(
-    "DOCS_EMBEDDING_SERVICE_URL", default="http://0.0.0.0:8083/embed"
-)
-
-embedding_model_id: str = env.str(
-    "EMBEDDING_MODEL_ID", default="BAAI/bge-large-en-v1.5"
-)
 
-docs_embedding_model_id: str = env.str("DOCS_EMBEDDING_MODEL_ID", default="BAAI/bge-m3")
+embedding_model_id: str = env.str("EMBEDDING_MODEL_ID", default="BAAI/bge-m3")
 
 truncate_embed_text: bool = env.bool("TRUNCATE_EMBED_TEXT", default=False)
 
@@ -84,8 +76,7 @@
     temporal_worker_url=temporal_worker_url,
     temporal_namespace=temporal_namespace,
     openai_api_key=openai_api_key,
-    docs_embedding_model_id=docs_embedding_model_id,
-    docs_embedding_service_url=docs_embedding_service_url,
+    docs_embedding_service_url=embedding_service_url,
     embedding_model_id=embedding_model_id,
 )
 

diff --git a/agents-api/agents_api/model_registry.py b/agents-api/agents_api/model_registry.py
@@ -2,14 +2,19 @@
 Model Registry maintains a list of supported models and their configs.
 """
 
-from typing import Dict
+import ast
+import json
 from agents_api.clients.worker.types import ChatML
 from agents_api.common.exceptions.agents import (
     AgentModelNotValid,
     MissingAgentModelAPIKeyError,
 )
 import litellm
 from litellm.utils import get_valid_models
+from pydantic import BaseModel
+from typing import Dict, Literal, Optional
+import xml.etree.ElementTree as ET
+
 
 GPT4_MODELS: Dict[str, int] = {
     # stable model names:
@@ -93,9 +98,18 @@
 
 OPENAI_MODELS = {**GPT4_MODELS, **TURBO_MODELS, **GPT3_5_MODELS, **GPT3_MODELS}
 
-JULEP_MODELS = {
+LOCAL_MODELS = {
     "julep-ai/samantha-1-turbo": 32768,
     "julep-ai/samantha-1-turbo-awq": 32768,
+    "TinyLlama/TinyLlama_v1.1": 2048,
+    "casperhansen/llama-3-8b-instruct-awq": 8192,
+    "julep-ai/Hermes-2-Theta-Llama-3-8B": 8192,
+    "OpenPipe/Hermes-2-Theta-Llama-3-8B-32k": 32768,
+}
+
+LOCAL_MODELS_WITH_TOOL_CALLS = {
+    "OpenPipe/Hermes-2-Theta-Llama-3-8B-32k": 32768,
+    "julep-ai/Hermes-2-Theta-Llama-3-8B": 8192,
 }
 
 OLLAMA_MODELS = {
@@ -104,9 +118,40 @@
 
 CHAT_MODELS = {**GPT4_MODELS, **TURBO_MODELS, **CLAUDE_MODELS}
 
+ALL_AVAILABLE_MODELS = litellm.model_list + list(LOCAL_MODELS.keys())
+VALID_MODELS = get_valid_models() + list(LOCAL_MODELS.keys())
+
+
+class FunctionCall(BaseModel):
+    arguments: dict
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class FunctionDefinition(BaseModel):
+    name: str
+    description: Optional[str] = None
+    parameters: Optional[Dict[str, object]] = None
+
 
-ALL_AVAILABLE_MODELS = litellm.model_list + list(JULEP_MODELS.keys())
-VALID_MODELS = get_valid_models() + list(JULEP_MODELS.keys())
+class FunctionSignature(BaseModel):
+    function: FunctionDefinition
+    type: Literal["function"]
+
+
+class PromptSchema(BaseModel):
+    Role: str
+    Objective: str
+    Tools: str
+    Schema: str
+    Instructions: str
 
 
 def validate_configuration(model: str):
@@ -131,7 +176,7 @@ def load_context(init_context: list[ChatML], model: str):
             }
             for msg in init_context
         ]
-    elif model in JULEP_MODELS:
+    elif model in LOCAL_MODELS:
         init_context = [
             {"name": msg.name, "role": msg.role, "content": msg.content}
             for msg in init_context
@@ -141,6 +186,54 @@ def load_context(init_context: list[ChatML], model: str):
     return init_context
 
 
+def validate_and_extract_tool_calls(assistant_content):
+    validation_result = False
+    tool_calls = []
+    error_message = None
+
+    try:
+        # wrap content in root element
+        xml_root_element = f"<root>{assistant_content}</root>"
+        root = ET.fromstring(xml_root_element)
+
+        # extract JSON data
+        for element in root.findall(".//tool_call"):
+            json_data = None
+            try:
+                if element.text is None:
+                    continue
+
+                json_text = element.text.strip()
+
+                try:
+                    # Prioritize json.loads for better error handling
+                    json_data = json.loads(json_text)
+                except json.JSONDecodeError as json_err:
+                    try:
+                        # Fallback to ast.literal_eval if json.loads fails
+                        json_data = ast.literal_eval(json_text)
+                    except (SyntaxError, ValueError) as eval_err:
+                        error_message = (
+                            f"JSON parsing failed with both json.loads and ast.literal_eval:\n"
+                            f"- JSON Decode Error: {json_err}\n"
+                            f"- Fallback Syntax/Value Error: {eval_err}\n"
+                            f"- Problematic JSON text: {json_text}"
+                        )
+                        continue
+            except BaseException as e:
+                error_message = f"Cannot strip text: {e}"
+
+            if json_data is not None:
+                tool_calls.append(json_data)
+                validation_result = True
+
+    except ET.ParseError as err:
+        error_message = f"XML Parse Error: {err}"
+
+    # Return default values if no valid data is extracted
+    return validation_result, tool_calls, error_message
+
+
 def get_extra_settings(settings):
     extra_settings = (
         dict(
@@ -151,7 +244,7 @@ def get_extra_settings(settings):
             logit_bias=settings.logit_bias,
             preset=settings.preset.name if settings.preset else None,
         )
-        if settings.model in JULEP_MODELS
+        if settings.model in LOCAL_MODELS
         else {}
     )
 

diff --git a/agents-api/agents_api/prompt_assets/sys_prompt.yml b/agents-api/agents_api/prompt_assets/sys_prompt.yml
@@ -0,0 +1,35 @@
+Role: |
+  You are a function calling AI agent with self-recursion.
+  You can call only one function at a time and analyse data you get from function response.
+  You are provided with function signatures within <tools></tools> XML tags.
+  The current date is: {date}.
+Objective: |
+  You may use agentic frameworks for reasoning and planning to help with user query.
+  Please call a function and wait for function results to be provided to you in the next iteration.
+  Don't make assumptions about what values to plug into function arguments.
+  Once you have called a function, results will be fed back to you within <tool_response></tool_response> XML tags.
+  Don't make assumptions about tool results if <tool_response> XML tags are not present since function hasn't been executed yet.
+  Analyze the data once you get the results and call another function.
+  At each iteration please continue adding the your analysis to previous summary.
+  Your final response should directly answer the user query with an anlysis or summary of the results of function calls.
+Tools: |
+  Here are the available tools:
+  <tools> {{agent.tools}} </tools>
+  If the provided function signatures doesn't have the function you must call, you may write executable python code in markdown syntax and call code_interpreter() function as follows:
+  <tool_call>
+  {{"arguments": {{"code_markdown": <python-code>, "name": "code_interpreter"}}}}
+  </tool_call>
+  Make sure that the json object above with code markdown block is parseable with json.loads() and the XML block with XML ElementTree.
+Schema: |
+  Use the following pydantic model json schema for each tool call you will make:
+  {schema}
+Instructions: |
+  At the very first turn you don't have <tool_results> so you shouldn't not make up the results.
+  Please keep a running summary with analysis of previous function results and summaries from previous iterations.
+  Do not stop calling functions until the task has been accomplished or you've reached max iteration of 10.
+  Calling multiple functions at once can overload the system and increase cost so call one function at a time please.
+  If you plan to continue with analysis, always call another function.
+  For each function call return a valid json object (using doulbe quotes) with function name and arguments within <tool_call></tool_call> XML tags as follows:
+  <tool_call>
+  {{"arguments": <args-dict>, "name": <function-name>}}
+  </tool_call>
diff --git a/agents-api/agents_api/rec_sum/generate.py b/agents-api/agents_api/rec_sum/generate.py
@@ -1,6 +1,6 @@
 from tenacity import retry, stop_after_attempt, wait_fixed
 from agents_api.env import model_inference_url, model_api_key
-from agents_api.model_registry import JULEP_MODELS
+from agents_api.model_registry import LOCAL_MODELS
 from litellm import acompletion
 
 
@@ -11,7 +11,7 @@ async def generate(
     **kwargs,
 ) -> dict:
     base_url, api_key = None, None
-    if model in JULEP_MODELS:
+    if model in LOCAL_MODELS:
         base_url, api_key = model_inference_url, model_api_key
         model = f"openai/{model}"
 

diff --git a/agents-api/agents_api/routers/agents/__init__.py b/agents-api/agents_api/routers/agents/__init__.py
@@ -1 +1,8 @@
-from .routers import router  # noqa: F401
+from .router import router  # noqa: F401
+
+from .create_agent import create_agent  # noqa: F401
+from .delete_agent import delete_agent  # noqa: F401
+from .get_agent_details import get_agent_details  # noqa: F401
+from .list_agents import list_agents  # noqa: F401
+from .update_agent import update_agent  # noqa: F401
+from .patch_agent import patch_agent  # noqa: F401