NexaAI · YangXianda007 · Nov 28, 2025 · Nov 28, 2025
diff --git a/demos/Agent/README.md b/demos/Agent/README.md
@@ -0,0 +1,47 @@
+## About
+
+This project demonstrates a simple agent implementation built on **Nexa SDK Serve**. 
+
+## Setup
+
+### Prerequisites
+
+- Nexa SDK Installed ([Installation Guide](https://github.com/NexaAI/nexa-sdk?tab=readme-ov-file#step-1-download-nexa-cli-with-one-click))
+- Python 3.11 – 3.13
+
+
+### Install Dependencies
+
+```bash
+# Navigate to the agent directory
+cd Agent
+
+# Create a Python virtual environment
+python -m venv .venv
+
+# Activate the virtual environment
+.\.venv\Scripts\activate # windows
+
+source .venv/bin/activate # macOS
+
+# Install all required dependencies
+pip install -r requirements.txt
+```
+
+### Running the Example
+
+First, open a new terminal window and start the Nexa server:
+```bash
+# Start Nexa server
+nexa serve
+```
+
+In a new terminal window
+
+```bash
+
+# Option 2: Run the Gradio UI version
+# This starts a local web server with a chat interface at http://localhost:7860
+python gradio_ui.py
+
+```
diff --git a/demos/Agent/agent.py b/demos/Agent/agent.py
@@ -0,0 +1,76 @@
+import json
+from serve import LLMService
+import tools
+
+SYSTEM_PROMPT = """
+You are an expert at breaking down a complex user request into a sequence of function calls. Respect the chronological order of actions described by the user.  
+
+Based on the user's request and the history of previously executed functions, decide on the next function to call to achieve the user's goal.
+
+If the goal is complete and you have the result that you need call the finished function.
+If the input does not match any supported function call the finished function.
+If the input sounds like a conversation or the user just says thanks for the previous request call the finished function.
+
+Here is the list of supported functions:
+
+- timenow(): return the current date and time
+- get_weather(city): return the weather for a certain city.
+- send_email(to, email_message): send an email to a recipient containing a message.
+- finished: call this function with NO parameters when the user's goal is complete.
+
+You must return exactly one JSON object representing a function call per response.
+
+Respond only with a valid JSON. Do not include comments, explanations, tabs, or extra spaces.
+{"function":"function_name","describe":"describe your intent in three words","parameter":"parameter_value or Leave empty string '' if no parameters"}`
+"""
+
+
+class AgentRunner:
+    def __init__(self):
+        self.history = [
+            {"role": "system", "content": SYSTEM_PROMPT}
+        ]
+
+    def run(self, base_url, task, model):
+        self.history.append({"role": "user", "content": task})
+
+        yield json.dumps({"status": "proccess", "message": "Starting analysis task..."})
+
+        while True:
+
+            max_retries = 3
+            for attempt in range(1, max_retries + 1):
+                try:
+                    response = LLMService.chat(
+                        base_url=base_url, 
+                        messages=self.history,
+                        model=model
+                    )
+                    message = response["choices"][0]["message"]["content"]
+                    data = json.loads(message)
+                    break
+                except Exception as e:
+                    if attempt < max_retries:
+                        continue
+                    yield json.dumps({"status": "error", "message": f"{e}"})
+                    return
+
+            func = data.get("function")
+            param = data.get("parameter")
+            describe = data.get("describe")
+            yield json.dumps({"status": "function", "message": f"{data}"})
+
+            if func == "finished":
+                yield json.dumps({"status": "finished", "message": f"{describe}!"})
+                return
+
+            if hasattr(tools, func):
+                yield json.dumps({"status": "task", "message": f"{describe}..."})
+                result = getattr(tools, func)(param)
+                self.history.append({
+                    "role": "assistant",
+                    "content": f"running `{func}`, result: {result}"
+                })
+            else:
+                yield json.dumps({"status": "error", "message": f"unknow func: {func}"})
+                break
diff --git a/demos/Agent/gradio_ui.py b/demos/Agent/gradio_ui.py
@@ -0,0 +1,110 @@
+
+import gradio as gr
+import json
+from serve import (LLMService, ALL_ASR_MODELS, ALL_INFER_MODELS, BASE_URL)
+from agent import AgentRunner
+from gradio import ChatMessage
+
+agent = AgentRunner()
+
+def run_task(history, audio, base_url, asr_model, llm_model):
+    if history is None:
+        history = []
+
+    history.append(
+        ChatMessage(
+            role="assistant",
+            content="",
+            metadata={"title": f"**Process audio...**"}
+        ))
+    yield history, None
+
+    try:
+        task = LLMService.speech_to_text(base_url=base_url, audio=audio, model=asr_model)
+    except Exception as e:
+        history.append(ChatMessage(
+                        role="assistant",
+                        content=f"(Error: {e})",
+                        metadata={"title": f"**Error occurred**"},
+                    ))
+        yield history, None
+        return
+
+    # task = """
+    # give me the time right now, and tell me the weather for New York then send email
+    # """
+
+    for raw in agent.run(base_url=base_url, task=task, model=llm_model):
+        # raw is expected to be a JSON string
+        parsed = None
+        if isinstance(raw, str):
+            try:
+                parsed = json.loads(raw)
+            except Exception:
+                # Not JSON: treat as raw stream chunk
+                parsed = None
+
+            if parsed and isinstance(parsed, dict) and "status" in parsed:
+                st = parsed.get("status")
+                msg = parsed.get("message", "")
+
+                if st == "error":
+                    history.append(ChatMessage(
+                        role="assistant",
+                        content=f"(Error: {msg})",
+                        metadata={"title": f"**Error occurred**"},
+                    ))
+                    yield history, None
+                    continue
+                if st == "function":
+                    history.append(ChatMessage(
+                        role="assistant",
+                        content=f"""
+                        ```json
+                        {msg}
+                        ```
+                        """,
+                        metadata={"title": f"**Call Tool**"},
+                    ))
+                    yield history, None
+                    continue
+
+                if st == "proccess" or st == "task":
+                    history.append(ChatMessage(
+                        role="assistant",
+                        content="",
+                        metadata={"title": f"**{msg}**"}
+                    ))
+                    yield history, None
+                    continue
+
+                if st == "finished":
+                    history.append(ChatMessage(
+                        role="assistant",
+                        content="",
+                        metadata={"title": f"**{msg}**"}
+                    ))
+                    yield history, None
+                    continue
+
+with gr.Blocks() as demo:
+    gr.Markdown("## Agent with Nexa serve")
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbox = gr.Chatbot(height=500)
+            audio_input = gr.Audio(
+                sources=["microphone"], 
+                type="filepath",
+                format='wav',
+                show_label=False
+            )
+
+        with gr.Column(scale=1):
+            base_url=gr.Textbox(BASE_URL, label="Base URL")
+            asr_repo_id = gr.Dropdown(ALL_ASR_MODELS, label="Asr model repo Id", value=ALL_ASR_MODELS[0])
+            llm_repo_id = gr.Dropdown(ALL_INFER_MODELS, label="LLM model repo Id", value=ALL_INFER_MODELS[0])
+
+    audio_input.stop_recording(fn=run_task, inputs=[chatbox, audio_input, base_url, asr_repo_id, llm_repo_id], outputs=[chatbox, audio_input])
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/demos/Agent/requirements.txt b/demos/Agent/requirements.txt
@@ -0,0 +1,2 @@
+gradio
+requests
diff --git a/demos/Agent/serve.py b/demos/Agent/serve.py
@@ -0,0 +1,34 @@
+# serve.py
+import requests
+
+BASE_URL = "http://127.0.0.1:18181" 
+# BASE_URL = "https://api.hyperlinkos.com" 
+
+ALL_ASR_MODELS = ["NexaAI/parakeet-tdt-0.6b-v2-MLX"]
+ALL_INFER_MODELS = ["NexaAI/Qwen3-4B-GGUF"]
+
+class LLMService:
+
+    @staticmethod
+    def speech_to_text(base_url, audio, model):
+        files = {
+            "file": (audio, open(audio, "rb"), "audio/wav")
+        }
+
+        data = {
+            "model": model,
+            "language": "en"
+        }
+        resp = requests.post(f"{base_url}/v1/audio/transcriptions", data=data, files=files)
+        return resp.json().get("text", "")
+
+    @staticmethod
+    def chat(base_url, messages, model, tools=None):
+        body = {
+            "model": model,
+            "messages": messages,
+            "tools": tools if tools else [],
+            "enable_think": False
+        }
+        resp = requests.post(f"{base_url}/v1/chat/completions", json=body)
+        return resp.json()
diff --git a/demos/Agent/tools.py b/demos/Agent/tools.py
@@ -0,0 +1,90 @@
+import time
+
+# mock tools
+
+def get_weather(location):
+    # get weather
+    return f"location: {location} weather: 18℃"
+
+def send_email(content):
+    # send email
+    return f"email send: content: {content}"
+
+def timenow(unuse):
+    # get time
+    return f"{time.strftime("%a %b %d %H:%M:%S %Y", time.localtime())}"
+
+def finished():
+    # finished
+    return "done"
+
+
+TOOL_FUNCTION=[
+    {
+        "type": "function",
+        "function": {
+            "name": "timenow",
+            "description": "Return the current date and time.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Return the weather for a certain city.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {
+                        "type": "string",
+                        "description": "The city to query weather for."
+                    }
+                },
+                "required": [
+                    "city"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "send_email",
+            "description": "Send an email to a recipient containing a message.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "to": {
+                        "type": "string",
+                        "description": "The receiver's name or email address."
+                    },
+                    "email_message": {
+                        "type": "string",
+                        "description": "The content of the email message."
+                    }
+                },
+                "required": [
+                    "to",
+                    "email_message"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "finished",
+            "description": "Call this when the user's goal is complete. No parameters.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    }
+]