Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions demos/Agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
## About

This project demonstrates a simple agent implementation built on **Nexa SDK Serve**.

## Setup

### Prerequisites

- Nexa SDK Installed ([Installation Guide](https://github.com/NexaAI/nexa-sdk?tab=readme-ov-file#step-1-download-nexa-cli-with-one-click))
- Python 3.11 – 3.13


### Install Dependencies

```bash
# Navigate to the agent directory
cd Agent

# Create a Python virtual environment
python -m venv .venv

# Activate the virtual environment
.\.venv\Scripts\activate # windows

source .venv/bin/activate # macOS

# Install all required dependencies
pip install -r requirements.txt
```

### Running the Example

First, open a new terminal window and start the Nexa server:
```bash
# Start Nexa server
nexa serve
```

In a new terminal window

```bash

# Option 2: Run the Gradio UI version
# This starts a local web server with a chat interface at http://localhost:7860
python gradio_ui.py

```
76 changes: 76 additions & 0 deletions demos/Agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import json
from serve import LLMService
import tools

SYSTEM_PROMPT = """
You are an expert at breaking down a complex user request into a sequence of function calls. Respect the chronological order of actions described by the user.

Based on the user's request and the history of previously executed functions, decide on the next function to call to achieve the user's goal.

If the goal is complete and you have the result that you need call the finished function.
If the input does not match any supported function call the finished function.
If the input sounds like a conversation or the user just says thanks for the previous request call the finished function.

Here is the list of supported functions:

- timenow(): return the current date and time
- get_weather(city): return the weather for a certain city.
- send_email(to, email_message): send an email to a recipient containing a message.
- finished: call this function with NO parameters when the user's goal is complete.

You must return exactly one JSON object representing a function call per response.

Respond only with a valid JSON. Do not include comments, explanations, tabs, or extra spaces.
{"function":"function_name","describe":"describe your intent in three words","parameter":"parameter_value or Leave empty string '' if no parameters"}`
"""


class AgentRunner:
def __init__(self):
self.history = [
{"role": "system", "content": SYSTEM_PROMPT}
]

def run(self, base_url, task, model):
self.history.append({"role": "user", "content": task})

yield json.dumps({"status": "proccess", "message": "Starting analysis task..."})

while True:

max_retries = 3
for attempt in range(1, max_retries + 1):
try:
response = LLMService.chat(
base_url=base_url,
messages=self.history,
model=model
)
message = response["choices"][0]["message"]["content"]
data = json.loads(message)
break
except Exception as e:
if attempt < max_retries:
continue
yield json.dumps({"status": "error", "message": f"{e}"})
return

func = data.get("function")
param = data.get("parameter")
describe = data.get("describe")
yield json.dumps({"status": "function", "message": f"{data}"})

if func == "finished":
yield json.dumps({"status": "finished", "message": f"{describe}!"})
return

if hasattr(tools, func):
yield json.dumps({"status": "task", "message": f"{describe}..."})
result = getattr(tools, func)(param)
self.history.append({
"role": "assistant",
"content": f"running `{func}`, result: {result}"
})
else:
yield json.dumps({"status": "error", "message": f"unknow func: {func}"})
break
110 changes: 110 additions & 0 deletions demos/Agent/gradio_ui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@

import gradio as gr
import json
from serve import (LLMService, ALL_ASR_MODELS, ALL_INFER_MODELS, BASE_URL)
from agent import AgentRunner
from gradio import ChatMessage

agent = AgentRunner()

def run_task(history, audio, base_url, asr_model, llm_model):
if history is None:
history = []

history.append(
ChatMessage(
role="assistant",
content="",
metadata={"title": f"**Process audio...**"}
))
yield history, None

try:
task = LLMService.speech_to_text(base_url=base_url, audio=audio, model=asr_model)
except Exception as e:
history.append(ChatMessage(
role="assistant",
content=f"(Error: {e})",
metadata={"title": f"**Error occurred**"},
))
yield history, None
return

# task = """
# give me the time right now, and tell me the weather for New York then send email
# """

for raw in agent.run(base_url=base_url, task=task, model=llm_model):
# raw is expected to be a JSON string
parsed = None
if isinstance(raw, str):
try:
parsed = json.loads(raw)
except Exception:
# Not JSON: treat as raw stream chunk
parsed = None

if parsed and isinstance(parsed, dict) and "status" in parsed:
st = parsed.get("status")
msg = parsed.get("message", "")

if st == "error":
history.append(ChatMessage(
role="assistant",
content=f"(Error: {msg})",
metadata={"title": f"**Error occurred**"},
))
yield history, None
continue
if st == "function":
history.append(ChatMessage(
role="assistant",
content=f"""
```json
{msg}
```
""",
metadata={"title": f"**Call Tool**"},
))
yield history, None
continue

if st == "proccess" or st == "task":
history.append(ChatMessage(
role="assistant",
content="",
metadata={"title": f"**{msg}**"}
))
yield history, None
continue

if st == "finished":
history.append(ChatMessage(
role="assistant",
content="",
metadata={"title": f"**{msg}**"}
))
yield history, None
continue

with gr.Blocks() as demo:
gr.Markdown("## Agent with Nexa serve")
with gr.Row():
with gr.Column(scale=2):
chatbox = gr.Chatbot(height=500)
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
format='wav',
show_label=False
)

with gr.Column(scale=1):
base_url=gr.Textbox(BASE_URL, label="Base URL")
asr_repo_id = gr.Dropdown(ALL_ASR_MODELS, label="Asr model repo Id", value=ALL_ASR_MODELS[0])
llm_repo_id = gr.Dropdown(ALL_INFER_MODELS, label="LLM model repo Id", value=ALL_INFER_MODELS[0])

audio_input.stop_recording(fn=run_task, inputs=[chatbox, audio_input, base_url, asr_repo_id, llm_repo_id], outputs=[chatbox, audio_input])

if __name__ == "__main__":
demo.launch()
2 changes: 2 additions & 0 deletions demos/Agent/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
gradio
requests
34 changes: 34 additions & 0 deletions demos/Agent/serve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# serve.py
import requests

BASE_URL = "http://127.0.0.1:18181"
# BASE_URL = "https://api.hyperlinkos.com"

ALL_ASR_MODELS = ["NexaAI/parakeet-tdt-0.6b-v2-MLX"]
ALL_INFER_MODELS = ["NexaAI/Qwen3-4B-GGUF"]

class LLMService:

@staticmethod
def speech_to_text(base_url, audio, model):
files = {
"file": (audio, open(audio, "rb"), "audio/wav")
}

data = {
"model": model,
"language": "en"
}
resp = requests.post(f"{base_url}/v1/audio/transcriptions", data=data, files=files)
return resp.json().get("text", "")

@staticmethod
def chat(base_url, messages, model, tools=None):
body = {
"model": model,
"messages": messages,
"tools": tools if tools else [],
"enable_think": False
}
resp = requests.post(f"{base_url}/v1/chat/completions", json=body)
return resp.json()
90 changes: 90 additions & 0 deletions demos/Agent/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import time

# mock tools

def get_weather(location):
# get weather
return f"location: {location} weather: 18℃"

def send_email(content):
# send email
return f"email send: content: {content}"

def timenow(unuse):
# get time
return f"{time.strftime("%a %b %d %H:%M:%S %Y", time.localtime())}"

def finished():
# finished
return "done"


TOOL_FUNCTION=[
{
"type": "function",
"function": {
"name": "timenow",
"description": "Return the current date and time.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Return the weather for a certain city.",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to query weather for."
}
},
"required": [
"city"
]
}
}
},
{
"type": "function",
"function": {
"name": "send_email",
"description": "Send an email to a recipient containing a message.",
"parameters": {
"type": "object",
"properties": {
"to": {
"type": "string",
"description": "The receiver's name or email address."
},
"email_message": {
"type": "string",
"description": "The content of the email message."
}
},
"required": [
"to",
"email_message"
]
}
}
},
{
"type": "function",
"function": {
"name": "finished",
"description": "Call this when the user's goal is complete. No parameters.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}
]