Skip to content

Inconsistent behavior #588

@Maayank-D-S

Description

@Maayank-D-S

Describe the bug

Hey, So I am trying to make a voice agent that can be deployed on a website.
The Website frontend is made using React with python-flask as backend

The problem is the agent shows an inconsistent behavior where it sometimes joins the room and sometimes does not. Because of this inconsistency I am unable to identify what the problem is.

Relevant log output

No response

Describe your environment

I am running this whole code in production environment, with the backend on EC2, OS=Ubuntu and frontend on Vercel

Minimal reproducible example

agent.py

from dotenv import load_dotenv
import asyncio
import os
import logging
import traceback
from livekit import agents
from livekit.agents import AgentSession, Agent, RoomInputOptions, JobRequest
from livekit.plugins import (
    openai,
    cartesia,
    deepgram,
    silero,
    elevenlabs
)
from livekit.agents.voice.agent import ModelSettings
from livekit.agents.llm import ChatMessage
import livekit.agents.llm as livekit_llm
# from livekit.plugins.turn_detector.multilingual import MultilingualModel
# from livekit.agents import ChatContext
load_dotenv()


logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")





class Assistant(Agent):
    def __init__(self,session:AgentSession,index):
        base_dir = os.path.dirname(os.path.abspath(__file__))  # points to voice_agent/
        prompt_path = os.path.join(base_dir, "prompt_template.txt")
        with open(prompt_path, "r", encoding="utf-8") as f:
            instructions = f.read()
        super().__init__(instructions=instructions)
        self._instructions=instructions
        self.index = index
        self._session=session
        self.interaction_count = 0
        
    async def llm_node(
        self,
        chat_ctx: livekit_llm.ChatContext,
        tools: list[livekit_llm.FunctionTool],
        model_settings:ModelSettings
        ):
            self.interaction_count += 1

            chat_ctx_to_use = livekit_llm.ChatContext()

    # 1. Get user query (latest message)
            user_query = ""
            if chat_ctx.items and isinstance(chat_ctx.items[-1], ChatMessage) and chat_ctx.items[-1].role == "user":
                user_query = chat_ctx.items[-1].text_content or ""

            context = self._instructions + "\n\n"
            
            # if self.interaction_count == 1:
            # # First message: greet and ask
            #     chat_ctx_to_use.items.append(ChatMessage(role="assistant", content=[
            #     "Hi! I'm your UnBroker assistant, here to help you explore luxury properties. Could you tell me your preferred location or budget so I can better assist you?"
            #     ]))
            #     return  # Skip LLM generation — this is a static greeting
            if user_query.strip():
                docs = self.index.similarity_search(user_query, k=5)
                for doc in docs:
                    context += doc.page_content + "\n"
            
            chat_ctx_to_use.items.append(ChatMessage(role="system", content=[context]))
            conversation_history = [
            msg for msg in chat_ctx.items if msg.role != "system"
            ][-3:]
            chat_ctx_to_use.items.extend(conversation_history)

            print("======== Final chat_ctx_to_use ========")
            for msg in chat_ctx_to_use.items:
                print(f"{msg.role.upper()}: {msg.content}")

            first_chunk = True
            async for chunk in Agent.default.llm_node(self, chat_ctx_to_use, tools,model_settings):
                # if first_chunk:
                    # llm_response_received_time = time.time()
                    # llm_processing_time = llm_response_received_time - llm_query_sent_time
                # logger.info(f"LLM query received at {llm_response_received_time:.2f} (Processing time: {llm_processing_time:.2f} seconds)")
                # logger.info(f"TTS start at {llm_response_received_time:.2f}")
                # first_chunk = False
                yield chunk    
    

async def request_fnc(req: JobRequest):
    # accept the job request
    await req.accept(
        # the agent's name (Participant.name), defaults to ""
        name="agent",
        # the agent's identity (Participant.identity), defaults to "agent-<jobid>"
        identity="identity",
        # attributes to set on the agent participant upon join
        attributes={"myagent": "rocks"},
    )
async def entrypoint(ctx: agents.JobContext):
  try:
    from rag_utils import load_faiss_vectorstore
    try:
        vectorstore = load_faiss_vectorstore()
        print("[Agent] Vectorstore loaded successfully.", flush=True)
    except Exception as e:
        print("[ERROR] Failed to load vectorstore", flush=True)
        traceback.print_exc()

    await ctx.connect()
    

    try:
            session = AgentSession(
                stt=deepgram.STT(model="nova-3", language="multi"),
                llm=openai.LLM(model="gpt-4o"),
                # tts=cartesia.TTS(model="sonic-2", voice="f786b574-daa5-4673-aa0c-cbe3e8534c02"),
                tts=elevenlabs.TTS(
                    voice_id="VJzrUxHaC52mTyYHMCnK",
                    model="eleven_turbo_v2_5"
                ),
                vad=silero.VAD.load(),
                # turn_detection=MultilingualModel(),
            )
    except Exception as e:
            logging.error(f"Failed to initialize session: {str(e)}")
            return
    

    try:
        agent = Assistant(session=session, index=vectorstore)
    except Exception as e:
        logging.error(f"Failed to initialize Assistant: {str(e)}")
        return


    try:
      await session.start(
          room=ctx.room,
              agent=agent,
              room_input_options=RoomInputOptions(),
            )
    except Exception as e:
          logging.error(f"Failed to start session: {str(e)}")
          return
    
    await session.generate_reply(
        instructions="Hi, welcome to Unbroker"
    )
  
  except Exception as e:
      logging.error(f"An unexpected error occurred: {str(e)}")


if __name__ == "__main__":
    agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint,request_fnc=request_fnc))

token generation route

@app.route('/getToken', methods=["POST"])
def get_token():
    data = request.get_json()
    identity = data.get("identity")
    room = data.get("room")

    if not identity or not room:
        return jsonify({"error": "Missing identity or room"}), 400

    token = api.AccessToken(
        os.getenv("LIVEKIT_API_KEY"),
        os.getenv("LIVEKIT_API_SECRET"),
    ).with_identity(identity).with_grants(
        api.VideoGrants(
            room_join=True,
            room=room,
            can_publish=True,
            can_subscribe=True,
        )
    )
    print(token.to_jwt())
    return jsonify({"token": token.to_jwt()})

minimalist frontend code:

import React, { useState, useRef } from 'react';
import { Room, createLocalAudioTrack } from 'livekit-client';

const BACKEND_URL = import.meta.env.VITE_API_BASE;
const LIVEKIT_WS_URL = 'wss://ds-nl2qsdc2.livekit.cloud';

export default function VoiceAgent() {
  const [roomName, setRoomName] = useState(`room-${Math.random().toString(36).slice(2, 8)}`);
  const [userId, setUserId] = useState(`frontend-${Math.random().toString(36).slice(2, 8)}`);
  const roomRef = useRef(null);

  const handleJoin = async () => {
    try {
      // 🔌 Disconnect existing room if needed
      if (roomRef.current) {
        console.log('🔌 Disconnecting previous room');
        roomRef.current.disconnect();
        roomRef.current = null;
      }

      // 🔓 iOS fix: request mic access and resume AudioContext
      await navigator.mediaDevices.getUserMedia({ audio: true });
      if (typeof AudioContext !== 'undefined') {
        const audioCtx = new AudioContext();
        if (audioCtx.state === 'suspended') await audioCtx.resume();
      }

      // 🔑 Step 1: Get token
      const tokenRes = await fetch(`${BACKEND_URL}/getToken`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ identity: userId, room: roomName }),
      });
      const { token } = await tokenRes.json();
      if (!token) throw new Error('❌ No token received from backend');
      console.log('🔑 Received token');

      // 🧠 Step 2: Create room
      const room = new Room();
      roomRef.current = room;

      // 📡 Step 3: Set event listeners
      room.on('connected', () => console.log('✅ Connected to LiveKit room'));
      room.on('disconnected', () => console.log('🔌 Disconnected from room'));
      room.on('connectionError', err => console.error('❗ Connection error:', err));

      room.on('participantConnected', p => console.log('👤 Participant joined:', p.identity));
      room.on('participantDisconnected', p => console.log('👋 Participant left:', p.identity));

      // 🔗 Step 4: Connect to server
      await room.connect(LIVEKIT_WS_URL, token);

      // 🎤 Step 5: Create and publish mic track
      const micTrack = await createLocalAudioTrack();
      console.log('🎙️ Mic track created:', micTrack);

      // Optional mic test (hear yourself)
      const micTestEl = micTrack.attach();
      micTestEl.volume = 0.3;
      micTestEl.autoplay = true;
      document.body.appendChild(micTestEl);

      await room.localParticipant.publishTrack(micTrack);
      console.log('📢 Mic published');

      // 🔉 Step 6: Subscribe to remote audio
      room.on('trackSubscribed', (track, publication, participant) => {
        if (track.kind === 'audio') {
          console.log(`🔈 Subscribed to audio from ${participant.identity}`);
          const audioEl = track.attach();
          audioEl.autoplay = true;
          document.body.appendChild(audioEl);
        }
      });

    } catch (err) {
      console.error('❌ Error joining room:', err);
      alert('Join failed. See console.');
    }
  };

  return (
    <div style={{ padding: 20 }}>
      <h2>🎙️ LiveKit Voice Join</h2>
      <label>User ID: <input value={userId} onChange={e => setUserId(e.target.value)} /></label><br />
      <label>Room: <input value={roomName} onChange={e => setRoomName(e.target.value)} /></label><br />
      <button className="text-white" onClick={handleJoin}>Join Room</button>
    </div>
  );
}

Additional information

In the frontend logs, when the agent connects, I get "Participant joined: identity" and "Subscribed to audio from identity" along with other 4 logs. But when it does not connect, I get only the 4 other logs and no Participant joined.. or subscribed to audio.. log.

This is when the agent connects. When it does not, the last two logs dont show up.
Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions