|
4 | 4 | import copy |
5 | 5 | import logging |
6 | 6 | import asyncio |
7 | | -from typing import List, Optional, Any, Dict, Union, Literal, TYPE_CHECKING, Callable, Tuple |
| 7 | +from typing import List, Optional, Any, Dict, Union, Literal, TYPE_CHECKING, Callable, Tuple, Generator |
8 | 8 | from rich.console import Console |
9 | 9 | from rich.live import Live |
10 | 10 | from ..llm import ( |
@@ -1937,7 +1937,55 @@ def run(self): |
1937 | 1937 |
|
1938 | 1938 | def start(self, prompt: str, **kwargs): |
1939 | 1939 | """Start the agent with a prompt. This is a convenience method that wraps chat().""" |
1940 | | - return self.chat(prompt, **kwargs) |
| 1940 | + # Check if streaming is enabled (either from kwargs or agent's stream attribute) |
| 1941 | + stream_enabled = kwargs.get('stream', getattr(self, 'stream', False)) |
| 1942 | + |
| 1943 | + if stream_enabled: |
| 1944 | + # Return a generator for streaming response |
| 1945 | + return self._start_stream(prompt, **kwargs) |
| 1946 | + else: |
| 1947 | + # Return regular chat response for backward compatibility |
| 1948 | + return self.chat(prompt, **kwargs) |
| 1949 | + |
| 1950 | + def _start_stream(self, prompt: str, **kwargs) -> Generator[str, None, None]: |
| 1951 | + """Stream generator for real-time response chunks.""" |
| 1952 | + try: |
| 1953 | + # Reset the final display flag for each new conversation |
| 1954 | + self._final_display_shown = False |
| 1955 | + |
| 1956 | + # Temporarily disable verbose mode to prevent console output during streaming |
| 1957 | + original_verbose = self.verbose |
| 1958 | + self.verbose = False |
| 1959 | + |
| 1960 | + # Use the existing chat logic but capture and yield chunks |
| 1961 | + # This approach reuses all existing logic without duplication |
| 1962 | + response = self.chat(prompt, **kwargs) |
| 1963 | + |
| 1964 | + # Restore original verbose mode |
| 1965 | + self.verbose = original_verbose |
| 1966 | + |
| 1967 | + if response: |
| 1968 | + # Simulate streaming by yielding the response in word chunks |
| 1969 | + # This provides a consistent streaming experience regardless of LLM type |
| 1970 | + words = str(response).split() |
| 1971 | + chunk_size = max(1, len(words) // 20) # Split into ~20 chunks for smooth streaming |
| 1972 | + |
| 1973 | + for i in range(0, len(words), chunk_size): |
| 1974 | + chunk_words = words[i:i + chunk_size] |
| 1975 | + chunk = ' '.join(chunk_words) |
| 1976 | + |
| 1977 | + # Add space after chunk unless it's the last one |
| 1978 | + if i + chunk_size < len(words): |
| 1979 | + chunk += ' ' |
| 1980 | + |
| 1981 | + yield chunk |
| 1982 | + |
| 1983 | + except Exception as e: |
| 1984 | + # Graceful fallback to non-streaming if streaming fails |
| 1985 | + logging.warning(f"Streaming failed, falling back to regular response: {e}") |
| 1986 | + response = self.chat(prompt, **kwargs) |
| 1987 | + if response: |
| 1988 | + yield response |
1941 | 1989 |
|
1942 | 1990 | def execute(self, task, context=None): |
1943 | 1991 | """Execute a task synchronously - backward compatibility method""" |
|
0 commit comments