Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions agentops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,141 @@ def extract_key_from_attr(attr_value: str) -> str:
return False


def diagnose_session() -> Dict[str, Any]:
"""
Diagnose the current session status and connectivity.

Returns:
Dictionary containing diagnostic information about the session status.
"""
diagnosis = {
"sdk_initialized": False,
"client_initialized": False,
"has_api_key": False,
"has_auth_token": False,
"active_traces": 0,
"exporter_healthy": False,
"export_stats": {},
"issues": [],
"recommendations": []
}

try:
# Check SDK initialization
diagnosis["sdk_initialized"] = tracer.initialized
if not tracer.initialized:
diagnosis["issues"].append("AgentOps SDK not initialized")
diagnosis["recommendations"].append("Call agentops.init() to initialize the SDK")
return diagnosis

# Check client
client = get_client()
diagnosis["client_initialized"] = client.initialized

# Check API key
diagnosis["has_api_key"] = bool(client.config.api_key)
if not client.config.api_key:
diagnosis["issues"].append("No API key provided")
diagnosis["recommendations"].append("Set AGENTOPS_API_KEY environment variable or pass api_key to init()")

# Check auth token
auth_token = client.get_current_jwt()
diagnosis["has_auth_token"] = bool(auth_token)
if client.config.api_key and not auth_token:
diagnosis["issues"].append("Authentication failed - no JWT token available")
diagnosis["recommendations"].append("Check if API key is valid and network connectivity is working")

# Check active traces
active_traces = tracer.get_active_traces()
diagnosis["active_traces"] = len(active_traces)

# Check exporter health
try:
# Access the exporter from the tracer's span processors
span_processors = tracer._provider._active_span_processor._span_processors
for processor in span_processors:
if hasattr(processor, '_exporter') and hasattr(processor._exporter, 'is_healthy'):
diagnosis["exporter_healthy"] = processor._exporter.is_healthy()
diagnosis["export_stats"] = processor._exporter.get_export_stats()
break
except Exception:
pass

# Analyze issues
if diagnosis["export_stats"].get("failed_exports", 0) > 0:
total_attempts = diagnosis["export_stats"].get("total_attempts", 0)
failed_exports = diagnosis["export_stats"].get("failed_exports", 0)
if total_attempts > 0 and failed_exports / total_attempts > 0.5:
diagnosis["issues"].append(f"High export failure rate: {failed_exports}/{total_attempts} attempts failed")
diagnosis["recommendations"].append("Check network connectivity and API key validity")

if diagnosis["has_api_key"] and not diagnosis["has_auth_token"]:
diagnosis["issues"].append("API key provided but authentication failed")
diagnosis["recommendations"].append("Verify API key is correct and check network connectivity")

if not diagnosis["issues"]:
diagnosis["recommendations"].append("Session appears healthy - data should be reaching backend")

except Exception as e:
diagnosis["issues"].append(f"Error during diagnosis: {e}")

return diagnosis


def print_session_status():
"""
Print a user-friendly diagnostic report of the current session status.
This is helpful for debugging when sessions aren't reaching the backend.
"""
from termcolor import colored

diagnosis = diagnose_session()

print("\n" + "="*50)
print(colored("AgentOps Session Diagnostic Report", "cyan", attrs=["bold"]))
print("="*50)

# Status indicators
status_items = [
("SDK Initialized", diagnosis["sdk_initialized"]),
("Client Initialized", diagnosis["client_initialized"]),
("API Key Present", diagnosis["has_api_key"]),
("Authenticated", diagnosis["has_auth_token"]),
("Exporter Healthy", diagnosis["exporter_healthy"]),
]

print("\nStatus:")
for item, status in status_items:
color = "green" if status else "red"
symbol = "✓" if status else "✗"
print(f" {colored(symbol, color)} {item}: {colored(str(status), color)}")

print(f"\nActive Traces: {diagnosis['active_traces']}")

# Export statistics
if diagnosis["export_stats"]:
stats = diagnosis["export_stats"]
print(f"\nExport Statistics:")
print(f" Total Attempts: {stats.get('total_attempts', 0)}")
print(f" Successful: {stats.get('successful_exports', 0)}")
print(f" Failed: {stats.get('failed_exports', 0)}")
print(f" Success Rate: {stats.get('success_rate', 0)}%")

# Issues
if diagnosis["issues"]:
print(colored("\nIssues Found:", "red", attrs=["bold"]))
for issue in diagnosis["issues"]:
print(f" • {colored(issue, 'red')}")

# Recommendations
if diagnosis["recommendations"]:
print(colored("\nRecommendations:", "yellow", attrs=["bold"]))
for rec in diagnosis["recommendations"]:
print(f" • {colored(rec, 'yellow')}")

print("\n" + "="*50)


__all__ = [
# Legacy exports
"start_session",
Expand All @@ -466,6 +601,9 @@ def extract_key_from_attr(attr_value: str) -> str:
"update_trace_metadata",
"Client",
"get_client",
# Diagnostics
"diagnose_session",
"print_session_status",
# Decorators
"trace",
"session",
Expand Down
33 changes: 31 additions & 2 deletions agentops/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import asyncio
import threading
from typing import Optional, Any
import time

from agentops.client.api import ApiClient
from agentops.config import Config
Expand Down Expand Up @@ -110,9 +111,10 @@ async def _fetch_auth_async(self, api_key: str) -> Optional[dict]:
logger.debug("Successfully fetched authentication token asynchronously")
return response
else:
logger.debug("Authentication failed - will continue without authentication")
logger.warning("Authentication failed - invalid API key or network issue. Session data will not reach backend.")
return None
except Exception:
except Exception as e:
logger.warning(f"Authentication error: {e}. Session data will not reach backend.")
return None

def _start_auth_task(self, api_key: str):
Expand Down Expand Up @@ -143,6 +145,33 @@ def run_async_auth():

auth_thread = threading.Thread(target=run_async_auth, daemon=True)
auth_thread.start()

def wait_for_auth(self, timeout_seconds: int = 10) -> bool:
"""
Wait for authentication to complete.

Args:
timeout_seconds: Maximum time to wait for authentication

Returns:
True if authenticated successfully, False otherwise
"""
if not self.config.api_key:
return False

# If we already have a token, return immediately
if self.get_current_jwt():
return True

# Wait for auth task to complete
start_time = time.time()
while time.time() - start_time < timeout_seconds:
if self.get_current_jwt():
return True
time.sleep(0.1)

logger.warning(f"Authentication timeout after {timeout_seconds}s. Session data may not reach backend.")
return False

def init(self, **kwargs: Any) -> None: # Return type updated to None
# Recreate the Config object to parse environment variables at the time of initialization
Expand Down
152 changes: 152 additions & 0 deletions agentops/helpers/README_DEBUG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# AgentOps Session Debugging Tools

This document describes the debugging tools available to help diagnose issues where users see session URLs but no data reaches the AgentOps backend.

## The Problem

Some users experience an issue where:
1. They call `agentops.init()` successfully
2. They see a session URL printed to the console
3. However, no session data actually reaches the AgentOps backend

This happens due to a race condition between URL generation and authentication, plus silent export failures.

## Root Causes

1. **Race Condition**: Session URLs are generated immediately when a trace starts, but authentication happens asynchronously in the background
2. **Silent Authentication Failures**: If authentication fails, the JWT token remains `None` and exports fail silently
3. **Export Failures**: The span exporter fails to send data but this doesn't prevent URL generation
4. **Poor Error Visibility**: Export failures are logged as warnings that users might miss

## Debugging Tools

### 1. `agentops.diagnose_session()`

Returns a dictionary with detailed diagnostic information:

```python
import agentops

agentops.init()
diagnosis = agentops.diagnose_session()
print(diagnosis)
```

Returns:
```python
{
"sdk_initialized": True,
"client_initialized": True,
"has_api_key": True,
"has_auth_token": False, # This indicates the issue!
"active_traces": 1,
"exporter_healthy": False,
"export_stats": {
"total_attempts": 5,
"successful_exports": 0,
"failed_exports": 5,
"success_rate": 0.0
},
"issues": ["Authentication failed - no JWT token available"],
"recommendations": ["Check if API key is valid and network connectivity is working"]
}
```

### 2. `agentops.print_session_status()`

Prints a user-friendly diagnostic report:

```python
import agentops

agentops.init()
agentops.print_session_status()
```

Output:
```
==================================================
AgentOps Session Diagnostic Report
==================================================

Status:
✓ SDK Initialized: True
✓ Client Initialized: True
✓ API Key Present: True
✗ Authenticated: False
✗ Exporter Healthy: False

Active Traces: 1

Export Statistics:
Total Attempts: 3
Successful: 0
Failed: 3
Success Rate: 0.0%

Issues Found:
• Authentication failed - no JWT token available

Recommendations:
• Check if API key is valid and network connectivity is working
==================================================
```

### 3. Full Connectivity Test

Use the debug helper module for comprehensive testing:

```python
from agentops.helpers.debug_session import test_session_connectivity, print_connectivity_test_results

# Test with your API key
results = test_session_connectivity(api_key="your-api-key-here")
print_connectivity_test_results(results)
```

Or run the example script:
```bash
python examples/debug_session_connectivity.py your-api-key-here
```

## Enhanced Error Messages

The updated code now provides better error messages:

1. **Session URL Generation**: Now includes status indicators
- 🟢 Normal URL (authenticated)
- 🟡 Local only URL (no API key)
- 🔴 Auth failed URL (invalid API key)

2. **Export Failures**: More explicit error messages
- "Session data will not reach backend"
- "Session data not sent to backend"

3. **Authentication Issues**: Clearer warnings
- "Authentication failed - invalid API key or network issue"
- "Authentication timeout after Xs. Session data may not reach backend"

## Usage in Support

When users report this issue, ask them to run:

```python
import agentops
agentops.init() # With their normal setup
agentops.print_session_status()
```

This will immediately show:
- Whether they have an API key
- Whether authentication succeeded
- Whether the exporter is healthy
- Export success/failure statistics
- Specific recommendations

## Prevention

The enhanced code also prevents the issue by:
1. Checking authentication status before showing URLs
2. Color-coding URLs based on connectivity status
3. Providing immediate feedback on authentication failures
4. Tracking export statistics for ongoing monitoring
Loading
Loading