-
Notifications
You must be signed in to change notification settings - Fork 4
Error Handling
adham90 edited this page Feb 14, 2026
·
3 revisions
Understanding and handling errors in RubyLLM::Agents.
StandardError
└── RubyLLM::Agents::Error (base class)
├── RubyLLM::Agents::BudgetExceededError
├── RubyLLM::Agents::CircuitBreakerOpenError
├── RubyLLM::Agents::ConfigurationError
├── RubyLLM::Agents::TimeoutError
└── RubyLLM::Agents::ValidationError
Raised when budget limits are exceeded (with :hard enforcement):
begin
result = LLM::ExpensiveAgent.call(query: params[:query])
rescue RubyLLM::Agents::BudgetExceededError => e
e.message # => "Daily budget exceeded: $100.00 limit reached"
e.budget_type # => :daily or :monthly
e.budget_scope # => :global, :per_agent, or :tenant
e.limit # => 100.0
e.current # => 105.50
e.tenant_budget? # => true/false (v0.4.0+)
# Handle gracefully
render json: { error: "Service temporarily unavailable" }, status: 503
endRaised when the circuit breaker is open (too many recent failures):
begin
result = LLM::MyAgent.call(query: params[:query])
rescue RubyLLM::Agents::CircuitBreakerOpenError => e
e.message # => "Circuit breaker open for MyAgent"
e.agent_type # => "MyAgent"
e.cooldown_ends # => Time object when circuit will close
e.remaining_ms # => Milliseconds until retry is allowed
# Suggest retry time
render json: {
error: "Service temporarily unavailable",
retry_after: e.remaining_ms / 1000
}, status: 503
endRaised when agent configuration is invalid:
# Missing required configuration
class BadAgent < ApplicationAgent
# No model specified
param :query, required: true
end
BadAgent.call(query: "test")
# => Raises ConfigurationError: "Model must be configured"Raised when total_timeout is exceeded:
begin
result = LLM::SlowAgent.call(query: params[:query])
rescue RubyLLM::Agents::TimeoutError => e
e.message # => "Total timeout of 30s exceeded"
e.timeout # => 30
e.elapsed # => 31.5
e.attempts # => 3 (attempts made before timeout)
render json: { error: "Request timed out" }, status: 504
endRaised when parameter validation fails:
class TypedAgent < ApplicationAgent
param :count, type: :integer, required: true
end
TypedAgent.call(count: "not a number")
# => Raises ValidationError: "Parameter 'count' must be Integer, got String"RubyLLM::Agents automatically classifies errors:
-
Faraday::TimeoutError- Request timeout -
Faraday::ConnectionFailed- Connection issues -
RubyLLM::RateLimitError- Rate limit exceeded -
Net::OpenTimeout- Connection timeout -
Errno::ECONNREFUSED- Connection refused
-
RubyLLM::AuthenticationError- Invalid API key -
RubyLLM::InvalidRequestError- Bad request parameters -
ArgumentError- Missing required parameters -
RubyLLM::Agents::BudgetExceededError- Budget exceeded -
RubyLLM::Agents::CircuitBreakerOpenError- Circuit open
result = LLM::MyAgent.call(query: "test")
unless result.success?
if result.retryable?
# Safe to retry later
RetryJob.perform_later(query: "test")
else
# Don't retry, handle differently
notify_admin(result.error)
end
enddef search(query)
result = LLM::SearchAgent.call(query: query)
if result.success?
result.content
else
# Return cached/default response
cached_search(query) || default_response
end
rescue RubyLLM::Agents::BudgetExceededError
{ error: "Search temporarily unavailable", results: [] }
rescue RubyLLM::Agents::CircuitBreakerOpenError => e
{ error: "Service degraded, retry in #{e.remaining_ms / 1000}s", results: [] }
endclass SearchService
def search(query)
# Try AI-powered search first
ai_search(query)
rescue RubyLLM::Agents::Error
# Fall back to basic search
basic_search(query)
end
private
def ai_search(query)
result = LLM::SearchAgent.call(query: query)
raise result.error unless result.success?
result.content[:results]
end
def basic_search(query)
# Simple database search as fallback
Product.search(query).limit(10)
end
endclass AgentRetryService
MAX_RETRIES = 3
BASE_DELAY = 1
def call(agent_class, **params)
retries = 0
begin
agent_class.call(**params)
rescue RubyLLM::Agents::Error => e
raise unless e.retryable? && retries < MAX_RETRIES
retries += 1
sleep(BASE_DELAY * (2 ** retries))
retry
end
end
endclass AgentJob < ApplicationJob
retry_on RubyLLM::Agents::CircuitBreakerOpenError, wait: :polynomially_longer
discard_on RubyLLM::Agents::BudgetExceededError
def perform(agent_class_name, **params)
agent_class = agent_class_name.constantize
result = agent_class.call(**params)
if result.success?
ResultHandler.process(result)
else
handle_failure(result)
end
end
private
def handle_failure(result)
Rails.logger.error("Agent failed: #{result.error}")
notify_admin(result)
end
endclass ApplicationController < ActionController::Base
rescue_from RubyLLM::Agents::BudgetExceededError, with: :handle_budget_exceeded
rescue_from RubyLLM::Agents::CircuitBreakerOpenError, with: :handle_circuit_open
rescue_from RubyLLM::Agents::TimeoutError, with: :handle_timeout
private
def handle_budget_exceeded(error)
render json: {
error: "Service limit reached",
type: "budget_exceeded"
}, status: :service_unavailable
end
def handle_circuit_open(error)
response.headers["Retry-After"] = (error.remaining_ms / 1000).to_s
render json: {
error: "Service temporarily unavailable",
type: "circuit_open",
retry_after: error.remaining_ms / 1000
}, status: :service_unavailable
end
def handle_timeout(error)
render json: {
error: "Request timed out",
type: "timeout"
}, status: :gateway_timeout
end
endclass Api::V1::SearchController < Api::BaseController
def search
result = LLM::SearchAgent.call(query: params[:q])
if result.success?
render json: {
data: result.content,
meta: {
tokens: result.total_tokens,
cost: result.total_cost,
duration_ms: result.duration_ms
}
}
else
render json: {
error: result.error,
retryable: result.retryable?
}, status: :unprocessable_entity
end
end
end# Track error rates
error_rate = RubyLLM::Agents::Execution
.today
.by_agent("MyAgent")
.then { |e| e.failed.count.to_f / e.count }
if error_rate > 0.1 # >10% error rate
SlackNotifier.alert("High error rate for MyAgent: #{(error_rate * 100).round}%")
end# Analyze failure reasons
RubyLLM::Agents::Execution
.today
.failed
.group(:error_message)
.count
.sort_by { |_, count| -count }
.first(5)
# => [["Rate limit exceeded", 45], ["Timeout", 12], ...]# config/initializers/ruby_llm_agents.rb
RubyLLM::Agents.configure do |config|
config.on_alert = ->(event, payload) {
case event
when :breaker_open
PagerDuty.trigger(
summary: "Circuit breaker open for #{payload[:agent_type]}",
severity: "warning"
)
Slack::Notifier.new(ENV["SLACK_WEBHOOK"]).ping(
"Circuit breaker opened: #{payload[:agent_type]}"
)
when :budget_hard_cap
PagerDuty.trigger(
summary: "Budget exceeded: $#{payload[:total_cost]}",
severity: "critical"
)
when :budget_soft_cap
Rails.logger.warn("Budget warning: #{payload}")
end
}
end-
Always check
result.success?- Don't assume calls succeed - Use rescue blocks sparingly - Prefer checking result status
- Log errors with context - Include agent type, parameters, and timing
- Set up monitoring - Track error rates and patterns
- Implement graceful degradation - Have fallback strategies
- Use circuit breakers - Prevent cascade failures
- Configure appropriate timeouts - Balance responsiveness and reliability
- Reliability - Retries and fallbacks
- Circuit Breakers - Failure protection
- Budget Controls - Spending limits
- Execution Tracking - Error logging
- Testing Agents - Testing error paths