-
Notifications
You must be signed in to change notification settings - Fork 4
Production Deployment
adham90 edited this page Feb 20, 2026
·
5 revisions
Best practices for deploying RubyLLM::Agents to production.
RubyLLM::Agents.configure do |config|
config.async_logging = true # Required for production
endEnsure a job processor is running:
# Solid Queue (Rails 7.1+)
bin/jobs
# Or Sidekiq
bundle exec sidekiqconfig.dashboard_auth = ->(controller) {
controller.current_user&.admin?
}config.budgets = {
global_daily: 500.0,
global_monthly: 10000.0,
enforcement: :hard
}config.on_alert = ->(event, payload) {
case event
when :budget_hard_cap, :breaker_open
PagerDuty.trigger(summary: "Alert: #{event}")
Slack::Notifier.new(ENV['SLACK_WEBHOOK']).ping("#{event}: #{payload[:agent_type]}")
end
}# config/initializers/ruby_llm_agents.rb
RubyLLM::Agents.configure do |config|
# API Keys (unified config, v2.1+) — no separate ruby_llm.rb needed
config.openai_api_key = ENV["OPENAI_API_KEY"]
config.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
config.gemini_api_key = ENV["GOOGLE_API_KEY"]
# Performance
config.async_logging = true
config.cache_store = Rails.cache
# Defaults
config.default_model = "gpt-4o"
config.default_temperature = 0.0
config.default_timeout = 60
# Data Management
config.retention_period = 90.days
config.persist_prompts = true
config.persist_responses = true
# Cost Control
config.budgets = {
global_daily: 500.0,
global_monthly: 10000.0,
per_agent_daily: {
"ExpensiveAgent" => 100.0
},
enforcement: :hard,
soft_cap_percentage: 80
}
# Anomaly Detection
config.anomaly_cost_threshold = 10.00
config.anomaly_duration_threshold = 30_000
# Alerts
config.on_alert = ->(event, payload) {
case event
when :budget_hard_cap, :breaker_open
PagerDuty.trigger(summary: "Critical: #{event}", details: payload)
when :budget_soft_cap, :agent_anomaly
Slack::Notifier.new(ENV['SLACK_WEBHOOK']).ping("Warning: #{event}")
end
}
# Dashboard
config.dashboard_auth = ->(controller) {
controller.current_user&.admin?
}
config.dashboard_per_page = 50
end# API Keys
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
GOOGLE_API_KEY=...
# Alerts
SLACK_WEBHOOK_URL=https://hooks.slack.com/services/...
ALERT_WEBHOOK_URL=https://your-app.com/webhooks/llm
# Redis (for caching)
REDIS_URL=redis://localhost:6379/1
# Database
DATABASE_URL=postgres://...Verify indexes exist:
rails db:migrate:status | grep ruby_llmIf indexes are missing:
rails generate ruby_llm_agents:upgrade
rails db:migrateEnsure adequate pool size for async logging:
# config/database.yml
production:
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 10 } %># config/environments/production.rb
config.cache_store = :redis_cache_store, {
url: ENV['REDIS_URL'],
namespace: 'llm_cache',
expires_in: 1.day
}
# config/initializers/ruby_llm_agents.rb
config.cache_store = Rails.cachePre-populate cache for common queries:
# lib/tasks/cache.rake
namespace :llm do
task warm_cache: :environment do
CommonQueries.each do |query|
SearchAgent.call(query: query)
end
end
end# Add to agents for APM integration
class ApplicationAgent < RubyLLM::Agents::Base
def call
NewRelic::Agent::Tracer.in_transaction(
name: "LLM/#{self.class.name}",
category: :task
) do
super
end
end
end# Track key metrics
ActiveSupport::Notifications.subscribe("ruby_llm_agents.execution.complete") do |*, payload|
StatsD.timing("llm.duration", payload[:duration_ms])
StatsD.increment("llm.executions", tags: ["agent:#{payload[:agent_type]}", "model:#{payload[:model_used]}"])
StatsD.gauge("llm.cost", payload[:total_cost])
StatsD.histogram("llm.tokens", payload[:total_tokens])
end
ActiveSupport::Notifications.subscribe("ruby_llm_agents.execution.error") do |*, payload|
StatsD.increment("llm.errors", tags: ["agent:#{payload[:agent_type]}", "error:#{payload[:error_class]}"])
end# app/controllers/health_controller.rb
class HealthController < ApplicationController
def llm
# Check LLM connectivity
result = HealthCheckAgent.call(message: "ping", timeout: 5)
if result.success?
render json: { status: "ok", latency_ms: result.duration_ms }
else
render json: { status: "error" }, status: :service_unavailable
end
rescue => e
render json: { status: "error", message: e.message }, status: :service_unavailable
end
endAgents are stateless and scale horizontally:
# kubernetes deployment
spec:
replicas: 3
template:
spec:
containers:
- name: rails
resources:
limits:
memory: "2Gi"
cpu: "1000m"Scale job workers independently:
# Sidekiq deployment
spec:
replicas: 2
template:
spec:
containers:
- name: sidekiq
command: ["bundle", "exec", "sidekiq"]Configure retry behavior for rate limits:
class ProductionAgent < ApplicationAgent
retries max: 5, backoff: :exponential, max_delay: 60.0
fallback_models "gpt-4o-mini", "claude-3-haiku"
circuit_breaker errors: 10, within: 60, cooldown: 300
end# lib/tasks/maintenance.rake
namespace :llm do
task cleanup: :environment do
retention = RubyLLM::Agents.configuration.retention_period
deleted = RubyLLM::Agents::Execution
.where("created_at < ?", retention.ago)
.delete_all
Rails.logger.info("Deleted #{deleted} old executions")
end
endSchedule with cron:
# config/schedule.rb (whenever gem)
every 1.day, at: '3:00 am' do
rake "llm:cleanup"
end# Archive to S3 before deletion
namespace :llm do
task archive: :environment do
old_executions = RubyLLM::Agents::Execution
.where("created_at < ?", 90.days.ago)
S3Client.upload(
key: "llm-archive/#{Date.today}.json.gz",
body: compress(old_executions.to_json)
)
old_executions.delete_all
end
end- Dashboard requires authentication
- API keys in environment variables (not code)
- HTTPS enforced
- Rate limiting in place
- Budget limits set
- Alert notifications configured
- Logs don't contain sensitive data
- Database encrypted at rest
- Network traffic encrypted (TLS)
# Backup executions table
pg_dump -t ruby_llm_agents_executions > backup.sqlConfigure multiple providers:
class CriticalAgent < ApplicationAgent
model "gpt-4o"
fallback_models "claude-3-5-sonnet", "gemini-2.0-flash"
endRegularly test:
- Database restore
- Provider failover
- Circuit breaker recovery
- Configuration - Full config reference
- Background Jobs - Job processor setup
- Budget Controls - Cost management
- Troubleshooting - Common issues