Production Deployment

Best practices for deploying RubyLLM::Agents to production.

Configuration Checklist

1. Async Logging

RubyLLM::Agents.configure do |config|
  config.async_logging = true  # Required for production
end

2. Background Job Processor

Ensure a job processor is running:

# Solid Queue (Rails 7.1+)
bin/jobs

# Or Sidekiq
bundle exec sidekiq

3. Dashboard Authentication

config.dashboard_auth = ->(controller) {
  controller.current_user&.admin?
}

4. Budget Limits

config.budgets = {
  global_daily: 500.0,
  global_monthly: 10000.0,
  enforcement: :hard
}

5. Alerts

config.on_alert = ->(event, payload) {
  case event
  when :budget_hard_cap, :breaker_open
    PagerDuty.trigger(summary: "Alert: #{event}")
    Slack::Notifier.new(ENV['SLACK_WEBHOOK']).ping("#{event}: #{payload[:agent_type]}")
  end
}

Complete Production Configuration

# config/initializers/ruby_llm_agents.rb
RubyLLM::Agents.configure do |config|
  # API Keys (unified config, v2.1+) — no separate ruby_llm.rb needed
  config.openai_api_key = ENV["OPENAI_API_KEY"]
  config.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
  config.gemini_api_key = ENV["GOOGLE_API_KEY"]

  # Performance
  config.async_logging = true
  config.cache_store = Rails.cache

  # Defaults
  config.default_model = "gpt-4o"
  config.default_temperature = 0.0
  config.default_timeout = 60

  # Data Management
  config.retention_period = 90.days
  config.persist_prompts = true
  config.persist_responses = true

  # Cost Control
  config.budgets = {
    global_daily: 500.0,
    global_monthly: 10000.0,
    per_agent_daily: {
      "ExpensiveAgent" => 100.0
    },
    enforcement: :hard,
    soft_cap_percentage: 80
  }

  # Anomaly Detection
  config.anomaly_cost_threshold = 10.00
  config.anomaly_duration_threshold = 30_000

  # Alerts
  config.on_alert = ->(event, payload) {
    case event
    when :budget_hard_cap, :breaker_open
      PagerDuty.trigger(summary: "Critical: #{event}", details: payload)
    when :budget_soft_cap, :agent_anomaly
      Slack::Notifier.new(ENV['SLACK_WEBHOOK']).ping("Warning: #{event}")
    end
  }

  # Dashboard
  config.dashboard_auth = ->(controller) {
    controller.current_user&.admin?
  }
  config.dashboard_per_page = 50
end

Environment Variables

# API Keys
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
GOOGLE_API_KEY=...

# Alerts
SLACK_WEBHOOK_URL=https://hooks.slack.com/services/...
ALERT_WEBHOOK_URL=https://your-app.com/webhooks/llm

# Redis (for caching)
REDIS_URL=redis://localhost:6379/1

# Database
DATABASE_URL=postgres://...

Database Setup

Indexes

Verify indexes exist:

rails db:migrate:status | grep ruby_llm

If indexes are missing:

rails generate ruby_llm_agents:upgrade
rails db:migrate

Connection Pool

Ensure adequate pool size for async logging:

# config/database.yml
production:
  pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 10 } %>

Caching

Redis Setup

# config/environments/production.rb
config.cache_store = :redis_cache_store, {
  url: ENV['REDIS_URL'],
  namespace: 'llm_cache',
  expires_in: 1.day
}

# config/initializers/ruby_llm_agents.rb
config.cache_store = Rails.cache

Cache Warming

Pre-populate cache for common queries:

# lib/tasks/cache.rake
namespace :llm do
  task warm_cache: :environment do
    CommonQueries.each do |query|
      SearchAgent.call(query: query)
    end
  end
end

Monitoring

Application Performance Monitoring

# Add to agents for APM integration
class ApplicationAgent < RubyLLM::Agents::Base
  def call
    NewRelic::Agent::Tracer.in_transaction(
      name: "LLM/#{self.class.name}",
      category: :task
    ) do
      super
    end
  end
end

Metrics

# Track key metrics
ActiveSupport::Notifications.subscribe("ruby_llm_agents.execution.complete") do |*, payload|
  StatsD.timing("llm.duration", payload[:duration_ms])
  StatsD.increment("llm.executions", tags: ["agent:#{payload[:agent_type]}", "model:#{payload[:model_used]}"])
  StatsD.gauge("llm.cost", payload[:total_cost])
  StatsD.histogram("llm.tokens", payload[:total_tokens])
end

ActiveSupport::Notifications.subscribe("ruby_llm_agents.execution.error") do |*, payload|
  StatsD.increment("llm.errors", tags: ["agent:#{payload[:agent_type]}", "error:#{payload[:error_class]}"])
end

Health Checks

# app/controllers/health_controller.rb
class HealthController < ApplicationController
  def llm
    # Check LLM connectivity
    result = HealthCheckAgent.call(message: "ping", timeout: 5)

    if result.success?
      render json: { status: "ok", latency_ms: result.duration_ms }
    else
      render json: { status: "error" }, status: :service_unavailable
    end
  rescue => e
    render json: { status: "error", message: e.message }, status: :service_unavailable
  end
end

Scaling

Horizontal Scaling

Agents are stateless and scale horizontally:

# kubernetes deployment
spec:
  replicas: 3
  template:
    spec:
      containers:
        - name: rails
          resources:
            limits:
              memory: "2Gi"
              cpu: "1000m"

Background Job Scaling

Scale job workers independently:

# Sidekiq deployment
spec:
  replicas: 2
  template:
    spec:
      containers:
        - name: sidekiq
          command: ["bundle", "exec", "sidekiq"]

Rate Limit Handling

Configure retry behavior for rate limits:

class ProductionAgent < ApplicationAgent
  retries max: 5, backoff: :exponential, max_delay: 60.0
  fallback_models "gpt-4o-mini", "claude-3-haiku"
  circuit_breaker errors: 10, within: 60, cooldown: 300
end

Data Retention

Automatic Cleanup

# lib/tasks/maintenance.rake
namespace :llm do
  task cleanup: :environment do
    retention = RubyLLM::Agents.configuration.retention_period

    deleted = RubyLLM::Agents::Execution
      .where("created_at < ?", retention.ago)
      .delete_all

    Rails.logger.info("Deleted #{deleted} old executions")
  end
end

Schedule with cron:

# config/schedule.rb (whenever gem)
every 1.day, at: '3:00 am' do
  rake "llm:cleanup"
end

Archiving

# Archive to S3 before deletion
namespace :llm do
  task archive: :environment do
    old_executions = RubyLLM::Agents::Execution
      .where("created_at < ?", 90.days.ago)

    S3Client.upload(
      key: "llm-archive/#{Date.today}.json.gz",
      body: compress(old_executions.to_json)
    )

    old_executions.delete_all
  end
end

Security Checklist

Disaster Recovery

Backup Strategy

# Backup executions table
pg_dump -t ruby_llm_agents_executions > backup.sql

Failover

Configure multiple providers:

class CriticalAgent < ApplicationAgent
  model "gpt-4o"
  fallback_models "claude-3-5-sonnet", "gemini-2.0-flash"
end

Recovery Testing

Regularly test:

Database restore
Provider failover
Circuit breaker recovery

Related Pages

Configuration - Full config reference
Background Jobs - Job processor setup
Budget Controls - Cost management
Troubleshooting - Common issues

Production Deployment

Production Deployment

Configuration Checklist

1. Async Logging

2. Background Job Processor

3. Dashboard Authentication

4. Budget Limits

5. Alerts

Complete Production Configuration

Environment Variables

Database Setup

Indexes

Connection Pool

Caching

Redis Setup

Cache Warming

Monitoring

Application Performance Monitoring

Metrics

Health Checks

Scaling

Horizontal Scaling

Background Job Scaling

Rate Limit Handling

Data Retention

Automatic Cleanup

Archiving

Security Checklist

Disaster Recovery

Backup Strategy

Failover

Recovery Testing

Related Pages

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally