WildMeOrg · JasonWildMe · Nov 30, 2025 · Nov 30, 2025 · Jan 23, 2026 · Jan 23, 2026
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -0,0 +1,10 @@
+{
+  "permissions": {
+    "allow": [
+      "WebSearch",
+      "Bash(tree:*)"
+    ],
+    "deny": [],
+    "ask": []
+  }
+}
diff --git a/devops/deploy/.dockerfiles/db/postgresql.conf b/devops/deploy/.dockerfiles/db/postgresql.conf
@@ -1,17 +1,208 @@
+# =============================================================================
+# PostgreSQL Configuration for Wildbook
+# Optimized for Azure Standard D8s v5 (8 vCPUs, 32 GB RAM, Premium SSD)
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# CONNECTION SETTINGS
+# -----------------------------------------------------------------------------
 listen_addresses = '*'
-max_connections = 300
+max_connections = 2000
 
+# -----------------------------------------------------------------------------
+# MEMORY SETTINGS
+# -----------------------------------------------------------------------------
+# shared_buffers: 25% of RAM is standard; 8GB is optimal for 32GB system
 shared_buffers = 8GB
-work_mem = 40MB
 
-# WAL
+# effective_cache_size: Estimate of memory available for disk caching
+# Set to ~75% of total RAM (OS cache + shared_buffers)
+effective_cache_size = 24GB
+
+# work_mem: Memory per sort/hash operation. With 300 connections, be conservative
+# Formula: (RAM - shared_buffers) / (max_connections * 3) = (32-8)GB / 900 ≈ 26MB
+# Setting slightly higher for complex Wildbook queries (encounters, annotations)
+work_mem = 32MB
+
+# maintenance_work_mem: Memory for VACUUM, CREATE INDEX, ALTER TABLE
+# Can be higher since these operations are infrequent; 1-2GB recommended
+maintenance_work_mem = 1GB
+
+# huge_pages: Enable if OS supports it (reduces TLB misses for large shared_buffers)
+huge_pages = try
+
+# -----------------------------------------------------------------------------
+# QUERY PLANNER / OPTIMIZER
+# -----------------------------------------------------------------------------
+# random_page_cost: Lower for SSD storage (default 4.0 assumes spinning disk)
+# Azure Premium SSD: use 1.1-1.5
+random_page_cost = 1.1
+
+# seq_page_cost: Cost of sequential page fetch (baseline)
+seq_page_cost = 1.0
+
+# effective_io_concurrency: Concurrent I/O operations for bitmap heap scans
+# For SSD: 200 is recommended (default is 1)
+effective_io_concurrency = 200
+
+# cpu_tuple_cost and cpu_index_tuple_cost: Slightly reduce to favor index usage
+cpu_tuple_cost = 0.01
+cpu_index_tuple_cost = 0.005
+
+# Default statistics target: Increase for better query plans on complex tables
+# Wildbook has large tables (Encounter, Annotation) that benefit from more stats
+default_statistics_target = 200
+
+# -----------------------------------------------------------------------------
+# PARALLEL QUERY EXECUTION (Leverage 8 vCPUs)
+# -----------------------------------------------------------------------------
+# max_worker_processes: Background workers available (should match vCPUs)
+max_worker_processes = 8
+
+# max_parallel_workers_per_gather: Workers per query (half of vCPUs is safe)
+max_parallel_workers_per_gather = 4
+
+# max_parallel_workers: Total workers for parallel queries
+max_parallel_workers = 8
+
+# max_parallel_maintenance_workers: Workers for VACUUM, CREATE INDEX
+max_parallel_maintenance_workers = 4
+
+# parallel_tuple_cost: Lower to encourage parallel execution
+parallel_tuple_cost = 0.01
+
+# parallel_setup_cost: Lower to encourage parallelism for smaller result sets
+parallel_setup_cost = 500
+
+# min_parallel_table_scan_size: Minimum table size to consider parallel scan
+min_parallel_table_scan_size = 8MB
+
+# min_parallel_index_scan_size: Minimum index size for parallel index scan
+min_parallel_index_scan_size = 512kB
+
+# -----------------------------------------------------------------------------
+# WAL (Write-Ahead Logging)
+# -----------------------------------------------------------------------------
+# wal_level: 'replica' enables streaming replication if needed later
+wal_level = replica
+
+# max_wal_size: Maximum size before checkpoint is triggered
 max_wal_size = 8GB
+
+# min_wal_size: Minimum WAL size to retain
+min_wal_size = 2GB
+
+# wal_buffers: WAL buffer size (-1 = auto, typically 3% of shared_buffers, max 64MB)
+wal_buffers = 64MB
+
+# wal_writer_flush_after: Flush WAL after this much data written
 wal_writer_flush_after = 1MB
 
-# Checkpoints
-checkpoint_timeout = 10min
-checkpoint_completion_target = 0.8
+# wal_compression: Compress WAL records (reduces I/O, slight CPU cost)
+wal_compression = on
+
+# -----------------------------------------------------------------------------
+# CHECKPOINTS
+# -----------------------------------------------------------------------------
+# checkpoint_timeout: Maximum time between checkpoints
+checkpoint_timeout = 15min
+
+# checkpoint_completion_target: Spread checkpoint writes over this fraction of interval
+# 0.9 spreads writes more evenly, reducing I/O spikes
+checkpoint_completion_target = 0.9
 
-# Background Writer
+# -----------------------------------------------------------------------------
+# BACKGROUND WRITER
+# -----------------------------------------------------------------------------
+# bgwriter_delay: Time between background writer runs (ms)
+bgwriter_delay = 200ms
+
+# bgwriter_lru_maxpages: Max pages to write per round
 bgwriter_lru_maxpages = 1000
-bgwriter_lru_multiplier = 3.0
+
+# bgwriter_lru_multiplier: Multiplier for estimating pages needed
+bgwriter_lru_multiplier = 4.0
+
+# -----------------------------------------------------------------------------
+# VACUUMING & AUTOVACUUM
+# -----------------------------------------------------------------------------
+# autovacuum: Ensure it's enabled (critical for Wildbook's large tables)
+autovacuum = on
+
+# autovacuum_max_workers: Parallel autovacuum processes
+autovacuum_max_workers = 4
+
+# autovacuum_naptime: Time between autovacuum runs
+autovacuum_naptime = 30s
+
+# autovacuum_vacuum_threshold: Min row changes before vacuum
+autovacuum_vacuum_threshold = 50
+
+# autovacuum_vacuum_scale_factor: Fraction of table to trigger vacuum
+# Lower for large tables like Encounter, Annotation
+autovacuum_vacuum_scale_factor = 0.05
+
+# autovacuum_analyze_threshold: Min row changes before analyze
+autovacuum_analyze_threshold = 50
+
+# autovacuum_analyze_scale_factor: Fraction of table to trigger analyze
+autovacuum_analyze_scale_factor = 0.025
+
+# autovacuum_vacuum_cost_limit: I/O limit for autovacuum (higher = faster but more I/O)
+autovacuum_vacuum_cost_limit = 1000
+
+# -----------------------------------------------------------------------------
+# LOGGING (for performance analysis)
+# -----------------------------------------------------------------------------
+# log_min_duration_statement: Log queries taking longer than this (ms)
+# Set to 1000ms to identify slow queries; adjust as needed
+log_min_duration_statement = 1000
+
+# log_checkpoints: Log checkpoint activity
+log_checkpoints = on
+
+# log_connections: Log new connections (useful for debugging connection issues)
+log_connections = on
+
+# log_disconnections: Log disconnections
+log_disconnections = on
+
+# log_lock_waits: Log lock waits longer than deadlock_timeout
+log_lock_waits = on
+
+# log_temp_files: Log temp file usage (indicates work_mem may be too low)
+# Log files > 10MB
+log_temp_files = 10240
+
+# log_autovacuum_min_duration: Log autovacuum operations taking > 250ms
+log_autovacuum_min_duration = 250
+
+# -----------------------------------------------------------------------------
+# STATEMENT BEHAVIOR (CONSERVATIVE - no timeouts that could kill queries)
+# -----------------------------------------------------------------------------
+# statement_timeout: Disabled to avoid killing long-running legitimate queries
+# Enable only after profiling your workload to find safe timeout values
+statement_timeout = 0
+
+# lock_timeout: Disabled to match original behavior
+lock_timeout = 0
+
+# idle_in_transaction_session_timeout: Disabled to match original behavior
+# WARNING: This means abandoned transactions can hold locks indefinitely
+# Consider enabling (e.g., 300000 = 5 min) after testing
+idle_in_transaction_session_timeout = 0
+
+# -----------------------------------------------------------------------------
+# JIT (Just-In-Time Compilation) - PostgreSQL 11+
+# -----------------------------------------------------------------------------
+# jit: Enable JIT for complex queries (benefits large analytical queries)
+jit = on
+
+# jit_above_cost: Use JIT for queries with cost above this
+jit_above_cost = 100000
+
+# jit_inline_above_cost: Inline functions for queries above this cost
+jit_inline_above_cost = 500000
+
+# jit_optimize_above_cost: Apply expensive optimizations above this cost
+jit_optimize_above_cost = 500000