diff --git a/.env.template b/.env.template index 6bd724b..64b469d 100644 --- a/.env.template +++ b/.env.template @@ -62,4 +62,9 @@ LANGFUSE_SECRET_KEY="" LANGFUSE_HOST="" # Optional, defaults to https://cloud.langfuse.com FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN="" FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME="" -FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME="" \ No newline at end of file +FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME="" + +# CloudFront and Route53 Configuration +USE_ROUTE53="false" +USE_CLOUDFRONT="true" +CLOUDFRONT_PRICE_CLASS="PriceClass_100" diff --git a/README.md b/README.md index 2d9c39a..4988613 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Project ACTIVE as of Feb 15, 2025 - [Project Overview](#project-overview) - [Architecture](#architecture) - [AWS Services in this Guidance](#aws-services-in-this-Guidance) +- [Distribution Options](#distribution-options) - [Cost](#cost) - [Cost Considerations](#cost-considerations) - [Cost Components](#cost-components) @@ -27,7 +28,7 @@ If you are unfamiliar with LiteLLM, it provides a consistent interface to access ## Architecture -![Reference Architecture Diagram ECS EKS](./media/Reference_architecture_ECS_EKS_platform_combined.jpg) +![Reference Architecture Diagram ECS EKS](./media/architecture.png) ### Architecture steps @@ -38,7 +39,154 @@ If you are unfamiliar with LiteLLM, it provides a consistent interface to access 5. External model providers providers (OpenAI, Anthropic, Vertex AI etc.) are configured using LiteLLM Admin UI to enable additional LLM model access via unified application interface. Pre-existing configurations of third-party providers are integrated into the Gateway using LiteLLM APIs. 6. LiteLLM integrates with [Amazon ElastiCache (Redis OSS)](https://aws.amazon.com/elasticache/), [Amazon Relational Database Service (RDS)](https://aws.amazon.com/rds/), and [AWS Secrets Manager](https://aws.amazon.com/secrets-manager/) services. Amazon ElastiCache enables multi-tenant distribution of application settings and prompt caching. Amazon RDS enables persistence of virtual API keys and other configuration settings provided by LiteLLM. AWS Secrets Manager stores external model provider credentials and other sensitive settings securely. 7. LiteLLM and the API/middleware store application logs in the dedicated [Amazon S3](https://aws.amazon.com/s3) storage bucket for troubleshooting and access analysis. - + +## Distribution Options + +Starting with version 1.1.0, this solution supports flexible deployment scenarios to meet various security and accessibility requirements. You can customize how your LiteLLM gateway is accessed based on your specific needs. + +### Deployment Scenarios + +#### Scenario 1: Default - Public with CloudFront (Recommended) +```bash +USE_CLOUDFRONT="true" +USE_ROUTE53="false" +PUBLIC_LOAD_BALANCER="true" +``` + +**Why choose this scenario:** +- Global performance with low-latency access via CloudFront's edge locations +- Enhanced security with AWS Shield Standard DDoS protection +- Simplified HTTPS management with CloudFront's default certificate +- Best option for public-facing AI services with global user base + +**Security:** +- CloudFront IP filtering restricts ALB access to only CloudFront traffic +- WAF can be applied at the CloudFront level (requires global WAF) +- Simpler certificate management using CloudFront's default certificate + +**Access URL:** `https://d1234abcdef.cloudfront.net` + +#### Scenario 2: Custom Domain with CloudFront +```bash +USE_CLOUDFRONT="true" +USE_ROUTE53="true" +PUBLIC_LOAD_BALANCER="true" +HOSTED_ZONE_NAME="example.com" +RECORD_NAME="genai" +CERTIFICATE_ARN="arn:aws:acm:region:account:certificate/certificate-id" +``` + +**Why choose this scenario:** +- Brand consistency with your custom domain +- Professional appearance and SEO benefits +- Same global performance and security as Scenario 1 + +**Additional requirements:** +- Route53 hosted zone for your domain +- ACM certificate for your domain (must be in us-east-1 for CloudFront) + +**Access URL:** `https://genai.example.com` + +#### Scenario 3: Direct ALB Access (No CloudFront) +```bash +USE_CLOUDFRONT="false" +USE_ROUTE53="true" +PUBLIC_LOAD_BALANCER="true" +HOSTED_ZONE_NAME="example.com" +RECORD_NAME="genai" +CERTIFICATE_ARN="arn:aws:acm:region:account:certificate/certificate-id" +``` + +**Why choose this scenario:** +- Lower latency for single-region deployments +- Simplified architecture without CloudFront +- Regional WAF can be directly applied to the ALB +- Cost savings by eliminating CloudFront distribution + +**Security considerations:** +- No CloudFront layer means direct internet exposure of ALB +- WAF protection becomes particularly important +- ALB security group allows traffic from all IPs (0.0.0.0/0) + +**Access URL:** `https://genai.example.com` (points directly to ALB) + +#### Scenario 4: Private VPC Only +```bash +USE_CLOUDFRONT="false" +USE_ROUTE53="true" +PUBLIC_LOAD_BALANCER="false" +HOSTED_ZONE_NAME="example.internal" # Often a private .internal domain +RECORD_NAME="genai" +CERTIFICATE_ARN="arn:aws:acm:region:account:certificate/certificate-id" +``` + +**Why choose this scenario:** +- Maximum security for internal enterprise applications +- Complete isolation from public internet +- Suitable for processing sensitive or proprietary data + +**Access methods:** +- VPN connection to the VPC +- AWS Direct Connect +- VPC peering with corporate network +- Transit Gateway + +**Security considerations:** +- No public internet access possible +- ALB security group only allows traffic from private subnet CIDRs +- Requires network connectivity to the VPC for access + +**Access URL:** `https://genai.example.internal` (resolves only within VPC or connected networks) + +### Configuration Quick Reference + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `USE_CLOUDFRONT` | `true` | Enables CloudFront distribution for global delivery | +| `USE_ROUTE53` | `false` | Enables Route53 for custom domain support | +| `PUBLIC_LOAD_BALANCER` | `true` | Deploys ALB in public subnets | +| `CLOUDFRONT_PRICE_CLASS` | `PriceClass_100` | CloudFront price class (100/200/All) | +| `HOSTED_ZONE_NAME` | `""` | Route53 hosted zone name for custom domain | +| `RECORD_NAME` | `""` | Record to create in Route53 (subdomain) | +| `CERTIFICATE_ARN` | `""` | ARN of ACM certificate for custom domain | + +### Security Considerations + +Each deployment scenario offers different security characteristics: + +1. **CloudFront with public ALB (Default)**: + - ALB is in public subnets but protected by custom header authentication + - Only traffic with the proper CloudFront secret header is allowed (except health check paths) + - CloudFront provides an additional security layer with AWS Shield Standard DDoS protection + - Best balance of accessibility and security for public services + +2. **Direct ALB access (No CloudFront)**: + - ALB directly accessible from internet + - WAF protection is crucial for this deployment + - Consider IP-based restrictions if possible + +3. **Private VPC deployment**: + - Highest security, no direct internet exposure + - Requires VPN or Direct Connect for access + - Consider for sensitive workloads or internal services + +All scenarios maintain security best practices including: +- HTTPS for all communications with TLS 1.2+ +- Security groups with principle of least privilege +- WAF protection against common attacks +- IAM roles with appropriate permissions + +### CloudFront Authentication + +When using CloudFront, a custom security mechanism is implemented: + +1. CloudFront adds a secret header (`X-CloudFront-Secret`) to all requests sent to the ALB +2. The ALB has listener rules that verify this header before allowing access +3. Health check paths are specifically exempted to allow CloudFront origin health checks +4. The secret is stable across deployments (won't change unless explicitly changed) + +This provides a robust defense against direct ALB access even if someone discovers your ALB's domain name. The secret is only displayed once after creation in the Terraform outputs and is marked as sensitive. + ### AWS Services in this Guidance | **AWS Service** | **Role** | **Description** | @@ -51,10 +199,11 @@ If you are unfamiliar with LiteLLM, it provides a consistent interface to access | [Amazon Web Applications Firewall](https://aws.amazon.com/waf/) (WAF) | Core Service | Protect guidance applications from common exploits | | [Amazon Elastic Container Registry](http://aws.amazon.com/ecr/) (ECR) | Supporting service | Stores and manages Docker container images for EKS deployments. | | [Elastic Load Balancer](https://aws.amazon.com/elasticloadbalancing/) (ALB) | Supporting service | Distributes incoming traffic across multiple targets in the EKS cluster. | +| [Amazon CloudFront](https://aws.amazon.com/cloudfront/) | Supporting service | Global content delivery network for improved performance and security. | | [Amazon Simple Storage Service ](https://aws.amazon.com/s3) (S3) | Supporting service | Provides persistent object storage for Applications logs and other related data. | | [Amazon Relational Database Service ](https://aws.amazon.com/rds/) (RDS) | Supporting service | Enables persistence of virtual API keys and other configuration settings provided by LiteLLM. | | [Amazon ElastiCache Service (Redis OSS) ](https://aws.amazon.com/elasticache/) (OSS) | Supporting service | Enables multi-tenant distribution of application settings and prompt caching. | -| [AWS Route 53](https://aws.amazon.com/route53/) | Supporting Service | Routes users to the guidance application via DNS records | +| [AWS Route 53](https://aws.amazon.com/route53/) | Supporting Service | Optional DNS service for custom domain management | | [AWS Identity and Access Management](https://aws.amazon.com/iam/) (IAM) | Supporting service | Manages access to AWS services and resources securely, including ECS or EKS cluster access. | | [AWS Certificate Manager](https://aws.amazon.com/certificate-manager/) (ACM) | Security service | Manages SSL/TLS certificates for secure communication within the cluster. | | [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/) | Monitoring service | Collects and tracks metrics, logs, and events from ECS, EKS and other AWS resources provisoned in the guidance | @@ -118,8 +267,8 @@ While this implementation guide provides default configurations, customers are r Customers should regularly review their AWS service usage patterns, adjust configurations as needed, and leverage AWS cost management tools to optimize their spending. -We recommend creating a [budget](https://docs.aws.amazon.com/cost-management/latest/userguide/budgets-create.html)  -through [AWS Cost Explorer](http://aws.amazon.com/aws-cost-management/aws-cost-explorer/) to +We recommend creating a [budget](https://docs.aws.amazon.com/cost-management/latest/userguide/budgets-create.html) +through [AWS Cost Explorer](http://aws.amazon.com/aws-cost-management/aws-cost-explorer/) to help manage costs. Prices are subject to change and also depend on model provider usage patterns/volume of data. For full details, please refer to the pricing webpage for each AWS service used in this guidance. ### Sample Cost tables @@ -268,6 +417,4 @@ For detailed information about the open source libraries used in this applicatio ## Notices -Customers are responsible for making their own independent assessment of the information in this Guidance. This Guidance: (a) is for informational purposes only, (b) represents AWS current product offerings and practices, which are subject to change without notice, and (c) does not create any commitments or assurances from AWS and its affiliates, suppliers or licensors. AWS products or services are provided “as is” without warranties, representations, or conditions of any kind, whether express or implied. AWS responsibilities and liabilities to its customers are controlled by AWS agreements, and this Guidance is not part of, nor does it modify, any agreement between AWS and its customers. - - +Customers are responsible for making their own independent assessment of the information in this Guidance. This Guidance: (a) is for informational purposes only, (b) represents AWS current product offerings and practices, which are subject to change without notice, and (c) does not create any commitments or assurances from AWS and its affiliates, suppliers or licensors. AWS products or services are provided "as is" without warranties, representations, or conditions of any kind, whether express or implied. AWS responsibilities and liabilities to its customers are controlled by AWS agreements, and this Guidance is not part of, nor does it modify, any agreement between AWS and its customers. diff --git a/deploy.sh b/deploy.sh index e01c14a..7ce930e 100755 --- a/deploy.sh +++ b/deploy.sh @@ -46,7 +46,6 @@ if [ ! -f ".env" ]; then cp .env.template .env fi - SKIP_BUILD=false while [[ $# -gt 0 ]]; do case $1 in @@ -66,10 +65,44 @@ APP_NAME=litellm MIDDLEWARE_APP_NAME=middleware LOG_BUCKET_STACK_NAME="log-bucket-stack" MAIN_STACK_NAME="litellm-stack" - +TRACKING_STACK_NAME="tracking-stack" # Load environment variables from .env file source .env +# Auto-detect existing deployments and set defaults for backward compatibility +if aws cloudformation describe-stacks --stack-name "${TRACKING_STACK_NAME}" &>/dev/null; then + echo "Detected existing deployment - ensuring backward compatibility" + # If variables aren't explicitly set in .env, use existing configuration + if [ -z "$USE_ROUTE53" ]; then + # For existing deployments where HOSTED_ZONE_NAME is set, maintain Route53 usage + if [ -n "$HOSTED_ZONE_NAME" ] && [ -n "$RECORD_NAME" ]; then + USE_ROUTE53="true" + echo "→ Setting USE_ROUTE53=true to maintain existing configuration" + else + USE_ROUTE53="false" + fi + fi +else + echo "New deployment detected - using new defaults" + # For new deployments, set defaults if not explicitly defined + if [ -z "$USE_ROUTE53" ]; then + USE_ROUTE53="false" + echo "→ Setting USE_ROUTE53=false (default for new deployments)" + fi +fi + +# Use CloudFront by default if not explicitly set +if [ -z "$USE_CLOUDFRONT" ]; then + USE_CLOUDFRONT="true" + echo "→ Setting USE_CLOUDFRONT=true (default)" +fi + +# Set CloudFront price class if not defined +if [ -z "$CLOUDFRONT_PRICE_CLASS" ]; then + CLOUDFRONT_PRICE_CLASS="PriceClass_100" + echo "→ Setting CLOUDFRONT_PRICE_CLASS=${CLOUDFRONT_PRICE_CLASS} (default)" +fi + # Check if bucket exists if aws s3api head-bucket --bucket "$TERRAFORM_S3_BUCKET_NAME" 2>/dev/null; then echo "Terraform Bucket $TERRAFORM_S3_BUCKET_NAME already exists, skipping creation" @@ -84,9 +117,21 @@ if [[ (-z "$LITELLM_VERSION") || ("$LITELLM_VERSION" == "placeholder") ]]; then exit 1 fi -if [ -z "$CERTIFICATE_ARN" ] || [ -z "$RECORD_NAME" ]; then - echo "Error: CERTIFICATE_ARN and RECORD_NAME must be set in .env file" - exit 1 +# Update validation logic +if [ "$USE_ROUTE53" = "true" ]; then + if [ -z "$HOSTED_ZONE_NAME" ] || [ -z "$RECORD_NAME" ]; then + echo "Error: When USE_ROUTE53=true, both HOSTED_ZONE_NAME and RECORD_NAME must be set in .env file" + exit 1 + fi + + if [ -z "$CERTIFICATE_ARN" ]; then + echo "Warning: No CERTIFICATE_ARN provided. Using CloudFront-to-ALB HTTP communication with header authentication." + echo "Note: Communication between users and CloudFront will still use HTTPS." + fi +else + if [ -n "$HOSTED_ZONE_NAME" ] || [ -n "$RECORD_NAME" ]; then + echo "Warning: HOSTED_ZONE_NAME and/or RECORD_NAME are set but will not be used because USE_ROUTE53=false" + fi fi echo "Certificate Arn: " $CERTIFICATE_ARN @@ -325,6 +370,14 @@ export TF_VAR_disable_swagger_page=$DISABLE_SWAGGER_PAGE export TF_VAR_disable_admin_ui=$DISABLE_ADMIN_UI export TF_VAR_langfuse_public_key=$LANGFUSE_PUBLIC_KEY export TF_VAR_langfuse_secret_key=$LANGFUSE_SECRET_KEY +export TF_VAR_use_route53=$USE_ROUTE53 +export TF_VAR_use_cloudfront=$USE_CLOUDFRONT +export TF_VAR_cloudfront_price_class=$CLOUDFRONT_PRICE_CLASS + +# Display CloudFront and Route53 configuration +echo "USE_ROUTE53: $USE_ROUTE53" +echo "USE_CLOUDFRONT: $USE_CLOUDFRONT" +echo "CLOUDFRONT_PRICE_CLASS: $CLOUDFRONT_PRICE_CLASS" if [ -n "${LANGFUSE_HOST}" ]; then export TF_VAR_langfuse_host=$LANGFUSE_HOST @@ -355,7 +408,6 @@ if [ $? -eq 0 ]; then echo "Deployment successful. Extracting outputs..." if [ "$DEPLOYMENT_PLATFORM" = "ECS" ]; then - LITELLM_ECS_CLUSTER=$(terraform output -raw LitellmEcsCluster) LITELLM_ECS_TASK=$(terraform output -raw LitellmEcsTask) SERVICE_URL=$(terraform output -raw ServiceURL) @@ -378,6 +430,30 @@ if [ $? -eq 0 ]; then aws eks update-kubeconfig --region $aws_region --name $EKS_CLUSTER_NAME kubectl rollout restart deployment $EKS_DEPLOYMENT_NAME fi + + # Validate CloudFront if enabled + if [ "$USE_CLOUDFRONT" = "true" ]; then + echo "Validating CloudFront deployment..." + CF_DIST_ID=$(terraform output -raw cloudfront_distribution_id 2>/dev/null || echo "") + if [ -n "$CF_DIST_ID" ]; then + echo "✓ CloudFront distribution created successfully: $CF_DIST_ID" + CF_DOMAIN=$(terraform output -raw cloudfront_domain_name 2>/dev/null || echo "") + echo "✓ CloudFront domain: $CF_DOMAIN" + echo "CloudFrontDomain=$CF_DOMAIN" >> resources.txt + echo "CloudFrontID=$CF_DIST_ID" >> resources.txt + + echo "Note: CloudFront distribution deployment may take 15-30 minutes to complete globally" + else + echo "⚠️ CloudFront distribution ID not found in outputs - this is expected if CloudFront module was not applied" + fi + fi + + # Validate Route53 if used + if [ "$USE_ROUTE53" = "true" ]; then + echo "✓ Route53 configuration applied successfully" + fi + + echo "✓ Deployment completed successfully" else echo "Deployment failed" -fi \ No newline at end of file +fi diff --git a/litellm-terraform-stack/main.tf b/litellm-terraform-stack/main.tf index 8229f36..1724d7d 100644 --- a/litellm-terraform-stack/main.tf +++ b/litellm-terraform-stack/main.tf @@ -6,7 +6,7 @@ resource "aws_cloudformation_stack" "guidance_deployment_metrics" { template_body = <