Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ pg = [
"asyncpg",
"psycopg[binary,pool]",
]
aws = [
"boto3",
]
container = [
"anthropic>=0.49.0",
"google-genai",
Expand Down
142 changes: 133 additions & 9 deletions src/phoenix/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,44 @@
ENV_PHOENIX_POSTGRES_USER = "PHOENIX_POSTGRES_USER"
"""
Used with PHOENIX_POSTGRES_HOST to specify the user to use for the PostgreSQL database (required).

When using AWS RDS IAM authentication (PHOENIX_POSTGRES_USE_AWS_IAM_AUTH=true), this should be
set to the IAM-enabled database username configured in your RDS/Aurora instance.
"""
ENV_PHOENIX_POSTGRES_PASSWORD = "PHOENIX_POSTGRES_PASSWORD"
"""
Used with PHOENIX_POSTGRES_HOST to specify the password to use for the PostgreSQL database
(required).
(required, unless PHOENIX_POSTGRES_USE_AWS_IAM_AUTH is enabled).

When using AWS RDS IAM authentication (PHOENIX_POSTGRES_USE_AWS_IAM_AUTH=true), this password
is NOT used. Instead, authentication tokens are generated dynamically using AWS IAM credentials.
"""
ENV_PHOENIX_POSTGRES_DB = "PHOENIX_POSTGRES_DB"
"""
Used with PHOENIX_POSTGRES_HOST to specify the database to use for the PostgreSQL database.
"""
ENV_PHOENIX_POSTGRES_USE_AWS_IAM_AUTH = "PHOENIX_POSTGRES_USE_AWS_IAM_AUTH"
"""
Enable AWS RDS IAM database authentication. When enabled, Phoenix will use AWS IAM credentials
to generate short-lived authentication tokens instead of using a static password.

This requires:
- boto3 to be installed: pip install 'arize-phoenix[aws]'
- AWS credentials configured (via environment, ~/.aws/credentials, or IAM role)
- AWS region configured via standard AWS methods
- The database user to be configured for IAM authentication in RDS/Aurora
- SSL to be enabled (required by AWS RDS IAM auth)

When enabled, PHOENIX_POSTGRES_PASSWORD should NOT be set.
"""
ENV_PHOENIX_POSTGRES_AWS_IAM_TOKEN_LIFETIME_SECONDS = (
"PHOENIX_POSTGRES_AWS_IAM_TOKEN_LIFETIME_SECONDS"
)
"""
Token lifetime in seconds for connection pool recycling when using AWS RDS IAM authentication.
AWS RDS auth tokens are valid for 15 minutes. This should be set slightly lower to ensure
tokens are refreshed before expiration. Defaults to 840 seconds (14 minutes).
"""
ENV_PHOENIX_SQL_DATABASE_SCHEMA = "PHOENIX_SQL_DATABASE_SCHEMA"
"""
The schema to use for the PostgresSQL database. (This is ignored for SQLite.)
Expand Down Expand Up @@ -1400,18 +1428,36 @@ def get_env_postgres_connection_str() -> Optional[str]:
"""
Build PostgreSQL connection string from environment variables.
"""
if not (
(pg_host := getenv(ENV_PHOENIX_POSTGRES_HOST, "").rstrip("/"))
and (pg_user := getenv(ENV_PHOENIX_POSTGRES_USER))
and (pg_password := getenv(ENV_PHOENIX_POSTGRES_PASSWORD))
):
pg_host = getenv(ENV_PHOENIX_POSTGRES_HOST, "").rstrip("/")
pg_user = getenv(ENV_PHOENIX_POSTGRES_USER)
pg_password = getenv(ENV_PHOENIX_POSTGRES_PASSWORD)
use_iam_auth = _bool_val(ENV_PHOENIX_POSTGRES_USE_AWS_IAM_AUTH, False)

if not (pg_host and pg_user):
return None

if use_iam_auth:
if pg_password:
raise ValueError(
f"The environment variable {ENV_PHOENIX_POSTGRES_PASSWORD} is set but will be "
"ignored when using AWS RDS IAM authentication "
f"({ENV_PHOENIX_POSTGRES_USE_AWS_IAM_AUTH}=true). Authentication tokens will be "
"generated using AWS credentials."
)
connection_str = f"postgresql://{quote(pg_user)}@{pg_host}"
else:
if not pg_password:
raise ValueError(
f"The environment variable {ENV_PHOENIX_POSTGRES_PASSWORD} is not set. "
"Please set it to the password for the PostgreSQL database."
)
encoded_user = quote(pg_user)
encoded_password = quote(pg_password)
connection_str = f"postgresql://{encoded_user}:{encoded_password}@{pg_host}"

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: PostgreSQL Config Error Breaks Non-PostgreSQL Deployments

The get_env_postgres_connection_str() function now raises a ValueError when PHOENIX_POSTGRES_PASSWORD is missing (and IAM auth is not enabled). Previously, it returned None in this scenario, allowing graceful handling of incomplete PostgreSQL configurations. This change can cause unexpected startup failures for deployments not using PostgreSQL.

Fix in Cursor Fix in Web

pg_port = getenv(ENV_PHOENIX_POSTGRES_PORT)
pg_db = getenv(ENV_PHOENIX_POSTGRES_DB)

encoded_user = quote(pg_user)
encoded_password = quote(pg_password)
connection_str = f"postgresql://{encoded_user}:{encoded_password}@{pg_host}"
if pg_port:
connection_str = f"{connection_str}:{pg_port}"
if pg_db:
Expand Down Expand Up @@ -1968,6 +2014,7 @@ def verify_server_environment_variables() -> None:
get_env_database_usage_insertion_blocking_threshold_percentage()
get_env_max_spans_queue_size()
validate_env_support_email()
_validate_iam_auth_config()

# Notify users about deprecated environment variables if they are being used.
if os.getenv("PHOENIX_ENABLE_WEBSOCKETS") is not None:
Expand Down Expand Up @@ -2026,3 +2073,80 @@ def get_env_allow_external_resources() -> bool:
Defaults to True if not set.
"""
return _bool_val(ENV_PHOENIX_ALLOW_EXTERNAL_RESOURCES, True)


def get_env_postgres_use_iam_auth() -> bool:
"""
Gets whether AWS RDS IAM authentication is enabled for PostgreSQL connections.

Returns:
bool: True if IAM authentication should be used, False otherwise (default)
"""
return _bool_val(ENV_PHOENIX_POSTGRES_USE_AWS_IAM_AUTH, False)


def get_env_postgres_iam_token_lifetime() -> int:
"""
Gets the token lifetime in seconds for AWS RDS IAM authentication pool recycling.

AWS RDS IAM tokens are valid for 15 minutes (900 seconds). This value should be
set slightly lower to ensure connections are recycled before token expiration.

Returns:
int: Token lifetime in seconds (default: 840 = 14 minutes)
"""
lifetime = _int_val(ENV_PHOENIX_POSTGRES_AWS_IAM_TOKEN_LIFETIME_SECONDS, 840)
if lifetime <= 0:
raise ValueError(
f"{ENV_PHOENIX_POSTGRES_AWS_IAM_TOKEN_LIFETIME_SECONDS} must be a positive integer. "
f"Got: {lifetime}"
)
if lifetime > 900:
logger.warning(
f"{ENV_PHOENIX_POSTGRES_AWS_IAM_TOKEN_LIFETIME_SECONDS} is set to {lifetime} seconds, "
f"which exceeds AWS RDS IAM token validity (900 seconds / 15 minutes). "
f"Consider setting it to 840 seconds (14 minutes) or less."
)
return lifetime


def _validate_iam_auth_config() -> None:
"""
Validate AWS RDS IAM authentication configuration if enabled.

Raises:
ImportError: If boto3 is not installed when IAM auth is enabled
ValueError: If configuration is invalid
"""
if not get_env_postgres_use_iam_auth():
return

pg_host = getenv(ENV_PHOENIX_POSTGRES_HOST)
if not pg_host:
return

try:
import boto3 # type: ignore # noqa: F401
except ImportError:
raise ImportError(
f"boto3 is required when {ENV_PHOENIX_POSTGRES_USE_AWS_IAM_AUTH} is enabled. "
"Install it with: pip install 'arize-phoenix[aws]'"
)

if not getenv(ENV_PHOENIX_POSTGRES_USER):
raise ValueError(
f"{ENV_PHOENIX_POSTGRES_USER} must be set when using AWS RDS IAM authentication"
)

try:
import boto3 # pyright: ignore

Comment on lines +2142 to +2143
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

already imported above

Suggested change
import boto3 # pyright: ignore

client = boto3.client("sts") # pyright: ignore
client.get_caller_identity() # pyright: ignore
logger.info("✓ AWS credentials validated for RDS IAM authentication")
except Exception as e:
logger.warning(
f"Failed to validate AWS credentials for RDS IAM authentication: {e}. "
"Ensure AWS credentials are configured via environment variables, "
"~/.aws/credentials, or IAM role."
)
Comment on lines +2147 to +2152
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this just a warning and not an exception?

142 changes: 128 additions & 14 deletions src/phoenix/db/engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Callable
from enum import Enum
from sqlite3 import Connection
from typing import Any
from typing import Any, Optional

import aiosqlite
import numpy as np
Expand Down Expand Up @@ -168,29 +168,143 @@ def aio_postgresql_engine(
log_to_stdout: bool = False,
log_migrations_to_stdout: bool = True,
) -> AsyncEngine:
asyncpg_url, asyncpg_args = get_pg_config(url, "asyncpg")
engine = create_async_engine(
url=asyncpg_url,
connect_args=asyncpg_args,
echo=log_to_stdout,
json_serializer=_dumps,
from phoenix.config import (
get_env_postgres_iam_token_lifetime,
get_env_postgres_use_iam_auth,
)

use_iam_auth = get_env_postgres_use_iam_auth()

asyncpg_url, asyncpg_args = get_pg_config(url, "asyncpg", enforce_ssl=use_iam_auth)

iam_config: Optional[dict[str, Any]] = None
token_lifetime: int = 0
if use_iam_auth:
iam_config = _extract_iam_config_from_url(url)
token_lifetime = get_env_postgres_iam_token_lifetime()

async def iam_async_creator() -> Any:
import asyncpg # type: ignore

from phoenix.db.iam_auth import generate_aws_rds_token

assert iam_config is not None
token = generate_aws_rds_token(
host=iam_config["host"],
port=iam_config["port"],
user=iam_config["user"],
)

conn_kwargs = {
"host": iam_config["host"],
"port": iam_config["port"],
"user": iam_config["user"],
"password": token,
"database": iam_config["database"],
}

if asyncpg_args:
conn_kwargs.update(asyncpg_args)

return await asyncpg.connect(**conn_kwargs)

engine = create_async_engine(
url=asyncpg_url,
async_creator=iam_async_creator,
echo=log_to_stdout,
json_serializer=_dumps,
pool_recycle=token_lifetime,
)
else:
engine = create_async_engine(
url=asyncpg_url,
connect_args=asyncpg_args,
echo=log_to_stdout,
json_serializer=_dumps,
)

if not migrate:
return engine

psycopg_url, psycopg_args = get_pg_config(url, "psycopg")
sync_engine = sqlalchemy.create_engine(
url=psycopg_url,
connect_args=psycopg_args,
echo=log_migrations_to_stdout,
json_serializer=_dumps,
)
psycopg_url, psycopg_args = get_pg_config(url, "psycopg", enforce_ssl=use_iam_auth)

if use_iam_auth:
assert iam_config is not None
_iam_config = iam_config

def iam_sync_creator() -> Any:
import psycopg

from phoenix.db.iam_auth import generate_aws_rds_token

token = generate_aws_rds_token(
host=_iam_config["host"],
port=_iam_config["port"],
user=_iam_config["user"],
)

conn_kwargs = {
"host": _iam_config["host"],
"port": _iam_config["port"],
"user": _iam_config["user"],
"password": token,
"dbname": _iam_config["database"],
}

if psycopg_args:
conn_kwargs.update(psycopg_args)

return psycopg.connect(**conn_kwargs)

sync_engine = sqlalchemy.create_engine(
url=psycopg_url,
creator=iam_sync_creator,
echo=log_migrations_to_stdout,
json_serializer=_dumps,
pool_recycle=token_lifetime,
)
else:
sync_engine = sqlalchemy.create_engine(
url=psycopg_url,
connect_args=psycopg_args,
echo=log_migrations_to_stdout,
json_serializer=_dumps,
)

if schema := get_env_database_schema():
event.listen(sync_engine, "connect", set_postgresql_search_path(schema))
migrate_in_thread(sync_engine)
return engine


def _extract_iam_config_from_url(url: URL) -> dict[str, Any]:
"""Extract connection parameters needed for IAM authentication from a SQLAlchemy URL.

Args:
url: SQLAlchemy database URL

Returns:
Dictionary with host, port, user, and database
"""
host = url.host
if not host:
raise ValueError("Database host is required for IAM authentication")

port = url.port or 5432
user = url.username
if not user:
raise ValueError("Database user is required for IAM authentication")

database = url.database or "postgres"

return {
"host": host,
"port": port,
"user": user,
"database": database,
}


def _dumps(obj: Any) -> str:
return orjson.dumps(obj, default=_default).decode()

Expand Down
Loading
Loading