PrimeIntellect-ai · willccbb · Feb 7, 2026 · Feb 3, 2026 · Feb 6, 2026 · Feb 7, 2026
diff --git a/configs/endpoints.py b/configs/endpoints.py
@@ -85,22 +85,22 @@
         "key": "PRIME_API_KEY",
     },
     "qwen3-vl-30b-i": {
-        "model": "qwen/qwen3-30b-a3b-instruct-2507",
+        "model": "qwen/qwen3-vl-30b-a3b-instruct",
         "url": "https://api.pinference.ai/api/v1",
         "key": "PRIME_API_KEY",
     },
     "qwen3-vl-30b-t": {
-        "model": "qwen/qwen3-30b-a3b-thinking-2507",
+        "model": "qwen/qwen3-vl-30b-a3b-thinking",
         "url": "https://api.pinference.ai/api/v1",
         "key": "PRIME_API_KEY",
     },
     "qwen3-vl-235b-i": {
-        "model": "qwen/qwen3-235b-a22b-instruct-2507",
+        "model": "qwen/qwen3-vl-235b-a22b-instruct",
         "url": "https://api.pinference.ai/api/v1",
         "key": "PRIME_API_KEY",
     },
     "qwen3-vl-235b-t": {
-        "model": "qwen/qwen3-235b-a22b-thinking-2507",
+        "model": "qwen/qwen3-vl-235b-a22b-thinking",
         "url": "https://api.pinference.ai/api/v1",
         "key": "PRIME_API_KEY",
     },

diff --git a/configs/endpoints.toml b/configs/endpoints.toml
@@ -0,0 +1,215 @@
+[[endpoint]]
+endpoint_id = "olmo3-32b-t"
+model = "allenai/olmo-3-32b-think"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "olmo3-7b-i"
+model = "allenai/olmo-3-7b-instruct"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "olmo3-7b-t"
+model = "allenai/olmo-3-7b-think"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "trinity-mini"
+model = "arcee/trinity-mini"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "haiku"
+model = "anthropic/claude-4.5-haiku"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "sonnet"
+model = "anthropic/claude-4.5-sonnet"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "opus"
+model = "anthropic/claude-4.5-opus"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gemini-2.5-flash"
+model = "google/gemini-2.5-flash"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gemini-2.5-pro"
+model = "google/gemini-2.5-pro"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gemini-3-flash"
+model = "google/gemini-3-flash"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gemini-3-pro"
+model = "google/gemini-3-pro-preview"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gemini-3-pro-exp"
+model = "google/gemini-3-pro-preview"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-30b-i"
+model = "qwen/qwen3-30b-a3b-instruct-2507"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-30b-t"
+model = "qwen/qwen3-30b-a3b-thinking-2507"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-235b-i"
+model = "qwen/qwen3-235b-a22b-instruct-2507"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-235b-t"
+model = "qwen/qwen3-235b-a22b-thinking-2507"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-vl-30b-i"
+model = "qwen/qwen3-vl-30b-a3b-instruct"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-vl-30b-t"
+model = "qwen/qwen3-vl-30b-a3b-thinking"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-vl-235b-i"
+model = "qwen/qwen3-vl-235b-a22b-instruct"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "qwen3-vl-235b-t"
+model = "qwen/qwen3-vl-235b-a22b-thinking"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "kimi-k2"
+model = "moonshotai/kimi-k2-0905"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "kimi-k2-t"
+model = "moonshotai/kimi-k2-thinking"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-oss-120b"
+model = "openai/gpt-oss-120b"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-oss-20b"
+model = "openai/gpt-oss-20b"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-4.1-nano"
+model = "gpt-4.1-nano"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-4.1-mini"
+model = "gpt-4.1-mini"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-4.1"
+model = "gpt-4.1"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-5-nano"
+model = "gpt-5-nano"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-5-mini"
+model = "gpt-5-mini"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-5"
+model = "gpt-5"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-5.1"
+model = "gpt-5.1"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "gpt-5.2"
+model = "gpt-5.2"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+
+[[endpoint]]
+endpoint_id = "glm-4.5"
+model = "z-ai/glm-4.5"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "glm-4.5-air"
+model = "z-ai/glm-4.5-air"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "glm-4.6"
+model = "z-ai/glm-4.6"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
+
+[[endpoint]]
+endpoint_id = "glm-4.7"
+model = "z-ai/glm-4.7"
+url = "https://api.pinference.ai/api/v1"
+key = "PRIME_API_KEY"
diff --git a/configs/eval/multi-env.toml b/configs/eval/multi-env.toml
@@ -0,0 +1,21 @@
+endpoints_path = "../endpoints.toml"
+
+endpoint_id = "gpt-5-mini"
+save_results = true
+rollouts_per_example = 3
+
+[[eval]]
+env_id = "bfcl-v3"
+num_examples = 100
+
+[[eval]]
+env_id = "tau2-bench"
+num_examples = 100
+
+[[eval]]
+env_id = "wiki-search"
+num_examples = 100
+
+[[eval]]
+env_id = "tool-test"
+num_examples = 100
diff --git a/docs/evaluation.md b/docs/evaluation.md
@@ -66,10 +66,10 @@ prime eval run my-env -x '{"max_turns": 20}'
 | `--model` | `-m` | `openai/gpt-4.1-mini` | Model name or endpoint alias |
 | `--api-base-url` | `-b` | `https://api.pinference.ai/api/v1` | API base URL |
 | `--api-key-var` | `-k` | `PRIME_API_KEY` | Environment variable containing API key |
-| `--endpoints-path` | `-e` | `./configs/endpoints.py` | Path to endpoints registry |
+| `--endpoints-path` | `-e` | `./configs/endpoints.toml` | Path to endpoints registry (`.toml` preferred, `.py` supported) |
 | `--header` | — | — | Extra HTTP header (`Name: Value`), repeatable |
 
-For convenience, define model endpoints in `./configs/endpoints.py` to avoid repeating URL and key flags:
+For convenience, define model endpoints in `./configs/endpoints.toml` (or `./configs/endpoints.py`) to avoid repeating URL and key flags.
 
 ```python
 ENDPOINTS = {
@@ -86,6 +86,18 @@ ENDPOINTS = {
 }
 ```
 
+Equivalent TOML format:
+
+```toml
+[[endpoint]]
+endpoint_id = "gpt-4.1-mini"
+model = "gpt-4.1-mini"
+url = "https://api.openai.com/v1"
+key = "OPENAI_API_KEY"
+```
+
+To define equivalent replicas, add multiple `[[endpoint]]` entries with the same `endpoint_id`.
+
 Then use the alias directly:
 
 ```bash
@@ -94,6 +106,10 @@ prime eval run my-env -m qwen3-235b-i
 
 If the model name is in the registry, those values are used by default, but you can override them with `--api-base-url` and/or `--api-key-var`. If the model name isn't found, the CLI flags are used (falling back to defaults when omitted).
 
+In other words, `-m/--model` is treated as an endpoint alias lookup when present in the registry, and otherwise treated as a literal model id.
+
+When using eval TOML configs, you can set `endpoint_id` in `[[eval]]` sections to resolve from the endpoint registry. `endpoint_id` is only supported when `endpoints_path` points to a TOML registry file.
+
 ### Sampling Parameters
 
 | Flag | Short | Default | Description |
@@ -273,6 +289,7 @@ Each `[[eval]]` section must contain an `env_id` field. All other fields are opt
 | `rollouts_per_example` | integer | Rollouts per example |
 | `extra_env_kwargs` | table | Arguments passed to environment constructor |
 | `model` | string | Model to evaluate |
+| `endpoint_id` | string | Endpoint registry id (requires TOML `endpoints_path`) |
 
 Example with `env_args`:
 

diff --git a/docs/reference.md b/docs/reference.md
@@ -213,6 +213,8 @@ class GenerateMetadata(TypedDict):
     tools: list[ChatCompletionToolParam] | None
 ```
 
+`base_url` is always serialized as a string. For multi-endpoint runs (e.g., using `ClientConfig.endpoint_configs`), it is stored as a comma-separated list of URLs.
+
 ### RolloutScore / RolloutScores
 
 ```python
@@ -567,15 +569,19 @@ Combines rubrics for `EnvGroup`.
 
 ```python
 class ClientConfig(BaseModel):
+    client_idx: int = 0
     api_key_var: str = "PRIME_API_KEY"
     api_base_url: str = "https://api.pinference.ai/api/v1"
+    endpoint_configs: list[ClientConfig] = []
     timeout: float = 3600.0
     max_connections: int = 28000
     max_keepalive_connections: int = 28000
     max_retries: int = 10
     extra_headers: dict[str, str] = {}
 ```
 
+Use `endpoint_configs` for multi-endpoint round-robin. In grouped scoring mode, groups are distributed round-robin across endpoint configs.
+
 When `api_key_var` is `"PRIME_API_KEY"` (the default), credentials are loaded with the following precedence:
 - **API key**: `PRIME_API_KEY` env var > `~/.prime/config.json` > `"EMPTY"`
 - **Team ID**: `PRIME_TEAM_ID` env var > `~/.prime/config.json` > not set
@@ -589,6 +595,7 @@ class EvalConfig(BaseModel):
     env_id: str
     env_args: dict
     env_dir_path: str
+    endpoint_id: str | None = None
     model: str
     client_config: ClientConfig
     sampling_args: SamplingArgs
@@ -610,9 +617,11 @@ class EvalConfig(BaseModel):
 
 ```python
 Endpoint = TypedDict("Endpoint", {"key": str, "url": str, "model": str})
-Endpoints = dict[str, Endpoint]
+Endpoints = dict[str, list[Endpoint]]
 ```
 
+`Endpoints` maps an endpoint id to one or more endpoint variants. A single variant is represented as a one-item list.
+
 ---
 
 ## Decorators