PrimeIntellect-ai · willccbb · Jan 3, 2026 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/README.md b/README.md
@@ -236,7 +236,7 @@ The following named attributes available for use by reward functions in your Rub
 - `prompt`: sequence of input messages
 - `completion`: sequence of messages generated during rollout by model and Environment
 - `answer`: primary answer column, optional (defaults to empty string if omitted)
-- `state`: can be modified during rollout to accumulate any metadata (`state['responses']` includes full OpenAI response objects by default)
+- `state`: can be modified during rollout to accumulate any metadata (`state['trajectory']` includes the full list of `TrajectoryStep` objects by default)
 - `info`: auxiliary info needed for reward computation (e.g. test cases), optional (defaults to empty dict if omitted)
 - `task`: tag for task type (used by `EnvGroup` and `RubricGroup`)
 - `parser`: the parser object declared. Note: `vf.Parser().get_format_reward_func()` is a no-op (always 1.0); use `vf.ThinkParser` or a custom parser if you want a real format adherence reward.
@@ -275,28 +275,27 @@ For training, or self-hosted endpoints, you'll want to enable auto tool choice i
 
 ### MultiTurnEnv
 
-Both `SingleTurnEnv` and `ToolEnv` are instances of `MultiTurnEnv`, which exposes an interface for writing custom Environment interaction protocols. Override `is_completed` and `env_response`, and make sure any custom completion logic defers to the base class so turn limits and other shared guards keep working.
+Both `SingleTurnEnv` and `ToolEnv` are instances of `MultiTurnEnv`, which exposes an interface for writing custom Environment interaction protocols. To implement a custom protocol, define an `env_response` method and use `@vf.stop` decorators for termination conditions.
 
 ```python
-from typing import Tuple
 import verifiers as vf
 from verifiers.types import Messages, State
+
 class YourMultiTurnEnv(vf.MultiTurnEnv):
     def __init__(self,
                  dataset: Dataset,
                  rubric: Rubric,
 				 max_turns: int,
                  **kwargs):
-
-  async def is_completed(self, messages: Messages, state: State, **kwargs) -> bool:
-    # Always call the base check so max_turns and shared guards are respected
-    if await super().is_completed(messages, state, **kwargs):
-        return True
-    # return whether or not a rollout is completed
-    return state.get("task_complete", False)
-
-  async def env_response(self, messages: Messages, state: State, **kwargs) -> Tuple[Messages, State]:
-    # return new environment message(s) + updated state
+
+    async def env_response(self, messages: Messages, state: State, **kwargs) -> Messages:
+        # return new environment message(s); state can be updated in-place
+        return [{"role": "user", "content": "feedback"}]
+
+    @vf.stop
+    async def task_complete(self, state: State) -> bool:
+        # return whether or not a rollout is completed
+        return state.get("task_complete", False)
 ```
 
 If your application requires more fine-grained control than is allowed by `MultiTurnEnv`, you may want to inherit from the base `Environment` functionality directly and override the `rollout` method.

diff --git a/docs/source/components.md b/docs/source/components.md
@@ -470,49 +470,47 @@ Build a Wordle-like game with multi-turn interaction:
 
 ```python
 from verifiers.types import Messages, State
-from typing import Tuple
 
 class WordleEnv(vf.MultiTurnEnv):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self.max_guesses = 6
 
-    def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
-        if state.get("turn", 0) == 0:
+    async def env_response(self, messages: Messages, state: State) -> Messages:
+        if len(state["trajectory"]) == 0:
             # First turn: initialize
-            state["turn"] = 1
             state["target"] = state["answer"]
             state["guesses"] = []
-            return [{"role": "user", "content": "Guess a 5-letter word. You have 6 attempts."}], state
+            return [{"role": "user", "content": "Guess a 5-letter word. You have 6 attempts."}]
 
         # Get the last assistant message
         last_msg = messages[-1]
         if last_msg["role"] != "assistant":
-            return [], state  # No response if not assistant message
+            return []  # No response if not assistant message
 
         guess = last_msg["content"].strip().upper()
         target = state["target"]
 
         # Validate guess
         if len(guess) != 5 or not guess.isalpha():
-            return [{"role": "user", "content": "Please guess a 5-letter word."}], state
+            return [{"role": "user", "content": "Please guess a 5-letter word."}]
 
         # Generate feedback
         feedback = self.get_feedback(guess, target)
         state["guesses"].append(guess)
-        state["turn"] += 1
 
         if guess == target:
             state["solved"] = True
-            return [{"role": "user", "content": f"Correct! The word was {target}."}], state
-        elif state["turn"] > self.max_guesses:
+            return [{"role": "user", "content": f"Correct! The word was {target}."}]
+        elif len(state["guesses"]) >= self.max_guesses:
             state["failed"] = True
-            return [{"role": "user", "content": f"Out of guesses. The word was {target}."}], state
+            return [{"role": "user", "content": f"Out of guesses. The word was {target}."}]
         else:
-            remaining = self.max_guesses - state["turn"] + 1
-            return [{"role": "user", "content": f"{feedback}\n{remaining} guesses remaining."}], state
+            remaining = self.max_guesses - len(state["guesses"])
+            return [{"role": "user", "content": f"{feedback}\n{remaining} guesses remaining."}]
 
-    def is_completed(self, messages: Messages, state: State) -> bool:
+    @vf.stop
+    async def game_over(self, state: State) -> bool:
         return state.get("solved", False) or state.get("failed", False)
 ```
 
@@ -521,9 +519,12 @@ class WordleEnv(vf.MultiTurnEnv):
 Generate training data using environment rollouts:
 
 ```python
+import asyncio
+
 async def generate_training_data(env, client, model, num_samples=1000):
     """Generate diverse solutions for training."""
     results = []
+    score_sem = asyncio.Semaphore(1)  # Semaphore for scoring
 
     for i in range(num_samples):
         # Get a random prompt
@@ -532,25 +533,19 @@ async def generate_training_data(env, client, model, num_samples=1000):
 
         # Generate multiple solutions
         for temp in [0.3, 0.7, 1.0]:
-            completion, state = await env.rollout(
-                client=client,
-                model=model,
-                prompt=prompt,
-                answer=answer,
-                sampling_args={"temperature": temp, "max_tokens": 1000}
-            )
+            input = {"prompt": prompt, "answer": answer,
+                    "task": "default", "example_id": i}
+            state = await env.rollout(input=input, client=client, model=model, sampling_args={"temperature": temp, "max_tokens": 1000})
 
             # Score the solution
-            rewards = await env.rubric.score_rollout(
-                prompt, completion, answer, state
-            )
+            await env.rubric.score_rollout(state, score_sem)
 
             # Save high-quality solutions
-            if rewards["total"] > 0.8:
+            if state.get("reward") and state["reward"] > 0.8:
                 results.append({
                     "prompt": prompt,
-                    "completion": completion,
-                    "score": rewards["total"]
+                    "completion": state["completion"],
+                    "score": state["reward"]
                 })
 
     return Dataset.from_list(results)

diff --git a/docs/source/concepts.md b/docs/source/concepts.md
@@ -39,17 +39,15 @@ from verifiers.types import Messages, State
 from typing import Tuple
 
 class MyProtocol(vf.MultiTurnEnv):
-    async def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
+    async def env_response(self, messages: Messages, state: State) -> Messages:
         """Define how environment responds to model"""
         response = [{"role": "user", "content": "Environment feedback"}]
         state["turn"] = state.get("turn", 0) + 1
-        return response, state
+        return response
 
-    async def is_completed(self, messages: Messages, state: State) -> bool:
+    @vf.stop
+    async def task_complete(self, state: State) -> bool:
         """Define when interaction ends"""
-        # Always defer to the base implementation so turn limits are respected
-        if await super().is_completed(messages, state):
-            return True
         return state.get("task_complete", False)
 ```
 
@@ -143,23 +141,36 @@ Environments maintain state throughout interactions:
 
 ```python
 state = {
-    # automatically managed
-    "prompt": prompt, # inputs from dataset
-    "completion": [], # trajectory so far
-    "answer": answer, # golden answer (str)
-    "task": task, # optional environment ID column
-    "info": info, # evaluation metadata (dict) -- can use answer/info/both
-    "responses": [], # Raw API responses from OpenAI client
-    "example_id": example_id, # Source dataset row identifier
-    "turn": 0,
-    "timing": {"generation_ms": 0.0, "scoring_ms": 0.0, "total_ms": 0.0},
+    # Input fields (automatically managed)
+    "prompt": prompt,              # Inputs from dataset (list[ChatMessage] or str)
+    "answer": answer,              # Golden answer (str, optional)
+    "task": task,                  # Environment ID column (str, optional)
+    "info": info,                  # Evaluation metadata (dict, optional) -- can use answer/info/both
+    "example_id": example_id,      # Source dataset row identifier (int)
+
+    # Rollout tracking (automatically managed):
+    "trajectory": [],              # Trajectory steps (list[TrajectoryStep], one per LLM request/response)
+    "completion": None,            # Full conversation except the initial prompt (list[ChatMessage] or str),
+                                   # rendered from trajectory when rollout ends
+    "is_completed": False,         # Whether rollout has terminated (bool)
+    "stop_condition": None,        # Name of stop condition that terminated rollout (str, optional)
+    "timing": {"generation_ms": 0.0, "scoring_ms": 0.0, "total_ms": 0.0},  # Timing info (dict)
+
     # custom user-managed state
     "lives_remaining": 2,
     "inventory": {"potion": 1, "power-up": 2}
     ...
 }
 ```
 
+**Trajectory Structure**: Each step in `state["trajectory"]` is a `TrajectoryStep` containing:
+- `prompt`: Messages sent to LLM for this request
+- `completion`: Messages returned from LLM for this request
+- `response`: Raw API response object
+- `tokens`: Token IDs, masks, and logprobs (if available)
+- `reward`: Reward for this step
+- `advantage`: Advantage for this step (for RL training)
+
 A wide variety of complex interaction protocols, reward schemes, and training algorithms can be coordinated via tracking appropriate data in `state`.
 
 ## Design Philosophy
@@ -224,11 +235,12 @@ results = asyncio.run(env.evaluate(client=async_client, model="llama-3.1-8b"))
   ```
   - `rollouts_per_example > 1` repeats dataset entries internally.
   - `max_concurrent` throttles concurrent rollouts.
-  - `save_every` (when > 0) checkpoints intermediate progress during interleaved rollouts (set `interleave_scoring=True`).
+  - `save_every` (when > 0) checkpoints intermediate progress during rollouts.
 
 - **Scoring**:
-  - Each reward function returns a float. Weights applied inside `Rubric` combine them into `results.reward`.
-  - All individual scores are logged under `results.metrics` keyed by function name (even if weight is 0.0).
+  - Each reward function returns a float. Weights applied inside `Rubric` combine them into `state["reward"]`.
+  - All individual scores are logged under `state["metrics"]` keyed by function name (even if weight is 0.0).
+  - Scoring is performed at the group level by default, parallelizing across rollouts.
 
 - **Outputs** (`GenerateOutputs`):
   - `prompt`, `completion`, `answer`, `state`, `info`, `task`, `id`, `reward`, `metrics: dict[str, list[float]]`, plus a `metadata` block summarizing the run.

diff --git a/docs/source/development.md b/docs/source/development.md
@@ -108,8 +108,6 @@ def test_with_mock(mock_client):
 4. **Group related tests** in test classes
 5. **Keep tests fast** - use mocks instead of real API calls
 
-> **Tip:** When subclassing `MultiTurnEnv`, always call `await super().is_completed(...)` (or `await self.max_turns_reached(state)`) so shared guards—especially max turn limits—remain effective. 
-
 ## Contributing
 
 ### Workflow

diff --git a/docs/source/environments.md b/docs/source/environments.md
@@ -336,24 +336,24 @@ from typing import Tuple
 
 class MyGameEnv(vf.MultiTurnEnv):
 
-    async def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
+    async def env_response(self, messages: Messages, state: State) -> Messages:
         """Define how the environment responds."""
         last_msg = messages[-1]
         if last_msg["role"] != "assistant":
-            return [], state
+            return []
 
         player_action = last_msg["content"]
         if self.is_game_over(state):
             state["done"] = True
-            return [{"role": "user", "content": "Game over!"}], state
+            return [{"role": "user", "content": "Game over!"}]
 
         state = self.update_state(state, player_action)
         feedback = self.get_game_feedback(state)
-        return [{"role": "user", "content": feedback}], state
+        return [{"role": "user", "content": feedback}]
 
-    async def is_completed(self, messages: Messages, state: State) -> bool:
-        if await super().is_completed(messages, state):
-            return True
+    @vf.stop
+    async def game_over(self, state: State) -> bool:
+        """Check if game is complete."""
         return state.get("solved", False) or state.get("failed", False)
 ```