PrimeIntellect-ai · snimu · Feb 2, 2026 · Feb 1, 2026 · Feb 1, 2026 · Feb 1, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,9 +51,6 @@ dependencies = [
     "msgpack>=1.1.2",
 ]
 
-[tool.uv.sources]
-prime-tunnel = { git = "https://github.com/PrimeIntellect-ai/prime.git", branch = "feature/tunnel", subdirectory = "packages/prime-tunnel" }
-
 [dependency-groups]
 dev = [
     "ruff",

diff --git a/verifiers/envs/integrations/browser_env/modes/cua_mode.py b/verifiers/envs/integrations/browser_env/modes/cua_mode.py
@@ -289,8 +289,11 @@ def verify_server_connection(self) -> None:
         if loop is not None:
             import concurrent.futures
 
+            def _run_health_check() -> None:
+                asyncio.run(self._check_server_health())
+
             with concurrent.futures.ThreadPoolExecutor() as executor:
-                future = executor.submit(asyncio.run, self._check_server_health())
+                future = executor.submit(_run_health_check)
                 future.result()
         else:
             asyncio.run(self._check_server_health())

diff --git a/verifiers/utils/eval_display.py b/verifiers/utils/eval_display.py
@@ -58,8 +58,8 @@ def elapsed_time(self) -> float:
         return end - self.start_time
 
 
-def _make_histogram(values: list[float], bins: int = 10, width: int = 20) -> Text:
-    """Create a simple text histogram of values."""
+def _make_histogram(values: list[float], bins: int = 10, height: int = 8) -> Text:
+    """Create a simple vertical text histogram of values."""
     if not values:
         return Text("no data", style="dim")
 
@@ -74,16 +74,51 @@ def _make_histogram(values: list[float], bins: int = 10, width: int = 20) -> Tex
         counts[bin_idx] += 1
 
     max_count = max(counts)
-    out = Text()
+    scaled = [
+        int(round((c / max_count) * height)) if max_count > 0 else 0 for c in counts
+    ]
+
+    label_width = max(
+        4,
+        len(f"{min_val:.2f}"),
+        len(f"{max_val:.2f}"),  # keep labels aligned
+    )
+    count_width = max(len(str(c)) for c in counts)
+    col_width = max(label_width, count_width)
+    spacer = " "
+    bar_on = "█" * col_width
+    bar_off = "░" * col_width
 
+    out = Text()
+    # Counts (top row)
     for i, count in enumerate(counts):
-        bin_start = min_val + i * bin_width
-        bar_len = int((count / max_count) * width) if max_count > 0 else 0
-        bar = "█" * bar_len + "░" * (width - bar_len)
+        out.append(str(count).center(col_width), style="dim")
+        if i < bins - 1:
+            out.append(spacer)
+    out.append("\n")
+
+    # Bars (top to bottom)
+    for row in range(height, 0, -1):
+        for i, h in enumerate(scaled):
+            if h >= row:
+                out.append(bar_on, style="cyan")
+            else:
+                out.append(bar_off, style="dim")
+            if i < bins - 1:
+                out.append(spacer)
+        out.append("\n")
+
+    # Baseline
+    out.append("─" * (bins * col_width + (bins - 1)), style="dim")
+    out.append("\n")
 
-        out.append(f"{bin_start:5.2f} ", style="dim")
-        out.append(bar, style="cyan")
-        out.append(f" {count}\n", style="dim")
+    # Bin labels (start values)
+    for i in range(bins):
+        bin_start = min_val + i * bin_width
+        label = f"{bin_start:.2f}".center(col_width)
+        out.append(label, style="dim")
+        if i < bins - 1:
+            out.append(spacer)
 
     return out
 
@@ -415,7 +450,44 @@ def print_final_summary(self) -> None:
         """Print a comprehensive summary after the display closes."""
         self.console.print()
 
-        # Summary table with main metrics
+        # Per-environment detailed sections
+        for idx, config in enumerate(self.configs):
+            env_state = self.state.envs[idx]
+            results = env_state.results
+
+            if results is None:
+                continue
+
+            self.console.print()
+            self.console.print(
+                Panel(
+                    self._make_env_detail(config, env_state, results),
+                    title=f"[bold blue]{config.env_id}[/bold blue]",
+                    border_style="dim",
+                )
+            )
+
+        # Print save paths if any
+        saved_envs = [
+            (idx, env_state)
+            for idx, env_state in self.state.envs.items()
+            if env_state.save_path is not None
+        ]
+        if saved_envs:
+            self.console.print()
+            self.console.print("[bold]Results saved to:[/bold]")
+            for idx, env_state in saved_envs:
+                self.console.print(f"  [cyan]•[/cyan] {env_state.save_path}")
+
+        # Print errors if any
+        for idx, config in enumerate(self.configs):
+            env_state = self.state.envs[idx]
+            if env_state.error:
+                self.console.print()
+                self.console.print(f"[red]error in {config.env_id}:[/red]")
+                self.console.print(f"  {env_state.error}")
+
+        # Summary table with main metrics (printed last)
         table = Table(title="Evaluation Summary")
         table.add_column("env_id", style="cyan")
         table.add_column("status", justify="center")
@@ -466,45 +538,8 @@ def print_final_summary(self) -> None:
                 time_str,
             )
 
+        self.console.print()
         self.console.print(table)
-
-        # Per-environment detailed sections
-        for idx, config in enumerate(self.configs):
-            env_state = self.state.envs[idx]
-            results = env_state.results
-
-            if results is None:
-                continue
-
-            self.console.print()
-            self.console.print(
-                Panel(
-                    self._make_env_detail(config, env_state, results),
-                    title=f"[bold blue]{config.env_id}[/bold blue]",
-                    border_style="dim",
-                )
-            )
-
-        # Print save paths if any
-        saved_envs = [
-            (idx, env_state)
-            for idx, env_state in self.state.envs.items()
-            if env_state.save_path is not None
-        ]
-        if saved_envs:
-            self.console.print()
-            self.console.print("[bold]Results saved to:[/bold]")
-            for idx, env_state in saved_envs:
-                self.console.print(f"  [cyan]•[/cyan] {env_state.save_path}")
-
-        # Print errors if any
-        for idx, config in enumerate(self.configs):
-            env_state = self.state.envs[idx]
-            if env_state.error:
-                self.console.print()
-                self.console.print(f"[red]error in {config.env_id}:[/red]")
-                self.console.print(f"  {env_state.error}")
-
         self.console.print()
 
     def _make_env_detail(
@@ -552,7 +587,7 @@ def _make_env_detail(
             # All rollouts histogram
             all_rollouts_content = Group(
                 Text("all rollouts:", style="bold"),
-                _make_histogram(rewards, bins=8, width=25),
+                _make_histogram(rewards, bins=8, height=8),
             )
 
             # Per-example averages if multiple rollouts
@@ -566,7 +601,7 @@ def _make_env_detail(
 
                 per_example_content = Group(
                     Text("per-example avg:", style="bold"),
-                    _make_histogram(example_avgs, bins=8, width=25),
+                    _make_histogram(example_avgs, bins=8, height=8),
                 )
 
                 # Side by side

diff --git a/verifiers/utils/eval_utils.py b/verifiers/utils/eval_utils.py
@@ -6,7 +6,7 @@
 import time
 from collections import Counter, defaultdict
 from collections.abc import Mapping
-from contextlib import contextmanager
+from contextlib import contextmanager, suppress
 from pathlib import Path
 from typing import TYPE_CHECKING, cast
 
@@ -493,19 +493,30 @@ def on_log(message: str) -> None:
             display.update_env_state(env_idx, status="failed", error=str(e))
             raise
 
+    async def refresh_loop() -> None:
+        while not display.state.all_completed:
+            display.refresh()
+            await asyncio.sleep(1)
+
     try:
         async with display:
-            await asyncio.gather(
-                *[
-                    run_with_progress(env_config, idx)
-                    for idx, env_config in enumerate(config.evals)
-                ],
-                return_exceptions=True,
-            )
+            refresh_task = asyncio.create_task(refresh_loop())
+            try:
+                await asyncio.gather(
+                    *[
+                        run_with_progress(env_config, idx)
+                        for idx, env_config in enumerate(config.evals)
+                    ],
+                    return_exceptions=True,
+                )
 
-            display.refresh()
-            if tui_mode:
-                await display.wait_for_exit()
+                display.refresh()
+                if tui_mode:
+                    await display.wait_for_exit()
+            finally:
+                refresh_task.cancel()
+                with suppress(asyncio.CancelledError):
+                    await refresh_task
 
     except KeyboardInterrupt:
         pass  # exit on interrupt