wlame
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 28 additions & 12 deletions b/‎.github/workflows/release.yml‎
Lines changed: 28 additions & 12 deletions
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 10 additions & 9 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎src/rx/cli/samples.py‎
Lines changed: 29 additions & 4 deletions b/‎src/rx/cli/samples.py‎
Lines changed: 29 additions & 4 deletions
diff --git a/‎src/rx/index.py‎
Lines changed: 165 additions & 1 deletion b/‎src/rx/index.py‎
Lines changed: 165 additions & 1 deletion
diff --git a/‎src/rx/models.py‎
Lines changed: 6 additions & 2 deletions b/‎src/rx/models.py‎
Lines changed: 6 additions & 2 deletions
@@ -38,55 +38,70 @@ jobs:
             artifact_name: rx-macos-x86_64
             binary_path: dist/rx
 
-    # Skip builds based on manual workflow dispatch input
-    if: |
-      github.event_name == 'release' ||
-      github.event.inputs.platform == 'all' ||
-      github.event.inputs.platform == matrix.platform
-
     steps:
+      - name: Check if build should run
+        id: should_build
+        run: |
+          if [ "${{ github.event_name }}" = "release" ]; then
+            echo "should_run=true" >> $GITHUB_OUTPUT
+          elif [ "${{ github.event.inputs.platform }}" = "all" ]; then
+            echo "should_run=true" >> $GITHUB_OUTPUT
+          elif [ "${{ github.event.inputs.platform }}" = "${{ matrix.platform }}" ]; then
+            echo "should_run=true" >> $GITHUB_OUTPUT
+          else
+            echo "should_run=false" >> $GITHUB_OUTPUT
+          fi
+        shell: bash
+
       - name: Checkout code
+        if: steps.should_build.outputs.should_run == 'true'
         uses: actions/checkout@v4
 
       - name: Set up Python
+        if: steps.should_build.outputs.should_run == 'true'
         uses: actions/setup-python@v5
         with:
           python-version: "3.13"
 
       - name: Install uv
+        if: steps.should_build.outputs.should_run == 'true'
         uses: astral-sh/setup-uv@v4
         with:
           enable-cache: true
 
       - name: Install dependencies
+        if: steps.should_build.outputs.should_run == 'true'
         run: |
           uv sync --group dev --group build
 
       - name: Install ripgrep (Ubuntu)
-        if: matrix.os == 'ubuntu-latest'
+        if: steps.should_build.outputs.should_run == 'true' && matrix.os == 'ubuntu-latest'
         run: |
           sudo apt-get update
           sudo apt-get install -y ripgrep
 
       - name: Install ripgrep (macOS)
-        if: matrix.os == 'macos-latest'
+        if: steps.should_build.outputs.should_run == 'true' && matrix.os == 'macos-latest'
         run: |
           brew install ripgrep
 
       - name: Install ripgrep (Windows)
-        if: matrix.os == 'windows-latest'
+        if: steps.should_build.outputs.should_run == 'true' && matrix.os == 'windows-latest'
         run: |
           choco install ripgrep -y
 
       - name: Run tests
+        if: steps.should_build.outputs.should_run == 'true'
         run: |
           uv run pytest
 
       - name: Build with PyInstaller
+        if: steps.should_build.outputs.should_run == 'true'
         run: |
           uv run pyinstaller rx.spec
 
       - name: Verify binary exists
+        if: steps.should_build.outputs.should_run == 'true'
         shell: bash
         run: |
           ls -lh dist/
@@ -96,25 +111,26 @@ jobs:
           fi
 
       - name: Test binary (Unix)
-        if: matrix.os != 'windows-latest'
+        if: steps.should_build.outputs.should_run == 'true' && matrix.os != 'windows-latest'
         run: |
           chmod +x ${{ matrix.binary_path }}
           ${{ matrix.binary_path }} --version
 
       - name: Test binary (Windows)
-        if: matrix.os == 'windows-latest'
+        if: steps.should_build.outputs.should_run == 'true' && matrix.os == 'windows-latest'
         run: |
           ${{ matrix.binary_path }} --version
 
       - name: Upload artifact
+        if: steps.should_build.outputs.should_run == 'true'
         uses: actions/upload-artifact@v4
         with:
           name: ${{ matrix.artifact_name }}
           path: ${{ matrix.binary_path }}
           if-no-files-found: error
 
       - name: Upload to release (on release event)
-        if: github.event_name == 'release'
+        if: steps.should_build.outputs.should_run == 'true' && github.event_name == 'release'
         uses: actions/upload-release-asset@v1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
@@ -2,9 +2,9 @@ name: Tests
 
 on:
   push:
-    branches: [ main ]
+    branches: [main]
   pull_request:
-    branches: [ main ]
+    branches: [main]
   workflow_dispatch:
 
 jobs:
@@ -29,10 +29,11 @@ jobs:
       - name: Run tests with coverage
         run: uv run pytest tests/ --cov=rx --cov-report=term-missing --cov-report=xml
 
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v4
-        with:
-          file: ./coverage.xml
-          fail_ci_if_error: false
-        env:
-          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      # disabled for now — no codecov token yet.
+      # - name: Upload coverage to Codecov
+      #   uses: codecov/codecov-action@v4
+      #   with:
+      #     file: ./coverage.xml
+      #     fail_ci_if_error: false
+      #   env:
+      #     CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
@@ -5,6 +5,7 @@
 
 import click
 
+from rx.index import calculate_exact_line_for_offset, calculate_exact_offset_for_line, get_index_path, load_index
 from rx.models import SamplesResponse
 from rx.parse import get_context, get_context_by_lines, is_text_file
 
@@ -124,10 +125,22 @@ def samples_command(
             offset_list = list(byte_offset)
             context_data = get_context(path, offset_list, before_context, after_context)
 
+            # Calculate line numbers for each byte offset (only if JSON output)
+            offset_to_line = {}
+            if json_output:
+                index_data = load_index(get_index_path(path))
+                for offset in offset_list:
+                    line_num = calculate_exact_line_for_offset(path, offset, index_data)
+                    offset_to_line[str(offset)] = line_num
+            else:
+                # For CLI mode, populate with offsets as keys (no line mapping needed)
+                for offset in offset_list:
+                    offset_to_line[str(offset)] = -1
+
             response = SamplesResponse(
                 path=path,
-                offsets=offset_list,
-                lines=[],
+                offsets=offset_to_line,
+                lines={},
                 before_context=before_context,
                 after_context=after_context,
                 samples={str(k): v for k, v in context_data.items()},
@@ -137,10 +150,22 @@ def samples_command(
             line_list = list(line_offset)
             context_data = get_context_by_lines(path, line_list, before_context, after_context)
 
+            # Calculate byte offsets for each line number (only if JSON output)
+            line_to_offset = {}
+            if json_output:
+                index_data = load_index(get_index_path(path))
+                for line_num in line_list:
+                    byte_offset = calculate_exact_offset_for_line(path, line_num, index_data)
+                    line_to_offset[str(line_num)] = byte_offset
+            else:
+                # For CLI mode, populate with line numbers as keys (no offset mapping needed)
+                for line_num in line_list:
+                    line_to_offset[str(line_num)] = -1
+
             response = SamplesResponse(
                 path=path,
-                offsets=[],
-                lines=line_list,
+                offsets={},
+                lines=line_to_offset,
                 before_context=before_context,
                 after_context=after_context,
                 samples={str(k): v for k, v in context_data.items()},
 
@@ -50,7 +50,7 @@ def get_large_file_threshold_bytes() -> int:
     Controlled by RX_LARGE_TEXT_FILE_MB environment variable.
     Default: 100MB
     """
-    threshold_mb = get_int_env("RX_LARGE_TEXT_FILE_MB")
+    threshold_mb = get_int_env("DEFAULT_LARGE_FILE_MB")
     if threshold_mb <= 0:
         threshold_mb = DEFAULT_LARGE_FILE_MB
     return threshold_mb * 1024 * 1024
@@ -418,6 +418,170 @@ def find_line_offset(line_index: list[list[int]], target_line: int) -> tuple[int
     return (line_index[idx][0], line_index[idx][1])
 
 
+def calculate_exact_offset_for_line(filename: str, target_line: int, index_data: dict | None = None) -> int:
+    """Calculate the exact byte offset for a given line number.
+
+    Args:
+        filename: Path to the file
+        target_line: Line number (1-based) to find offset for
+        index_data: Optional index data. If None, will try to load or calculate
+
+    Returns:
+        Byte offset of the line, or -1 if cannot determine (large file without index)
+    """
+    # If no index provided, try to load it
+    if index_data is None:
+        index_path = get_index_path(filename)
+        index_data = load_index(index_path)
+
+    # If we have an index, use it
+    if index_data:
+        line_index = index_data.get("line_index", [])
+        if not line_index:
+            return -1
+
+        # Find closest indexed line
+        indexed_line, indexed_offset = find_line_offset(line_index, target_line)
+
+        # If exact match, return it
+        if indexed_line == target_line:
+            return indexed_offset
+
+        # Read from indexed position and count to target
+        # Sequential reading is fast due to OS buffering and disk read-ahead
+        try:
+            with open(filename, "rb") as f:
+                f.seek(indexed_offset)
+                current_line = indexed_line
+                current_offset = indexed_offset
+
+                for line_bytes in f:
+                    if current_line == target_line:
+                        return current_offset
+                    current_offset += len(line_bytes)
+                    current_line += 1
+
+                # Reached EOF before finding target line
+                return -1
+        except (IOError, OSError) as e:
+            logger.error(f"Failed to read file {filename}: {e}")
+            return -1
+
+    # No index - check if file is small enough to read
+    try:
+        file_size = os.path.getsize(filename)
+        threshold = get_large_file_threshold_bytes()
+
+        if file_size > threshold:
+            # Large file without index - cannot determine
+            return -1
+
+        # Small file - read from beginning
+        with open(filename, "rb") as f:
+            current_line = 0
+            current_offset = 0
+
+            for line_bytes in f:
+                current_line += 1
+                if current_line == target_line:
+                    return current_offset
+                current_offset += len(line_bytes)
+
+            # Target line beyond EOF
+            return -1
+    except (IOError, OSError) as e:
+        logger.error(f"Failed to process file {filename}: {e}")
+        return -1
+
+
+def calculate_exact_line_for_offset(filename: str, target_offset: int, index_data: dict | None = None) -> int:
+    """Calculate the exact line number for a given byte offset.
+
+    Args:
+        filename: Path to the file
+        target_offset: Byte offset to find line number for
+        index_data: Optional index data. If None, will try to load or calculate
+
+    Returns:
+        Line number (1-based) at the offset, or -1 if cannot determine
+    """
+    # If no index provided, try to load it
+    if index_data is None:
+        index_path = get_index_path(filename)
+        index_data = load_index(index_path)
+
+    # If we have an index, use it
+    if index_data:
+        line_index = index_data.get("line_index", [])
+        if not line_index:
+            return -1
+
+        # Find closest indexed line before target offset
+        # Binary search by offset
+        offsets = [entry[1] for entry in line_index]
+        idx = bisect.bisect_right(offsets, target_offset) - 1
+        if idx < 0:
+            idx = 0
+
+        indexed_line, indexed_offset = line_index[idx]
+
+        # If exact match, return it
+        if indexed_offset == target_offset:
+            return indexed_line
+
+        # Read from indexed position and count lines to target offset
+        # Sequential reading is fast due to OS buffering and disk read-ahead
+        try:
+            with open(filename, "rb") as f:
+                f.seek(indexed_offset)
+                current_line = indexed_line
+                current_offset = indexed_offset
+
+                for line_bytes in f:
+                    if current_offset == target_offset:
+                        return current_line
+                    if current_offset + len(line_bytes) > target_offset:
+                        # Target offset is within this line
+                        return current_line
+                    current_offset += len(line_bytes)
+                    current_line += 1
+
+                # Reached EOF
+                return -1
+        except (IOError, OSError) as e:
+            logger.error(f"Failed to read file {filename}: {e}")
+            return -1
+
+    # No index - check if file is small enough to read
+    try:
+        file_size = os.path.getsize(filename)
+        threshold = get_large_file_threshold_bytes()
+
+        if file_size > threshold:
+            # Large file without index - cannot determine
+            return -1
+
+        # Small file - read from beginning
+        with open(filename, "rb") as f:
+            current_line = 0
+            current_offset = 0
+
+            for line_bytes in f:
+                current_line += 1
+                if current_offset == target_offset:
+                    return current_line
+                if current_offset + len(line_bytes) > target_offset:
+                    # Target offset is within this line
+                    return current_line
+                current_offset += len(line_bytes)
+
+            # EOF
+            return -1
+    except (IOError, OSError) as e:
+        logger.error(f"Failed to process file {filename}: {e}")
+        return -1
+
+
 def get_index_info(source_path: str) -> dict | None:
     """Get information about an existing index.
 
 
@@ -582,8 +582,12 @@ class SamplesResponse(BaseModel):
     """Response from samples endpoint"""
 
     path: str = Field(..., example="/path/to/file.txt")
-    offsets: list[int] = Field(default_factory=list, example=[123, 456])
-    lines: list[int] = Field(default_factory=list, example=[100, 200])
+    offsets: dict[str, int] = Field(
+        default_factory=dict, example={"123": 1, "456": 2}, description="Mapping of byte offset to line number"
+    )
+    lines: dict[str, int] = Field(
+        default_factory=dict, example={"1": 0, "2": 123}, description="Mapping of line number to byte offset"
+    )
     before_context: int = Field(..., example=3)
     after_context: int = Field(..., example=3)
     samples: dict[str, list[str]] = Field(