elastic · kapral18 · Feb 16, 2026
diff --git a/README.md b/README.md
@@ -10,10 +10,12 @@ You must index your code base with the Semantic Code Search Indexer found here:
 
 This MCP server expects the **locations-first** index model from indexer PR `elastic/semantic-code-search-indexer#135`:
 
-- `<index>` stores **content-deduplicated chunk documents** (semantic search + metadata).
-- `<index>_locations` stores **one document per chunk occurrence** (file path + line ranges + directory/git metadata) and references chunks by `chunk_id`.
+- `<alias>` stores **content-deduplicated chunk documents** (semantic search + metadata).
+- `<alias>_locations` stores **one document per chunk occurrence** (file path + line ranges + directory/git metadata) and references chunks by `chunk_id`.
 
-Several tools query `<index>_locations` and join back to `<index>` via `chunk_id` (typically using `mget`).
+Several tools query `<alias>_locations` and join back to `<alias>` via `chunk_id` (typically using `mget`).
+
+The indexer also maintains a stable settings index, `<alias>_settings`, for commit state and maintenance locking. This MCP server does not query `<alias>_settings` directly, but you should expect it to exist for any indexed alias.
 
 ## Running with Docker
 
@@ -92,7 +94,7 @@ For agents that connect over STDIO, you need to configure them to run the Docker
         "-i",
         "-e", "ELASTICSEARCH_CLOUD_ID=<your_cloud_id>",
         "-e", "ELASTICSEARCH_API_KEY=<your_api_key>",
-        "-e", "ELASTICSEARCH_INDEX=<your_index>",
+        "-e", "ELASTICSEARCH_INDEX=<your_alias>",
         "simianhacker/semantic-code-search-mcp-server",
         "node", "dist/src/mcp_server/bin.js", "stdio"
       ]
@@ -204,4 +206,4 @@ Configuration is managed via environment variables in a `.env` file.
 | --- | --- | --- |
 | `ELASTICSEARCH_CLOUD_ID` | The Cloud ID for your Elastic Cloud instance. | |
 | `ELASTICSEARCH_API_KEY` | An API key for Elasticsearch authentication. | |
-| `ELASTICSEARCH_INDEX` | The name of the Elasticsearch index to use. | `semantic-code-search` |
+| `ELASTICSEARCH_INDEX` | The **alias name** to query (stable public name). The MCP server will also query `<alias>_locations`. | `semantic-code-search` |
diff --git a/docker-compose.integration.yml b/docker-compose.integration.yml
@@ -0,0 +1,28 @@
+version: '3.8'
+
+services:
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch:9.2.0
+    container_name: semantic-code-search-mcp-test-es
+    environment:
+      - discovery.type=single-node
+      - xpack.security.enabled=true
+      - ELASTIC_PASSWORD=testpassword
+      - xpack.security.http.ssl.enabled=false
+      - xpack.ml.max_machine_memory_percent=90
+      - xpack.license.self_generated.type=trial
+      - ES_JAVA_OPTS=-Xms2g -Xmx2g
+    ports:
+      - "9200:9200"
+    healthcheck:
+      test: ["CMD-SHELL", "curl -u elastic:testpassword -f http://localhost:9200/_cluster/health || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 30
+    volumes:
+      - es-mcp-test-data:/usr/share/elasticsearch/data
+
+volumes:
+  es-mcp-test-data:
+    driver: local
+
diff --git a/docs/prompts/mcp-live-debug-harness.md b/docs/prompts/mcp-live-debug-harness.md
@@ -29,7 +29,7 @@ Crucially, the Harness will use a file watcher for live reloading. When a change
             "content": [
               {
                 "type": "text",
-                "text": "Index: beats-repo\n- Files: 10,460 total..."
+                "text": "Index: beats\n- Files: 10,460 total..."
               }
             ]
           },

diff --git a/docs/prompts/optimize-list_symbols_by_query-response.md b/docs/prompts/optimize-list_symbols_by_query-response.md
@@ -25,7 +25,7 @@ Output is a JSON object keyed by `filePath`. Each file contains grouped `symbols
 
 ### Note (locations-first indices)
 
-Per-file association is computed from `<index>_locations` (by aggregating `filePath` → `chunk_id`). Symbols/imports/exports are read from `<index>` and joined via `chunk_id`.
+Per-file association is computed from `<alias>_locations` (by aggregating `filePath` → `chunk_id`). Symbols/imports/exports are read from `<alias>` and joined via `chunk_id`.
 
 ## Historical Behavior (pre-grouped output)
 

diff --git a/jest.config.js b/jest.config.js
@@ -4,5 +4,6 @@ module.exports = {
   testEnvironment: 'node',
   watchman: false,
   testMatch: ['**/tests/**/*.test.ts'],
+  testPathIgnorePatterns: ['<rootDir>/tests/integration/'],
   modulePathIgnorePatterns: ['<rootDir>/.repos/'],
 };
diff --git a/jest.integration.config.js b/jest.integration.config.js
@@ -0,0 +1,9 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+  preset: 'ts-jest',
+  testEnvironment: 'node',
+  watchman: false,
+  testMatch: ['**/tests/integration/**/*.test.ts'],
+  modulePathIgnorePatterns: ['<rootDir>/.repos/'],
+  testTimeout: 180000,
+};
diff --git a/package.json b/package.json
@@ -14,6 +14,10 @@
     "format": "prettier --write \"src/**/*.ts\" \"tests/**/*.ts\"",
     "format:check": "prettier --check \"src/**/*.ts\" \"tests/**/*.ts\"",
     "test": "jest",
+    "test:integration": "npm run test:integration:setup && npm run test:integration:run; TEST_EXIT_CODE=$?; npm run test:integration:teardown; exit $TEST_EXIT_CODE",
+    "test:integration:run": "jest --config jest.integration.config.js --runInBand",
+    "test:integration:setup": "bash scripts/setup-integration-tests.sh",
+    "test:integration:teardown": "bash scripts/teardown-integration-tests.sh",
     "prepare": "npm run build",
     "mcp-server": "ts-node src/mcp_server/bin.ts",
     "mcp-server:http": "ts-node src/mcp_server/bin.ts http"

diff --git a/scripts/setup-integration-tests.sh b/scripts/setup-integration-tests.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -e
+
+# Check for Docker Compose v2
+if ! docker compose version &> /dev/null; then
+  echo "ERROR: Docker Compose v2 not found."
+  echo "Please install Docker Desktop (Mac/Windows) or Docker Engine with Compose plugin (Linux)."
+  echo "See: https://docs.docker.com/compose/install/"
+  exit 1
+fi
+
+echo "Starting Elasticsearch for integration tests..."
+docker compose -f docker-compose.integration.yml up -d
+
+echo "Waiting for Elasticsearch to be ready..."
+timeout=180
+elapsed=0
+while ! curl -s -u elastic:testpassword -f http://localhost:9200/_cluster/health >/dev/null; do
+  if [ $elapsed -ge $timeout ]; then
+    echo "ERROR: Elasticsearch did not start within $timeout seconds"
+    docker compose -f docker-compose.integration.yml logs
+    exit 1
+  fi
+  echo "Waiting for Elasticsearch... ($elapsed/$timeout seconds)"
+  sleep 5
+  elapsed=$((elapsed + 5))
+done
+
+echo "Elasticsearch is ready!"
+echo ""
+echo "✅ Integration test environment is ready!"
+echo "   Elasticsearch: http://localhost:9200"
+echo "   Username: elastic"
+echo "   Password: testpassword"
+echo ""
+
diff --git a/scripts/teardown-integration-tests.sh b/scripts/teardown-integration-tests.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -e
+
+# Check for Docker Compose v2
+if ! docker compose version &> /dev/null; then
+  echo "ERROR: Docker Compose v2 not found."
+  echo "Please install Docker Desktop (Mac/Windows) or Docker Engine with Compose plugin (Linux)."
+  exit 1
+fi
+
+echo "Stopping Elasticsearch integration test environment..."
+docker compose -f docker-compose.integration.yml down -v
+
+echo "✅ Integration test environment cleaned up!"
+
diff --git a/src/elasticsearch/directory_discovery.ts b/src/elasticsearch/directory_discovery.ts
@@ -26,7 +26,7 @@ interface DirectoryAggregationResponse {
 }
 
 /**
- * Discovers significant directories via `<index>_locations` (one document per chunk occurrence).
+ * Discovers significant directories via `<alias>_locations` (one document per chunk occurrence).
  */
 export async function discoverSignificantDirectories(
   client: Client,

diff --git a/src/mcp_server/prompts/chain_of_investigation.workflow.md b/src/mcp_server/prompts/chain_of_investigation.workflow.md
@@ -18,10 +18,10 @@
 
 This MCP server uses a locations-first Elasticsearch model:
 
-- Chunk-level fields (e.g. `language`, `kind`, `content`, `symbols`) live in `<index>`.
-- File-level fields (e.g. `filePath`, `directoryPath`, `startLine`, `endLine`) live in `<index>_locations`.
+- Chunk-level fields (e.g. `language`, `kind`, `content`, `symbols`) live in `<alias>`.
+- File-level fields (e.g. `filePath`, `directoryPath`, `startLine`, `endLine`) live in `<alias>_locations`.
 
-Implication: a KQL predicate like `filePath: *test*` is evaluated via `<index>_locations` and then joined back to `<index>` via `chunk_id`.
+Implication: a KQL predicate like `filePath: *test*` is evaluated via `<alias>_locations` and then joined back to `<alias>` via `chunk_id`.
 
 ### semantic_code_search
 **For discovering symbols**

diff --git a/src/mcp_server/tools/discover_directories.md b/src/mcp_server/tools/discover_directories.md
@@ -53,7 +53,7 @@ Returns a ranked list of directories with:
 - **Languages**: Programming languages used
 - **Score**: Average score of matches
 
-**Note (locations-first indices):** Directory and file-level information is derived from `<index>_locations` (one document per chunk occurrence). The tool may join to `<index>` via `chunk_id` to enrich results (e.g. languages / symbol counts).
+**Note (locations-first indices):** Directory and file-level information is derived from `<alias>_locations` (one document per chunk occurrence). The tool may join to `<alias>` via `chunk_id` to enrich results (e.g. languages / symbol counts).
 
 ## Example Output
 ```

diff --git a/src/mcp_server/tools/document_symbols.md b/src/mcp_server/tools/document_symbols.md
@@ -10,7 +10,7 @@ An AI coding agent can use this tool to get a focused list of symbols to documen
 
 ## Notes (locations-first indices)
 
-Per-file symbol listings are resolved via `<index>_locations` (mapping `filePath` → `chunk_id`) and then joined to `<index>` by `chunk_id` to read chunk-level symbol metadata.
+Per-file symbol listings are resolved via `<alias>_locations` (mapping `filePath` → `chunk_id`) and then joined to `<alias>` by `chunk_id` to read chunk-level symbol metadata.
 
 ## Parameters
 

diff --git a/src/mcp_server/tools/list_indices.md b/src/mcp_server/tools/list_indices.md
@@ -10,15 +10,15 @@ This tool allows LLMs to query for available indices and get a summary of their
 
 This MCP server expects the locations-first model:
 
-- `<index>`: content-deduplicated chunk documents
-- `<index>_locations`: one document per chunk occurrence, including `filePath`
+- `<alias>`: content-deduplicated chunk documents
+- `<alias>_locations`: one document per chunk occurrence, including `filePath`
 
-For this tool, **file counts are computed from `<index>_locations`** using a `cardinality(filePath)` aggregation, because chunk documents do not store per-file metadata.
+For this tool, **file counts are computed from `<alias>_locations`** using a `cardinality(filePath)` aggregation, because chunk documents do not store per-file metadata.
 
 ## Output
 
 Returns a human-readable list of indices with:
 
-- Files: approximate count of unique file paths in `<index>_locations`
-- Symbols: approximate unique symbol count from `<index>` (nested `symbols`)
-- Languages / Content: rough breakdowns from `<index>`
+- Files: approximate count of unique file paths in `<alias>_locations`
+- Symbols: approximate unique symbol count from `<alias>` (nested `symbols`)
+- Languages / Content: rough breakdowns from `<alias>`