Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
invocation_reasons.yaml

24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,35 @@ docker buildx build --platform linux/arm64,linux/amd64 -f mcp-local/Dockerfile -
For a single-platform build (faster):

```bash
docker buildx build -f mcp-local/Dockerfile -t arm-mcp mcp-local
# NOTE - building locally involves generating the Knowledge Base semantic embeddings and can take up to 20 minutes
docker buildx build -f mcp-local/Dockerfile -t arm-mcp mcp-local --load
```

### 2. Configure Your MCP Client

Choose the configuration that matches your MCP client:

#### Claude Code

Add to `.mcp.json` in your project:

```json
{
"mcpServers": {
"arm-mcp": {
"command": "docker",
"args": [
"run",
"--rm",
"-i",
"-v", "/path/to/your/workspace:/workspace",
"arm-mcp"
]
}
}
}
```

#### GitHub Copilot (VS Code)

Add to `.vscode/mcp.json` in your project, or globally at `~/Library/Application Support/Code/User/mcp.json` (macOS):
Expand Down
29 changes: 13 additions & 16 deletions embedding-generation/generate-chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from bs4 import BeautifulSoup
import argparse
import sys, os
import sys
import os
import re
import uuid
import yaml
import csv
import datetime
import json

import boto3
from botocore.exceptions import NoCredentialsError, ClientError
from bs4 import BeautifulSoup
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


# Create a session with retry logic for resilient HTTP requests
def create_retry_session(retries=5, backoff_factor=1, status_forcelist=(500, 502, 503, 504)):
"""Create a requests session with automatic retry on failures."""
Expand All @@ -45,10 +50,6 @@ def create_retry_session(retries=5, backoff_factor=1, status_forcelist=(500, 502
# Global session for all HTTP requests
http_session = create_retry_session()

# Boto3 for S3 operations
import boto3
from botocore.exceptions import NoCredentialsError, ClientError


def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks',
s3_bucket='arm-github-copilot-extension',
Expand All @@ -57,7 +58,6 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks',
Ensure the local 'intrinsic_chunks' folder exists and is populated with files from S3.
If the folder does not exist, create it and download all files from the S3 prefix.
"""
import os
if not os.path.exists(local_folder):
os.makedirs(local_folder, exist_ok=True)
print(f"Created local folder: {local_folder}")
Expand Down Expand Up @@ -86,10 +86,8 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks',
To fix:
1. Prevent multiple learning paths from being used (compare URLs to existing chunks OR delete overlaps)
2. Learning Path titles must come from index page...send through function along with Graviton.

'''


yaml_dir = 'yaml_data'
details_file = 'info/chunk_details.csv'

Expand All @@ -98,7 +96,6 @@ def ensure_intrinsic_chunks_from_s3(local_folder='intrinsic_chunks',
# Global var to prevent duplication entries from cross platform learning paths
cross_platform_lps_dont_duplicate = []


# Increase the file size limit, which defaults to '131,072'
csv.field_size_limit(10**9) #1,000,000,000 (1 billion), smaller than 64-bit space but avoids 'python overflowerror'

Expand Down Expand Up @@ -196,7 +193,6 @@ def createTextSnippet(main_row):
return



def createIntrinsicsDatabaseChunks():
def htmlToMarkdown(html_string):
# Step 0: Remove '<h4>Operation</h4>' as it isn't needed
Expand Down Expand Up @@ -315,7 +311,6 @@ def htmlToMarkdown(html_string):
'''



def processLearningPath(url,type):
github_raw_link = "https://raw.githubusercontent.com/ArmDeveloperEcosystem/arm-learning-paths/refs/heads/production/content"
site_link = "https://learn.arm.com"
Expand Down Expand Up @@ -462,6 +457,7 @@ def readInCSV(csv_file):

return csv_dict, csv_length


def getMarkdownGitHubURLsFromPage(url):
GH_urls = []
SITE_urls = []
Expand Down Expand Up @@ -523,6 +519,7 @@ def obtainMarkdownContentFromGitHubMDFile(gh_url):

return md_content


def obtainTextSnippets__Markdown(content, min_words=300, max_words=500, min_final_words=200):
"""Split content into chunks based on headers and word count constraints."""

Expand Down Expand Up @@ -620,6 +617,7 @@ def createChunk(text_snippet,WEBSITE_url,keywords,title):

return chunk


def printChunks(chunks):
for chunk_dict in chunks:
print('='*100)
Expand Down Expand Up @@ -690,7 +688,6 @@ def recordChunk():
print(f"{file_name} === {chunk.title}")



def main():


Expand Down
4 changes: 4 additions & 0 deletions embedding-generation/local_vectorstore_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from sentence_transformers import SentenceTransformer
from usearch.index import Index


def load_local_yaml_files() -> List[Dict]:
"""Load locally stored YAML files and return their contents as a list of dictionaries."""
print("Loading local YAML files")
Expand Down Expand Up @@ -63,6 +64,7 @@ def load_local_yaml_files() -> List[Dict]:
print(f"Successfully loaded {len(yaml_contents)} YAML files")
return yaml_contents


def create_embeddings(contents: List[str], model_name: str = 'all-MiniLM-L6-v2') -> np.ndarray:
"""Create embeddings for the given contents using SentenceTransformers."""
print(f"Creating embeddings using model: {model_name}")
Expand All @@ -71,6 +73,7 @@ def create_embeddings(contents: List[str], model_name: str = 'all-MiniLM-L6-v2')
print(f"Created embeddings with shape: {embeddings.shape}")
return embeddings


def create_usearch_index(embeddings: np.ndarray, metadata: List[Dict]) -> Tuple[Index, List[Dict]]:
"""Create a USearch index with the given embeddings and metadata."""
print("Creating USearch index")
Expand Down Expand Up @@ -100,6 +103,7 @@ def create_usearch_index(embeddings: np.ndarray, metadata: List[Dict]) -> Tuple[
print(f"Added {len(index)} vectors to the index")
return index, metadata


def main():
print("Starting the USearch datastore creation process")

Expand Down