wip

brendan-kellam · brendan-kellam · commit bf3720d920ef · 2025-05-05T22:42:52.000-07:00
diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts
@@ -0,0 +1,55 @@
+import { env } from './env.js';
+import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js';
+import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js';
+import { isServiceError } from './utils.js';
+
+export const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => {
+    console.error(`Executing search request: ${JSON.stringify(request, null, 2)}`);
+    const result = await fetch(`${env.SOURCEBOT_HOST}/api/search`, {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            'X-Org-Domain': '~'
+        },
+        body: JSON.stringify(request)
+    }).then(response => response.json());
+
+    if (isServiceError(result)) {
+        return result;
+    }
+
+    return searchResponseSchema.parse(result);
+}
+
+export const listRepos = async (): Promise<ListRepositoriesResponse | ServiceError> => {
+    const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, {
+        method: 'GET',
+        headers: {
+            'Content-Type': 'application/json',
+            'X-Org-Domain': '~'
+        },
+    }).then(response => response.json());
+
+    if (isServiceError(result)) {
+        return result;
+    }
+
+    return listRepositoriesResponseSchema.parse(result);
+}
+
+export const getFileSource = async (request: FileSourceRequest): Promise<FileSourceResponse | ServiceError> => {
+    const result = await fetch(`${env.SOURCEBOT_HOST}/api/source`, {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            'X-Org-Domain': '~'
+        },
+        body: JSON.stringify(request)
+    }).then(response => response.json());
+
+    if (isServiceError(result)) {
+        return result;
+    }
+
+    return fileSourceResponseSchema.parse(result);
+}
diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts
@@ -1,50 +1,61 @@
 // Entry point for the MCP server
 import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import escapeStringRegexp from 'escape-string-regexp';
 import { z } from 'zod';
+import { listRepos, search, getFileSource } from './client.js';
 import { env, numberSchema } from './env.js';
-import { listRepositoriesResponseSchema, searchResponseSchema } from './schemas.js';
-import { ListRepositoriesResponse, SearchRequest, SearchResponse, TextContent, ServiceError } from './types.js';
+import { TextContent } from './types.js';
 import { base64Decode, isServiceError } from './utils.js';
-import escapeStringRegexp from 'escape-string-regexp';
 
 // Create MCP server
 const server = new McpServer({
     name: 'sourcebot-mcp-server',
     version: '0.1.0',
 });
 
+
 server.tool(
     "search_code",
-    `Fetches code snippets that match the given keywords exactly. This is not a semantic search. Results are returned as an array of files, where each file contains a list of code snippets, as well as the file's URL, repository, and language. ALWAYS include the file's external URL when referencing a file.`,
+    `Fetches code that matches the provided regex pattern in \`query\`. This is NOT a semantic search.
+    Results are returned as an array of matching files, with the file's URL, repository, and language.
+    If the \`includeCodeSnippets\` property is true, code snippets containing the matches will be included in the response. Only set this to true if the request requires code snippets (e.g., show me examples where library X is used).
+    When referencing a file in your response, **ALWAYS** include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
+    **ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty.`,
     {
         query: z
             .string()
             .describe(`The regex pattern to search for. RULES:
-            1. When a regex special character needs to be escaped, ALWAYS use a single backslash (\) (e.g., 'console\.log')
-            2. ALWAYS escape spaces with a single backslash (\) (e.g., 'console\ log')
+        1. When a regex special character needs to be escaped, ALWAYS use a single backslash (\) (e.g., 'console\.log')
+        2. **ALWAYS** escape spaces with a single backslash (\) (e.g., 'console\ log')
         `),
-        repoIds: z
+        filterByRepoIds: z
             .array(z.string())
-            .describe(`Scope the search to the provided repositories to the Sourcebot compatible repository IDs. Do not use this property if you want to search all repositories. You must call 'list_repos' first to obtain the exact repository ID.`)
+            .describe(`Scope the search to the provided repositories to the Sourcebot compatible repository IDs. **DO NOT** use this property if you want to search all repositories. **YOU MUST** call 'list_repos' first to obtain the exact repository ID.`)
             .optional(),
-        languages: z
+        filterByLanguages: z
             .array(z.string())
             .describe(`Scope the search to the provided languages. The language MUST be formatted as a GitHub linguist language. Examples: Python, JavaScript, TypeScript, Java, C#, C++, PHP, Go, Rust, Ruby, Swift, Kotlin, Shell, C, Dart, HTML, CSS, PowerShell, SQL, R`)
             .optional(),
+        caseSensitive: z
+            .boolean()
+            .describe(`Whether the search should be case sensitive (default: false).`)
+            .optional(),
+        includeCodeSnippets: z
+            .boolean()
+            .describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`)
+            .optional(),
         maxTokens: numberSchema
             .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
             .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val))
             .optional(),
-        caseSensitive: z.boolean()
-            .describe(`Whether the search should be case sensitive (default: false).`)
-            .optional(),
     },
     async ({
         query,
-        repoIds = [],
-        languages = [],
+        filterByRepoIds: repoIds = [],
+        filterByLanguages: languages = [],
         maxTokens = env.DEFAULT_MINIMUM_TOKENS,
+        includeCodeSnippets = false,
         caseSensitive = false,
     }) => {
         if (repoIds.length > 0) {
@@ -96,12 +107,16 @@ server.tool(
                 const content = base64Decode(chunk.content);
                 return `\`\`\`\n${content}\n\`\`\``
             }).join('\n');
-            const text = `file: ${file.url}\nrepository: ${file.repository}\nlanguage: ${file.language}\n${snippets}`;
+            const numMatches = file.chunks.reduce(
+                (acc, chunk) => acc + chunk.matchRanges.length,
+                0,
+            );
+            const text = `file: ${file.url}\nnum_matches: ${numMatches}\nrepository: ${file.repository}\nlanguage: ${file.language}\n${includeCodeSnippets ? `snippets:\n${snippets}` : ''}`;
             // Rough estimate of the number of tokens in the text
             // @see: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
             const tokens = text.length / 4;
 
-            if ((totalTokens + tokens) > (maxTokens ?? env.DEFAULT_MINIMUM_TOKENS)) {
+            if ((totalTokens + tokens) > maxTokens) {
                 isResponseTruncated = true;
                 break;
             }
@@ -153,39 +168,40 @@ server.tool(
     }
 );
 
-const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => {
-    console.error(`Executing search request: ${JSON.stringify(request, null, 2)}`);
-    const result = await fetch(`${env.SOURCEBOT_HOST}/api/search`, {
-        method: 'POST',
-        headers: {
-            'Content-Type': 'application/json',
-            'X-Org-Domain': '~'
-        },
-        body: JSON.stringify(request)
-    }).then(response => response.json());
-
-    if (isServiceError(result)) {
-        return result;
-    }
+server.tool(
+    "get_file_source",
+    "Fetches the source code for a given file.",
+    {
+        fileName: z.string().describe("The file to fetch the source code for."),
+        repository: z.string().describe("The repository to fetch the source code for. This is the Sourcebot compatible repository ID."),
+    },
+    async ({ fileName, repository }) => {
+        const response = await getFileSource({
+            fileName,
+            repository,
+        });
 
-    return searchResponseSchema.parse(result);
-}
+        if (isServiceError(response)) {
+            return {
+                content: [{
+                    type: "text",
+                    text: `Error fetching file source: ${response.message}`,
+                }],
+            };
+        }
 
-const listRepos = async (): Promise<ListRepositoriesResponse | ServiceError> => {
-    const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, {
-        method: 'GET',
-        headers: {
-            'Content-Type': 'application/json',
-            'X-Org-Domain': '~'
-        },
-    }).then(response => response.json());
-
-    if (isServiceError(result)) {
-        return result;
+        const content: TextContent[] = [{
+            type: "text",
+            text: `file: ${fileName}\nrepository: ${repository}\nlanguage: ${response.language}\nsource:\n${base64Decode(response.source)}`,
+        }]
+
+        return {
+            content,
+        };
     }
+);
+
 
-    return listRepositoriesResponseSchema.parse(result);
-}
 
 const runServer = async () => {
     const transport = new StdioServerTransport();