Skip to content

Commit bf3720d

Browse files
wip
1 parent 9ab1c1b commit bf3720d

File tree

2 files changed

+116
-45
lines changed

2 files changed

+116
-45
lines changed

packages/mcp/src/client.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { env } from './env.js';
2+
import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js';
3+
import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js';
4+
import { isServiceError } from './utils.js';
5+
6+
export const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => {
7+
console.error(`Executing search request: ${JSON.stringify(request, null, 2)}`);
8+
const result = await fetch(`${env.SOURCEBOT_HOST}/api/search`, {
9+
method: 'POST',
10+
headers: {
11+
'Content-Type': 'application/json',
12+
'X-Org-Domain': '~'
13+
},
14+
body: JSON.stringify(request)
15+
}).then(response => response.json());
16+
17+
if (isServiceError(result)) {
18+
return result;
19+
}
20+
21+
return searchResponseSchema.parse(result);
22+
}
23+
24+
export const listRepos = async (): Promise<ListRepositoriesResponse | ServiceError> => {
25+
const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, {
26+
method: 'GET',
27+
headers: {
28+
'Content-Type': 'application/json',
29+
'X-Org-Domain': '~'
30+
},
31+
}).then(response => response.json());
32+
33+
if (isServiceError(result)) {
34+
return result;
35+
}
36+
37+
return listRepositoriesResponseSchema.parse(result);
38+
}
39+
40+
export const getFileSource = async (request: FileSourceRequest): Promise<FileSourceResponse | ServiceError> => {
41+
const result = await fetch(`${env.SOURCEBOT_HOST}/api/source`, {
42+
method: 'POST',
43+
headers: {
44+
'Content-Type': 'application/json',
45+
'X-Org-Domain': '~'
46+
},
47+
body: JSON.stringify(request)
48+
}).then(response => response.json());
49+
50+
if (isServiceError(result)) {
51+
return result;
52+
}
53+
54+
return fileSourceResponseSchema.parse(result);
55+
}

packages/mcp/src/index.ts

Lines changed: 61 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,61 @@
11
// Entry point for the MCP server
22
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
33
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4+
import escapeStringRegexp from 'escape-string-regexp';
45
import { z } from 'zod';
6+
import { listRepos, search, getFileSource } from './client.js';
57
import { env, numberSchema } from './env.js';
6-
import { listRepositoriesResponseSchema, searchResponseSchema } from './schemas.js';
7-
import { ListRepositoriesResponse, SearchRequest, SearchResponse, TextContent, ServiceError } from './types.js';
8+
import { TextContent } from './types.js';
89
import { base64Decode, isServiceError } from './utils.js';
9-
import escapeStringRegexp from 'escape-string-regexp';
1010

1111
// Create MCP server
1212
const server = new McpServer({
1313
name: 'sourcebot-mcp-server',
1414
version: '0.1.0',
1515
});
1616

17+
1718
server.tool(
1819
"search_code",
19-
`Fetches code snippets that match the given keywords exactly. This is not a semantic search. Results are returned as an array of files, where each file contains a list of code snippets, as well as the file's URL, repository, and language. ALWAYS include the file's external URL when referencing a file.`,
20+
`Fetches code that matches the provided regex pattern in \`query\`. This is NOT a semantic search.
21+
Results are returned as an array of matching files, with the file's URL, repository, and language.
22+
If the \`includeCodeSnippets\` property is true, code snippets containing the matches will be included in the response. Only set this to true if the request requires code snippets (e.g., show me examples where library X is used).
23+
When referencing a file in your response, **ALWAYS** include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
24+
**ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty.`,
2025
{
2126
query: z
2227
.string()
2328
.describe(`The regex pattern to search for. RULES:
24-
1. When a regex special character needs to be escaped, ALWAYS use a single backslash (\) (e.g., 'console\.log')
25-
2. ALWAYS escape spaces with a single backslash (\) (e.g., 'console\ log')
29+
1. When a regex special character needs to be escaped, ALWAYS use a single backslash (\) (e.g., 'console\.log')
30+
2. **ALWAYS** escape spaces with a single backslash (\) (e.g., 'console\ log')
2631
`),
27-
repoIds: z
32+
filterByRepoIds: z
2833
.array(z.string())
29-
.describe(`Scope the search to the provided repositories to the Sourcebot compatible repository IDs. Do not use this property if you want to search all repositories. You must call 'list_repos' first to obtain the exact repository ID.`)
34+
.describe(`Scope the search to the provided repositories to the Sourcebot compatible repository IDs. **DO NOT** use this property if you want to search all repositories. **YOU MUST** call 'list_repos' first to obtain the exact repository ID.`)
3035
.optional(),
31-
languages: z
36+
filterByLanguages: z
3237
.array(z.string())
3338
.describe(`Scope the search to the provided languages. The language MUST be formatted as a GitHub linguist language. Examples: Python, JavaScript, TypeScript, Java, C#, C++, PHP, Go, Rust, Ruby, Swift, Kotlin, Shell, C, Dart, HTML, CSS, PowerShell, SQL, R`)
3439
.optional(),
40+
caseSensitive: z
41+
.boolean()
42+
.describe(`Whether the search should be case sensitive (default: false).`)
43+
.optional(),
44+
includeCodeSnippets: z
45+
.boolean()
46+
.describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`)
47+
.optional(),
3548
maxTokens: numberSchema
3649
.describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
3750
.transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val))
3851
.optional(),
39-
caseSensitive: z.boolean()
40-
.describe(`Whether the search should be case sensitive (default: false).`)
41-
.optional(),
4252
},
4353
async ({
4454
query,
45-
repoIds = [],
46-
languages = [],
55+
filterByRepoIds: repoIds = [],
56+
filterByLanguages: languages = [],
4757
maxTokens = env.DEFAULT_MINIMUM_TOKENS,
58+
includeCodeSnippets = false,
4859
caseSensitive = false,
4960
}) => {
5061
if (repoIds.length > 0) {
@@ -96,12 +107,16 @@ server.tool(
96107
const content = base64Decode(chunk.content);
97108
return `\`\`\`\n${content}\n\`\`\``
98109
}).join('\n');
99-
const text = `file: ${file.url}\nrepository: ${file.repository}\nlanguage: ${file.language}\n${snippets}`;
110+
const numMatches = file.chunks.reduce(
111+
(acc, chunk) => acc + chunk.matchRanges.length,
112+
0,
113+
);
114+
const text = `file: ${file.url}\nnum_matches: ${numMatches}\nrepository: ${file.repository}\nlanguage: ${file.language}\n${includeCodeSnippets ? `snippets:\n${snippets}` : ''}`;
100115
// Rough estimate of the number of tokens in the text
101116
// @see: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
102117
const tokens = text.length / 4;
103118

104-
if ((totalTokens + tokens) > (maxTokens ?? env.DEFAULT_MINIMUM_TOKENS)) {
119+
if ((totalTokens + tokens) > maxTokens) {
105120
isResponseTruncated = true;
106121
break;
107122
}
@@ -153,39 +168,40 @@ server.tool(
153168
}
154169
);
155170

156-
const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => {
157-
console.error(`Executing search request: ${JSON.stringify(request, null, 2)}`);
158-
const result = await fetch(`${env.SOURCEBOT_HOST}/api/search`, {
159-
method: 'POST',
160-
headers: {
161-
'Content-Type': 'application/json',
162-
'X-Org-Domain': '~'
163-
},
164-
body: JSON.stringify(request)
165-
}).then(response => response.json());
166-
167-
if (isServiceError(result)) {
168-
return result;
169-
}
171+
server.tool(
172+
"get_file_source",
173+
"Fetches the source code for a given file.",
174+
{
175+
fileName: z.string().describe("The file to fetch the source code for."),
176+
repository: z.string().describe("The repository to fetch the source code for. This is the Sourcebot compatible repository ID."),
177+
},
178+
async ({ fileName, repository }) => {
179+
const response = await getFileSource({
180+
fileName,
181+
repository,
182+
});
170183

171-
return searchResponseSchema.parse(result);
172-
}
184+
if (isServiceError(response)) {
185+
return {
186+
content: [{
187+
type: "text",
188+
text: `Error fetching file source: ${response.message}`,
189+
}],
190+
};
191+
}
173192

174-
const listRepos = async (): Promise<ListRepositoriesResponse | ServiceError> => {
175-
const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, {
176-
method: 'GET',
177-
headers: {
178-
'Content-Type': 'application/json',
179-
'X-Org-Domain': '~'
180-
},
181-
}).then(response => response.json());
182-
183-
if (isServiceError(result)) {
184-
return result;
193+
const content: TextContent[] = [{
194+
type: "text",
195+
text: `file: ${fileName}\nrepository: ${repository}\nlanguage: ${response.language}\nsource:\n${base64Decode(response.source)}`,
196+
}]
197+
198+
return {
199+
content,
200+
};
185201
}
202+
);
203+
186204

187-
return listRepositoriesResponseSchema.parse(result);
188-
}
189205

190206
const runServer = async () => {
191207
const transport = new StdioServerTransport();

0 commit comments

Comments
 (0)