Skip to content

Commit 40ac835

Browse files
committed
Revert "Add context limit support for MCP compatibility (#102)"
This reverts commit 0e2d053.
1 parent b141d90 commit 40ac835

File tree

4 files changed

+24
-115
lines changed

4 files changed

+24
-115
lines changed

README.md

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,6 @@ A Model Context Protocol (MCP) server implementation that integrates with [Firec
2121
- Automatic retries and rate limiting
2222
- Cloud and self-hosted support
2323
- SSE support
24-
- **Context limit support for MCP compatibility**
25-
26-
## Context Limiting for MCP
27-
28-
All tools now support the `maxResponseSize` parameter to limit response size for better MCP compatibility. This is especially useful for large responses that may exceed MCP context limits.
29-
30-
**Example Usage:**
31-
```json
32-
{
33-
"name": "firecrawl_scrape",
34-
"arguments": {
35-
"url": "https://example.com",
36-
"formats": ["markdown"],
37-
"maxResponseSize": 50000
38-
}
39-
}
40-
```
41-
42-
When the response exceeds the specified limit, content will be truncated with a clear message indicating truncation occurred. This parameter is optional and preserves full backward compatibility.
4324

4425
> Play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers).
4526

package-lock.json

Lines changed: 2 additions & 45 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
"@mendable/firecrawl-js": "^4.3.6",
3131
"dotenv": "^17.2.2",
3232
"firecrawl-fastmcp": "^1.0.2",
33-
"node-fetch": "^2.7.0",
3433
"typescript": "^5.9.2",
3534
"zod": "^4.1.5"
3635
},

src/index.ts

Lines changed: 22 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,8 @@ function getClient(session?: SessionData): FirecrawlApp {
153153
return createClient(session?.firecrawlApiKey);
154154
}
155155

156-
function asText(data: unknown, maxResponseSize?: number): string {
157-
const text = JSON.stringify(data, null, 2);
158-
159-
if (maxResponseSize && maxResponseSize > 0 && text.length > maxResponseSize) {
160-
const truncatedText = text.substring(0, maxResponseSize - 100); // Reserve space for truncation message
161-
return truncatedText + '\n\n[Content truncated due to size limit. Increase maxResponseSize parameter to see full content.]';
162-
}
163-
164-
return text;
156+
function asText(data: unknown): string {
157+
return JSON.stringify(data, null, 2);
165158
}
166159

167160
// scrape tool (v2 semantics, minimal args)
@@ -236,13 +229,12 @@ const scrapeParamsSchema = z.object({
236229
.optional(),
237230
storeInCache: z.boolean().optional(),
238231
maxAge: z.number().optional(),
239-
maxResponseSize: z.number().optional(),
240232
});
241233

242234
server.addTool({
243235
name: 'firecrawl_scrape',
244236
description: `
245-
Scrape content from a single URL with advanced options.
237+
Scrape content from a single URL with advanced options.
246238
This is the most powerful, fastest and most reliable scraper tool, if available you should always default to using this tool for any web scraping needs.
247239
248240
**Best for:** Single page content extraction, when you know exactly which page contains the information.
@@ -256,13 +248,11 @@ This is the most powerful, fastest and most reliable scraper tool, if available
256248
"arguments": {
257249
"url": "https://example.com",
258250
"formats": ["markdown"],
259-
"maxAge": 172800000,
260-
"maxResponseSize": 50000
251+
"maxAge": 172800000
261252
}
262253
}
263254
\`\`\`
264255
**Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
265-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility (e.g., 50000 characters).
266256
**Returns:** Markdown, HTML, or other formats as specified.
267257
${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
268258
`,
@@ -271,12 +261,12 @@ ${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions
271261
args: unknown,
272262
{ session, log }: { session?: SessionData; log: Logger }
273263
): Promise<string> => {
274-
const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
264+
const { url, ...options } = args as { url: string } & Record<string, unknown>;
275265
const client = getClient(session);
276266
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
277267
log.info('Scraping URL', { url: String(url) });
278268
const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN } as any);
279-
return asText(res, maxResponseSize);
269+
return asText(res);
280270
},
281271
});
282272

@@ -288,15 +278,13 @@ Map a website to discover all indexed URLs on the site.
288278
**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.
289279
**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).
290280
**Common mistakes:** Using crawl to discover URLs instead of map.
291-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
292281
**Prompt Example:** "List all URLs on example.com."
293282
**Usage Example:**
294283
\`\`\`json
295284
{
296285
"name": "firecrawl_map",
297286
"arguments": {
298-
"url": "https://example.com",
299-
"maxResponseSize": 50000
287+
"url": "https://example.com"
300288
}
301289
}
302290
\`\`\`
@@ -309,18 +297,17 @@ Map a website to discover all indexed URLs on the site.
309297
includeSubdomains: z.boolean().optional(),
310298
limit: z.number().optional(),
311299
ignoreQueryParameters: z.boolean().optional(),
312-
maxResponseSize: z.number().optional(),
313300
}),
314301
execute: async (
315302
args: unknown,
316303
{ session, log }: { session?: SessionData; log: Logger }
317304
): Promise<string> => {
318-
const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
305+
const { url, ...options } = args as { url: string } & Record<string, unknown>;
319306
const client = getClient(session);
320307
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
321308
log.info('Mapping URL', { url: String(url) });
322309
const res = await client.map(String(url), { ...cleaned, origin: ORIGIN } as any);
323-
return asText(res, maxResponseSize);
310+
return asText(res);
324311
},
325312
});
326313

@@ -379,12 +366,10 @@ The query also supports search operators, that you can use if needed to refine t
379366
"scrapeOptions": {
380367
"formats": ["markdown"],
381368
"onlyMainContent": true
382-
},
383-
"maxResponseSize": 50000
369+
}
384370
}
385371
}
386372
\`\`\`
387-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
388373
**Returns:** Array of search results (with optional scraped content).
389374
`,
390375
parameters: z.object({
@@ -397,21 +382,20 @@ The query also supports search operators, that you can use if needed to refine t
397382
.array(z.object({ type: z.enum(['web', 'images', 'news']) }))
398383
.optional(),
399384
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
400-
maxResponseSize: z.number().optional(),
401385
}),
402386
execute: async (
403387
args: unknown,
404388
{ session, log }: { session?: SessionData; log: Logger }
405389
): Promise<string> => {
406390
const client = getClient(session);
407-
const { query, maxResponseSize, ...opts } = args as { query: string; maxResponseSize?: number } & Record<string, unknown>;
391+
const { query, ...opts } = args as Record<string, unknown>;
408392
const cleaned = removeEmptyTopLevel(opts as Record<string, unknown>);
409393
log.info('Searching', { query: String(query) });
410394
const res = await client.search(query as string, {
411395
...(cleaned as any),
412396
origin: ORIGIN,
413397
});
414-
return asText(res, maxResponseSize);
398+
return asText(res);
415399
},
416400
});
417401

@@ -435,12 +419,10 @@ server.addTool({
435419
"limit": 20,
436420
"allowExternalLinks": false,
437421
"deduplicateSimilarURLs": true,
438-
"sitemap": "include",
439-
"maxResponseSize": 50000
422+
"sitemap": "include"
440423
}
441424
}
442425
\`\`\`
443-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
444426
**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
445427
${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
446428
`,
@@ -471,18 +453,17 @@ server.addTool({
471453
deduplicateSimilarURLs: z.boolean().optional(),
472454
ignoreQueryParameters: z.boolean().optional(),
473455
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
474-
maxResponseSize: z.number().optional(),
475456
}),
476457
execute: async (args, { session, log }) => {
477-
const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
458+
const { url, ...options } = args as Record<string, unknown>;
478459
const client = getClient(session);
479460
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
480461
log.info('Starting crawl', { url: String(url) });
481462
const res = await client.crawl(String(url), {
482463
...(cleaned as any),
483464
origin: ORIGIN,
484465
});
485-
return asText(res, maxResponseSize);
466+
return asText(res);
486467
},
487468
});
488469

@@ -496,26 +477,20 @@ Check the status of a crawl job.
496477
{
497478
"name": "firecrawl_check_crawl_status",
498479
"arguments": {
499-
"id": "550e8400-e29b-41d4-a716-446655440000",
500-
"maxResponseSize": 50000
480+
"id": "550e8400-e29b-41d4-a716-446655440000"
501481
}
502482
}
503483
\`\`\`
504-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
505484
**Returns:** Status and progress of the crawl job, including results if available.
506485
`,
507-
parameters: z.object({
508-
id: z.string(),
509-
maxResponseSize: z.number().optional(),
510-
}),
486+
parameters: z.object({ id: z.string() }),
511487
execute: async (
512488
args: unknown,
513489
{ session }: { session?: SessionData }
514490
): Promise<string> => {
515-
const { id, maxResponseSize } = args as { id: string; maxResponseSize?: number };
516491
const client = getClient(session);
517-
const res = await client.getCrawlStatus(id);
518-
return asText(res, maxResponseSize);
492+
const res = await client.getCrawlStatus((args as any).id as string);
493+
return asText(res);
519494
},
520495
});
521496

@@ -552,12 +527,10 @@ Extract structured information from web pages using LLM capabilities. Supports b
552527
},
553528
"allowExternalLinks": false,
554529
"enableWebSearch": false,
555-
"includeSubdomains": false,
556-
"maxResponseSize": 50000
530+
"includeSubdomains": false
557531
}
558532
}
559533
\`\`\`
560-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
561534
**Returns:** Extracted structured data as defined by your schema.
562535
`,
563536
parameters: z.object({
@@ -567,14 +540,13 @@ Extract structured information from web pages using LLM capabilities. Supports b
567540
allowExternalLinks: z.boolean().optional(),
568541
enableWebSearch: z.boolean().optional(),
569542
includeSubdomains: z.boolean().optional(),
570-
maxResponseSize: z.number().optional(),
571543
}),
572544
execute: async (
573545
args: unknown,
574546
{ session, log }: { session?: SessionData; log: Logger }
575547
): Promise<string> => {
576548
const client = getClient(session);
577-
const a = args as { maxResponseSize?: number } & Record<string, unknown>;
549+
const a = args as Record<string, unknown>;
578550
log.info('Extracting from URLs', {
579551
count: Array.isArray(a.urls) ? a.urls.length : 0,
580552
});
@@ -588,7 +560,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
588560
origin: ORIGIN,
589561
});
590562
const res = await client.extract(extractBody as any);
591-
return asText(res, a.maxResponseSize);
563+
return asText(res);
592564
},
593565
});
594566
const PORT = Number(process.env.PORT || 3000);

0 commit comments

Comments
 (0)