diff --git a/src/core/metrics/calculateMetrics.ts b/src/core/metrics/calculateMetrics.ts index 29b8a622..5cc5b0dc 100644 --- a/src/core/metrics/calculateMetrics.ts +++ b/src/core/metrics/calculateMetrics.ts @@ -26,7 +26,7 @@ export const calculateMetrics = async ( const [fileMetrics, totalTokens] = await Promise.all([ deps.calculateAllFileMetrics(processedFiles, config.tokenCount.encoding, progressCallback), - deps.calculateOutputMetrics(output, config.tokenCount.encoding), + deps.calculateOutputMetrics(output, config.tokenCount.encoding, config.output.filePath), ]); const totalFiles = processedFiles.length; diff --git a/src/core/metrics/calculateOutputMetrics.ts b/src/core/metrics/calculateOutputMetrics.ts index 0ff7b1a9..323a310a 100644 --- a/src/core/metrics/calculateOutputMetrics.ts +++ b/src/core/metrics/calculateOutputMetrics.ts @@ -3,8 +3,11 @@ import { logger } from '../../shared/logger.js'; import { initPiscina } from '../../shared/processConcurrency.js'; import type { OutputMetricsTask } from './workers/outputMetricsWorker.js'; -const initTaskRunner = () => { - const pool = initPiscina(1, new URL('./workers/outputMetricsWorker.js', import.meta.url).href); +const CHUNK_SIZE = 1000; +const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1000KB + +const initTaskRunner = (numOfTasks: number) => { + const pool = initPiscina(numOfTasks, new URL('./workers/outputMetricsWorker.js', import.meta.url).href); return (task: OutputMetricsTask) => pool.run(task); }; @@ -16,13 +19,42 @@ export const calculateOutputMetrics = async ( initTaskRunner, }, ): Promise => { - const runTask = deps.initTaskRunner(); + const shouldRunInParallel = content.length > MIN_CONTENT_LENGTH_FOR_PARALLEL; + const numOfTasks = shouldRunInParallel ? CHUNK_SIZE : 1; + const runTask = deps.initTaskRunner(numOfTasks); try { - logger.trace(`Starting output token count for ${path}`); + logger.trace(`Starting output token count for ${path || 'output'}`); const startTime = process.hrtime.bigint(); - const result = await runTask({ content, encoding, path }); + let result: number; + + if (shouldRunInParallel) { + // Split content into chunks for parallel processing + const chunkSize = Math.ceil(content.length / CHUNK_SIZE); + const chunks: string[] = []; + + for (let i = 0; i < content.length; i += chunkSize) { + chunks.push(content.slice(i, i + chunkSize)); + } + + // Process chunks in parallel + const chunkResults = await Promise.all( + chunks.map((chunk, index) => + runTask({ + content: chunk, + encoding, + path: path ? `${path}-chunk-${index}` : undefined, + }), + ), + ); + + // Sum up the results + result = chunkResults.reduce((sum, count) => sum + count, 0); + } else { + // Process small content directly + result = await runTask({ content, encoding, path }); + } const endTime = process.hrtime.bigint(); const duration = Number(endTime - startTime) / 1e6; diff --git a/src/core/metrics/workers/outputMetricsWorker.ts b/src/core/metrics/workers/outputMetricsWorker.ts index 59c64fac..52e97aef 100644 --- a/src/core/metrics/workers/outputMetricsWorker.ts +++ b/src/core/metrics/workers/outputMetricsWorker.ts @@ -24,11 +24,9 @@ export default async ({ content, encoding, path }: OutputMetricsTask): Promise