Skip to content

Commit

Permalink
feat(pack): Count output tokens in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
yamadashy committed Jan 25, 2025
1 parent 6c9a149 commit 3b2e45e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 11 deletions.
2 changes: 1 addition & 1 deletion src/core/metrics/calculateMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export const calculateMetrics = async (

const [fileMetrics, totalTokens] = await Promise.all([
deps.calculateAllFileMetrics(processedFiles, config.tokenCount.encoding, progressCallback),
deps.calculateOutputMetrics(output, config.tokenCount.encoding),
deps.calculateOutputMetrics(output, config.tokenCount.encoding, config.output.filePath),
]);

const totalFiles = processedFiles.length;
Expand Down
42 changes: 37 additions & 5 deletions src/core/metrics/calculateOutputMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ import { logger } from '../../shared/logger.js';
import { initPiscina } from '../../shared/processConcurrency.js';
import type { OutputMetricsTask } from './workers/outputMetricsWorker.js';

const initTaskRunner = () => {
const pool = initPiscina(1, new URL('./workers/outputMetricsWorker.js', import.meta.url).href);
const CHUNK_SIZE = 1000;
const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1000KB

const initTaskRunner = (numOfTasks: number) => {
const pool = initPiscina(numOfTasks, new URL('./workers/outputMetricsWorker.js', import.meta.url).href);
return (task: OutputMetricsTask) => pool.run(task);
};

Check warning on line 12 in src/core/metrics/calculateOutputMetrics.ts

View check run for this annotation

Codecov / codecov/patch

src/core/metrics/calculateOutputMetrics.ts#L10-L12

Added lines #L10 - L12 were not covered by tests

Expand All @@ -16,13 +19,42 @@ export const calculateOutputMetrics = async (
initTaskRunner,
},
): Promise<number> => {
const runTask = deps.initTaskRunner();
const shouldRunInParallel = content.length > MIN_CONTENT_LENGTH_FOR_PARALLEL;
const numOfTasks = shouldRunInParallel ? CHUNK_SIZE : 1;
const runTask = deps.initTaskRunner(numOfTasks);

try {
logger.trace(`Starting output token count for ${path}`);
logger.trace(`Starting output token count for ${path || 'output'}`);
const startTime = process.hrtime.bigint();

const result = await runTask({ content, encoding, path });
let result: number;

if (shouldRunInParallel) {
// Split content into chunks for parallel processing
const chunkSize = Math.ceil(content.length / CHUNK_SIZE);
const chunks: string[] = [];

Check warning on line 35 in src/core/metrics/calculateOutputMetrics.ts

View check run for this annotation

Codecov / codecov/patch

src/core/metrics/calculateOutputMetrics.ts#L34-L35

Added lines #L34 - L35 were not covered by tests

for (let i = 0; i < content.length; i += chunkSize) {
chunks.push(content.slice(i, i + chunkSize));
}

Check warning on line 39 in src/core/metrics/calculateOutputMetrics.ts

View check run for this annotation

Codecov / codecov/patch

src/core/metrics/calculateOutputMetrics.ts#L37-L39

Added lines #L37 - L39 were not covered by tests

// Process chunks in parallel
const chunkResults = await Promise.all(
chunks.map((chunk, index) =>
runTask({
content: chunk,
encoding,
path: path ? `${path}-chunk-${index}` : undefined,
}),
),
);

Check warning on line 50 in src/core/metrics/calculateOutputMetrics.ts

View check run for this annotation

Codecov / codecov/patch

src/core/metrics/calculateOutputMetrics.ts#L42-L50

Added lines #L42 - L50 were not covered by tests

// Sum up the results
result = chunkResults.reduce((sum, count) => sum + count, 0);

Check warning on line 53 in src/core/metrics/calculateOutputMetrics.ts

View check run for this annotation

Codecov / codecov/patch

src/core/metrics/calculateOutputMetrics.ts#L53

Added line #L53 was not covered by tests
} else {
// Process small content directly
result = await runTask({ content, encoding, path });
}

const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1e6;
Expand Down
8 changes: 3 additions & 5 deletions src/core/metrics/workers/outputMetricsWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@ export default async ({ content, encoding, path }: OutputMetricsTask): Promise<n
const tokenCount = counter.countTokens(content, path);

const processEndAt = process.hrtime.bigint();
if (path) {
logger.trace(
`Counted tokens for ${path}. Count: ${tokenCount}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`,
);
}
logger.trace(
`Counted output tokens. Count: ${tokenCount}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`,
);

return tokenCount;
};
Expand Down

0 comments on commit 3b2e45e

Please sign in to comment.