Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import crypto from 'node:crypto';
import fs from 'node:fs';
import fsp from 'node:fs/promises';
import os from 'node:os';
Expand Down Expand Up @@ -35,13 +34,20 @@ async function listDirectoryEntriesSafe(

/**
* Session indexer for Cursor transcript artifacts.
*
* Recent cursor-agent versions write JSONL transcripts under
* ~/.cursor/projects/<project-dir>/agent-transcripts/<chatId>/<chatId>.jsonl
* (sometimes nested one level deeper). The legacy
* ~/.cursor/chats/<projectHash>/
* directory still exists but now holds SQLite `store.db` files used by the
* loader (cursor-sessions.provider.ts), not JSONL the indexer can parse.
*/
export class CursorSessionSynchronizer implements IProviderSessionSynchronizer {
private readonly provider = 'cursor' as const;
private readonly cursorHome = path.join(os.homedir(), '.cursor');

/**
* Scans Cursor chats and upserts discovered sessions into DB.
* Scans Cursor transcripts and upserts discovered sessions into DB.
*/
async synchronize(since?: Date): Promise<number> {
const projectsDir = path.join(this.cursorHome, 'projects');
Expand All @@ -54,19 +60,19 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer {
continue;
}

const workerLogPath = path.join(projectsDir, entry.name, 'worker.log');
const projectDir = path.join(projectsDir, entry.name);
const workerLogPath = path.join(projectDir, 'worker.log');
const projectPath = await this.extractProjectPathFromWorkerLog(workerLogPath);
if (!projectPath || seenProjectPaths.has(projectPath)) {
continue;
}

seenProjectPaths.add(projectPath);
const projectHash = this.md5(projectPath);
const chatsDir = path.join(this.cursorHome, 'chats', projectHash);
const files = await findFilesRecursivelyCreatedAfter(chatsDir, '.jsonl', since ?? null);

const transcriptsDir = path.join(projectDir, 'agent-transcripts');
const files = await findFilesRecursivelyCreatedAfter(transcriptsDir, '.jsonl', since ?? null);

for (const filePath of files) {
const parsed = await this.processSessionFile(filePath);
const parsed = await this.processSessionFile(filePath, projectPath);
if (!parsed) {
continue;
}
Expand All @@ -89,7 +95,7 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer {
}

/**
* Parses and upserts one Cursor session JSONL file.
* Parses and upserts one Cursor session JSONL file (called by the file watcher).
*/
async synchronizeFile(filePath: string): Promise<string | null> {
if (!filePath.endsWith('.jsonl')) {
Expand All @@ -114,10 +120,30 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer {
}

/**
* Produces the same project hash Cursor uses in chat directory names.
* Walks up from a transcript file looking for the project's worker.log.
*
* Cursor has nested transcripts at varying depths over time
* (`agent-transcripts/<chatId>/<file>.jsonl` and
* `agent-transcripts/<chatId>/<sub>/<file>.jsonl` both occur in the wild),
* so a fixed `dirname()` count silently skipped the deeper variant.
*/
private md5(input: string): string {
return crypto.createHash('md5').update(input).digest('hex');
private async findProjectDirForTranscript(filePath: string): Promise<string | null> {
const projectsRoot = path.join(this.cursorHome, 'projects');
let current = path.dirname(filePath);
while (current.startsWith(projectsRoot + path.sep) && current !== projectsRoot) {
try {
await fsp.access(path.join(current, 'worker.log'));
return current;
} catch {
// keep walking up
}
const parent = path.dirname(current);
if (parent === current) {
break;
}
current = parent;
}
return null;
}

/**
Expand Down Expand Up @@ -147,16 +173,25 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer {
/**
* Extracts session metadata from one Cursor JSONL session file.
*/
private async processSessionFile(filePath: string): Promise<ParsedSession | null> {
private async processSessionFile(
filePath: string,
projectPathHint?: string
): Promise<ParsedSession | null> {
const sessionId = path.basename(filePath, '.jsonl');
const grandparentDir = path.dirname(path.dirname(filePath));
const workerLogPath = path.join(grandparentDir, 'worker.log');
const projectPath = await this.extractProjectPathFromWorkerLog(workerLogPath);

let projectPath = projectPathHint ?? null;
if (!projectPath) {
return null;
const projectDir = await this.findProjectDirForTranscript(filePath);
if (!projectDir) {
return null;
}
projectPath = await this.extractProjectPathFromWorkerLog(path.join(projectDir, 'worker.log'));
if (!projectPath) {
return null;
}
}

const resolvedProjectPath = projectPath;
return extractFirstValidJsonlData(filePath, (rawData) => {
const data = rawData as Record<string, any>;
if (data.role !== 'user') {
Expand All @@ -168,7 +203,7 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer {

return {
sessionId,
projectPath,
projectPath: resolvedProjectPath,
sessionName: normalizeSessionName(firstLine, 'Untitled Cursor Session'),
};
});
Expand Down
120 changes: 120 additions & 0 deletions server/modules/providers/tests/cursor-session-synchronizer.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import assert from 'node:assert/strict';
import fs from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import test from 'node:test';

const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'cursor-sync-'));
process.env.DATABASE_PATH = path.join(tempRoot, 'auth.db');

const { initializeDatabase, sessionsDb, scanStateDb } = await import(
'@/modules/database/index.js'
);
const { CursorSessionSynchronizer } = await import(
'@/modules/providers/list/cursor/cursor-session-synchronizer.provider.js'
);
const { closeConnection } = await import('@/modules/database/connection.js');

const patchHomeDir = (nextHomeDir: string) => {
const original = os.homedir;
(os as any).homedir = () => nextHomeDir;
return () => {
(os as any).homedir = original;
};
};

const writeJsonl = async (filePath: string, rows: unknown[]) => {
await fs.mkdir(path.dirname(filePath), { recursive: true });
await fs.writeFile(filePath, rows.map((r) => JSON.stringify(r)).join('\n') + '\n');
};

const userQueryRow = (text: string) => ({
role: 'user',
message: { content: [{ type: 'text', text: `<user_query>\n${text}\n</user_query>` }] },
});

/**
* Cursor's transcript layout has shifted over time:
* - jsonl at agent-transcripts/<chatId>/<chatId>.jsonl (current)
* - jsonl at agent-transcripts/<chatId>/<sub>/<chatId>.jsonl (older)
* Both must be picked up. The legacy ~/.cursor/chats/<projectHash>/
* directory now holds only SQLite store.db files used by the loader.
*/
test('CursorSessionSynchronizer indexes transcripts at both nested depths', { concurrency: false }, async () => {
const restoreHomeDir = patchHomeDir(tempRoot);
try {
await initializeDatabase();

const cursorHome = path.join(tempRoot, '.cursor');
const projectsDir = path.join(cursorHome, 'projects');
const projectDir = path.join(projectsDir, 'home-coder-cc-backend');
const transcriptsDir = path.join(projectDir, 'agent-transcripts');

await fs.mkdir(projectDir, { recursive: true });
await fs.writeFile(
path.join(projectDir, 'worker.log'),
[
'[info] starting worker',
'[info] Getting tree structure for workspacePath=/home/coder/cc-backend',
].join('\n')
);

const shallowChatId = '11111111-2222-3333-4444-555555555555';
const shallowJsonl = path.join(transcriptsDir, shallowChatId, `${shallowChatId}.jsonl`);
await writeJsonl(shallowJsonl, [userQueryRow('refactor the watchtower analytics route')]);

const deepChatId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';
const deepJsonl = path.join(transcriptsDir, deepChatId, 'turn-001', `${deepChatId}.jsonl`);
await writeJsonl(deepJsonl, [userQueryRow('add a parity test for the bun build')]);

// Project that lacks worker.log — must be ignored, not crash the scan.
await fs.mkdir(path.join(projectsDir, 'tmp-orphan'), { recursive: true });
await writeJsonl(
path.join(projectsDir, 'tmp-orphan', 'agent-transcripts', 'orphan', 'orphan.jsonl'),
[userQueryRow('orphan')]
);

// Legacy ~/.cursor/chats SQLite presence must NOT cause indexer to claim sessions.
await fs.mkdir(path.join(cursorHome, 'chats', 'deadbeef'), { recursive: true });
await fs.writeFile(path.join(cursorHome, 'chats', 'deadbeef', 'store.db'), '');

const sync = new CursorSessionSynchronizer();
const processed = await sync.synchronize();
assert.equal(processed, 2, 'should index both shallow and deep transcripts');

const shallow = sessionsDb.getSessionById(shallowChatId);
assert.ok(shallow, 'shallow session indexed');
assert.equal(shallow!.provider, 'cursor');
assert.equal(shallow!.project_path, '/home/coder/cc-backend');
assert.match(shallow!.custom_name ?? '', /watchtower analytics/);

const deep = sessionsDb.getSessionById(deepChatId);
assert.ok(deep, 'deep session indexed');
assert.equal(deep!.project_path, '/home/coder/cc-backend');
assert.match(deep!.custom_name ?? '', /parity test/);

// Per-file path used by the watcher must also resolve project_path
// for transcripts at both depths without a hint.
const ad = path.join(transcriptsDir, 'cccccccc-cccc-cccc-cccc-cccccccccccc');
const adHocJsonl = path.join(ad, 'cccccccc-cccc-cccc-cccc-cccccccccccc.jsonl');
await writeJsonl(adHocJsonl, [userQueryRow('hot-added by watcher')]);
const indexedId = await sync.synchronizeFile(adHocJsonl);
assert.equal(indexedId, 'cccccccc-cccc-cccc-cccc-cccccccccccc');
const adHoc = sessionsDb.getSessionById('cccccccc-cccc-cccc-cccc-cccccccccccc');
assert.equal(adHoc!.project_path, '/home/coder/cc-backend');

// Transcripts outside ~/.cursor/projects must be rejected.
const outsideJsonl = path.join(tempRoot, 'random', 'dddddddd.jsonl');
await writeJsonl(outsideJsonl, [userQueryRow('outside cursor home')]);
assert.equal(await sync.synchronizeFile(outsideJsonl), null);

// Incremental rescan with `since` set to now finds nothing new.
scanStateDb.updateLastScannedAt(new Date(Date.now() + 60_000));
const reprocessed = await sync.synchronize(scanStateDb.getLastScannedAt() ?? undefined);
assert.equal(reprocessed, 0, 'incremental scan should skip files older than `since`');
} finally {
closeConnection();
restoreHomeDir();
await fs.rm(tempRoot, { recursive: true, force: true });
}
});