Skip to content

Commit 2b176d5

Browse files
cursoragentmsukkari
andcommitted
feat: Resolve exact repo matches in search parser
This change allows the search parser to resolve exact repository names, including display names, when they are anchored with '^' and '$'. This improves search accuracy by directly mapping literal repository identifiers to their corresponding entries in the database. If the pattern is not an exact literal, it falls back to regex handling. Co-authored-by: michael <michael@sourcebot.dev>
1 parent 0b436bb commit 2b176d5

File tree

2 files changed

+133
-0
lines changed

2 files changed

+133
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import { describe, expect, it, vi } from 'vitest';
2+
import type { PrismaClient } from '@sourcebot/db';
3+
import { parseQuerySyntaxIntoIR } from './parser';
4+
5+
describe('parseQuerySyntaxIntoIR', () => {
6+
it('resolves anchored repo display names to repo_set queries', async () => {
7+
const findMany = vi.fn().mockResolvedValue([
8+
{ name: 'gerrit.example.com:29418/zximgw/rcsiap2001' },
9+
]);
10+
11+
const prisma = {
12+
repo: {
13+
findMany,
14+
},
15+
} as unknown as PrismaClient;
16+
17+
const query = await parseQuerySyntaxIntoIR({
18+
query: 'repo:"^zximgw/rcsiap2001$"',
19+
options: {},
20+
prisma,
21+
});
22+
23+
expect(findMany).toHaveBeenCalledWith({
24+
where: {
25+
orgId: expect.any(Number),
26+
OR: [
27+
{ name: 'zximgw/rcsiap2001' },
28+
{ displayName: 'zximgw/rcsiap2001' },
29+
],
30+
},
31+
select: { name: true },
32+
});
33+
34+
expect(query.repo_set).toBeDefined();
35+
expect(query.repo_set?.set).toEqual({
36+
'gerrit.example.com:29418/zximgw/rcsiap2001': true,
37+
});
38+
});
39+
40+
it('falls back to regex handling when pattern is not a literal string', async () => {
41+
const findMany = vi.fn();
42+
const prisma = {
43+
repo: {
44+
findMany,
45+
},
46+
} as unknown as PrismaClient;
47+
48+
const query = await parseQuerySyntaxIntoIR({
49+
query: 'repo:^gerrit.*$',
50+
options: {},
51+
prisma,
52+
});
53+
54+
expect(findMany).not.toHaveBeenCalled();
55+
expect(query.repo?.regexp).toEqual('^gerrit.*$');
56+
});
57+
});

packages/web/src/features/search/parser.ts

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import { SINGLE_TENANT_ORG_ID } from '@/lib/constants';
2828
import { ServiceErrorException } from '@/lib/serviceError';
2929
import { StatusCodes } from 'http-status-codes';
3030
import { ErrorCode } from '@/lib/errorCodes';
31+
import escapeStringRegexp from 'escape-string-regexp';
3132

3233
// Configure the parser to throw errors when encountering invalid syntax.
3334
const parser = _parser.configure({
@@ -95,6 +96,26 @@ export const parseQuerySyntaxIntoIR = async ({
9596

9697
return context.repos.map((repo) => repo.name);
9798
},
99+
onResolveRepoExactMatch: async (literalRepoName: string) => {
100+
const repos = await prisma.repo.findMany({
101+
where: {
102+
orgId: SINGLE_TENANT_ORG_ID,
103+
OR: [
104+
{ name: literalRepoName },
105+
{ displayName: literalRepoName },
106+
],
107+
},
108+
select: {
109+
name: true,
110+
}
111+
});
112+
113+
if (repos.length === 0) {
114+
return undefined;
115+
}
116+
117+
return repos.map((repo) => repo.name);
118+
},
98119
});
99120
} catch (error) {
100121
if (error instanceof SyntaxError) {
@@ -117,12 +138,14 @@ const transformTreeToIR = async ({
117138
isCaseSensitivityEnabled,
118139
isRegexEnabled,
119140
onExpandSearchContext,
141+
onResolveRepoExactMatch,
120142
}: {
121143
tree: Tree;
122144
input: string;
123145
isCaseSensitivityEnabled: boolean;
124146
isRegexEnabled: boolean;
125147
onExpandSearchContext: (contextName: string) => Promise<string[]>;
148+
onResolveRepoExactMatch?: (literalRepoName: string) => Promise<string[] | undefined>;
126149
}): Promise<QueryIR> => {
127150
const transformNode = async (node: SyntaxNode): Promise<QueryIR> => {
128151
switch (node.type.id) {
@@ -239,6 +262,16 @@ const transformTreeToIR = async ({
239262
};
240263

241264
case RepoExpr:
265+
if (onResolveRepoExactMatch) {
266+
const repoSet = await resolveRepoLiteralIfPossible({
267+
value,
268+
onResolveRepoExactMatch,
269+
});
270+
if (repoSet) {
271+
return repoSet;
272+
}
273+
}
274+
242275
return {
243276
repo: {
244277
regexp: value
@@ -409,3 +442,46 @@ const getChildren = (node: SyntaxNode): SyntaxNode[] => {
409442
}
410443
return children;
411444
}
445+
446+
const resolveRepoLiteralIfPossible = async ({
447+
value,
448+
onResolveRepoExactMatch,
449+
}: {
450+
value: string;
451+
onResolveRepoExactMatch: (literalRepoName: string) => Promise<string[] | undefined>;
452+
}): Promise<QueryIR | undefined> => {
453+
const literalMatch = value.match(/^\^(.*)\$/);
454+
if (!literalMatch) {
455+
return undefined;
456+
}
457+
458+
const innerPattern = literalMatch[1];
459+
const unescaped = unescapeRegexLiteral(innerPattern);
460+
461+
if (escapeStringRegexp(unescaped) !== innerPattern) {
462+
return undefined;
463+
}
464+
465+
const repoNames = await onResolveRepoExactMatch(unescaped);
466+
if (!repoNames || repoNames.length === 0) {
467+
return undefined;
468+
}
469+
470+
return {
471+
repo_set: {
472+
set: repoNames.reduce((acc, name) => {
473+
acc[name.trim()] = true;
474+
return acc;
475+
}, {} as Record<string, boolean>)
476+
},
477+
query: "repo_set"
478+
};
479+
}
480+
481+
const unescapeRegexLiteral = (pattern: string) => {
482+
const hexUnescaped = pattern.replace(/\\x([0-9a-fA-F]{2})/g, (_match, hex) => {
483+
return String.fromCharCode(parseInt(hex, 16));
484+
});
485+
486+
return hexUnescaped.replace(/\\([\\.^$|?*+()[\]{}])/g, (_match, char) => char);
487+
}

0 commit comments

Comments
 (0)