Skip to content

Commit 98ded71

Browse files
committed
refactor(search): clean up hybrid ranking logic and apply review feedback
1 parent 62cc8c8 commit 98ded71

File tree

3 files changed

+279
-754
lines changed

3 files changed

+279
-754
lines changed

apps/backend/src/utils/AI/askDocQuestion/askDocQuestion.ts

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ type VectorStoreEl = {
2323
docName: string;
2424
};
2525

26+
type VectorStoreElWithSimilarity = VectorStoreEl & { similarity: number };
27+
28+
2629
/**
2730
* Simple in-memory vector store to hold document embeddings and their content.
2831
* Each entry contains:
@@ -274,19 +277,17 @@ export const searchChunkReference = async (
274277
query: string,
275278
maxResults: number = MAX_RELEVANT_CHUNKS_NB,
276279
minSimilarity: number = MIN_RELEVANT_CHUNKS_SIMILARITY
277-
): Promise<VectorStoreEl[]> => {
278-
// Generate an embedding for the user's query
280+
): Promise<VectorStoreElWithSimilarity[]> => {
279281
const queryEmbedding = await generateEmbedding(query);
280282

281-
// Calculate similarity scores between the query embedding and each document's embedding
282283
const selection = vectorStore
283284
.map((chunk) => ({
284285
...chunk,
285-
similarity: cosineSimilarity(queryEmbedding, chunk.embedding), // Add similarity score to each doc
286+
similarity: cosineSimilarity(queryEmbedding, chunk.embedding),
286287
}))
287-
.filter((chunk) => chunk.similarity > minSimilarity) // Filter out documents with low similarity scores
288-
.sort((a, b) => b.similarity - a.similarity) // Sort documents by highest similarity first
289-
.slice(0, maxResults); // Select the top 6 most similar documents
288+
.filter((chunk) => chunk.similarity > minSimilarity)
289+
.sort((a, b) => b.similarity - a.similarity)
290+
.slice(0, maxResults);
290291

291292
const orderedDocKeys = new Set(selection.map((chunk) => chunk.fileKey));
292293

@@ -300,8 +301,13 @@ export const searchChunkReference = async (
300301
)
301302
);
302303

303-
// Return the content of the top matching documents
304-
return results;
304+
return results.map((r) => ({
305+
...r,
306+
similarity:
307+
selection.find(
308+
(s) => s.fileKey === r.fileKey && s.chunkNumber === r.chunkNumber
309+
)?.similarity ?? 0,
310+
}));
305311
};
306312

307313
const CHAT_GPT_PROMPT = readAsset('./PROMPT.md');
@@ -401,3 +407,7 @@ export const askDocQuestion = async (
401407
relatedFiles,
402408
};
403409
};
410+
411+
export function getEmbedding(input: string) {
412+
throw new Error('Function not implemented.');
413+
}

apps/website/src/components/DocPage/Search/SearchView.tsx

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,26 @@ import { useSearchParams } from 'next/navigation';
1616
import { useIntlayer, useLocale } from 'next-intlayer';
1717
import { type FC, useEffect, useRef, useState } from 'react';
1818

19+
1920
const fuseOptions: IFuseOptions<DocMetadata> = {
2021
includeScore: true,
2122
shouldSort: true,
22-
threshold: 0.25, // More flexible fuzzy matching
23-
ignoreLocation: true, // Word order insensitive
23+
threshold: 0.25,
24+
ignoreLocation: true,
2425
distance: 100,
2526
minMatchCharLength: 2,
2627
findAllMatches: true,
2728
keys: [
2829
{ name: 'title', weight: 0.7 },
2930
{ name: 'description', weight: 0.15 },
3031
{ name: 'keywords', weight: 0.1 },
31-
{ name: 'excerpt', weight: 0.05 }, // Optional short snippet per doc
32+
{ name: 'excerpt', weight: 0.05 },
3233
],
3334
};
3435

35-
// Debounce utility
36+
const FUSE_WEIGHT = 0.3;
37+
const BACKEND_WEIGHT = 0.7;
38+
3639
const debounce = <T extends (...args: any[]) => void>(
3740
func: T,
3841
delay: number,
@@ -48,16 +51,19 @@ const debounce = <T extends (...args: any[]) => void>(
4851
};
4952
};
5053

54+
type BackendDocResult = { fileKey: string; similarityScore: number };
55+
5156
function mergeHybridResults(
5257
fuseResults: Fuse.FuseResult<DocMetadata>[],
53-
backendResults: { docKey: string; score: number }[],
58+
backendResults: BackendDocResult[],
5459
allDocs: DocMetadata[]
5560
): DocMetadata[] {
56-
const normalizeFuse = (score?: number) => 1 - Math.min((score ?? 1) / 0.5, 1); // invert Fuse score
57-
const normalizeBackend = (score: number) => Math.min(score / 1.0, 1); // already cosine-like
61+
const normalizeFuse = (score?: number) =>
62+
1 - Math.min((score ?? 1) / 0.5, 1); // invert Fuse score
63+
const normalizeBackend = (score: number) => Math.min(score, 1); // no need to divide by 1
5864

5965
const backendMap = new Map(
60-
backendResults.map((r) => [r.docKey, normalizeBackend(r.score)])
66+
backendResults.map((r) => [r.fileKey, normalizeBackend(r.similarityScore)])
6167
);
6268
const combinedMap = new Map<string, { doc: DocMetadata; score: number }>();
6369

@@ -66,15 +72,15 @@ function mergeHybridResults(
6672
const fuseScore = normalizeFuse(fuseItem.score);
6773
const backendScore = backendMap.get(doc.docKey);
6874
const combinedScore = backendScore
69-
? 0.7 * backendScore + 0.3 * fuseScore
75+
? BACKEND_WEIGHT * backendScore + FUSE_WEIGHT * fuseScore
7076
: fuseScore;
7177
combinedMap.set(doc.docKey, { doc, score: combinedScore });
7278
}
7379

74-
for (const [docKey, backendScore] of backendMap) {
75-
if (!combinedMap.has(docKey)) {
76-
const doc = allDocs.find((d) => d.docKey === docKey);
77-
if (doc) combinedMap.set(docKey, { doc, score: 0.7 * backendScore });
80+
for (const [fileKey, backendScore] of backendMap) {
81+
if (!combinedMap.has(fileKey)) {
82+
const doc = allDocs.find((d) => d.docKey === fileKey);
83+
if (doc) combinedMap.set(fileKey, { doc, score: BACKEND_WEIGHT * backendScore });
7884
}
7985
}
8086

@@ -83,6 +89,7 @@ function mergeHybridResults(
8389
.map((entry) => entry.doc);
8490
}
8591

92+
8693
const SearchResultItem: FC<{ doc: DocMetadata; onClickLink: () => void }> = ({
8794
doc,
8895
onClickLink,
@@ -115,24 +122,28 @@ const SearchResultItem: FC<{ doc: DocMetadata; onClickLink: () => void }> = ({
115122
);
116123
};
117124

125+
126+
118127
export const SearchView: FC<{
119128
onClickLink?: () => void;
120129
isOpen?: boolean;
121130
}> = ({ onClickLink = () => {}, isOpen = false }) => {
122131
const inputRef = useRef<HTMLInputElement>(null);
123132
const searchQueryParam = useSearchParams().get('search');
124133
const [results, setResults] = useState<DocMetadata[]>([]);
134+
const [currentSearchQuery, setCurrentSearchQuery] = useState<string | null>(
135+
searchQueryParam
136+
);
137+
125138
const { search, setSearch } = useSearch({
126139
defaultValue: searchQueryParam,
127140
onClear: () => setResults([]),
128-
onSearch: (searchQuery: string) => {
129-
const fuseSearchResults = fuse
130-
.search(searchQuery)
131-
.map((result) => result.item);
132-
133-
setResults(fuseSearchResults);
141+
onSearch: (query) => {
142+
const fuseResults = fuse.search(query).map((r) => r.item);
143+
setResults(fuseResults);
134144
},
135145
});
146+
136147
const { data: searchDocData, isFetching } = useSearchDoc({
137148
input: search,
138149
});
@@ -142,8 +153,8 @@ export const SearchView: FC<{
142153

143154
const docMetadata = getIntlayer('doc-metadata', locale) as DocMetadata[];
144155
const blogMetadata = getIntlayer('blog-metadata', locale) as BlogMetadata[];
145-
146156
const filesData = [...docMetadata, ...blogMetadata];
157+
147158
const fuse = new Fuse(filesData, fuseOptions);
148159

149160
const handleSearch = async (query: string) => {
@@ -166,16 +177,17 @@ export const SearchView: FC<{
166177

167178
useEffect(() => {
168179
if (searchDocData?.data && currentSearchQuery) {
169-
const backendDocsWithScore =
170-
(searchDocData?.data ?? []).map((d: any) => ({
171-
docKey: d.fileKey,
172-
score: d.similarityScore ?? 0.5,
173-
})) ?? [];
180+
const backendDocumentsWithScore: BackendDocResult[] = searchDocData.data.map(
181+
(doc) => ({
182+
fileKey: doc.fileKey,
183+
similarityScore: doc.similarityScore ?? 0.5,
184+
})
185+
);
174186

175-
const fuseSearchResults = fuse.search(currentSearchQuery);
187+
const fuseResults = fuse.search(currentSearchQuery);
176188
const mergedResults = mergeHybridResults(
177-
fuseSearchResults,
178-
backendDocsWithScore,
189+
fuseResults,
190+
backendDocumentsWithScore,
179191
filesData
180192
);
181193

@@ -184,24 +196,13 @@ export const SearchView: FC<{
184196
}, [searchDocData, currentSearchQuery, filesData, fuse]);
185197

186198
useEffect(() => {
187-
if (searchQuery) handleSearch(searchQuery);
188-
}, [searchQuery]);
199+
if (searchQueryParam) handleSearch(searchQueryParam);
200+
}, [searchQueryParam]);
189201

190-
// Focus input when modal opens using setTimeout
191-
// This waits for the browser's paint cycle and the modal animation
192202
useEffect(() => {
193203
if (isOpen) {
194-
setTimeout(() => {
195-
inputRef.current?.focus();
196-
}, 50);
204+
setTimeout(() => inputRef.current?.focus(), 50);
197205
}
198-
return () => {
199-
if (timeoutRef.current) clearTimeout(timeoutRef.current);
200-
};
201-
}, [isOpen]);
202-
203-
const isNoResult =
204-
!isFetching && results.length === 0 && inputRef.current?.value !== '';
205206
}, [isOpen]);
206207

207208
const isNoResult = !isFetching && results.length === 0 && search.length > 0;
@@ -228,10 +229,9 @@ export const SearchView: FC<{
228229

229230
{results.length > 0 && (
230231
<ul className="flex flex-col gap-10">
231-
{results.map((result, i) => (
232+
{results.map((result) => (
232233
<li key={result.url}>
233234
<SearchResultItem doc={result} onClickLink={onClickLink} />
234-
<p className="text-gray-400 text-xs">Rank #{i + 1}</p>
235235
</li>
236236
))}
237237
</ul>

0 commit comments

Comments
 (0)