From 295304582682d6bd27839454f609dc0e88d029a2 Mon Sep 17 00:00:00 2001 From: Christian Flach Date: Sun, 5 Dec 2021 11:00:01 +0100 Subject: [PATCH] feat: split indexes by plugin and documentation versions --- .vscode/settings.json | 3 +- package.json | 2 +- .../src/client/theme/SearchBar/index.tsx | 192 +++++++++--------- .../src/server/index.ts | 163 ++++++++------- .../docusaurus-search-local/tsconfig.json | 1 + 5 files changed, 185 insertions(+), 176 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f6f553b..1996b54 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,5 +2,6 @@ "search.exclude": { "lib/**": true, "example-docs/**": true - } + }, + "jest.jestCommandLine": "yarn test" } \ No newline at end of file diff --git a/package.json b/package.json index 886d69d..bd62754 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "prepare": "husky install", "lint": "prettier --check **/*.{js,jsx,ts,tsx,json,css,scss,md,html}", "format": "prettier --write **/*.{js,jsx,ts,tsx,json,css,scss,md,html}", - "test": "jest packages/docusaurus-search-local", + "test": "jest packages/docusaurus-search-local/src", "test:e2e": "playwright test e2e-tests" }, "lint-staged": { diff --git a/packages/docusaurus-search-local/src/client/theme/SearchBar/index.tsx b/packages/docusaurus-search-local/src/client/theme/SearchBar/index.tsx index cf39e31..dbf968f 100644 --- a/packages/docusaurus-search-local/src/client/theme/SearchBar/index.tsx +++ b/packages/docusaurus-search-local/src/client/theme/SearchBar/index.tsx @@ -25,6 +25,7 @@ import { usePluginData } from "@docusaurus/useGlobalData"; import type { DSLAPluginData, MyDocument } from "../../../types"; const SEARCH_INDEX_AVAILABLE = process.env.NODE_ENV === "production"; +const MAX_SEARCH_RESULTS = 8; type MyItem = { document: MyDocument; @@ -32,29 +33,27 @@ type MyItem = { terms: string[]; }; -function getItemUrl({ document }: MyItem) { +function getItemUrl({ document }: MyItem): string { const [path, hash] = document.sectionRoute.split("#"); - let url = path; + let url = path!; if (hash) { url += "#" + hash; } return url; } -function fetchIndex(baseUrl: string) { +function fetchIndex(baseUrl: string, tag: string): Promise { if (SEARCH_INDEX_AVAILABLE) { - return fetch(`${baseUrl}search-index.json`) + return fetch(`${baseUrl}search-index-${tag}.json`) .then((content) => content.json()) .then((json) => ({ documents: json.documents as MyDocument[], - allTags: json.allTags as string[], index: mylunr.Index.load(json.index), })); } else { // The index does not exist in development, therefore load a dummy index here. return Promise.resolve({ documents: [], - allTags: [DEFAULT_SEARCH_TAG], index: mylunr(function () { this.ref("id"); this.field("title"); @@ -80,7 +79,9 @@ function useContextualSearchFilters() { const preferredVersion = docsPreferredVersionByPluginId[pluginId]; - const latestVersion = allDocsData[pluginId].versions.find((v) => v.isLast)!; + const latestVersion = allDocsData[pluginId]!.versions.find( + (v) => v.isLast + )!; const version = activeVersion ?? preferredVersion ?? latestVersion; @@ -98,6 +99,11 @@ function useContextualSearchFilters() { }; } +type IndexWithDocuments = { + documents: MyDocument[]; + index: lunr.Index; +}; + const SearchBar = () => { const { siteConfig: { baseUrl }, @@ -117,23 +123,41 @@ const SearchBar = () => { tagsRef.current = tags; }, [tags]); - const index = useRef< - | null - | "loading" - | { - documents: MyDocument[]; - allTags: string[]; - index: lunr.Index; + const indexes = useRef< + Record< + string, + | { + state: "loading"; + callbacks: Array<(index: IndexWithDocuments) => void>; + } + | ({ state: "ready" } & IndexWithDocuments) + > + >({}); + + const getIndex = async (tag: string): Promise => { + const index = indexes.current[tag]; + switch (index?.state) { + case "ready": + return index; + case undefined: { + const callbacks: Array<(index: IndexWithDocuments) => void> = []; + indexes.current[tag] = { + state: "loading", + callbacks, + }; + const index = await fetchIndex(baseUrl, tag); + callbacks.forEach((cb) => cb(index)); + + return (indexes.current[tag] = { + state: "ready", + ...index, + }); } - >(null); - - const getIndex = async () => { - if (index.current !== null && index.current !== "loading") { - // Do not load the index (again) if its already loaded or in the process of being loaded. - return index.current; + case "loading": + return new Promise((resolve) => { + index.callbacks.push(resolve); + }); } - index.current = "loading"; - return (index.current = await fetchIndex(baseUrl)); }; const placeholder = translate({ @@ -279,81 +303,59 @@ const SearchBar = () => { return getItemUrl(item); }, async getItems() { - const { documents, allTags, index } = await getIndex(); + const tags = tagsRef.current; + const indexes = await Promise.all( + tags.map((tag) => getIndex(tag)) + ); + const terms = tokenize(input); - const results = index - .query((query) => { - query.term(terms, { fields: ["title"], boost: titleBoost }); - query.term(terms, { - fields: ["title"], - boost: titleBoost, - wildcard: mylunr.Query.wildcard.TRAILING, - }); - query.term(terms, { - fields: ["content"], - boost: contentBoost, - }); - query.term(terms, { - fields: ["content"], - boost: contentBoost, - wildcard: mylunr.Query.wildcard.TRAILING, - }); - - if (indexDocSidebarParentCategories) { - query.term(terms, { - fields: ["sidebarParentCategories"], - boost: parentCategoriesBoost, - }); - query.term(terms, { - fields: ["sidebarParentCategories"], - boost: parentCategoriesBoost, - wildcard: mylunr.Query.wildcard.TRAILING, - }); - } - - // We want to search all documents with whose tag is included in `searchTags`. - // Since lunr.js does not allow OR queries, we instead prohibit all other tags. - // - // https://github.com/cmfcmf/docusaurus-search-local/issues/19 - const searchTags = tagsRef.current; - allTags.forEach((tag) => { - if (!searchTags.includes(tag)) { - query.term(tag, { - fields: ["tag"], - boost: 0, - presence: mylunr.Query.presence.PROHIBITED, - // Disable stemmer for tags. - usePipeline: false, + + return indexes + .flatMap(({ index, documents }) => + index + .query((query) => { + query.term(terms, { + fields: ["title"], + boost: titleBoost, }); - } - }); - }) - // We need to remove results with a score of 0 that occur - // when the docs are versioned and just the version matches. - .filter((result) => result.score > 0) - .slice(0, 8) - .map((result) => ({ - document: documents.find( - (document) => document.id.toString() === result.ref - )!, - score: result.score, - terms, - })); - - // if (!SEARCH_INDEX_AVAILABLE) { - // results.push({ - // score: 0.5, - // document: { - // id: 1, - // pageTitle: "BLOG POST TITLE", - // sectionTitle: "BLOG POST TITLE", - // sectionRoute: "/blog/d-s-l-test", - // }, - // terms: ["a", "b"], - // }); - // } - - return results; + query.term(terms, { + fields: ["title"], + boost: titleBoost, + wildcard: mylunr.Query.wildcard.TRAILING, + }); + query.term(terms, { + fields: ["content"], + boost: contentBoost, + }); + query.term(terms, { + fields: ["content"], + boost: contentBoost, + wildcard: mylunr.Query.wildcard.TRAILING, + }); + + if (indexDocSidebarParentCategories) { + query.term(terms, { + fields: ["sidebarParentCategories"], + boost: parentCategoriesBoost, + }); + query.term(terms, { + fields: ["sidebarParentCategories"], + boost: parentCategoriesBoost, + wildcard: mylunr.Query.wildcard.TRAILING, + }); + } + }) + .slice(0, MAX_SEARCH_RESULTS) + .map((result) => ({ + document: documents.find( + (document) => document.id.toString() === result.ref + )!, + score: result.score, + terms, + })) + ) + .sort((a, b) => b.score - a.score) + .slice(0, MAX_SEARCH_RESULTS); }, }, ]; diff --git a/packages/docusaurus-search-local/src/server/index.ts b/packages/docusaurus-search-local/src/server/index.ts index 5b23715..3b336a0 100644 --- a/packages/docusaurus-search-local/src/server/index.ts +++ b/packages/docusaurus-search-local/src/server/index.ts @@ -154,7 +154,7 @@ export default function cmfcmfDocusaurusSearchLocal( } if (Array.isArray(language) && language.length === 1) { - language = language[0]; + language = language[0]!; } let generated = @@ -455,91 +455,96 @@ export const tokenize = (input) => lunr.tokenizer(input) ) ).flat(); - logger.info(`Building index (${documents.length} documents)`); + const documentsByTag = documents.reduce((acc, doc) => { + acc[doc.tag] = acc[doc.tag] ?? []; + acc[doc.tag]!.push(doc); + return acc; + }, {} as Record); - const allTags = new Set(); + logger.info( + `${Object.keys(documentsByTag).length} indexes will be created.` + ); - const index = lunr(function () { - if (language !== "en") { - if (Array.isArray(language)) { - // @ts-expect-error - this.use(lunr.multiLanguage(...language)); - } else { - // @ts-expect-error - this.use(lunr[language]); - } - } - - this.k1(k1); - this.b(b); - - this.ref("id"); - this.field("title"); - this.field("content"); - // @ts-expect-error - this.field("tag", { isLiteral: true }); - - if (indexDocSidebarParentCategories > 0) { - this.field("sidebarParentCategories"); - } - const that = this; - documents.forEach(function ({ - id, - sectionTitle, - sectionContent, - tag, - docSidebarParentCategories, - }) { - let sidebarParentCategories; - if ( - indexDocSidebarParentCategories > 0 && - docSidebarParentCategories - ) { - sidebarParentCategories = docSidebarParentCategories - .reverse() - .slice(0, indexDocSidebarParentCategories) - .join(" "); - } + await Promise.all( + Object.entries(documentsByTag).map(async ([tag, documents]) => { + logger.info(`Building index ${tag} (${documents.length} documents)`); + + const index = lunr(function () { + if (language !== "en") { + if (Array.isArray(language)) { + // @ts-expect-error + this.use(lunr.multiLanguage(...language)); + } else { + // @ts-expect-error + this.use(lunr[language]); + } + } - allTags.add(tag); + this.k1(k1); + this.b(b); - that.add({ - id: id.toString(), // the ref must be a string - title: sectionTitle, - content: sectionContent, - tag, - sidebarParentCategories, - }); - }); - }); + this.ref("id"); + this.field("title"); + this.field("content"); + + if (indexDocSidebarParentCategories > 0) { + this.field("sidebarParentCategories"); + } + const that = this; + documents.forEach( + ({ + id, + sectionTitle, + sectionContent, + docSidebarParentCategories, + }) => { + let sidebarParentCategories; + if ( + indexDocSidebarParentCategories > 0 && + docSidebarParentCategories + ) { + sidebarParentCategories = docSidebarParentCategories + .reverse() + .slice(0, indexDocSidebarParentCategories) + .join(" "); + } - logger.info("Writing index to disk"); + that.add({ + id: id.toString(), // the ref must be a string + title: sectionTitle, + content: sectionContent, + sidebarParentCategories, + }); + } + ); + }); - await writeFileAsync( - path.join(outDir, "search-index.json"), - JSON.stringify({ - documents: documents.map( - ({ - id, - pageTitle, - sectionTitle, - sectionRoute, - type, - }): MyDocument => ({ - id, - pageTitle, - sectionTitle, - sectionRoute, - type, - }) - ), - allTags: [...allTags.values()], - index, - }), - { encoding: "utf8" } + await writeFileAsync( + path.join(outDir, `search-index-${tag}.json`), + JSON.stringify({ + documents: documents.map( + ({ + id, + pageTitle, + sectionTitle, + sectionRoute, + type, + }): MyDocument => ({ + id, + pageTitle, + sectionTitle, + sectionRoute, + type, + }) + ), + index, + }), + { encoding: "utf8" } + ); + + logger.info(`Index ${tag} written to disk`); + }) ); - - logger.info("Index written to disk, success!"); }, }; } diff --git a/packages/docusaurus-search-local/tsconfig.json b/packages/docusaurus-search-local/tsconfig.json index 93ae439..b7baec7 100644 --- a/packages/docusaurus-search-local/tsconfig.json +++ b/packages/docusaurus-search-local/tsconfig.json @@ -16,6 +16,7 @@ "strictPropertyInitialization": true, "noImplicitThis": true, "alwaysStrict": true, + // "noUncheckedIndexedAccess": true, /* Additional Checks */ "noUnusedLocals": true,