From dcb8a18dbd95b80aaadeceb4751b29040daa6df4 Mon Sep 17 00:00:00 2001 From: erhant Date: Sat, 10 Feb 2024 04:15:25 +0300 Subject: [PATCH] community[minor]: Add Dria retriever (#4302) * initial builds and setups * examples, integration & docs * small doc fix --- .../docs/integrations/retrievers/dria.mdx | 39 ++++++ examples/src/retrievers/dria.ts | 14 ++ libs/langchain-community/.gitignore | 4 + libs/langchain-community/langchain.config.js | 7 +- libs/langchain-community/package.json | 18 +++ .../src/retrievers/dria.ts | 122 ++++++++++++++++++ .../src/retrievers/tests/dria.int.test.ts | 16 +++ yarn.lock | 35 +++++ 8 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 docs/core_docs/docs/integrations/retrievers/dria.mdx create mode 100644 examples/src/retrievers/dria.ts create mode 100644 libs/langchain-community/src/retrievers/dria.ts create mode 100644 libs/langchain-community/src/retrievers/tests/dria.int.test.ts diff --git a/docs/core_docs/docs/integrations/retrievers/dria.mdx b/docs/core_docs/docs/integrations/retrievers/dria.mdx new file mode 100644 index 000000000000..761c4df44b97 --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/dria.mdx @@ -0,0 +1,39 @@ +--- +hide_table_of_contents: true +--- + +# Dria Retriever + +The [Dria](https://dria.co/profile) retriever allows an agent to perform a text-based search across a comprehensive knowledge hub. + +## Setup + +To use Dria retriever, first install Dria JS client: + +```bash npm2yarn +npm install dria +``` + +You need to provide two things to the retriever: + +- **API Key**: you can get yours at your [profile page](https://dria.co/profile) when you create an account. +- **Contract ID**: accessible at the top of the page when viewing a knowledge or in its URL. + For example, the Bitcoin whitepaper is uploaded on Dria at https://dria.co/knowledge/2KxNbEb040GKQ1DSDNDsA-Fsj_BlQIEAlzBNuiapBR0, so its contract ID is `2KxNbEb040GKQ1DSDNDsA-Fsj_BlQIEAlzBNuiapBR0`. + Contract ID can be omitted during instantiation, and later be set via `dria.contractId = "your-contract"` + +Dria retriever exposes the underlying [Dria client](https://npmjs.com/package/dria) as well, refer to the [Dria documentation](https://github.com/firstbatchxyz/dria-js-client?tab=readme-ov-file#usage) to learn more about the client. + +## Usage + +import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; + + + +```bash npm2yarn +npm install dria @langchain/community +``` + +import CodeBlock from "@theme/CodeBlock"; +import Example from "@examples/retrievers/dria.ts"; + +{Example} diff --git a/examples/src/retrievers/dria.ts b/examples/src/retrievers/dria.ts new file mode 100644 index 000000000000..d66a60d03e81 --- /dev/null +++ b/examples/src/retrievers/dria.ts @@ -0,0 +1,14 @@ +import { DriaRetriever } from "@langchain/community/retrievers/dria"; + +// contract of TypeScript Handbook v4.9 uploaded to Dria +// https://dria.co/knowledge/-B64DjhUtCwBdXSpsRytlRQCu-bie-vSTvTIT8Ap3g0 +const contractId = "-B64DjhUtCwBdXSpsRytlRQCu-bie-vSTvTIT8Ap3g0"; + +const retriever = new DriaRetriever({ + contractId, // a knowledge to connect to + apiKey: "DRIA_API_KEY", // if not provided, will check env for `DRIA_API_KEY` + topK: 15, // optional: default value is 10 +}); + +const docs = await retriever.getRelevantDocuments("What is a union type?"); +console.log(docs); diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index 416c6d5382c5..58d504d6a553 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -506,6 +506,10 @@ retrievers/databerry.cjs retrievers/databerry.js retrievers/databerry.d.ts retrievers/databerry.d.cts +retrievers/dria.cjs +retrievers/dria.js +retrievers/dria.d.ts +retrievers/dria.d.cts retrievers/metal.cjs retrievers/metal.js retrievers/metal.d.ts diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index e0d347a64606..f0de1f18f721 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -9,9 +9,8 @@ function abs(relativePath) { return resolve(dirname(fileURLToPath(import.meta.url)), relativePath); } - export const config = { - internals:[ + internals: [ /node\:/, /@langchain\/core\//, "convex", @@ -161,6 +160,7 @@ export const config = { "retrievers/amazon_knowledge_base": "retrievers/amazon_knowledge_base", "retrievers/chaindesk": "retrievers/chaindesk", "retrievers/databerry": "retrievers/databerry", + "retrievers/dria": "retrievers/dria", "retrievers/metal": "retrievers/metal", "retrievers/remote": "retrievers/remote/index", "retrievers/supabase": "retrievers/supabase", @@ -299,6 +299,7 @@ export const config = { "chat_models/iflytek_xinghuo/web", "retrievers/amazon_kendra", "retrievers/amazon_knowledge_base", + "retrievers/dria", "retrievers/metal", "retrievers/supabase", "retrievers/vectara_summary", @@ -342,4 +343,4 @@ export const config = { cjsSource: "./dist-cjs", cjsDestination: "./dist", abs, -} \ No newline at end of file +}; diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 920076c332f8..0049f9538520 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -123,6 +123,7 @@ "discord.js": "^14.14.1", "dotenv": "^16.0.3", "dpdm": "^3.12.0", + "dria": "^0.0.3", "eslint": "^8.33.0", "eslint-config-airbnb-base": "^15.0.0", "eslint-config-prettier": "^8.6.0", @@ -221,6 +222,7 @@ "cohere-ai": "*", "convex": "^1.3.1", "discord.js": "^14.14.1", + "dria": "^0.0.3", "faiss-node": "^0.5.1", "firebase-admin": "^11.9.0", "google-auth-library": "^8.9.0", @@ -411,6 +413,9 @@ "discord.js": { "optional": true }, + "dria": { + "optional": true + }, "faiss-node": { "optional": true }, @@ -1646,6 +1651,15 @@ "import": "./retrievers/databerry.js", "require": "./retrievers/databerry.cjs" }, + "./retrievers/dria": { + "types": { + "import": "./retrievers/dria.d.ts", + "require": "./retrievers/dria.d.cts", + "default": "./retrievers/dria.d.ts" + }, + "import": "./retrievers/dria.js", + "require": "./retrievers/dria.cjs" + }, "./retrievers/metal": { "types": { "import": "./retrievers/metal.d.ts", @@ -2554,6 +2568,10 @@ "retrievers/databerry.js", "retrievers/databerry.d.ts", "retrievers/databerry.d.cts", + "retrievers/dria.cjs", + "retrievers/dria.js", + "retrievers/dria.d.ts", + "retrievers/dria.d.cts", "retrievers/metal.cjs", "retrievers/metal.js", "retrievers/metal.d.ts", diff --git a/libs/langchain-community/src/retrievers/dria.ts b/libs/langchain-community/src/retrievers/dria.ts new file mode 100644 index 000000000000..60caec99553b --- /dev/null +++ b/libs/langchain-community/src/retrievers/dria.ts @@ -0,0 +1,122 @@ +import { + BaseRetriever, + type BaseRetrieverInput, +} from "@langchain/core/retrievers"; +import { Document } from "@langchain/core/documents"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import type { DriaParams, SearchOptions as DriaSearchOptions } from "dria"; +import { Dria } from "dria"; + +/** + * Configurations for Dria retriever. + * + * - `contractId`: a Dria knowledge's contract ID. + * - `apiKey`: a Dria API key; if omitted, the retriever will check for `DRIA_API_KEY` environment variable. + * + * The retrieval can be configured with the following options: + * + * - `topK`: number of results to return, max 20. (default: 10) + * - `rerank`: re-rank the results from most to least semantically relevant to the given search query. (default: true) + * - `level`: level of detail for the search, must be an integer from 0 to 5 (inclusive). (default: 1) + * - `field`: CSV field name, only relevant for the CSV files. + */ +export interface DriaRetrieverArgs + extends DriaParams, + BaseRetrieverInput, + DriaSearchOptions {} + +/** + * Class for retrieving documents from knowledge uploaded to Dria. + * + * @example + * ```typescript + * // contract of TypeScript Handbook v4.9 uploaded to Dria + * const contractId = "-B64DjhUtCwBdXSpsRytlRQCu-bie-vSTvTIT8Ap3g0"; + * const retriever = new DriaRetriever({ contractId }); + * + * const docs = await retriever.getRelevantDocuments("What is a union type?"); + * console.log(docs); + * ``` + */ +export class DriaRetriever extends BaseRetriever { + static lc_name() { + return "DriaRetriever"; + } + + lc_namespace = ["langchain", "retrievers", "dria"]; + + get lc_secrets() { + return { apiKey: "DRIA_API_KEY" }; + } + + get lc_aliases() { + return { apiKey: "api_key" }; + } + + apiKey: string; + + public driaClient: Dria; + + private searchOptions: DriaSearchOptions; + + constructor(fields: DriaRetrieverArgs) { + super(fields); + + const apiKey = fields.apiKey ?? getEnvironmentVariable("DRIA_API_KEY"); + if (!apiKey) throw new Error("Missing DRIA_API_KEY."); + this.apiKey = apiKey; + + this.searchOptions = { + topK: fields.topK, + field: fields.field, + rerank: fields.rerank, + level: fields.level, + }; + + this.driaClient = new Dria({ + contractId: fields.contractId, + apiKey: this.apiKey, + }); + } + + /** + * Currently connected knowledge on Dria. + * + * Retriever will use this contract ID while retrieving documents, + * and will throw an error if `undefined`. + * + * In the case that this is `undefined`, the user is expected to + * set contract ID manually, such as after creating a new knowledge & inserting + * data there with the Dria client. + */ + get contractId(): string | undefined { + return this.driaClient.contractId; + } + + set contractId(value: string) { + this.driaClient.contractId = value; + } + + /** + * Retrieves documents from Dria with respect to the configured contract ID, based on + * the given query string. + * + * @param query The query string + * @returns A promise that resolves to an array of documents, with page content as text, + * along with `id` and the relevance `score` within the metadata. + */ + async _getRelevantDocuments(query: string): Promise { + const docs = await this.driaClient.search(query, this.searchOptions); + return docs.map( + (d) => + new Document({ + // dria.search returns a string within the metadata as the content + pageContent: d.metadata, + metadata: { + id: d.id, + score: d.score, + }, + }) + ); + } +} diff --git a/libs/langchain-community/src/retrievers/tests/dria.int.test.ts b/libs/langchain-community/src/retrievers/tests/dria.int.test.ts new file mode 100644 index 000000000000..5b6c5350a64e --- /dev/null +++ b/libs/langchain-community/src/retrievers/tests/dria.int.test.ts @@ -0,0 +1,16 @@ +import { test, expect } from "@jest/globals"; +import { DriaRetriever } from "../dria.js"; + +test.skip("DriaRetriever", async () => { + // contract of TypeScript Handbook v4.9 uploaded to Dria + // https://dria.co/knowledge/-B64DjhUtCwBdXSpsRytlRQCu-bie-vSTvTIT8Ap3g0 + const contractId = "-B64DjhUtCwBdXSpsRytlRQCu-bie-vSTvTIT8Ap3g0"; + const topK = 10; + + const retriever = new DriaRetriever({ contractId, topK }); + + const docs = await retriever.getRelevantDocuments("What is a union type?"); + expect(docs.length).toBe(topK); + + console.log(docs[0].pageContent); +}); diff --git a/yarn.lock b/yarn.lock index cb5d2110618a..a48925e5da64 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8941,6 +8941,7 @@ __metadata: discord.js: ^14.14.1 dotenv: ^16.0.3 dpdm: ^3.12.0 + dria: ^0.0.3 eslint: ^8.33.0 eslint-config-airbnb-base: ^15.0.0 eslint-config-prettier: ^8.6.0 @@ -9042,6 +9043,7 @@ __metadata: cohere-ai: "*" convex: ^1.3.1 discord.js: ^14.14.1 + dria: ^0.0.3 faiss-node: ^0.5.1 firebase-admin: ^11.9.0 google-auth-library: ^8.9.0 @@ -9178,6 +9180,8 @@ __metadata: optional: true discord.js: optional: true + dria: + optional: true faiss-node: optional: true firebase-admin: @@ -16351,6 +16355,17 @@ __metadata: languageName: node linkType: hard +"axios@npm:^1.6.5": + version: 1.6.7 + resolution: "axios@npm:1.6.7" + dependencies: + follow-redirects: ^1.15.4 + form-data: ^4.0.0 + proxy-from-env: ^1.1.0 + checksum: 87d4d429927d09942771f3b3a6c13580c183e31d7be0ee12f09be6d5655304996bb033d85e54be81606f4e89684df43be7bf52d14becb73a12727bf33298a082 + languageName: node + linkType: hard + "axobject-query@npm:^3.1.1, axobject-query@npm:^3.2.1": version: 3.2.1 resolution: "axobject-query@npm:3.2.1" @@ -19500,6 +19515,16 @@ __metadata: languageName: node linkType: hard +"dria@npm:^0.0.3": + version: 0.0.3 + resolution: "dria@npm:0.0.3" + dependencies: + axios: ^1.6.5 + zod: ^3.22.4 + checksum: 69d66479cb015e87425fba7f1741e4d895b4f43844e6b8897d3e7fe38e579097f2c4673d534141419d15a72866adf4db12acb9b59b42681ef2c4ee2d301b9267 + languageName: node + linkType: hard + "duck@npm:^0.1.12": version: 0.1.12 resolution: "duck@npm:0.1.12" @@ -21727,6 +21752,16 @@ __metadata: languageName: node linkType: hard +"follow-redirects@npm:^1.15.4": + version: 1.15.5 + resolution: "follow-redirects@npm:1.15.5" + peerDependenciesMeta: + debug: + optional: true + checksum: 5ca49b5ce6f44338cbfc3546823357e7a70813cecc9b7b768158a1d32c1e62e7407c944402a918ea8c38ae2e78266312d617dc68783fac502cbb55e1047b34ec + languageName: node + linkType: hard + "for-each@npm:^0.3.3": version: 0.3.3 resolution: "for-each@npm:0.3.3"