Skip to content

Commit 2fe6217

Browse files
TheSonOfThomptsck
andcommitted
Update ingest for Azure (#2843)
* Add initial package.json for chatbot-server * feat(ui): initialize chatbot UI with React and Vite (#2841) * feat(ui): initialize chatbot UI with React and Vite - Added package.json for UI package with scripts and dependencies. - Created App.css for styling the main application layout. - Implemented App.tsx as the main component integrating the MongoDB Chatbot UI. - Added a React SVG asset for branding. - Created index.css for global styles and theming. - Set up main.tsx as the entry point for the React application. - Added vite-env.d.ts for Vite type definitions. - Configured TypeScript with tsconfig.json and tsconfig.node.json for the UI package. - Created vite.config.ts for Vite configuration with React plugin. * fix: update license to Apache-2.0 and refactor import statements for consistency * feat(ui): initialize chatbot UI with React and Vite - Added package.json for UI package with scripts and dependencies. - Created App.css for styling the main application layout. - Implemented App.tsx as the main component integrating the MongoDB Chatbot UI. - Added a React SVG asset for branding. - Created index.css for global styles and theming. - Set up main.tsx as the entry point for the React application. - Added vite-env.d.ts for Vite type definitions. - Configured TypeScript with tsconfig.json and tsconfig.node.json for the UI package. - Created vite.config.ts for Vite configuration with React plugin. * fix: update license to Apache-2.0 and refactor import statements for consistency * feat(ingest): add initial configuration and data sources for chatbot ingestion * update configs * feat(ingest): implement data sources for LeafyGreen UI and MongoDB Chatbot Framework * chore: update pnpm workspace configuration to include 'apps/*' directory * feat(ingest): update ingest configuration to use Azure OpenAI and add loadEnv utility * feat(ingest): enhance environment variable loading and add MongoDB chatbot framework data source * feat(ingest): add data sources for LeafyGreen UI and MongoDB Chatbot Framework, and refactor Azure OpenAI embedder * feat(ingest): remove MongoDB connection URI from example environment file * feat: remove ts-node from devDependencies and update package versions in pnpm-lock.yaml * feat(ingest): add MongoDB Design website data source and integrate into ingest configuration * Update pnpm-lock.yaml * Delete package-lock.json * reset ui * Update .env.example * rm wip web loader --------- Co-authored-by: Terrence Keane <terrence.keane@mongodb.com>
1 parent f0fb59a commit 2fe6217

File tree

13 files changed

+179
-211
lines changed

13 files changed

+179
-211
lines changed

apps/chatbot-server/.env.example

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
# MongoDB config
2-
MONGODB_CONNECTION_URI="<MongoDB connection URI with Atlas Vector Search configured on 'embedded_content'>"
2+
MONGODB_USER=<YOUR_MONGODB_USER>
3+
MONGODB_PASSWORD=<YOUR_MONGODB_PASSWORD>
4+
MONGODB_PROJECT_URL=<YOUR_PROJECT_URL>
5+
MONGODB_APP_NAME=LeafyGreenAI
6+
37
VECTOR_SEARCH_INDEX_NAME="vector_index" # or whatever your index name is
48
MONGODB_DATABASE_NAME="mongodb-chatbot-framework-chatbot" # or whatever your database name is. must contain vector search index.
59

610
# OpenAI config
711
OPENAI_API_KEY=<OpenAI API key>
812
OPENAI_EMBEDDING_MODEL="text-embedding-ada-002" # or other model
913
OPENAI_CHAT_COMPLETION_MODEL="gpt-3.5-turbo" # or other model
14+
15+
# Azure OpenAI config
16+
AZURE_API_KEY1=<YOUR_AZURE_API_KEY1>
17+
AZURE_API_KEY2=<YOUR_AZURE_API_KEY2>
18+
AZURE_OPENAI_ENDPOINT=https://<your-instance>.openai.azure.com/
19+
AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
20+
AZURE_OPENAI_CHAT_COMPLETION_MODEL=gpt-3.5-turbo
Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,32 @@
11
import {
22
makeMongoDbEmbeddedContentStore,
33
makeMongoDbPageStore,
4-
makeOpenAiEmbedder,
54
} from 'mongodb-rag-core';
6-
import { OpenAI } from 'mongodb-rag-core/openai';
5+
import { AzureOpenAI } from 'mongodb-rag-core/openai';
76
import { Config, makeIngestMetaStore } from 'mongodb-rag-ingest';
8-
import path from 'path';
97

10-
import { leafygreenGithubSourceConstructor } from './LGGithubDataSource';
11-
import { loadEnvVars } from './loadEnvVars';
12-
import { mongoDbChatbotFrameworkDocsDataSourceConstructor } from './mongodbChatbotFrameworkDataSource';
8+
import { leafygreenGithubSourceConstructor } from './sources/github-leafygreen-ui';
9+
import { mongoDbChatbotFrameworkDocsDataSourceConstructor } from './sources/github-mdb-chatbot-framework';
10+
import { createAzureEmbedderConstructor } from './utils/createAzureEmbedderConstructor';
11+
import { loadEnvVars } from './utils/loadEnv';
1312

1413
// Load project environment variables
15-
const dotenvPath = path.join(__dirname, '..', '..', '..', '.env'); // .env at project root
1614
const {
1715
MONGODB_CONNECTION_URI,
1816
MONGODB_DATABASE_NAME,
19-
OPENAI_API_KEY,
2017
OPENAI_EMBEDDING_MODEL,
21-
} = loadEnvVars(dotenvPath);
18+
} = loadEnvVars();
2219

2320
export default {
24-
embedder: async () => {
25-
return makeOpenAiEmbedder({
26-
openAiClient: new OpenAI({ apiKey: OPENAI_API_KEY }),
27-
deployment: OPENAI_EMBEDDING_MODEL,
28-
backoffOptions: {
29-
numOfAttempts: 25,
30-
startingDelay: 1000,
31-
},
32-
});
33-
},
21+
embedder: createAzureEmbedderConstructor({
22+
azureClient: new AzureOpenAI({
23+
endpoint: process.env.AZURE_OPENAI_ENDPOINT,
24+
apiKey: process.env.AZURE_API_KEY1,
25+
apiVersion: '2024-04-01-preview',
26+
deployment: process.env.AZURE_OPENAI_DEPLOYMENT,
27+
}),
28+
model: OPENAI_EMBEDDING_MODEL,
29+
}),
3430
embeddedContentStore: () =>
3531
makeMongoDbEmbeddedContentStore({
3632
connectionUri: MONGODB_CONNECTION_URI,
@@ -52,11 +48,9 @@ export default {
5248
}),
5349
// Add data sources here
5450
dataSources: async () => {
55-
const mongodbChatbotFrameworkSource =
56-
await mongoDbChatbotFrameworkDocsDataSourceConstructor();
57-
58-
const leafyGreenGithubSource = await leafygreenGithubSourceConstructor();
59-
60-
return [mongodbChatbotFrameworkSource, leafyGreenGithubSource];
51+
return Promise.all([
52+
mongoDbChatbotFrameworkDocsDataSourceConstructor(),
53+
leafygreenGithubSourceConstructor(),
54+
]);
6155
},
6256
} satisfies Config;

apps/chatbot-server/packages/ingest/src/LGGithubDataSource.ts renamed to apps/chatbot-server/ingest/sources/github-leafygreen-ui.ts

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,25 @@ import {
44
makeGitDataSource,
55
} from 'mongodb-rag-core/dataSources';
66

7+
export const leafygreenGithubSourceConstructor = async () => {
8+
return await makeGitDataSource({
9+
name: 'leafygreen-ui',
10+
repoUri: 'https://github.com/mongodb/leafygreen-ui.git',
11+
repoOptions: {
12+
'--depth': 1,
13+
'--branch': 'main',
14+
},
15+
metadata: {
16+
productName: 'LeafyGreen UI',
17+
version: '1.0.0',
18+
tags: ['leafygreen', 'docs'],
19+
},
20+
filter: (path: string) => path.endsWith('.md') || path.includes('types'),
21+
handlePage: async (path, content) =>
22+
await handleHtmlDocument(path, content, htmlParserOptions),
23+
});
24+
};
25+
726
const removeElements = (domDoc: Document) => [
827
...Array.from(domDoc.querySelectorAll('head')),
928
...Array.from(domDoc.querySelectorAll('script')),
@@ -38,22 +57,3 @@ const htmlParserOptions: Omit<HandleHtmlPageFuncOptions, 'sourceName'> = {
3857
extractTitle,
3958
extractMetadata,
4059
};
41-
42-
export const leafygreenGithubSourceConstructor = async () => {
43-
return await makeGitDataSource({
44-
name: 'leafygreen-ui',
45-
repoUri: 'https://github.com/mongodb/leafygreen-ui.git',
46-
repoOptions: {
47-
'--depth': 1,
48-
'--branch': 'main',
49-
},
50-
metadata: {
51-
productName: 'LeafyGreen UI',
52-
version: '1.0.0',
53-
tags: ['leafygreen', 'docs'],
54-
},
55-
filter: (path: string) => path.endsWith('.md') || path.includes('types'),
56-
handlePage: async (path, content) =>
57-
await handleHtmlDocument(path, content, htmlParserOptions),
58-
});
59-
};

apps/chatbot-server/packages/ingest/src/mongodbChatbotFrameworkDataSource.ts renamed to apps/chatbot-server/ingest/sources/github-mdb-chatbot-framework.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
/**
2+
@fileoverview Data source for the MongoDB Chatbot Framework docs.
3+
*/
14
import {
25
makeMdOnGithubDataSource,
36
MakeMdOnGithubDataSourceParams,
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"extends": "@lg-tools/build/config/package.tsconfig.json",
3+
"compilerOptions": {
4+
"module": "NodeNext",
5+
"moduleResolution": "nodenext",
6+
"target": "ES2017",
7+
"rootDir": ".",
8+
"outDir": "../dist",
9+
"emitDeclarationOnly": false,
10+
"composite": false,
11+
"declaration": false,
12+
"declarationMap": false,
13+
"noEmit": false,
14+
},
15+
"files": [
16+
"./ingest.config.ts"
17+
],
18+
"include": ["./**/*.ts"],
19+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import { EmbedArgs, Embedder } from 'mongodb-rag-core';
2+
import { AzureOpenAI } from 'mongodb-rag-core/openai';
3+
import { Constructor } from 'mongodb-rag-ingest';
4+
5+
/**
6+
* Returns a constructor function for an embedder that uses Azure OpenAI
7+
* to create embeddings.
8+
* @returns
9+
*/
10+
export const createAzureEmbedderConstructor =
11+
({
12+
azureClient,
13+
model,
14+
}: {
15+
azureClient: AzureOpenAI;
16+
model: string;
17+
}): Constructor<Embedder> =>
18+
() => {
19+
const embed: Embedder['embed'] = async (args: EmbedArgs) => {
20+
const result = await azureClient.embeddings.create({
21+
input: args.text,
22+
model,
23+
});
24+
25+
return {
26+
// @ts-expect-error
27+
embedding: result.data as Array<number>,
28+
};
29+
};
30+
31+
return {
32+
embed,
33+
modelName: model,
34+
};
35+
};
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { strict as assert } from 'assert';
2+
import dotenv from 'dotenv';
3+
4+
/**
5+
Load environment variables from a .env file at the given path.
6+
Note that if you change the environment variable names,
7+
you need to update this function to support those environment variables.
8+
*/
9+
export function loadEnvVars() {
10+
dotenv.config();
11+
const {
12+
MONGODB_USER,
13+
MONGODB_PASSWORD,
14+
MONGODB_PROJECT_URL,
15+
MONGODB_APP_NAME,
16+
MONGODB_DATABASE_NAME,
17+
VECTOR_SEARCH_INDEX_NAME,
18+
OPENAI_API_KEY,
19+
OPENAI_EMBEDDING_MODEL,
20+
} = process.env;
21+
const requiredEnvVars = {
22+
MONGODB_USER,
23+
MONGODB_PASSWORD,
24+
MONGODB_PROJECT_URL,
25+
MONGODB_APP_NAME,
26+
MONGODB_DATABASE_NAME,
27+
VECTOR_SEARCH_INDEX_NAME,
28+
OPENAI_API_KEY,
29+
OPENAI_EMBEDDING_MODEL,
30+
} as const;
31+
32+
for (const [name, value] of Object.entries(requiredEnvVars)) {
33+
assert(value, `${name} is required`);
34+
}
35+
36+
const MONGODB_CONNECTION_URI = `mongodb+srv://${MONGODB_USER}:${MONGODB_PASSWORD}@${MONGODB_PROJECT_URL}/?retryWrites=true&w=majority&appName=${MONGODB_APP_NAME}`;
37+
38+
return {
39+
MONGODB_CONNECTION_URI,
40+
MONGODB_USER,
41+
MONGODB_PASSWORD,
42+
MONGODB_PROJECT_URL,
43+
MONGODB_APP_NAME,
44+
MONGODB_DATABASE_NAME,
45+
VECTOR_SEARCH_INDEX_NAME,
46+
OPENAI_API_KEY,
47+
OPENAI_EMBEDDING_MODEL,
48+
} as Record<string, string>;
49+
}

apps/chatbot-server/package.json

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,25 @@
33
"version": "0.0.1",
44
"description": "",
55
"main": "index.js",
6-
"workspaces": [
7-
"packages/*"
8-
],
96
"publishConfig": {
107
"access": "restricted"
118
},
129
"scripts": {
13-
"ingest:all": "npm run ingest:all --workspace=ingest",
10+
"build:ingest": "tsc -p ./ingest",
11+
"ingest": "pnpm build:ingest && ingest all --config ./dist/ingest.config.js",
1412
"dev": "npm run dev --workspace=server & npm run dev --workspace=ui"
1513
},
1614
"keywords": [],
1715
"author": "",
1816
"license": "Apache-2.0",
19-
"devDependencies": {
20-
"ts-node": "^10.9.2"
21-
},
2217
"dependencies": {
18+
"@emotion/css": "^11.13.5",
19+
"dotenv": "^16.5.0",
2320
"jsdom": "^26.1.0"
21+
},
22+
"devDependencies": {
23+
"mongodb-rag-core": "^0.6.3",
24+
"mongodb-rag-ingest": "^0.3.1",
25+
"tsx": "^4.19.4"
2426
}
2527
}

apps/chatbot-server/packages/ingest/.gitignore

Lines changed: 0 additions & 3 deletions
This file was deleted.

apps/chatbot-server/packages/ingest/package.json

Lines changed: 0 additions & 19 deletions
This file was deleted.

0 commit comments

Comments
 (0)