Skip to content

Commit 17f24b3

Browse files
committed
fix ingest - pull from config file
1 parent 29304d7 commit 17f24b3

File tree

1 file changed

+21
-57
lines changed

1 file changed

+21
-57
lines changed

bin/commands/data/ingest.js

Lines changed: 21 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,6 @@ export async function ingestData(config, options) {
1515

1616
const isDevelopment = process.env.NODE_ENV === 'development' || process.env.NODE_ENV === 'test';
1717

18-
// Validate MongoDB URI
19-
if (!config.mongoUrl || typeof config.mongoUrl !== 'string') {
20-
throw new Error('Invalid MongoDB URI: URI must be a non-empty string');
21-
}
22-
23-
// Ensure URI starts with mongodb:// or mongodb+srv://
24-
if (!config.mongoUrl.startsWith('mongodb://') && !config.mongoUrl.startsWith('mongodb+srv://')) {
25-
throw new Error('Invalid MongoDB URI: URI must start with mongodb:// or mongodb+srv://');
26-
}
27-
2818
// Test MongoDB connection before proceeding
2919
try {
3020
if (isDevelopment) {
@@ -40,62 +30,31 @@ export async function ingestData(config, options) {
4030
throw new Error(`MongoDB connection test failed: ${error.message}`);
4131
}
4232

43-
// Restructure the config to match expected format
33+
// Create the RAG configuration using the exact structure expected by MongoRAG
4434
const ragConfig = {
45-
// MongoDB connection details at top level
46-
connectionString: config.mongoUrl, // Try this instead of nested mongodb object
47-
databaseName: config.database, // Use full names at top level
48-
collectionName: config.collection,
49-
50-
// Keep the rest of the config
35+
mongoUrl: config.mongoUrl,
36+
database: config.database,
37+
collection: config.collection,
5138
embedding: {
52-
provider: config.embedding?.provider || config.provider,
39+
provider: config.embedding.provider,
5340
apiKey: config.apiKey,
54-
model: config.embedding?.model || config.model,
55-
dimensions: config.embedding?.dimensions || config.dimensions,
56-
baseUrl: config.embedding?.baseUrl || config.baseUrl,
57-
batchSize: config.embedding?.batchSize || 100
58-
},
59-
search: {
60-
maxResults: config.search?.maxResults || 5,
61-
minScore: config.search?.minScore || 0.7
41+
model: config.embedding.model,
42+
dimensions: config.embedding.dimensions,
43+
baseUrl: config.embedding.baseUrl,
44+
batchSize: config.embedding.batchSize
6245
},
63-
indexName: config.indexName,
64-
65-
// Add standard MongoDB options
66-
mongodbOptions: {
67-
useNewUrlParser: true,
68-
useUnifiedTopology: true
69-
}
46+
indexName: config.indexName
7047
};
7148

72-
// Remove the mongodb nested object structure
73-
if (isDevelopment) {
74-
console.log('Attempting to connect to MongoDB...');
75-
console.log('MongoDB URI:', ragConfig.connectionString);
76-
}
77-
78-
// Set environment variables from config if they're not already set
79-
if (!process.env.EMBEDDING_API_KEY && config.apiKey) {
80-
process.env.EMBEDDING_API_KEY = config.apiKey;
81-
}
82-
if (!process.env.EMBEDDING_PROVIDER && (config.embedding?.provider || config.provider)) {
83-
process.env.EMBEDDING_PROVIDER = config.embedding?.provider || config.provider;
84-
}
85-
if (!process.env.EMBEDDING_MODEL && (config.embedding?.model || config.model)) {
86-
process.env.EMBEDDING_MODEL = config.embedding?.model || config.model;
87-
}
88-
8949
try {
9050
if (isDevelopment) {
91-
console.log('Creating MongoRAG instance with config...');
51+
console.log('Creating MongoRAG instance...');
9252
}
9353

9454
const rag = new MongoRAG(ragConfig);
9555

9656
if (isDevelopment) {
97-
console.log('Attempting to connect to MongoDB...');
98-
console.log('MongoDB URI:', ragConfig.connectionString);
57+
console.log('Connecting to MongoDB...');
9958
}
10059

10160
await rag.connect();
@@ -128,7 +87,7 @@ export async function ingestData(config, options) {
12887
const chunkedDocs = [];
12988
for (const doc of documents) {
13089
if (isDevelopment) {
131-
console.log(chalk.blue(`📄 Chunking document: ${doc.metadata.filename}`));
90+
console.log(chalk.blue(`📄 Chunking document: ${doc.metadata?.filename}`));
13291
}
13392
const chunks = chunker.chunkDocument(doc);
13493
chunkedDocs.push(...chunks);
@@ -150,6 +109,7 @@ export async function ingestData(config, options) {
150109
});
151110

152111
console.log(chalk.green(`✅ Successfully ingested ${result.processed} documents!`));
112+
await rag.close();
153113
return result;
154114
} catch (error) {
155115
console.error(chalk.red('❌ Ingestion failed:'), error.message);
@@ -169,8 +129,12 @@ async function processDirectory(dirPath, options) {
169129
const subDocs = await processDirectory(filePath, options);
170130
documents.push(...subDocs);
171131
} else if (stat.isFile()) {
172-
const docs = await processFile(filePath, options);
173-
documents.push(...docs);
132+
try {
133+
const docs = await processFile(filePath, options);
134+
documents.push(...docs);
135+
} catch (error) {
136+
console.warn(chalk.yellow(`⚠️ Warning: Failed to process ${filePath}: ${error.message}`));
137+
}
174138
}
175139
}
176140

@@ -194,7 +158,7 @@ async function processFile(filePath, options) {
194158

195159
if (isDevelopment) {
196160
console.log(chalk.blue(`📄 Processed ${filePath}`));
197-
if (doc.metadata.processingFailed) {
161+
if (doc.metadata?.processingFailed) {
198162
console.warn(chalk.yellow(`⚠️ Warning: ${doc.metadata.error}`));
199163
}
200164
}

0 commit comments

Comments
 (0)