1
+ // bin/commands/data/ask.js
2
+ import chalk from 'chalk' ;
3
+ import { isConfigValid } from '../../utils/validation.js' ;
4
+ import MongoRAG from '../../../src/core/MongoRAG.js' ;
5
+ import OpenAI from 'openai' ;
6
+ import fetch from 'node-fetch' ;
7
+
8
+ // Helper for controlled logging
9
+ const debug = ( message , data ) => {
10
+ if ( process . env . NODE_ENV === 'development' || process . env . DEBUG ) {
11
+ console . log ( chalk . blue ( `🔍 DEBUG: ${ message } ` ) , data ? data : '' ) ;
12
+ }
13
+ } ;
14
+
15
+ export async function askQuestion ( config , query , options = { } ) {
16
+ if ( ! isConfigValid ( config ) ) {
17
+ throw new Error ( "Configuration missing. Run 'npx mongodb-rag init' first." ) ;
18
+ }
19
+
20
+ try {
21
+ // Initialize RAG
22
+ const rag = new MongoRAG ( config ) ;
23
+
24
+ // Step 1: Connect to MongoDB
25
+ await rag . connect ( ) ;
26
+
27
+ console . log ( chalk . cyan ( `🔍 Searching for relevant information about: "${ query } "` ) ) ;
28
+
29
+ // Debug info in development only
30
+ debug ( 'Using configuration' , {
31
+ database : config . database ,
32
+ collection : config . collection ,
33
+ indexName : config . indexName ,
34
+ embeddingFieldPath : config . embeddingFieldPath || 'embedding'
35
+ } ) ;
36
+
37
+ // Step 2: Search for relevant documents
38
+ const searchOptions = {
39
+ maxResults : options . maxResults || config . search ?. maxResults || 5 ,
40
+ minScore : options . minScore || config . search ?. minScore || 0.7 ,
41
+ indexName : config . indexName , // Explicitly include the index name
42
+ skipIndexCreation : false // Allow index creation if needed
43
+ } ;
44
+
45
+ debug ( 'Search options' , searchOptions ) ;
46
+
47
+ // Try direct search approach if configured
48
+ let searchResults ;
49
+
50
+ try {
51
+ // Standard search approach
52
+ searchResults = await rag . search ( query , searchOptions ) ;
53
+ } catch ( error ) {
54
+ // If the standard search fails, try a direct approach in development
55
+ if ( process . env . NODE_ENV === 'development' || process . env . DEBUG ) {
56
+ console . log ( chalk . yellow ( `⚠️ Standard search failed: ${ error . message } ` ) ) ;
57
+ console . log ( chalk . yellow ( "Attempting direct search approach..." ) ) ;
58
+
59
+ // Get collection
60
+ const col = await rag . _getCollection ( ) ;
61
+
62
+ // Get embedding
63
+ const embedding = await rag . getEmbedding ( query ) ;
64
+
65
+ // Build search pipeline
66
+ const searchPipeline = [
67
+ {
68
+ $vectorSearch : {
69
+ index : config . indexName ,
70
+ path : config . embeddingFieldPath || "embedding" ,
71
+ queryVector : embedding ,
72
+ numCandidates : 100 ,
73
+ limit : searchOptions . maxResults || 5
74
+ }
75
+ } ,
76
+ {
77
+ $project : {
78
+ _id : 0 ,
79
+ documentId : 1 ,
80
+ content : 1 ,
81
+ metadata : 1 ,
82
+ score : { $meta : "vectorSearchScore" }
83
+ }
84
+ }
85
+ ] ;
86
+
87
+ debug ( 'Direct search pipeline' , searchPipeline ) ;
88
+
89
+ // Execute direct search
90
+ searchResults = await col . aggregate ( searchPipeline ) . toArray ( ) ;
91
+ } else {
92
+ // In production, just re-throw the error
93
+ throw error ;
94
+ }
95
+ }
96
+
97
+ if ( searchResults . length === 0 ) {
98
+ console . log ( chalk . yellow ( "⚠️ No relevant information found." ) ) ;
99
+ if ( ! options . fallbackToGeneral ) {
100
+ return { answer : "I couldn't find any relevant information to answer your question." } ;
101
+ }
102
+ console . log ( chalk . blue ( "Attempting to answer based on general knowledge..." ) ) ;
103
+ } else {
104
+ console . log ( chalk . green ( `✅ Found ${ searchResults . length } relevant documents.` ) ) ;
105
+ }
106
+
107
+ // Step 3: Format context
108
+ const formattedContext = formatContext ( searchResults ) ;
109
+
110
+ // Step 4: Generate response using the embedding provider
111
+ console . log ( chalk . cyan ( "🧠 Generating response..." ) ) ;
112
+
113
+ // Get chat response based on provider
114
+ const response = await generateResponse (
115
+ config ,
116
+ formattedContext ,
117
+ query ,
118
+ options
119
+ ) ;
120
+
121
+ // Display the response
122
+ console . log ( chalk . bold ( "\n🤖 Response:" ) ) ;
123
+ console . log ( response . answer ) ;
124
+
125
+ // Show sources if requested
126
+ if ( options . showSources && searchResults . length > 0 ) {
127
+ console . log ( chalk . bold ( "\n📚 Sources:" ) ) ;
128
+ searchResults . forEach ( ( doc , i ) => {
129
+ const sourceText = doc . metadata && doc . metadata . source
130
+ ? doc . metadata . source
131
+ : `Document ${ i + 1 } ` ;
132
+ console . log ( chalk . yellow ( `${ i + 1 } . ${ sourceText } (Score: ${ doc . score . toFixed ( 3 ) } )` ) ) ;
133
+ } ) ;
134
+ }
135
+
136
+ await rag . close ( ) ;
137
+ return response ;
138
+
139
+ } catch ( error ) {
140
+ console . error ( chalk . red ( `❌ Error: ${ error . message } ` ) ) ;
141
+ if ( process . env . NODE_ENV === 'development' || process . env . DEBUG ) {
142
+ console . error ( chalk . gray ( error . stack ) ) ;
143
+ }
144
+ throw error ;
145
+ }
146
+ }
147
+
148
+
149
+ function formatContext ( documents ) {
150
+ return documents . map ( ( doc , index ) => {
151
+ const sourceInfo = doc . metadata ?. source ? `Source: ${ doc . metadata . source } ` : '' ;
152
+ return `[Document ${ index + 1 } ]\n${ doc . content } \n${ sourceInfo } \n---` ;
153
+ } ) . join ( '\n\n' ) ;
154
+ }
155
+
156
+ async function generateResponse ( config , context , query , options ) {
157
+ const provider = config . embedding ?. provider ?. toLowerCase ( ) || 'openai' ;
158
+ const systemPrompt = createSystemPrompt ( context , options ) ;
159
+
160
+ switch ( provider ) {
161
+ case 'openai' :
162
+ return await generateOpenAIResponse (
163
+ config . embedding . apiKey ,
164
+ systemPrompt ,
165
+ query ,
166
+ options . model || 'gpt-4o'
167
+ ) ;
168
+ case 'ollama' :
169
+ return await generateOllamaResponse (
170
+ config . embedding . baseUrl || 'http://localhost:11434' ,
171
+ config . embedding . model || 'llama3' ,
172
+ systemPrompt ,
173
+ query
174
+ ) ;
175
+ default :
176
+ throw new Error ( `Provider ${ provider } is not supported for chat responses.` ) ;
177
+ }
178
+ }
179
+
180
+ async function generateOpenAIResponse ( apiKey , systemPrompt , query , model ) {
181
+ try {
182
+ const openai = new OpenAI ( { apiKey } ) ;
183
+
184
+ const response = await openai . chat . completions . create ( {
185
+ model,
186
+ messages : [
187
+ { role : 'system' , content : systemPrompt } ,
188
+ { role : 'user' , content : query }
189
+ ] ,
190
+ temperature : 0.7
191
+ } ) ;
192
+
193
+ return {
194
+ answer : response . choices [ 0 ] . message . content ,
195
+ model
196
+ } ;
197
+ } catch ( error ) {
198
+ throw new Error ( `OpenAI API error: ${ error . message } ` ) ;
199
+ }
200
+ }
201
+
202
+ async function generateOllamaResponse ( baseUrl , model , systemPrompt , query ) {
203
+ try {
204
+ const response = await fetch ( `${ baseUrl } /api/chat` , {
205
+ method : 'POST' ,
206
+ headers : { 'Content-Type' : 'application/json' } ,
207
+ body : JSON . stringify ( {
208
+ model,
209
+ messages : [
210
+ { role : 'system' , content : systemPrompt } ,
211
+ { role : 'user' , content : query }
212
+ ] ,
213
+ stream : false
214
+ } )
215
+ } ) ;
216
+
217
+ if ( ! response . ok ) {
218
+ throw new Error ( `Ollama API returned ${ response . status } : ${ response . statusText } ` ) ;
219
+ }
220
+
221
+ const data = await response . json ( ) ;
222
+
223
+ return {
224
+ answer : data . message ?. content || "Failed to generate a response." ,
225
+ model
226
+ } ;
227
+ } catch ( error ) {
228
+ throw new Error ( `Ollama API error: ${ error . message } ` ) ;
229
+ }
230
+ }
231
+
232
+ function createSystemPrompt ( context , options ) {
233
+ const citeSources = options . citeSources === true ;
234
+
235
+ return `You are a helpful assistant that answers questions based on the provided context.
236
+
237
+ CONTEXT:
238
+ ${ context || "No specific context available for this query." }
239
+
240
+ INSTRUCTIONS:
241
+ 1. Use ONLY the information from the provided documents to answer the user's question.
242
+ 2. If the context doesn't contain enough information to provide a complete answer, state what you know from the context and indicate where information is missing.
243
+ 3. Do not make up information or use your own knowledge beyond what's in the context.
244
+ 4. If the answer can be found in multiple documents, synthesize the information.
245
+ 5. Keep your answer concise but thorough.
246
+ ${ citeSources ? '6. Cite your sources by referring to the document numbers ([Document X]).' : '' }
247
+
248
+ If the provided context doesn't help with the user's question at all, respond with: "I don't have enough information to answer that question."` ;
249
+ }
0 commit comments