1+ // bin/commands/data/ask.js
2+ import chalk from 'chalk' ;
3+ import { isConfigValid } from '../../utils/validation.js' ;
4+ import MongoRAG from '../../../src/core/MongoRAG.js' ;
5+ import OpenAI from 'openai' ;
6+ import fetch from 'node-fetch' ;
7+
8+ // Helper for controlled logging
9+ const debug = ( message , data ) => {
10+ if ( process . env . NODE_ENV === 'development' || process . env . DEBUG ) {
11+ console . log ( chalk . blue ( `🔍 DEBUG: ${ message } ` ) , data ? data : '' ) ;
12+ }
13+ } ;
14+
15+ export async function askQuestion ( config , query , options = { } ) {
16+ if ( ! isConfigValid ( config ) ) {
17+ throw new Error ( "Configuration missing. Run 'npx mongodb-rag init' first." ) ;
18+ }
19+
20+ try {
21+ // Initialize RAG
22+ const rag = new MongoRAG ( config ) ;
23+
24+ // Step 1: Connect to MongoDB
25+ await rag . connect ( ) ;
26+
27+ console . log ( chalk . cyan ( `🔍 Searching for relevant information about: "${ query } "` ) ) ;
28+
29+ // Debug info in development only
30+ debug ( 'Using configuration' , {
31+ database : config . database ,
32+ collection : config . collection ,
33+ indexName : config . indexName ,
34+ embeddingFieldPath : config . embeddingFieldPath || 'embedding'
35+ } ) ;
36+
37+ // Step 2: Search for relevant documents
38+ const searchOptions = {
39+ maxResults : options . maxResults || config . search ?. maxResults || 5 ,
40+ minScore : options . minScore || config . search ?. minScore || 0.7 ,
41+ indexName : config . indexName , // Explicitly include the index name
42+ skipIndexCreation : false // Allow index creation if needed
43+ } ;
44+
45+ debug ( 'Search options' , searchOptions ) ;
46+
47+ // Try direct search approach if configured
48+ let searchResults ;
49+
50+ try {
51+ // Standard search approach
52+ searchResults = await rag . search ( query , searchOptions ) ;
53+ } catch ( error ) {
54+ // If the standard search fails, try a direct approach in development
55+ if ( process . env . NODE_ENV === 'development' || process . env . DEBUG ) {
56+ console . log ( chalk . yellow ( `⚠️ Standard search failed: ${ error . message } ` ) ) ;
57+ console . log ( chalk . yellow ( "Attempting direct search approach..." ) ) ;
58+
59+ // Get collection
60+ const col = await rag . _getCollection ( ) ;
61+
62+ // Get embedding
63+ const embedding = await rag . getEmbedding ( query ) ;
64+
65+ // Build search pipeline
66+ const searchPipeline = [
67+ {
68+ $vectorSearch : {
69+ index : config . indexName ,
70+ path : config . embeddingFieldPath || "embedding" ,
71+ queryVector : embedding ,
72+ numCandidates : 100 ,
73+ limit : searchOptions . maxResults || 5
74+ }
75+ } ,
76+ {
77+ $project : {
78+ _id : 0 ,
79+ documentId : 1 ,
80+ content : 1 ,
81+ metadata : 1 ,
82+ score : { $meta : "vectorSearchScore" }
83+ }
84+ }
85+ ] ;
86+
87+ debug ( 'Direct search pipeline' , searchPipeline ) ;
88+
89+ // Execute direct search
90+ searchResults = await col . aggregate ( searchPipeline ) . toArray ( ) ;
91+ } else {
92+ // In production, just re-throw the error
93+ throw error ;
94+ }
95+ }
96+
97+ if ( searchResults . length === 0 ) {
98+ console . log ( chalk . yellow ( "⚠️ No relevant information found." ) ) ;
99+ if ( ! options . fallbackToGeneral ) {
100+ return { answer : "I couldn't find any relevant information to answer your question." } ;
101+ }
102+ console . log ( chalk . blue ( "Attempting to answer based on general knowledge..." ) ) ;
103+ } else {
104+ console . log ( chalk . green ( `✅ Found ${ searchResults . length } relevant documents.` ) ) ;
105+ }
106+
107+ // Step 3: Format context
108+ const formattedContext = formatContext ( searchResults ) ;
109+
110+ // Step 4: Generate response using the embedding provider
111+ console . log ( chalk . cyan ( "🧠 Generating response..." ) ) ;
112+
113+ // Get chat response based on provider
114+ const response = await generateResponse (
115+ config ,
116+ formattedContext ,
117+ query ,
118+ options
119+ ) ;
120+
121+ // Display the response
122+ console . log ( chalk . bold ( "\n🤖 Response:" ) ) ;
123+ console . log ( response . answer ) ;
124+
125+ // Show sources if requested
126+ if ( options . showSources && searchResults . length > 0 ) {
127+ console . log ( chalk . bold ( "\n📚 Sources:" ) ) ;
128+ searchResults . forEach ( ( doc , i ) => {
129+ const sourceText = doc . metadata && doc . metadata . source
130+ ? doc . metadata . source
131+ : `Document ${ i + 1 } ` ;
132+ console . log ( chalk . yellow ( `${ i + 1 } . ${ sourceText } (Score: ${ doc . score . toFixed ( 3 ) } )` ) ) ;
133+ } ) ;
134+ }
135+
136+ await rag . close ( ) ;
137+ return response ;
138+
139+ } catch ( error ) {
140+ console . error ( chalk . red ( `❌ Error: ${ error . message } ` ) ) ;
141+ if ( process . env . NODE_ENV === 'development' || process . env . DEBUG ) {
142+ console . error ( chalk . gray ( error . stack ) ) ;
143+ }
144+ throw error ;
145+ }
146+ }
147+
148+
149+ function formatContext ( documents ) {
150+ return documents . map ( ( doc , index ) => {
151+ const sourceInfo = doc . metadata ?. source ? `Source: ${ doc . metadata . source } ` : '' ;
152+ return `[Document ${ index + 1 } ]\n${ doc . content } \n${ sourceInfo } \n---` ;
153+ } ) . join ( '\n\n' ) ;
154+ }
155+
156+ async function generateResponse ( config , context , query , options ) {
157+ const provider = config . embedding ?. provider ?. toLowerCase ( ) || 'openai' ;
158+ const systemPrompt = createSystemPrompt ( context , options ) ;
159+
160+ switch ( provider ) {
161+ case 'openai' :
162+ return await generateOpenAIResponse (
163+ config . embedding . apiKey ,
164+ systemPrompt ,
165+ query ,
166+ options . model || 'gpt-4o'
167+ ) ;
168+ case 'ollama' :
169+ return await generateOllamaResponse (
170+ config . embedding . baseUrl || 'http://localhost:11434' ,
171+ config . embedding . model || 'llama3' ,
172+ systemPrompt ,
173+ query
174+ ) ;
175+ default :
176+ throw new Error ( `Provider ${ provider } is not supported for chat responses.` ) ;
177+ }
178+ }
179+
180+ async function generateOpenAIResponse ( apiKey , systemPrompt , query , model ) {
181+ try {
182+ const openai = new OpenAI ( { apiKey } ) ;
183+
184+ const response = await openai . chat . completions . create ( {
185+ model,
186+ messages : [
187+ { role : 'system' , content : systemPrompt } ,
188+ { role : 'user' , content : query }
189+ ] ,
190+ temperature : 0.7
191+ } ) ;
192+
193+ return {
194+ answer : response . choices [ 0 ] . message . content ,
195+ model
196+ } ;
197+ } catch ( error ) {
198+ throw new Error ( `OpenAI API error: ${ error . message } ` ) ;
199+ }
200+ }
201+
202+ async function generateOllamaResponse ( baseUrl , model , systemPrompt , query ) {
203+ try {
204+ const response = await fetch ( `${ baseUrl } /api/chat` , {
205+ method : 'POST' ,
206+ headers : { 'Content-Type' : 'application/json' } ,
207+ body : JSON . stringify ( {
208+ model,
209+ messages : [
210+ { role : 'system' , content : systemPrompt } ,
211+ { role : 'user' , content : query }
212+ ] ,
213+ stream : false
214+ } )
215+ } ) ;
216+
217+ if ( ! response . ok ) {
218+ throw new Error ( `Ollama API returned ${ response . status } : ${ response . statusText } ` ) ;
219+ }
220+
221+ const data = await response . json ( ) ;
222+
223+ return {
224+ answer : data . message ?. content || "Failed to generate a response." ,
225+ model
226+ } ;
227+ } catch ( error ) {
228+ throw new Error ( `Ollama API error: ${ error . message } ` ) ;
229+ }
230+ }
231+
232+ function createSystemPrompt ( context , options ) {
233+ const citeSources = options . citeSources === true ;
234+
235+ return `You are a helpful assistant that answers questions based on the provided context.
236+
237+ CONTEXT:
238+ ${ context || "No specific context available for this query." }
239+
240+ INSTRUCTIONS:
241+ 1. Use ONLY the information from the provided documents to answer the user's question.
242+ 2. If the context doesn't contain enough information to provide a complete answer, state what you know from the context and indicate where information is missing.
243+ 3. Do not make up information or use your own knowledge beyond what's in the context.
244+ 4. If the answer can be found in multiple documents, synthesize the information.
245+ 5. Keep your answer concise but thorough.
246+ ${ citeSources ? '6. Cite your sources by referring to the document numbers ([Document X]).' : '' }
247+
248+ If the provided context doesn't help with the user's question at all, respond with: "I don't have enough information to answer that question."` ;
249+ }
0 commit comments