Skip to content

Commit c9b1540

Browse files
authored
Merge pull request #1 from Pash10g/main
New Voyage AI
2 parents b005bd8 + d366273 commit c9b1540

File tree

12 files changed

+428
-24
lines changed

12 files changed

+428
-24
lines changed

bin/commands/init.js

+8-5
Original file line numberDiff line numberDiff line change
@@ -63,23 +63,26 @@ export async function init(configPath) {
6363
type: 'select',
6464
name: 'provider',
6565
message: 'Select an Embedding Provider:',
66-
choices: ['openai', 'deepseek', 'ollama'],
66+
choices: ['voyage','openai', 'deepseek', 'ollama'],
6767
helpMessage: "Available embedding providers:\n" +
68+
"- Voyage: Best MongoDB compatible, requires API key\n" +
6869
"- OpenAI: Most popular, requires API key\n" +
6970
"- DeepSeek: Alternative provider, requires API key\n" +
7071
"- Ollama: Local deployment, no API key needed"
7172
});
7273

7374
// Provider-specific configuration
74-
if (responses.provider === 'openai' || responses.provider === 'deepseek') {
75+
if (responses.provider === 'openai' || responses.provider === 'deepseek' || responses.provider === 'voyage') {
7576
responses.apiKey = await promptWithValidation({
7677
type: 'password',
7778
name: 'apiKey',
78-
message: `Enter your ${responses.provider === 'openai' ? 'OpenAI' : 'DeepSeek'} API Key:`,
79+
message: `Enter your ${responses.provider === 'openai' ? 'OpenAI' : responses.provider === 'voyage' ? 'Voyage' : 'DeepSeek'} API Key:`,
7980
validate: (input) => input && input.length > 0 ? true : 'API key is required',
8081
helpMessage: responses.provider === 'openai'
8182
? "OpenAI API key format: sk-....\n- Get your key from: https://platform.openai.com/api-keys"
82-
: "DeepSeek API key format: dk-....\n- Get your key from DeepSeek's platform"
83+
: (responses.provider === 'voyage'
84+
? "VoyageAI API key format: pa-....\n- Get your key from VoyageAI's platform"
85+
: "DeepSeek API key format: dk-....\n- Get your key from DeepSeek's platform")
8386
});
8487
} else if (responses.provider === 'ollama') {
8588
const availableModels = getOllamaModels();
@@ -104,7 +107,7 @@ export async function init(configPath) {
104107
provider: responses.provider,
105108
...(responses.apiKey && { apiKey: responses.apiKey }),
106109
...(responses.model && { model: responses.model }),
107-
dimensions: 1536,
110+
dimensions: responses.provider === 'voyage' ? 1024 : 1536,
108111
batchSize: 100
109112
},
110113
search: {

bin/utils/prompts.js

+12-5
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ export async function promptForConfigEdits(currentConfig) {
7171
type: 'select',
7272
name: 'provider',
7373
message: 'Select an Embedding Provider:',
74-
choices: ['openai', 'deepseek', 'ollama'],
74+
choices: ['voyage','openai', 'deepseek', 'ollama'],
7575
initial: currentConfig.embedding.provider
7676
}
7777
]);
@@ -140,7 +140,7 @@ export async function promptForProviderConfig() {
140140
type: 'select',
141141
name: 'provider',
142142
message: 'Select an embedding provider:',
143-
choices: ['openai', 'ollama', 'anthropic', 'deepseek'],
143+
choices: ['voyage','openai', 'ollama', 'anthropic', 'deepseek'],
144144
initial: 'openai'
145145
});
146146

@@ -165,7 +165,8 @@ export async function promptForProviderConfig() {
165165
modelChoices = {
166166
'openai': ['text-embedding-3-small', 'text-embedding-3-large'],
167167
'anthropic': ['claude-3-opus-20240229', 'claude-3-sonnet-20240229'],
168-
'deepseek': ['deepseek-coder', 'deepseek-chat']
168+
'deepseek': ['deepseek-coder', 'deepseek-chat'],
169+
'voyage': ['voyage-3', 'voyage-3-large', 'voyage-3-lite', 'voyage-code-3', 'voyage-finance-2', 'voyage-law-2']
169170
}[providerResponse.provider] || [];
170171
defaultModel = modelChoices[0];
171172
}
@@ -204,7 +205,13 @@ export async function promptForProviderConfig() {
204205
'llama2': '4096',
205206
'mistral': '4096',
206207
'mixtral': '4096',
207-
'codellama': '4096'
208+
'codellama': '4096',
209+
'voyage-3': '1024',
210+
'voyage-3-large': '1024',
211+
'voyage-3-lite': '1024',
212+
'voyage-code-3': '1024',
213+
'voyage-finance-2': '1024',
214+
'voyage-law-2': '1024'
208215
}[answers.model] || '4096';
209216
return dims.toString();
210217
},
@@ -227,4 +234,4 @@ export async function promptForProviderConfig() {
227234
baseUrl: config.baseUrl,
228235
dimensions: config.dimensions
229236
};
230-
}
237+
}

bin/utils/providers.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export async function testProvider(config) {
2020
return await testOllamaProvider(config);
2121
case 'openai':
2222
case 'deepseek':
23+
case 'voyage':
2324
return await testApiProvider(config);
2425
default:
2526
throw new Error(`Unknown provider: ${config.embedding.provider}`);
@@ -80,6 +81,8 @@ export function getDefaultDimensions(provider) {
8081
return 1536; // For text-embedding-3-small
8182
case 'deepseek':
8283
return 1024;
84+
case 'voyage':
85+
return 1024; // For voyage models
8386
case 'ollama':
8487
return 4096; // For llama2 models
8588
default:
@@ -93,9 +96,11 @@ export function getProviderModels(provider) {
9396
return ['text-embedding-3-small', 'text-embedding-3-large'];
9497
case 'deepseek':
9598
return ['deepseek-embedding'];
99+
case 'voyage':
100+
return ['voyage-3', 'voyage-3-large', 'voyage-3-lite', 'voyage-code-3', 'voyage-finance-2', 'voyage-law-2'];
96101
case 'ollama':
97102
return getOllamaModels();
98103
default:
99104
return [];
100105
}
101-
}
106+
}

examples/basic-usage.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ async function runExample() {
3535
embedding: {
3636
provider: process.env.EMBEDDING_PROVIDER || 'openai',
3737
apiKey: process.env.EMBEDDING_API_KEY,
38-
dimensions: 1536,
38+
dimensions: 1024,
3939
model: process.env.EMBEDDING_MODEL || 'text-embedding-3-small'
4040
}
4141
});
@@ -65,7 +65,7 @@ async function runExample() {
6565

6666
console.log('\nSearch results:');
6767
results.forEach((result, i) => {
68-
console.log(`\n${i + 1}. Score: ${result.score.toFixed(3)}`);
68+
//console.log(`\n${i + 1}. Score: ${result.score.toFixed(3)}`);
6969
console.log(`Content: ${result.content}`);
7070
console.log(`Metadata: ${JSON.stringify(result.metadata)}`);
7171
});

mongodb-rag-docs/docs/api-reference.md

+8-4
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,13 @@ const rag = new MongoRAG({
3838
- `config.database` (string, required): Default MongoDB database name.
3939
- `config.collection` (string, required): Default MongoDB collection name.
4040
- `config.embedding` (object, required):
41-
- `provider` (string, required): Embedding provider (`openai` is supported).
42-
- `apiKey` (string, required): API key for the embedding provider.
43-
- `model` (string, optional): Model name (default: `'text-embedding-3-small'`).
41+
- `provider` (string, required): Embedding provider (`openai`, `ollama`, or `voyage` are supported).
42+
- `apiKey` (string, required): API key for the embedding provider (not required for `ollama`).
43+
- `model` (string, optional): Model name. Defaults depend on provider:
44+
- OpenAI: `'text-embedding-3-small'`
45+
- Voyage: `'voyage-3'` (other options: `voyage-3-large`, `voyage-3-lite`, `voyage-code-3`, `voyage-finance-2`, `voyage-law-2`)
46+
- Ollama: requires model specification
47+
- `baseUrl` (string, optional): Base URL for Ollama API (default: `'http://localhost:11434'`).
4448
- `batchSize` (number, optional): Batch size for embedding generation (default: `100`).
4549
- `dimensions` (number, optional): Number of dimensions in the embedding space (default: `1536`).
4650
- `config.search` (object, optional):
@@ -171,4 +175,4 @@ try {
171175

172176
For more detailed examples and use cases, refer to:
173177
- [Basic Example](./examples/basic-example.md)
174-
- [Advanced Example](./examples/advanced-example.md)
178+
- [Advanced Example](./examples/advanced-example.md)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
---
2+
id: voyage-example
3+
title: Using VoyageAI Embeddings
4+
sidebar_position: 3
5+
---
6+
7+
# Using VoyageAI Embeddings with MongoDB-RAG
8+
9+
This example demonstrates how to use VoyageAI's embedding models with MongoDB-RAG for vector search.
10+
11+
## Prerequisites
12+
13+
1. Install the mongodb-rag package:
14+
```bash
15+
npm install mongodb-rag voyageai
16+
```
17+
18+
2. Get a VoyageAI API key from [VoyageAI's website](https://www.voyageai.com/).
19+
20+
3. Set up your environment variables:
21+
```bash
22+
export VOYAGE_API_KEY=your_api_key_here
23+
```
24+
25+
## Basic Usage
26+
27+
```javascript
28+
import MongoRAG from 'mongodb-rag';
29+
import dotenv from 'dotenv';
30+
31+
// Load environment variables
32+
dotenv.config();
33+
34+
async function main() {
35+
// Initialize MongoRAG with VoyageAI provider
36+
const rag = new MongoRAG({
37+
mongoUrl: 'mongodb+srv://your-connection-string',
38+
database: 'ragdb',
39+
collection: 'documents',
40+
embedding: {
41+
provider: 'voyage',
42+
apiKey: process.env.VOYAGE_API_KEY,
43+
model: 'voyage-3' // This is the default model
44+
}
45+
});
46+
47+
// Connect to MongoDB
48+
await rag.connect();
49+
50+
// Ingest some documents
51+
await rag.ingestBatch([
52+
{
53+
documentId: 'doc1',
54+
content: 'MongoDB is a document database with the scalability and flexibility that you want with the querying and indexing that you need.',
55+
metadata: { source: 'MongoDB Website', category: 'Database' }
56+
},
57+
{
58+
documentId: 'doc2',
59+
content: 'Vector search in MongoDB allows you to search for documents based on semantic similarity using vector embeddings.',
60+
metadata: { source: 'MongoDB Documentation', category: 'Search' }
61+
}
62+
]);
63+
64+
// Perform a search
65+
const results = await rag.search('How does vector search work?');
66+
console.log(results);
67+
68+
// Close the connection
69+
await rag.close();
70+
}
71+
72+
main().catch(console.error);
73+
```
74+
75+
## Using Different VoyageAI Models
76+
77+
VoyageAI offers several embedding models optimized for different use cases:
78+
79+
```javascript
80+
// For general purpose (default)
81+
const rag = new MongoRAG({
82+
// MongoDB configuration...
83+
embedding: {
84+
provider: 'voyage',
85+
apiKey: process.env.VOYAGE_API_KEY,
86+
model: 'voyage-3' // Default model
87+
}
88+
});
89+
90+
// For higher quality embeddings
91+
const ragLarge = new MongoRAG({
92+
// MongoDB configuration...
93+
embedding: {
94+
provider: 'voyage',
95+
apiKey: process.env.VOYAGE_API_KEY,
96+
model: 'voyage-3-large' // Higher quality, larger model
97+
}
98+
});
99+
100+
// For faster, more efficient embeddings
101+
const ragLite = new MongoRAG({
102+
// MongoDB configuration...
103+
embedding: {
104+
provider: 'voyage',
105+
apiKey: process.env.VOYAGE_API_KEY,
106+
model: 'voyage-3-lite' // Faster, more efficient
107+
}
108+
});
109+
110+
// For code-specific embeddings
111+
const ragCode = new MongoRAG({
112+
// MongoDB configuration...
113+
embedding: {
114+
provider: 'voyage',
115+
apiKey: process.env.VOYAGE_API_KEY,
116+
model: 'voyage-code-3' // Optimized for code
117+
}
118+
});
119+
120+
// For finance-specific embeddings
121+
const ragFinance = new MongoRAG({
122+
// MongoDB configuration...
123+
embedding: {
124+
provider: 'voyage',
125+
apiKey: process.env.VOYAGE_API_KEY,
126+
model: 'voyage-finance-2' // Optimized for finance
127+
}
128+
});
129+
130+
// For legal-specific embeddings
131+
const ragLaw = new MongoRAG({
132+
// MongoDB configuration...
133+
embedding: {
134+
provider: 'voyage',
135+
apiKey: process.env.VOYAGE_API_KEY,
136+
model: 'voyage-law-2' // Optimized for legal content
137+
}
138+
});
139+
```
140+
141+
## Advanced Configuration
142+
143+
You can combine VoyageAI embeddings with advanced search options:
144+
145+
```javascript
146+
const rag = new MongoRAG({
147+
mongoUrl: 'mongodb+srv://your-connection-string',
148+
database: 'ragdb',
149+
collection: 'documents',
150+
embedding: {
151+
provider: 'voyage',
152+
apiKey: process.env.VOYAGE_API_KEY,
153+
model: 'voyage-3',
154+
batchSize: 50 // Process 50 documents at a time
155+
},
156+
search: {
157+
maxResults: 10,
158+
minScore: 0.75,
159+
similarityMetric: 'cosine'
160+
}
161+
});
162+
163+
// Search with metadata filtering
164+
const results = await rag.search('vector search techniques', {
165+
filter: { category: 'Search' }
166+
});
167+
```
168+
169+
## Error Handling
170+
171+
```javascript
172+
try {
173+
const rag = new MongoRAG({
174+
// MongoDB configuration...
175+
embedding: {
176+
provider: 'voyage',
177+
apiKey: process.env.VOYAGE_API_KEY,
178+
model: 'voyage-3'
179+
}
180+
});
181+
182+
await rag.connect();
183+
const results = await rag.search('vector search');
184+
} catch (error) {
185+
if (error.message.includes('VoyageAI API error')) {
186+
console.error('Error with VoyageAI API:', error.message);
187+
// Handle VoyageAI specific errors
188+
} else {
189+
console.error('General error:', error);
190+
}
191+
}

0 commit comments

Comments
 (0)