Skip to content

Commit

Permalink
Issue Hunting (#21)
Browse files Browse the repository at this point in the history
* issue hunting + better parcel

* add signal return type & badges
  • Loading branch information
erhant authored Dec 29, 2023
1 parent 3780042 commit 7695fba
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 36 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Firstbatch SDK

[![License: MIT](https://img.shields.io/badge/license-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![NPM](https://img.shields.io/npm/v/firstbatch?logo=npm&color=CB3837)](https://www.npmjs.com/package/firstbatch)

The FirstBatch SDK provides an interface for integrating vector databases and powering personalized AI experiences in your application.

## Key Features
Expand Down
Empty file removed examples/pinecone.ts
Empty file.
16 changes: 11 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "firstbatch",
"version": "0.1.5",
"version": "0.1.6",
"author": "FirstBatch Team <[email protected]>",
"license": "MIT",
"contributors": [
Expand Down Expand Up @@ -30,13 +30,19 @@
"targets": {
"cjs": {
"outputFormat": "commonjs",
"isLibrary": true,
"context": "node"
"isLibrary": false,
"context": "node",
"scopeHoist": false,
"includeNodeModules": false,
"optimize": false
},
"mjs": {
"outputFormat": "esmodule",
"isLibrary": true,
"context": "node"
"isLibrary": false,
"context": "node",
"scopeHoist": false,
"includeNodeModules": false,
"optimize": false
}
},
"files": [
Expand Down
3 changes: 3 additions & 0 deletions src/algorithm/blueprint/signal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ const presetSignals = {
CLICK_AD: new Signal('CLICK_AD', 6.0),
};

/** A union of preset signal names.
* Each signal here has a corresponding `Signal` object that is prepared by FirstBatch.
*/
export type PresetSignalNames = keyof typeof presetSignals;
/**
* Preset set of signals, you can use these or add your own signals to this object.
Expand Down
20 changes: 10 additions & 10 deletions src/client/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ export class FirstBatchClient {
return axiosResponse.data;
}

/** Initializes vectorDB, returns error message as a response if there was one. */
protected async initVectordbScalar(vdbid: string, vecs: number[][], quantiles: number[]) {
// TODO: type of `data`?
return await this.post<any>('embeddings/init_vdb', {
return await this.post<string>('embeddings/init_vdb', {
key: crypto.createHash('md5').update(this.apiKey).digest('hex'),
vdbid: vdbid,
mode: 'scalar',
Expand All @@ -54,6 +54,7 @@ export class FirstBatchClient {
});
}

/** Initializes vectorDB, returns error message as a response if there was one. */
protected async initVectordbProduct(
vdbid: string,
vecs: number[][],
Expand All @@ -64,8 +65,7 @@ export class FirstBatchClient {
Ks: number,
Ds: number
) {
// TODO: type of `data`?
return await this.post<any>('embeddings/init_vdb', {
return await this.post<string>('embeddings/init_vdb', {
key: crypto.createHash('md5').update(this.apiKey).digest('hex'),
vdbid: vdbid,
mode: 'product',
Expand All @@ -80,9 +80,9 @@ export class FirstBatchClient {
});
}

/** Updates history, returns error message as a response if there was one. */
protected async addHistory(session: SessionObject, ids: string[]) {
// TODO: type of data?
return await this.post<any>('embeddings/update_history', {
return await this.post<string>('embeddings/update_history', {
id: session.id,
ids,
});
Expand All @@ -108,24 +108,24 @@ export class FirstBatchClient {
});
}

/** Updates state, returns error message as a response if there was one. */
protected async updateState(session: SessionObject, state: string, batchType: Vertex['batchType']) {
// TODO: type of data?
return await this.post<any>('embeddings/update_state', {
return await this.post<string>('embeddings/update_state', {
id: session.id,
state: state,
batch_type: batchType.toUpperCase(), // NOTE: api expects uppercased values for this field
});
}

/** Adds a signal, returns error message as a response if there was one. */
protected async signal(
session: SessionObject,
vector: number[],
stateName: string,
signal: number,
signalLabel: string
) {
// TODO: type of data?
return this.post<any>('embeddings/signal', {
return this.post<string>('embeddings/signal', {
id: session.id,
state: stateName,
signal: signal,
Expand Down
69 changes: 50 additions & 19 deletions src/client/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ export class FirstBatch extends FirstBatchClient {
}

/** Add a vector store to the container.
*
* Behind the lines, this function makes an API call to the Embedding API
* to see if the vector store exists; if is doesn't exist, it will be "sketched"
* with respect to the `quantizerType`, which may take some time.
*
* If you would are CERTAIN that the vector store exists & would like to skip this
* existence-check API call, you can simply do:
*
* ```ts
* sdk.store[vdbid] = vectorStore
* ```
*
* Most of the times you will not need to do so, but it may save a few milliseconds in
* a serverless setting where the SDK is created on each function invocation.
*
* @param vdbid vectorDB ID of your choice
* @param vectorStore a `VectorStore` instance
Expand All @@ -65,7 +79,12 @@ export class FirstBatch extends FirstBatchClient {
} else {
this.logger.info(`VectorDB with id ${vdbid} not found, sketching a new VectorDB.`);
if (this.quantizerType === 'scalar') {
// TODO: THIS IS DANGEROUS, it is a side effect on vector store and may cause problems
// FIXME: THIS IS DANGEROUS, it is a side effect on vector store and may cause problems
// in particular, if the same vector store is used for different `vdbid`'s, it will cause
// the quantizer to be overwritten in the same process.
//
// on the other hand, this quantizer is not used outside this function, so perhaps we
// can have the quantizer as a separate object?
vectorStore.quantizer = new ScalarQuantizer(256);

const trainSize = Math.min(
Expand Down Expand Up @@ -143,32 +162,41 @@ export class FirstBatch extends FirstBatchClient {
* @returns `true` is signal was added succesfully
*/
async addSignal(session: SessionObject, userAction: UserAction, contentId: string) {
const response = await this.getSession(session);
const vectorStore = this.store[response.vdbid];
const sessionResponse = await this.getSession(session);
const vectorStore = this.store[sessionResponse.vdbid];
if (vectorStore === undefined) {
throw new Error('Vector Store is undefined, have you called `addVdb` function?');
}

const query = new FetchQuery(contentId);
const result = await this.store[response.vdbid].fetch(query);
const result = await this.store[sessionResponse.vdbid].fetch(query);

const algoInstance = await this.getAlgorithm(vectorStore.embeddingSize, this.batchSize, response.algorithm, {
factoryId: response.factory_id,
customId: response.custom_id,
const algoInstance = await this.getAlgorithm(vectorStore.embeddingSize, this.batchSize, sessionResponse.algorithm, {
factoryId: sessionResponse.factory_id,
customId: sessionResponse.custom_id,
});

const [nextState] = algoInstance.blueprintStep(response.state, userAction);
const [nextState, batchType, params] = algoInstance.blueprintStep(sessionResponse.state, userAction);

const resp = await this.signal(
const signalResponse = await this.signal(
session,
result.vector.vector,
nextState.name,
userAction.actionType.weight,
userAction.actionType.label
);

if (resp.success && this.enableHistory) {
if (signalResponse.success && this.enableHistory) {
await this.addHistory(session, [contentId]);
}

return resp.success;
return {
success: signalResponse.success,
source: sessionResponse.state,
destination: nextState.name,
batchType,
params,
};
}

/**
Expand All @@ -189,10 +217,13 @@ export class FirstBatch extends FirstBatchClient {
}
): Promise<[string[], QueryMetadata[]]> {
const response = await this.getSession(session);
const vs = this.store[response.vdbid];
const vectorStore = this.store[response.vdbid];
if (vectorStore === undefined) {
throw new Error('Vector Store is undefined, have you called `addVdb` function?');
}
const batchSize = options?.batchSize || this.batchSize;

const algoInstance = await this.getAlgorithm(vs.embeddingSize, batchSize, response.algorithm, {
const algoInstance = await this.getAlgorithm(vectorStore.embeddingSize, batchSize, response.algorithm, {
factoryId: response.factory_id,
customId: response.custom_id,
});
Expand All @@ -211,12 +242,12 @@ export class FirstBatch extends FirstBatchClient {
if (batchType === 'random') {
const batchQuery = generateBatch(
batchSize,
vs.embeddingSize,
vectorStore.embeddingSize,
constants.MIN_TOPK * 2, // TODO: 2 is related to MMR factor here?
params.apply_mmr || params.apply_threshold[0]
);
this.updateState(session, nextState.name, 'random'); // TODO: await?
const batchQueryResult = await vs.multiSearch(batchQuery);
const batchQueryResult = await vectorStore.multiSearch(batchQuery);

[ids, batch] = algoInstance.randomBatch(batchQueryResult, batchQuery, {
applyMMR: params.apply_mmr,
Expand All @@ -234,12 +265,12 @@ export class FirstBatch extends FirstBatchClient {
this.logger.warn('No embeddings found for personalized batch, switching to random batch.');
const batchQuery = generateBatch(
batchSize,
vs.embeddingSize,
vectorStore.embeddingSize,
constants.MIN_TOPK * 2, // TODO: 2 is related to MMR factor here?
true // apply_mmr: true
);
this.updateState(session, nextState.name, 'personalized'); // TODO: await?
const batchQueryResult = await vs.multiSearch(batchQuery);
const batchQueryResult = await vectorStore.multiSearch(batchQuery);
[ids, batch] = algoInstance.randomBatch(batchQueryResult, batchQuery, {
applyMMR: params.apply_mmr, // TODO: this is supposed to be always true above?
applyThreshold: params.apply_threshold,
Expand All @@ -256,7 +287,7 @@ export class FirstBatch extends FirstBatchClient {
applyMMR: params.apply_mmr,
applyThreshold: params.apply_threshold[1],
});
const batchQueryResult = await vs.multiSearch(batchQuery);
const batchQueryResult = await vectorStore.multiSearch(batchQuery);

[ids, batch] = algoInstance.biasedBatch(batchQueryResult, batchQuery, {
applyMMR: params.apply_mmr,
Expand All @@ -270,7 +301,7 @@ export class FirstBatch extends FirstBatchClient {
applyMMR: params.apply_mmr,
applyThreshold: params.apply_threshold[1],
});
const batchQueryResult = await vs.multiSearch(batchQuery);
const batchQueryResult = await vectorStore.multiSearch(batchQuery);

[ids, batch] = algoInstance.sampledBatch(batchQueryResult, batchQuery, {
applyMMR: params.apply_mmr,
Expand Down
2 changes: 1 addition & 1 deletion src/client/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export * from './client';
export * from './core';
export * from './types';
export type * from './types';
2 changes: 1 addition & 1 deletion src/vector/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export * from './integrations';
export * from './fetch';
export * from './query';
export * from './types';
export type * from './types';
export * from './utils';
export * from './metadata';

0 comments on commit 7695fba

Please sign in to comment.