Skip to content

Commit

Permalink
fix: ranking empty inputs (#415)
Browse files Browse the repository at this point in the history
* fix: ranking empty inputs
* fix: adapt to breaking `llama.cpp` changes
  • Loading branch information
giladgd authored Jan 23, 2025
1 parent d1b4416 commit 86e1bee
Show file tree
Hide file tree
Showing 17 changed files with 128 additions and 78 deletions.
18 changes: 17 additions & 1 deletion .releaserc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,23 @@ const config: Omit<GlobalConfig, "repositoryUrl" | "tagFormat"> = {
}],
["@semantic-release/release-notes-generator", {
writerOpts: {
footerPartial: newFooterTemplate
footerPartial: newFooterTemplate,

// ensure that the "Features" group comes before the "Bug Fixes" group
commitGroupsSort(a: {title: string}, b: {title: string}) {
const order = ["Features", "Bug Fixes"];
const aIndex = order.indexOf(a?.title);
const bIndex = order.indexOf(b?.title);

if (aIndex >= 0 && bIndex >= 0)
return aIndex - bIndex;
else if (aIndex >= 0)
return -1;
else if (bIndex >= 0)
return 1;

return (a?.title || "").localeCompare(b?.title || "");
}
}
}],
["@semantic-release/exec", {
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
* Up-to-date with the latest `llama.cpp`. Download and compile the latest release with a [single CLI command](https://node-llama-cpp.withcat.ai//guide/building-from-source#downloading-a-release)
* Enforce a model to generate output in a parseable format, [like JSON](https://node-llama-cpp.withcat.ai/guide/chat-session#json-response), or even force it to [follow a specific JSON schema](https://node-llama-cpp.withcat.ai/guide/chat-session#response-json-schema)
* [Provide a model with functions it can call on demand](https://node-llama-cpp.withcat.ai/guide/chat-session#function-calling) to retrieve information or perform actions
* [Embedding support](https://node-llama-cpp.withcat.ai/guide/embedding)
* [Embedding and reranking support](https://node-llama-cpp.withcat.ai/guide/embedding)
* [Safe against special token injection attacks](https://node-llama-cpp.withcat.ai/guide/llama-text#input-safety-in-node-llama-cpp)
* Great developer experience with full TypeScript support, and [complete documentation](https://node-llama-cpp.withcat.ai/guide/)
* Much more
Expand Down
7 changes: 7 additions & 0 deletions docs/guide/Vulkan.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor

:::

* :::details Windows only: enable long paths support
Open cmd as Administrator and run this command:
```shell
reg add "HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f
```
:::

### Building From Source
When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source.

Expand Down
32 changes: 17 additions & 15 deletions llama/addon/AddonContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ class AddonContextLoadContextWorker : public Napi::AsyncWorker {

void Execute() {
try {
context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
context->ctx = llama_init_from_model(context->model->model, context->context_params);

context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_new_context_with_model\"");
SetError("Unknown error when calling \"llama_init_from_model\"");
}
}
void OnOK() {
Expand Down Expand Up @@ -252,7 +252,7 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
sampler->rebuildChainIfNeeded();

const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
const int n_vocab = llama_n_vocab(ctx->model->model);
const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);

auto & candidates = sampler->tokenCandidates;
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
Expand Down Expand Up @@ -525,7 +525,7 @@ Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
has_batch = true;
batch_n_tokens = n_tokens;

uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
if (newBatchMemorySize > batchMemorySize) {
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
batchMemorySize = newBatchMemorySize;
Expand Down Expand Up @@ -645,7 +645,7 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
return info.Env().Undefined();
}

const int n_embd = llama_n_embd(model->model);
const int n_embd = llama_model_n_embd(model->model);
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
if (embeddings == NULL) {
Expand Down Expand Up @@ -716,23 +716,25 @@ Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Nap
const auto draftCtx = draftContext->ctx;
const auto currentModel = model->model;
const auto draftModel = draftContext->model->model;
const auto currentVocab = model->vocab;
const auto draftVocab = draftContext->model->vocab;

if (llama_vocab_type(currentModel) != llama_vocab_type(draftModel)) {
if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

if (llama_add_bos_token(currentModel) != llama_add_bos_token(draftModel) ||
llama_add_eos_token(currentModel) != llama_add_eos_token(draftModel) ||
llama_token_bos(currentModel) != llama_token_bos(draftModel) ||
llama_token_eos(currentModel) != llama_token_eos(draftModel)
if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
) {
Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

const int currentModelVocabSize = llama_n_vocab(currentModel);
const int draftModelVocabSize = llama_n_vocab(draftModel);
const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);

const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);

Expand All @@ -747,8 +749,8 @@ Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Nap

const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
const char * currentTokenText = llama_token_get_text(currentModel, i);
const char * draftTokenText = llama_token_get_text(draftModel, i);
const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
if (std::strcmp(currentTokenText, draftTokenText) != 0) {
Napi::Error::New(
info.Env(),
Expand All @@ -767,7 +769,7 @@ Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
float scale = info[1].As<Napi::Number>().FloatValue();

llama_lora_adapter_set(ctx, lora->lora_adapter, scale);
llama_set_adapter_lora(ctx, lora->lora_adapter, scale);

return info.Env().Undefined();
}
Expand Down
2 changes: 1 addition & 1 deletion llama/addon/AddonGrammarEvaluationState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInf
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
grammarDef->Ref();

sampler = llama_sampler_init_grammar(model->model, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
}
}
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
Expand Down
56 changes: 26 additions & 30 deletions llama/addon/AddonModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
#include "AddonModelData.h"
#include "AddonModelLora.h"

static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
if (token < 0 || token == LLAMA_TOKEN_NULL) {
return Napi::Number::From(info.Env(), -1);
}

auto tokenAttributes = llama_token_get_attr(model, token);
auto tokenAttributes = llama_vocab_get_attr(vocab, token);

if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
return Napi::Number::From(info.Env(), -1);
Expand All @@ -22,12 +22,12 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
return Napi::Number::From(info.Env(), token);
}

static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
if (token < 0) {
return Napi::Number::From(info.Env(), -1);
}

auto tokenAttributes = llama_token_get_attr(model, token);
auto tokenAttributes = llama_vocab_get_attr(vocab, token);

if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
return Napi::Number::From(info.Env(), -1);
Expand Down Expand Up @@ -93,6 +93,7 @@ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
void Execute() {
try {
model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
model->vocab = llama_model_get_vocab(model->model);

model->modelLoaded = model->model != nullptr && model->model != NULL;
} catch (const std::exception& e) {
Expand Down Expand Up @@ -190,7 +191,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {

void Execute() {
try {
const auto loraAdapter = llama_lora_adapter_init(modelLora->model->model, modelLora->loraFilePath.c_str());
const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());

if (loraAdapter == nullptr) {
SetError(
Expand All @@ -213,7 +214,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_lora_adapter_init\"");
SetError("Unknown error when calling \"llama_adapter_lora_init\"");
}
}
void OnOK() {
Expand Down Expand Up @@ -426,7 +427,7 @@ Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
std::string text = info[0].As<Napi::String>().Utf8Value();
bool specialTokens = info[1].As<Napi::Boolean>().Value();

std::vector<llama_token> tokens = common_tokenize(model, text, false, specialTokens);
std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);

Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
for (size_t i = 0; i < tokens.size(); ++i) {
Expand All @@ -449,10 +450,10 @@ Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
std::string result;
result.resize(std::max(result.capacity(), tokens.ElementLength()));

int n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
if (n_chars < 0) {
result.resize(-n_chars);
n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
}

Expand All @@ -467,7 +468,7 @@ Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
return info.Env().Undefined();
}

return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
}

Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
Expand All @@ -476,7 +477,7 @@ Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
return info.Env().Undefined();
}

return Napi::Number::From(info.Env(), llama_n_embd(model));
return Napi::Number::From(info.Env(), llama_model_n_embd(model));
}

Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
Expand Down Expand Up @@ -515,68 +516,63 @@ Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
return info.Env().Undefined();
}

auto token = llama_token_bos(model);
if (token == LLAMA_TOKEN_NULL) {
token = llama_token_cls(model);
}

return getNapiControlToken(info, model, token);
return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
}
Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

return getNapiControlToken(info, model, llama_token_eos(model));
return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
}
Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

return getNapiToken(info, model, llama_token_nl(model));
return getNapiToken(info, vocab, llama_vocab_nl(vocab));
}
Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

return getNapiToken(info, model, llama_token_fim_pre(model));
return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
}
Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

return getNapiToken(info, model, llama_token_fim_mid(model));
return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
}
Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

return getNapiToken(info, model, llama_token_fim_suf(model));
return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
}
Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

return getNapiToken(info, model, llama_token_eot(model));
return getNapiToken(info, vocab, llama_vocab_eot(vocab));
}
Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

return getNapiToken(info, model, llama_token_sep(model));
return getNapiToken(info, vocab, llama_vocab_sep(vocab));
}
Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
if (disposed) {
Expand All @@ -587,7 +583,7 @@ Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
int token = info[0].As<Napi::Number>().Int32Value();
std::stringstream ss;

const char* str = llama_token_get_text(model, token);
const char* str = llama_vocab_get_text(vocab, token);
if (str == nullptr) {
return info.Env().Undefined();
}
Expand All @@ -608,7 +604,7 @@ Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
}

int token = info[0].As<Napi::Number>().Int32Value();
auto tokenAttributes = llama_token_get_attr(model, token);
auto tokenAttributes = llama_vocab_get_attr(vocab, token);

return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
}
Expand All @@ -624,25 +620,25 @@ Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {

int token = info[0].As<Napi::Number>().Int32Value();

return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
}
Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

auto vocabularyType = llama_vocab_type(model);
auto vocabularyType = llama_vocab_type(vocab);

return Napi::Number::From(info.Env(), int32_t(vocabularyType));
}
Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
const bool addBos = llama_add_bos_token(model);
const bool addBos = llama_vocab_get_add_bos(vocab);

return Napi::Boolean::New(info.Env(), addBos);
}
Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
const bool addEos = llama_add_eos_token(model);
const bool addEos = llama_vocab_get_add_eos(vocab);

return Napi::Boolean::New(info.Env(), addEos);
}
Expand Down
1 change: 1 addition & 0 deletions llama/addon/AddonModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
llama_model_params model_params;
std::vector<llama_model_kv_override> kv_overrides;
llama_model* model;
const llama_vocab* vocab;
uint64_t loadedModelSize = 0;
Napi::Reference<Napi::Object> addonExportsRef;
bool hasAddonExportsRef = false;
Expand Down
4 changes: 2 additions & 2 deletions llama/addon/AddonModelLora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class AddonModelLoraUnloadLoraWorker : public Napi::AsyncWorker {
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_lora_adapter_free\"");
SetError("Unknown error when calling \"llama_adapter_lora_free\"");
}
}
void OnOK() {
Expand All @@ -55,7 +55,7 @@ void AddonModelLora::dispose(bool skipErase) {
if (lora_adapter != nullptr) {
auto loraAdapterToDispose = lora_adapter;
lora_adapter = nullptr;
llama_lora_adapter_free(loraAdapterToDispose);
llama_adapter_lora_free(loraAdapterToDispose);

if (!skipErase && model->data != nullptr) {
model->data->removeLora(this);
Expand Down
Loading

0 comments on commit 86e1bee

Please sign in to comment.