From f3fb90b114835cc50c4816787d56bac2fe1180c3 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Thu, 28 Nov 2024 18:27:55 -0500 Subject: [PATCH] feat: Update llama.cpp --- llama_cpp/llama_cpp.py | 22 ++++++++++++---------- vendor/llama.cpp | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 457c6dddb..bb2ba1993 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -615,6 +615,9 @@ class llama_model_kv_override(ctypes.Structure): # struct llama_model_params { +# // NULL-terminated list of devices to use for offloading (if NULL, all available devices are used) +# ggml_backend_dev_t * devices; + # int32_t n_gpu_layers; // number of layers to store in VRAM # enum llama_split_mode split_mode; // how to split the model across multiple GPUs @@ -680,6 +683,7 @@ class llama_model_params(ctypes.Structure): check_tensors: bool _fields_ = [ + ("devices", ctypes.c_void_p), # NOTE: unnused ("n_gpu_layers", ctypes.c_int32), ("split_mode", ctypes.c_int), ("main_gpu", ctypes.c_int32), @@ -1898,6 +1902,14 @@ def llama_kv_cache_update(ctx: llama_context_p, /): ... +# // Check if the context supports KV cache shifting +# LLAMA_API bool llama_kv_cache_can_shift(struct llama_context * ctx); +@ctypes_function("llama_kv_cache_can_shift", [llama_context_p_ctypes], ctypes.c_bool) +def llama_kv_cache_can_shift(ctx: llama_context_p, /) -> bool: + """Check if the context supports KV cache shifting""" + ... + + # // # // State / sessions # // @@ -3621,13 +3633,3 @@ def llama_perf_sampler_reset(chain: llama_sampler_p, /): ... -# LLAMA_API void llama_perf_dump_yaml(FILE * stream, const struct llama_context * ctx); -@ctypes_function( - "llama_perf_dump_yaml", - [ctypes.POINTER(ctypes.c_void_p), llama_context_p_ctypes], - None, -) -def llama_perf_dump_yaml( - stream: ctypes.POINTER(ctypes.c_void_p), ctx: llama_context_p, / -): - ... diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 74d73dc85..dc2234408 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 74d73dc85cc2057446bf63cc37ff649ae7cebd80 +Subproject commit dc22344088a7ee81a1e4f096459b03a72f24ccdc