@@ -615,6 +615,9 @@ class llama_model_kv_override(ctypes.Structure):
615615
616616
617617# struct llama_model_params {
618+ # // NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
619+ # ggml_backend_dev_t * devices;
620+
618621# int32_t n_gpu_layers; // number of layers to store in VRAM
619622# enum llama_split_mode split_mode; // how to split the model across multiple GPUs
620623
@@ -680,6 +683,7 @@ class llama_model_params(ctypes.Structure):
680683 check_tensors : bool
681684
682685 _fields_ = [
686+ ("devices" , ctypes .c_void_p ), # NOTE: unnused
683687 ("n_gpu_layers" , ctypes .c_int32 ),
684688 ("split_mode" , ctypes .c_int ),
685689 ("main_gpu" , ctypes .c_int32 ),
@@ -1898,6 +1902,14 @@ def llama_kv_cache_update(ctx: llama_context_p, /):
18981902 ...
18991903
19001904
1905+ # // Check if the context supports KV cache shifting
1906+ # LLAMA_API bool llama_kv_cache_can_shift(struct llama_context * ctx);
1907+ @ctypes_function ("llama_kv_cache_can_shift" , [llama_context_p_ctypes ], ctypes .c_bool )
1908+ def llama_kv_cache_can_shift (ctx : llama_context_p , / ) -> bool :
1909+ """Check if the context supports KV cache shifting"""
1910+ ...
1911+
1912+
19011913# //
19021914# // State / sessions
19031915# //
@@ -3621,13 +3633,3 @@ def llama_perf_sampler_reset(chain: llama_sampler_p, /):
36213633 ...
36223634
36233635
3624- # LLAMA_API void llama_perf_dump_yaml(FILE * stream, const struct llama_context * ctx);
3625- @ctypes_function (
3626- "llama_perf_dump_yaml" ,
3627- [ctypes .POINTER (ctypes .c_void_p ), llama_context_p_ctypes ],
3628- None ,
3629- )
3630- def llama_perf_dump_yaml (
3631- stream : ctypes .POINTER (ctypes .c_void_p ), ctx : llama_context_p , /
3632- ):
3633- ...
0 commit comments