Skip to content

Commit a132897

Browse files
authored
Merge branch 'main' into patch-6
2 parents 9837a81 + 4192210 commit a132897

File tree

13 files changed

+309
-141
lines changed

13 files changed

+309
-141
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
shell: cmd
4343

4444
- name: Build wheels
45-
uses: pypa/cibuildwheel@v2.21.1
45+
uses: pypa/cibuildwheel@v2.22.0
4646
env:
4747
# disable repair
4848
CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -69,7 +69,7 @@ jobs:
6969
platforms: linux/arm64
7070

7171
- name: Build wheels
72-
uses: pypa/cibuildwheel@v2.21.1
72+
uses: pypa/cibuildwheel@v2.22.0
7373
env:
7474
CIBW_SKIP: "*musllinux* pp*"
7575
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/build-wheels-cuda.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
cache: 'pip'
6060

6161
- name: Setup Mamba
62-
uses: conda-incubator/setup-miniconda@v3.0.4
62+
uses: conda-incubator/setup-miniconda@v3.1.0
6363
with:
6464
activate-environment: "build"
6565
python-version: ${{ matrix.pyver }}

.github/workflows/build-wheels-metal.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
shell: cmd
4444

4545
- name: Build wheels
46-
uses: pypa/cibuildwheel@v2.21.1
46+
uses: pypa/cibuildwheel@v2.22.0
4747
env:
4848
# disable repair
4949
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/generate-index-from-release.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ jobs:
4444
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
4545
./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$'
4646
./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$'
47-
./scripts/releases-to-pep-503.sh index/whl/cu125 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu125$'
4847
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
4948
- name: Upload artifact
5049
uses: actions/upload-pages-artifact@v3

.github/workflows/test.yaml

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,11 @@ jobs:
5151
path: ~/.cache/huggingface/hub
5252
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
5353
- name: Install dependencies (Linux/MacOS)
54-
if: runner.os != 'Windows'
5554
run: |
5655
python -m pip install --upgrade pip
5756
python -m pip install uv
5857
python -m uv pip install -e .[all] --verbose
5958
shell: bash
60-
- name: Install dependencies (Windows)
61-
if: runner.os == 'Windows'
62-
run: |
63-
python -m pip install --upgrade pip
64-
python -m pip install uv
65-
python -m uv pip install -e .[all] --verbose
66-
shell: cmd
6759
- name: Test with pytest
6860
run: |
6961
python -m pytest
@@ -90,30 +82,21 @@ jobs:
9082
with:
9183
path: ~/.cache/huggingface/hub
9284
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
93-
94-
- name: Install dependencies (Linux/MacOS)
95-
if: runner.os != 'Windows'
96-
run: |
97-
python -m pip install --upgrade pip
98-
python -m pip install uv
99-
python -m uv pip install -e .[all] --verbose
100-
shell: bash
10185

10286
- name: Install dependencies (Windows)
103-
if: runner.os == 'Windows'
10487
run: |
10588
python -m pip install --upgrade pip
10689
python -m pip install uv
10790
python -m uv pip install -e .[all] --verbose
108-
shell: cmd
91+
shell: cmd
10992

11093
- name: Test with pytest
11194
run: |
11295
python -m pytest
11396
11497
build-macos:
11598
needs: download-model
116-
runs-on: macos-latest
99+
runs-on: macos-13
117100
strategy:
118101
matrix:
119102
python-version: ["3.9", "3.10", "3.11", "3.12"]
@@ -128,35 +111,33 @@ jobs:
128111
python-version: ${{ matrix.python-version }}
129112
cache: 'pip'
130113

114+
- name: System Info
115+
run: |
116+
uname -a
117+
sysctl -n machdep.cpu.brand_string
118+
python3 -c "import platform; print(platform.machine(), platform.architecture())"
119+
131120
- name: Restore model cache
132121
uses: actions/cache@v4
133122
with:
134123
path: ~/.cache/huggingface/hub
135124
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
136125

137126
- name: Install dependencies (Linux/MacOS)
138-
if: runner.os != 'Windows'
139127
run: |
140-
python -m pip install --upgrade pip
141-
python -m pip install uv
142-
python -m uv pip install -e .[all] --verbose
128+
python3 -m pip install --upgrade pip
129+
python3 -m pip install uv
130+
python3 -m uv pip install -e .[all] --verbose
131+
CMAKE_ARGS="-DLLAMA_METAL=off" python3 -m uv pip install .[all] --verbose
143132
shell: bash
144133

145-
- name: Install dependencies (Windows)
146-
if: runner.os == 'Windows'
147-
run: |
148-
python -m pip install --upgrade pip
149-
python -m pip install uv
150-
python -m uv pip install -e .[all] --verbose
151-
shell: cmd
152-
153134
- name: Test with pytest
154135
run: |
155-
python -m pytest
136+
python3 -m pytest
156137
157138
build-macos-metal:
158139
needs: download-model
159-
runs-on: macos-latest
140+
runs-on: macos-13
160141
steps:
161142
- uses: actions/checkout@v4
162143
with:
@@ -167,25 +148,24 @@ jobs:
167148
with:
168149
python-version: "3.9"
169150

151+
- name: System Info
152+
run: |
153+
uname -a
154+
sysctl -n machdep.cpu.brand_string
155+
python3 -c "import platform; print(platform.machine(), platform.architecture())"
156+
170157
- name: Restore model cache
171158
uses: actions/cache@v4
172159
with:
173160
path: ~/.cache/huggingface/hub
174161
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
175162

176-
- name: Install dependencies (Linux/MacOS)
177-
if: runner.os != 'Windows'
163+
- name: Install dependencies
178164
run: |
179-
python -m pip install --upgrade pip
180-
python -m pip install uv
181-
CMAKE_ARGS="-DLLAMA_METAL=on" python -m uv pip install .[all] --verbose
165+
python3 -m pip install --upgrade pip
166+
CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install .[all] --verbose
182167
shell: bash
183168

184-
- name: Install dependencies (Windows)
185-
if: runner.os == 'Windows'
186-
run: |
187-
python -m pip install --upgrade pip
188-
CMAKE_ARGS="-DGGML_METAL=on" python -m pip install .[all] --verbose
189169
- name: Test with pytest
190170
run: |
191-
python -m pytest
171+
python3 -m pytest

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.3.2]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@74d73dc85cc2057446bf63cc37ff649ae7cebd80
13+
1014
## [0.3.1]
1115

1216
- feat: Update llama.cpp to ggerganov/llama.cpp@c919d5db39c8a7fcb64737f008e4b105ee0acd20

CMakeLists.txt

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python
66
option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
77

88
function(llama_cpp_python_install_target target)
9+
if(NOT TARGET ${target})
10+
return()
11+
endif()
12+
913
install(
1014
TARGETS ${target}
1115
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
@@ -55,24 +59,59 @@ if (LLAMA_BUILD)
5559
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
5660
set(CMAKE_SKIP_RPATH FALSE)
5761

58-
# Building llama
59-
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
60-
# Need to disable these llama.cpp flags on Apple x86_64,
61-
# otherwise users may encounter invalid instruction errors
62-
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
63-
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
64-
set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
65-
set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
66-
endif()
62+
# Enable building of the common library
63+
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE)
6764

65+
# Architecture detection and settings for Apple platforms
6866
if (APPLE)
69-
set(GGML_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
67+
# Get the target architecture
68+
execute_process(
69+
COMMAND uname -m
70+
OUTPUT_VARIABLE HOST_ARCH
71+
OUTPUT_STRIP_TRAILING_WHITESPACE
72+
)
73+
74+
# If CMAKE_OSX_ARCHITECTURES is not set, use the host architecture
75+
if(NOT CMAKE_OSX_ARCHITECTURES)
76+
set(CMAKE_OSX_ARCHITECTURES ${HOST_ARCH} CACHE STRING "Build architecture for macOS" FORCE)
77+
endif()
78+
79+
message(STATUS "Host architecture: ${HOST_ARCH}")
80+
message(STATUS "Target architecture: ${CMAKE_OSX_ARCHITECTURES}")
81+
82+
# Configure based on target architecture
83+
if(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
84+
# Intel Mac settings
85+
set(GGML_AVX "OFF" CACHE BOOL "ggml: enable AVX" FORCE)
86+
set(GGML_AVX2 "OFF" CACHE BOOL "ggml: enable AVX2" FORCE)
87+
set(GGML_FMA "OFF" CACHE BOOL "ggml: enable FMA" FORCE)
88+
set(GGML_F16C "OFF" CACHE BOOL "ggml: enable F16C" FORCE)
89+
endif()
90+
91+
# Metal settings (enable for both architectures)
92+
set(GGML_METAL "ON" CACHE BOOL "ggml: enable Metal" FORCE)
93+
set(GGML_METAL_EMBED_LIBRARY "ON" CACHE BOOL "ggml: embed metal library" FORCE)
7094
endif()
7195

7296
add_subdirectory(vendor/llama.cpp)
7397
llama_cpp_python_install_target(llama)
7498
llama_cpp_python_install_target(ggml)
75-
99+
100+
llama_cpp_python_install_target(ggml-base)
101+
102+
llama_cpp_python_install_target(ggml-amx)
103+
llama_cpp_python_install_target(ggml-blas)
104+
llama_cpp_python_install_target(ggml-can)
105+
llama_cpp_python_install_target(ggml-cpu)
106+
llama_cpp_python_install_target(ggml-cuda)
107+
llama_cpp_python_install_target(ggml-hip)
108+
llama_cpp_python_install_target(ggml-kompute)
109+
llama_cpp_python_install_target(ggml-metal)
110+
llama_cpp_python_install_target(ggml-musa)
111+
llama_cpp_python_install_target(ggml-rpc)
112+
llama_cpp_python_install_target(ggml-sycl)
113+
llama_cpp_python_install_target(ggml-vulkan)
114+
76115
# Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563
77116
if (WIN32)
78117
install(
@@ -106,7 +145,7 @@ if (LLAMA_BUILD)
106145
# Building llava
107146
add_subdirectory(vendor/llama.cpp/examples/llava)
108147
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
109-
# Set CUDA_ARCHITECTURES to OFF on windows
148+
110149
if (WIN32)
111150
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
112151
endif()
@@ -121,5 +160,18 @@ if (LLAMA_BUILD)
121160
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
122161
)
123162
endif()
163+
164+
# Fix for llava build: Add include directory for llama.h
165+
# Move these commands after the add_subdirectory call
166+
target_include_directories(llava PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
167+
target_include_directories(llava PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
168+
169+
if (BUILD_SHARED_LIBS)
170+
target_include_directories(llava_shared PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
171+
target_include_directories(llava_shared PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
172+
endif()
173+
174+
target_include_directories(llama-llava-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
175+
target_include_directories(llama-minicpmv-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
124176
endif()
125177
endif()

llama_cpp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.3.1"
4+
__version__ = "0.3.2"

llama_cpp/_internals.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -362,13 +362,6 @@ def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: i
362362
self.ctx, llama_cpp.byref(candidates.candidates), p, min_keep
363363
)
364364

365-
def sample_tail_free(
366-
self, candidates: "_LlamaTokenDataArray", z: float, min_keep: int
367-
):
368-
llama_cpp.llama_sample_tail_free(
369-
self.ctx, llama_cpp.byref(candidates.candidates), z, min_keep
370-
)
371-
372365
def sample_typical(
373366
self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int
374367
):
@@ -685,9 +678,6 @@ def sample(
685678
ctx_main.sample_top_k(
686679
token_data_array, self.params.top_k, min_keep=min_keep
687680
)
688-
ctx_main.sample_tail_free(
689-
token_data_array, self.params.tfs_z, min_keep=min_keep
690-
)
691681
ctx_main.sample_typical(
692682
token_data_array, self.params.typical_p, min_keep=min_keep
693683
)
@@ -776,10 +766,6 @@ def add_min_p(self, p: float, min_keep: int):
776766
sampler = llama_cpp.llama_sampler_init_min_p(p, min_keep)
777767
self._add_sampler(sampler)
778768

779-
def add_tail_free(self, z: float, min_keep: int):
780-
sampler = llama_cpp.llama_sampler_init_tail_free(z, min_keep)
781-
self._add_sampler(sampler)
782-
783769
def add_typical(self, p: float, min_keep: int):
784770
sampler = llama_cpp.llama_sampler_init_typical(p, min_keep)
785771
self._add_sampler(sampler)

llama_cpp/llama.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,6 @@ def apply_func(token_data_array: llama_cpp.llama_token_data_array_p):
745745
n_probs = 0
746746
min_keep = max(1, n_probs)
747747
sampler.add_top_k(top_k)
748-
sampler.add_tail_free(tfs_z, min_keep)
749748
sampler.add_typical(typical_p, min_keep)
750749
sampler.add_top_p(top_p, min_keep)
751750
sampler.add_min_p(min_p, min_keep)
@@ -1142,7 +1141,7 @@ def _create_completion(
11421141
stopping_criteria: Optional[StoppingCriteriaList] = None,
11431142
logits_processor: Optional[LogitsProcessorList] = None,
11441143
grammar: Optional[LlamaGrammar] = None,
1145-
logit_bias: Optional[Dict[str, float]] = None,
1144+
logit_bias: Optional[Dict[int, float]] = None,
11461145
) -> Union[
11471146
Iterator[CreateCompletionResponse], Iterator[CreateCompletionStreamResponse]
11481147
]:
@@ -1762,7 +1761,7 @@ def create_completion(
17621761
stopping_criteria: Optional[StoppingCriteriaList] = None,
17631762
logits_processor: Optional[LogitsProcessorList] = None,
17641763
grammar: Optional[LlamaGrammar] = None,
1765-
logit_bias: Optional[Dict[str, float]] = None,
1764+
logit_bias: Optional[Dict[int, float]] = None,
17661765
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
17671766
"""Generate text from a prompt.
17681767
@@ -1859,7 +1858,7 @@ def __call__(
18591858
stopping_criteria: Optional[StoppingCriteriaList] = None,
18601859
logits_processor: Optional[LogitsProcessorList] = None,
18611860
grammar: Optional[LlamaGrammar] = None,
1862-
logit_bias: Optional[Dict[str, float]] = None,
1861+
logit_bias: Optional[Dict[int, float]] = None,
18631862
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
18641863
"""Generate text from a prompt.
18651864
@@ -1952,7 +1951,7 @@ def create_chat_completion(
19521951
model: Optional[str] = None,
19531952
logits_processor: Optional[LogitsProcessorList] = None,
19541953
grammar: Optional[LlamaGrammar] = None,
1955-
logit_bias: Optional[Dict[str, float]] = None,
1954+
logit_bias: Optional[Dict[int, float]] = None,
19561955
logprobs: Optional[bool] = None,
19571956
top_logprobs: Optional[int] = None,
19581957
) -> Union[
@@ -2074,7 +2073,7 @@ def __getstate__(self):
20742073
use_mlock=self.model_params.use_mlock,
20752074
kv_overrides=self.kv_overrides,
20762075
# Context Params
2077-
seed=self.context_params.seed,
2076+
seed=self._seed,
20782077
n_ctx=self.context_params.n_ctx,
20792078
n_batch=self.n_batch,
20802079
n_ubatch=self.context_params.n_ubatch,

0 commit comments

Comments
 (0)