Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -705,8 +705,13 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
struct ggml_tensor * leaf = graph->leafs[i];
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
galloc->leaf_allocs[i].leaf.offset = hn->offset;
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
if (leaf->view_src || leaf->data) {
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
galloc->leaf_allocs[i].leaf.size_max = 0;
} else {
galloc->leaf_allocs[i].leaf.offset = hn->offset;
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
}
}

// reallocate buffers if needed
Expand Down
7 changes: 4 additions & 3 deletions ggml-cuda/common.cuh
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#pragma once

#include "../ggml.h"
#include "../ggml-cuda.h"
#include "ggml.h"
#include "ggml-cuda.h"

#include <memory>

#if defined(GGML_USE_HIPBLAS)
Expand All @@ -11,7 +12,7 @@
#define GGML_COMMON_DECL_CUDA
#define GGML_COMMON_IMPL_CUDA
#endif
#include "../ggml-common.h"
#include "ggml-common.h"

#include <cstdio>
#include <array>
Expand Down
8 changes: 0 additions & 8 deletions ggml-cuda/dmmv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@
#include "dequantize.cuh"
#include "convert.cuh"

// dmmv = dequantize_mul_mat_vec
#ifndef GGML_CUDA_DMMV_X
#define GGML_CUDA_DMMV_X 32
#endif
#ifndef GGML_CUDA_MMV_Y
#define GGML_CUDA_MMV_Y 1
#endif

#ifndef K_QUANTS_PER_ITERATION
#define K_QUANTS_PER_ITERATION 2
#else
Expand Down
11 changes: 11 additions & 0 deletions ggml-cuda/dmmv.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
#include "common.cuh"

// dmmv = dequantize_mul_mat_vec

// TODO: remove this?
#ifndef GGML_CUDA_DMMV_X
#define GGML_CUDA_DMMV_X 32
#endif

#ifndef GGML_CUDA_MMV_Y
#define GGML_CUDA_MMV_Y 1
#endif

void ggml_cuda_op_dequantize_mul_mat_vec(
ggml_backend_cuda_context & ctx,
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
Expand Down
2 changes: 2 additions & 0 deletions scripts/sync-ggml-am.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
# src/ggml-backend-impl.h -> ggml-backend-impl.h
# src/ggml-backend.c -> ggml-backend.c
# src/ggml-common.h -> ggml-common.h
# src/ggml-cuda/* -> ggml-cuda/
# src/ggml-cuda.cu -> ggml-cuda.cu
# src/ggml-cuda.h -> ggml-cuda.h
# src/ggml-impl.h -> ggml-impl.h
Expand Down Expand Up @@ -128,6 +129,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
-e 's/src\/ggml-common\.h/ggml-common.h/g' \
-e 's/src\/ggml-cuda\//ggml-cuda\//g' \
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
Expand Down
1 change: 1 addition & 0 deletions scripts/sync-ggml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
cp -rpv ../ggml/src/ggml-cuda/* ./ggml-cuda/
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
Expand Down