From d3690068de63f7cc324e43269e29f2311a7f47d6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 24 Mar 2024 08:58:38 +0200 Subject: [PATCH 1/2] imatrix : fix wname for mul_mat_id ops --- examples/imatrix/imatrix.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index ea79b9062ddad..931c24a444d01 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -112,6 +112,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // this is necessary to guarantee equal number of "ncall" for each tensor for (int ex = 0; ex < n_as; ++ex) { src0 = t->src[2 + ex]; + wname = src0->name; auto& e = m_stats[wname]; if (e.values.empty()) { e.values.resize(src1->ne[0], 0); From 901f9f58d7ac579fdea05073efa98b36fd024fe2 Mon Sep 17 00:00:00 2001 From: slaren Date: Sun, 24 Mar 2024 13:35:28 +0100 Subject: [PATCH 2/2] also filter tensor names in mul_mat_id ops --- examples/imatrix/imatrix.cpp | 40 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 931c24a444d01..264e73f4e66f9 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -50,29 +50,31 @@ class IMatrixCollector { void keep_imatrix(int ncall) const; }; +// remove any prefix and suffixes from the name +// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight +static std::string filter_tensor_name(const char * name) { + std::string wname; + const char * p = strchr(name, '#'); + if (p != NULL) { + p = p + 1; + const char * q = strchr(p, '#'); + if (q != NULL) { + wname = std::string(p, q - p); + } else { + wname = p; + } + } else { + wname = name; + } + return wname; +} + bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { GGML_UNUSED(user_data); const struct ggml_tensor * src0 = t->src[0]; const struct ggml_tensor * src1 = t->src[1]; - - std::string wname; - { - // remove any prefix and suffixes from the name - // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight - const char * p = strchr(src0->name, '#'); - if (p != NULL) { - p = p + 1; - const char * q = strchr(p, '#'); - if (q != NULL) { - wname = std::string(p, q - p); - } else { - wname = p; - } - } else { - wname = src0->name; - } - } + std::string wname = filter_tensor_name(src0->name); // when ask is true, the scheduler wants to know if we are interested in data from this tensor // if we return true, a follow-up call will be made with ask=false in which we can do the actual collection @@ -112,7 +114,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // this is necessary to guarantee equal number of "ncall" for each tensor for (int ex = 0; ex < n_as; ++ex) { src0 = t->src[2 + ex]; - wname = src0->name; + wname = filter_tensor_name(src0->name); auto& e = m_stats[wname]; if (e.values.empty()) { e.values.resize(src1->ne[0], 0);