From d3690068de63f7cc324e43269e29f2311a7f47d6 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sun, 24 Mar 2024 08:58:38 +0200
Subject: [PATCH 1/2] imatrix : fix wname for mul_mat_id ops

---
 examples/imatrix/imatrix.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index ea79b9062ddad..931c24a444d01 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -112,6 +112,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
         // this is necessary to guarantee equal number of "ncall" for each tensor
         for (int ex = 0; ex < n_as; ++ex) {
             src0 = t->src[2 + ex];
+            wname = src0->name;
             auto& e = m_stats[wname];
             if (e.values.empty()) {
                 e.values.resize(src1->ne[0], 0);

From 901f9f58d7ac579fdea05073efa98b36fd024fe2 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Sun, 24 Mar 2024 13:35:28 +0100
Subject: [PATCH 2/2] also filter tensor names in mul_mat_id ops

---
 examples/imatrix/imatrix.cpp | 40 +++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 931c24a444d01..264e73f4e66f9 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -50,29 +50,31 @@ class IMatrixCollector {
     void keep_imatrix(int ncall) const;
 };
 
+// remove any prefix and suffixes from the name
+// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
+static std::string filter_tensor_name(const char * name) {
+    std::string wname;
+    const char * p = strchr(name, '#');
+    if (p != NULL) {
+        p = p + 1;
+        const char * q = strchr(p, '#');
+        if (q != NULL) {
+            wname = std::string(p, q - p);
+        } else {
+            wname = p;
+        }
+    } else {
+        wname = name;
+    }
+    return wname;
+}
+
 bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
     GGML_UNUSED(user_data);
 
     const struct ggml_tensor * src0 = t->src[0];
     const struct ggml_tensor * src1 = t->src[1];
-
-    std::string wname;
-    {
-        // remove any prefix and suffixes from the name
-        // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
-        const char * p = strchr(src0->name, '#');
-        if (p != NULL) {
-            p = p + 1;
-            const char * q = strchr(p, '#');
-            if (q != NULL) {
-                wname = std::string(p, q - p);
-            } else {
-                wname = p;
-            }
-        } else {
-            wname = src0->name;
-        }
-    }
+    std::string wname = filter_tensor_name(src0->name);
 
     // when ask is true, the scheduler wants to know if we are interested in data from this tensor
     // if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
@@ -112,7 +114,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
         // this is necessary to guarantee equal number of "ncall" for each tensor
         for (int ex = 0; ex < n_as; ++ex) {
             src0 = t->src[2 + ex];
-            wname = src0->name;
+            wname = filter_tensor_name(src0->name);
             auto& e = m_stats[wname];
             if (e.values.empty()) {
                 e.values.resize(src1->ne[0], 0);