balisujohn · balisujohn · Jul 27, 2024 · Jul 14, 2024 · Jul 27, 2024 · Jul 27, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,6 +14,11 @@ if (GGML_CUBLAS)
     add_definitions(-DGGML_USE_CUBLAS)
 endif()
 
+if (GGML_METAL)
+    add_definitions(-DGGML_USE_METAL)
+    set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "Embed Metal library")
+endif()
+
 add_subdirectory(ggml)
 
 target_link_libraries(tortoise PUBLIC ggml)

diff --git a/README.md b/README.md
@@ -10,13 +10,14 @@ git clone --recursive https://github.com/balisujohn/tortoise.cpp.git
 # Compiling
 For now, CUDA and CPU only. To compile:
 
-## Compile for CPU
+## Compile for CPU (works on Linux x86 and Mac ARM)
 ````
 mkdir build
 cd build
 cmake .. 
 make
 ````
+This is tested with mac os arm
 
 ## Compile for CUDA
 ````
@@ -27,6 +28,14 @@ make
 ````
 This is tested with Ubuntu 22.04 and cuda 12.0 and a 1070ti
 
+## Compile for Mac OS with metal (work in-progress)
+````
+mkdir build
+cd build
+cmake .. -DGGML_METAL=ON
+make
+````
+
 # Running
 
 **Only lowercase letters, spaces, and punctuation are supported in the prompt.**

diff --git a/main.cpp b/main.cpp
@@ -656,7 +656,7 @@ bool autoregressive_model_load(const std::string &fname,
 
 #ifdef GGML_USE_METAL
   fprintf(stderr, "%s: using Metal backend\n", __func__);
-  ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
+  // ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
   model.backend = ggml_backend_metal_init();
   if (!model.backend) {
     fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
@@ -822,7 +822,7 @@ bool autoregressive_model_load(const std::string &fname,
       }
 
       int32_t nelements = 1;
-      int32_t ne[2] = {1, 1};
+      int32_t ne[4] = {1, 1, 1, 1};
       for (int i = 0; i < n_dims; ++i) {
         fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
         nelements *= ne[i];
@@ -1218,7 +1218,7 @@ bool diffusion_model_load(const std::string &fname, diffusion_model &model) {
 
 #ifdef GGML_USE_METAL
   fprintf(stderr, "%s: using Metal backend\n", __func__);
-  ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
+  // ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
   model.backend = ggml_backend_metal_init();
   if (!model.backend) {
     fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
@@ -1559,7 +1559,7 @@ bool diffusion_model_load(const std::string &fname, diffusion_model &model) {
       }
 
       int32_t nelements = 1;
-      int32_t ne[2] = {1, 1};
+      int32_t ne[4] = {1, 1, 1, 1};
       for (int i = 0; i < n_dims; ++i) {
         fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
         nelements *= ne[i];
@@ -1782,7 +1782,7 @@ bool vocoder_model_load(const std::string &fname, vocoder_model &model) {
 
 #ifdef GGML_USE_METAL
   fprintf(stderr, "%s: using Metal backend\n", __func__);
-  ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
+  // ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
   model.backend = ggml_backend_metal_init();
   if (!model.backend) {
     fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
@@ -1946,7 +1946,7 @@ bool vocoder_model_load(const std::string &fname, vocoder_model &model) {
       }
 
       int32_t nelements = 1;
-      int32_t ne[2] = {1, 1};
+      int32_t ne[4] = {1, 1, 1, 1};
       for (int i = 0; i < n_dims; ++i) {
         fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
         nelements *= ne[i];