@@ -6059,29 +6059,29 @@ void ggml_compute_forward_im2col_back_f32(
6059
6059
}
6060
6060
}
6061
6061
6062
- static void ggml_call_mul_mat (
6063
- const ggml_compute_params * params,
6064
- int64_t m, int64_t n, int64_t k,
6065
- void * a, void * b, void * c) {
6066
-
6062
+ static void ggml_call_mul_mat (ggml_type T, const ggml_compute_params * params, int64_t m, int64_t n, int64_t k,
6063
+ void * a, void * b, void * c) {
6064
+ const ggml_type_traits * traits = ggml_get_type_traits (T);
6067
6065
struct ggml_tensor src1 = {};
6066
+ src1.type = T;
6068
6067
src1.ne [0 ] = k;
6069
6068
src1.ne [1 ] = m;
6070
6069
src1.ne [2 ] = 1 ;
6071
6070
src1.ne [3 ] = 1 ;
6072
- src1.nb [0 ] = sizeof ( float ) ;
6073
- src1.nb [1 ] = k * sizeof ( float ) ;
6071
+ src1.nb [0 ] = traits-> type_size ;
6072
+ src1.nb [1 ] = k * traits-> type_size ;
6074
6073
src1.nb [2 ] = src1.nb [1 ];
6075
6074
src1.nb [3 ] = src1.nb [2 ];
6076
6075
src1.data = a;
6077
6076
6078
6077
struct ggml_tensor src0 = {};
6078
+ src0.type = T;
6079
6079
src0.ne [0 ] = k;
6080
6080
src0.ne [1 ] = n;
6081
6081
src0.ne [2 ] = 1 ;
6082
6082
src0.ne [3 ] = 1 ;
6083
- src0.nb [0 ] = sizeof ( float ) ;
6084
- src0.nb [1 ] = k * sizeof ( float ) ;
6083
+ src0.nb [0 ] = traits-> type_size ;
6084
+ src0.nb [1 ] = k * traits-> type_size ;
6085
6085
src0.nb [2 ] = src0.nb [1 ];
6086
6086
src0.nb [3 ] = src0.nb [2 ];
6087
6087
src0.data = b;
@@ -6102,17 +6102,18 @@ static void ggml_call_mul_mat(
6102
6102
ggml_compute_forward_mul_mat (params, &dst);
6103
6103
}
6104
6104
6105
-
6106
6105
// ggml_compute_forward_conv_2d
6107
6106
6108
- static void ggml_compute_forward_conv_2d_f32 (
6109
- const ggml_compute_params * params,
6110
- const ggml_tensor * kernel , // [KW, KH, IC, OC] - fp32
6111
- const ggml_tensor * src , // [W, H, C , N]
6112
- ggml_tensor * dst) { // [OW, OH, OC, N]
6107
+ static void ggml_compute_forward_conv_2d_impl ( const ggml_compute_params * params,
6108
+ const ggml_tensor * kernel, // [KW, KH, IC, OC]
6109
+ const ggml_tensor * src , // [W, H, C, N]
6110
+ ggml_tensor * dst , // [OW, OH, OC , N]
6111
+ ggml_type kernel_type) {
6113
6112
6114
6113
GGML_ASSERT (ggml_is_contiguous (kernel));
6115
- GGML_ASSERT (kernel->type == GGML_TYPE_F32);
6114
+ GGML_ASSERT (kernel->type == kernel_type);
6115
+
6116
+ const ggml_type_traits * traits = ggml_get_type_traits (kernel_type);
6116
6117
6117
6118
const int32_t stride_x = dst->op_params [0 ];
6118
6119
const int32_t stride_y = dst->op_params [1 ];
@@ -6133,20 +6134,20 @@ static void ggml_compute_forward_conv_2d_f32(
6133
6134
const int64_t dst_h = dst->ne [1 ];
6134
6135
6135
6136
float * src_data = (float *) src->data ;
6136
- float * knl_data = ( float *) kernel->data ;
6137
+ void * knl_data = kernel->data ;
6137
6138
float * dst_data = (float *) dst->data ;
6138
6139
6139
6140
const int64_t knl_n = knl_w * knl_h * c_in;
6140
6141
const int64_t patch_total = dst->ne [3 ] * dst_w * dst_h;
6141
6142
6142
- const int64_t space_per_patch = knl_n * sizeof ( float ) + c_out * sizeof (float );
6143
+ const int64_t space_per_patch = knl_n * traits-> type_size + c_out * sizeof (float );
6143
6144
const int64_t batch_size = params->wsize / space_per_patch;
6144
6145
const int64_t patches_per_batch = batch_size > 8 ? (batch_size / 8 ) * 8 : batch_size;
6145
6146
const int64_t batch_n = (patch_total + patches_per_batch - 1 ) / patches_per_batch;
6146
6147
6147
6148
GGML_ASSERT (patches_per_batch > 0 && batch_size >= 1 );
6148
6149
6149
- float * tmp = ( float *) params->wdata ;
6150
+ void * tmp = params->wdata ;
6150
6151
6151
6152
for (int64_t batch_i = 0 ; batch_i < batch_n; ++batch_i) {
6152
6153
@@ -6166,7 +6167,7 @@ static void ggml_compute_forward_conv_2d_f32(
6166
6167
const int64_t src_y = p % dst_w;
6167
6168
6168
6169
float * src_base = (float *)((char *)src_data + batch_n * src->nb [3 ]);
6169
- float * dst_row = tmp + (p % patches_per_batch) * knl_n;
6170
+ char * dst_row = ( char *) tmp + (p % patches_per_batch) * knl_n * traits-> type_size ;
6170
6171
6171
6172
for (int64_t ic = 0 ; ic < c_in; ++ic) {
6172
6173
for (int64_t ky = 0 ; ky < knl_h; ++ky) {
@@ -6176,11 +6177,19 @@ static void ggml_compute_forward_conv_2d_f32(
6176
6177
6177
6178
int64_t dst_idx = ic * (knl_h * knl_w) + ky * knl_w + kx;
6178
6179
6180
+ float src_val;
6179
6181
if (sy < 0 || sy >= src_h || sx < 0 || sx >= src_w) {
6180
- dst_row[dst_idx] = 0 .0f ;
6182
+ src_val = 0 .0f ;
6181
6183
} else {
6182
6184
float * src_ptr = (float *)((char *)src_base + sx * src->nb [0 ] + sy * src->nb [1 ] + ic * src->nb [2 ]);
6183
- dst_row[dst_idx] = *src_ptr;
6185
+ src_val = *src_ptr;
6186
+ }
6187
+
6188
+ char * element_ptr = dst_row + dst_idx * traits->type_size ;
6189
+ if (kernel_type == GGML_TYPE_F32) {
6190
+ *(float *) element_ptr = src_val;
6191
+ } else if (kernel_type == GGML_TYPE_F16) {
6192
+ *(ggml_fp16_t *) element_ptr = GGML_FP32_TO_FP16 (src_val);
6184
6193
}
6185
6194
}
6186
6195
}
@@ -6189,11 +6198,10 @@ static void ggml_compute_forward_conv_2d_f32(
6189
6198
6190
6199
ggml_barrier (params->threadpool );
6191
6200
6192
- float * gemm_output = tmp + patches_per_batch * knl_n;
6201
+ float * gemm_output = ( float *) (( char *) tmp + patches_per_batch * knl_n * traits-> type_size ) ;
6193
6202
6194
6203
// GEMM: patches[patch_n, knl_n] × kernel[knl_n, c_out] = output[patch_n, c_out]
6195
- ggml_call_mul_mat (params, patch_n, c_out, knl_n,
6196
- tmp, knl_data, gemm_output);
6204
+ ggml_call_mul_mat (kernel_type, params, patch_n, c_out, knl_n, tmp, knl_data, gemm_output);
6197
6205
6198
6206
ggml_barrier (params->threadpool );
6199
6207
@@ -6211,7 +6219,6 @@ static void ggml_compute_forward_conv_2d_f32(
6211
6219
6212
6220
for (int64_t oc = 0 ; oc < c_out; ++oc) {
6213
6221
const float value = gemm_output[i * c_out + oc];
6214
- // Write to WHCN layout: dst[w, h, c, n]
6215
6222
float * dst_ptr = (float *)((char *)dst_data + dst_x * dst->nb [0 ] + dst_y * dst->nb [1 ] + oc * dst->nb [2 ] + batch_n * dst->nb [3 ]);
6216
6223
*dst_ptr = value;
6217
6224
}
@@ -6226,11 +6233,7 @@ void ggml_compute_forward_conv_2d(
6226
6233
const ggml_tensor * src0 = dst->src [0 ];
6227
6234
const ggml_tensor * src1 = dst->src [1 ];
6228
6235
6229
- if (src0->type == GGML_TYPE_F16) {
6230
- GGML_ASSERT (false && " F16 not supported yet" );
6231
- } else {
6232
- ggml_compute_forward_conv_2d_f32 (params, src0, src1, dst);
6233
- }
6236
+ ggml_compute_forward_conv_2d_impl (params, src0, src1, dst, src0->type );
6234
6237
}
6235
6238
6236
6239
// ggml_compute_forward_conv_transpose_2d
0 commit comments