Skip to content

Commit 9d3e4c8

Browse files
authored
initial vulkan implement, porting from ncnn (#424)
1 parent bf6d961 commit 9d3e4c8

File tree

213 files changed

+47100
-17
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

213 files changed

+47100
-17
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ option(TENGINE_ARCH_ARM_82 "build armv8.2 for arm" OFF)
7171

7272
# some plugin options
7373
option(TENGINE_ENABLE_ACL "Build with Arm Compute Library(ACL) support" OFF)
74+
option(TENGINE_ENABLE_VULKAN "Build with Vulkan GPU compute support" OFF)
7475

7576
# add_definitions(-DCONFIG_DISABLE_PARAM_ACCESS)
7677
# add_definitions(-DCONFIG_INTERN_ALLOCATOR)

cmake/generate_shader_spv_header.cmake

Lines changed: 585 additions & 0 deletions
Large diffs are not rendered by default.

examples/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,17 @@ macro (tengine_example name file)
1212
install (TARGETS ${name} DESTINATION bin)
1313
endmacro()
1414

15-
# add examples
15+
# add c++ api examples
16+
if (TENGINE_BUILD_CPP_API)
17+
tengine_example(cpp_tm_classification cpp_tm_classification.cpp)
18+
tengine_example(cpp_tm_mobilenet_ssd cpp_tm_mobilenet_ssd.cpp)
19+
endif()
20+
21+
# add c api examples
1622
tengine_example(tm_classification tm_classification.c)
1723
tengine_example(tm_classification_fp16 tm_classification_fp16.c)
1824
tengine_example(tm_classification_uint8 tm_classification_uint8.c)
25+
tengine_example(tm_classification_vulkan tm_classification_vulkan.c)
1926
tengine_example(tm_mobilenet_ssd tm_mobilenet_ssd.c)
2027
tengine_example(tm_mobilenet_ssd_uint8 tm_mobilenet_ssd_uint8.cpp)
2128
tengine_example(tm_retinaface tm_retinaface.cpp)
@@ -40,6 +47,7 @@ if (${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
4047
"${CMAKE_CURRENT_SOURCE_DIR}/${file}"
4148
"${CMAKE_CURRENT_SOURCE_DIR}/common/tengine_operations.c")
4249
target_link_libraries(${name} ${CMAKE_PROJECT_NAME} ${OpenCV_LIBS})
50+
install (TARGETS ${name} DESTINATION bin)
4351
endmacro()
4452
tengine_example_cv(tm_openpose tm_openpose.cpp)
4553
tengine_example_cv(tm_yolact tm_yolact.cpp)

examples/tm_classification.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
4747
/* set runtime options */
4848
struct options opt;
4949
opt.num_thread = num_thread;
50-
opt.cluster = TENGINE_CLUSTER_LITTLE;
50+
opt.cluster = TENGINE_CLUSTER_ALL;
5151
opt.precision = TENGINE_MODE_FP32;
5252

5353
/* inital tengine */
@@ -67,7 +67,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
6767
return -1;
6868
}
6969

70-
/* set the input shape to initial the graph, and prerun graph to infer shape */
70+
/* set the shape, data buffer of input_tensor of the graph */
7171
int img_size = img_h * img_w * 3;
7272
int dims[] = {1, 3, img_h, img_w}; // nchw
7373
float* input_data = ( float* )malloc(img_size * sizeof(float));
@@ -85,6 +85,13 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
8585
return -1;
8686
}
8787

88+
if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
89+
{
90+
fprintf(stderr, "Set input tensor buffer failed\n");
91+
return -1;
92+
}
93+
94+
/* prerun graph, set work options(num_thread, cluster, precision) */
8895
if (prerun_graph_multithread(graph, opt) < 0)
8996
{
9097
fprintf(stderr, "Prerun multithread graph failed.\n");
@@ -93,11 +100,6 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
93100

94101
/* prepare process input data, set the data mem to input tensor */
95102
get_input_data(image_file, input_data, img_h, img_w, mean, scale);
96-
if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
97-
{
98-
fprintf(stderr, "Set input tensor buffer failed\n");
99-
return -1;
100-
}
101103

102104
/* run graph */
103105
double min_time = __DBL_MAX__;
@@ -137,8 +139,6 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
137139

138140
/* release tengine */
139141
free(input_data);
140-
release_graph_tensor(input_tensor);
141-
release_graph_tensor(output_tensor);
142142
postrun_graph(graph);
143143
destroy_graph(graph);
144144
release_tengine();

examples/tm_classification_vulkan.c

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* License); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*
21+
* Copyright (c) 2020, OPEN AI LAB
22+
23+
*/
24+
25+
#include <unistd.h>
26+
#include <stdlib.h>
27+
#include <stdio.h>
28+
29+
#include "common.h"
30+
#include "tengine_c_api.h"
31+
#include "tengine_operations.h"
32+
33+
#define DEFAULT_IMG_H 227
34+
#define DEFAULT_IMG_W 227
35+
#define DEFAULT_SCALE1 1.f
36+
#define DEFAULT_SCALE2 1.f
37+
#define DEFAULT_SCALE3 1.f
38+
#define DEFAULT_MEAN1 104.007
39+
#define DEFAULT_MEAN2 116.669
40+
#define DEFAULT_MEAN3 122.679
41+
#define DEFAULT_LOOP_COUNT 1
42+
#define DEFAULT_THREAD_COUNT 1
43+
44+
int tengine_classify(const char* model_file, const char* image_file, int img_h, int img_w, const float* mean,
45+
const float* scale, int loop_count, int num_thread)
46+
{
47+
/* set runtime options */
48+
struct options opt;
49+
opt.num_thread = num_thread;
50+
opt.cluster = TENGINE_CLUSTER_ALL;
51+
opt.precision = TENGINE_MODE_FP32;
52+
53+
/* inital tengine */
54+
if (init_tengine() != 0)
55+
{
56+
fprintf(stderr, "Initial tengine failed.\n");
57+
return -1;
58+
}
59+
fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version());
60+
61+
/* create graph, load tengine model xxx.tmfile */
62+
context_t vk_context = create_context("VK", 1);
63+
add_context_device(vk_context, "VK");
64+
graph_t graph = create_graph(vk_context, "tengine", model_file);
65+
set_graph_device(graph, "VK");
66+
67+
if (NULL == graph)
68+
{
69+
fprintf(stderr, "Create graph failed.\n");
70+
fprintf(stderr, "errno: %d \n", get_tengine_errno());
71+
return -1;
72+
}
73+
74+
/* set the input shape to initial the graph, and prerun graph to infer shape */
75+
int img_size = img_h * img_w * 3;
76+
int dims[] = {1, 3, img_h, img_w}; // nchw
77+
float* input_data = ( float* )malloc(img_size * sizeof(float));
78+
79+
tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
80+
if (input_tensor == NULL)
81+
{
82+
fprintf(stderr, "Get input tensor failed\n");
83+
return -1;
84+
}
85+
86+
if (set_tensor_shape(input_tensor, dims, 4) < 0)
87+
{
88+
fprintf(stderr, "Set input tensor shape failed\n");
89+
return -1;
90+
}
91+
92+
if (prerun_graph_multithread(graph, opt) < 0)
93+
{
94+
fprintf(stderr, "Prerun multithread graph failed.\n");
95+
return -1;
96+
}
97+
98+
/* prepare process input data, set the data mem to input tensor */
99+
get_input_data(image_file, input_data, img_h, img_w, mean, scale);
100+
if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
101+
{
102+
fprintf(stderr, "Set input tensor buffer failed\n");
103+
return -1;
104+
}
105+
106+
/* run graph */
107+
double min_time = __DBL_MAX__;
108+
double max_time = -__DBL_MAX__;
109+
double total_time = 0.;
110+
for (int i = 0; i < loop_count; i++)
111+
{
112+
double start = get_current_time();
113+
if (run_graph(graph, 1) < 0)
114+
{
115+
fprintf(stderr, "Run graph failed\n");
116+
return -1;
117+
}
118+
double end = get_current_time();
119+
double cur = end - start;
120+
total_time += cur;
121+
if (min_time > cur)
122+
min_time = cur;
123+
if (max_time < cur)
124+
max_time = cur;
125+
}
126+
fprintf(stderr, "\nmodel file : %s\n", model_file);
127+
fprintf(stderr, "image file : %s\n", image_file);
128+
fprintf(stderr, "img_h, img_w, scale[3], mean[3] : %d %d , %.3f %.3f %.3f, %.1f %.1f %.1f\n", img_h, img_w,
129+
scale[0], scale[1], scale[2], mean[0], mean[1], mean[2]);
130+
fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", loop_count,
131+
num_thread, total_time / loop_count, max_time, min_time);
132+
fprintf(stderr, "--------------------------------------\n");
133+
134+
/* get the result of classification */
135+
tensor_t output_tensor = get_graph_output_tensor(graph, 0, 0);
136+
float* output_data = ( float* )get_tensor_buffer(output_tensor);
137+
int output_size = get_tensor_buffer_size(output_tensor) / sizeof(float);
138+
139+
print_topk(output_data, output_size, 5);
140+
fprintf(stderr, "--------------------------------------\n");
141+
142+
/* release tengine */
143+
free(input_data);
144+
release_graph_tensor(input_tensor);
145+
release_graph_tensor(output_tensor);
146+
postrun_graph(graph);
147+
destroy_graph(graph);
148+
release_tengine();
149+
150+
return 0;
151+
}
152+
153+
void show_usage()
154+
{
155+
fprintf(
156+
stderr,
157+
"[Usage]: [-h]\n [-m model_file] [-i image_file]\n [-g img_h,img_w] [-s scale[0],scale[1],scale[2]] [-w "
158+
"mean[0],mean[1],mean[2]] [-r loop_count] [-t thread_count]\n");
159+
fprintf(
160+
stderr,
161+
"\nmobilenet example: \n ./classification -m /path/to/mobilenet.tmfile -i /path/to/img.jpg -g 224,224 -s "
162+
"0.017,0.017,0.017 -w 104.007,116.669,122.679\n");
163+
}
164+
165+
int main(int argc, char* argv[])
166+
{
167+
int loop_count = DEFAULT_LOOP_COUNT;
168+
int num_thread = DEFAULT_THREAD_COUNT;
169+
char* model_file = NULL;
170+
char* image_file = NULL;
171+
float img_hw[2] = {0.f};
172+
int img_h = 0;
173+
int img_w = 0;
174+
float mean[3] = {-1.f, -1.f, -1.f};
175+
float scale[3] = {0.f, 0.f, 0.f};
176+
177+
int res;
178+
while ((res = getopt(argc, argv, "m:i:l:g:s:w:r:t:h")) != -1)
179+
{
180+
switch (res)
181+
{
182+
case 'm':
183+
model_file = optarg;
184+
break;
185+
case 'i':
186+
image_file = optarg;
187+
break;
188+
case 'g':
189+
split(img_hw, optarg, ",");
190+
img_h = ( int )img_hw[0];
191+
img_w = ( int )img_hw[1];
192+
break;
193+
case 's':
194+
split(scale, optarg, ",");
195+
break;
196+
case 'w':
197+
split(mean, optarg, ",");
198+
break;
199+
case 'r':
200+
loop_count = atoi(optarg);
201+
break;
202+
case 't':
203+
num_thread = atoi(optarg);
204+
break;
205+
case 'h':
206+
show_usage();
207+
return 0;
208+
default:
209+
break;
210+
}
211+
}
212+
213+
/* check files */
214+
if (model_file == NULL)
215+
{
216+
fprintf(stderr, "Error: Tengine model file not specified!\n");
217+
show_usage();
218+
return -1;
219+
}
220+
221+
if (image_file == NULL)
222+
{
223+
fprintf(stderr, "Error: Image file not specified!\n");
224+
show_usage();
225+
return -1;
226+
}
227+
228+
if (!check_file_exist(model_file) || !check_file_exist(image_file))
229+
return -1;
230+
231+
if (img_h == 0)
232+
{
233+
img_h = DEFAULT_IMG_H;
234+
fprintf(stderr, "Image height not specified, use default %d\n", img_h);
235+
}
236+
237+
if (img_w == 0)
238+
{
239+
img_w = DEFAULT_IMG_W;
240+
fprintf(stderr, "Image width not specified, use default %d\n", img_w);
241+
}
242+
243+
if (scale[0] == 0.f || scale[1] == 0.f || scale[2] == 0.f)
244+
{
245+
scale[0] = DEFAULT_SCALE1;
246+
scale[1] = DEFAULT_SCALE2;
247+
scale[2] = DEFAULT_SCALE3;
248+
fprintf(stderr, "Scale value not specified, use default %.1f, %.1f, %.1f\n", scale[0], scale[1], scale[2]);
249+
}
250+
251+
if (mean[0] == -1.0 || mean[1] == -1.0 || mean[2] == -1.0)
252+
{
253+
mean[0] = DEFAULT_MEAN1;
254+
mean[1] = DEFAULT_MEAN2;
255+
mean[2] = DEFAULT_MEAN3;
256+
fprintf(stderr, "Mean value not specified, use default %.1f, %.1f, %.1f\n", mean[0], mean[1], mean[2]);
257+
}
258+
259+
if (tengine_classify(model_file, image_file, img_h, img_w, mean, scale, loop_count, num_thread) < 0)
260+
return -1;
261+
262+
return 0;
263+
}

include/tengine_op_name.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
#define OP_CONV_NAME "Convolution"
4343
#define OP_CONST_NAME "Const"
4444
#define OP_CROP_NAME "Crop"
45-
#define OP_DECONV_NAME "DeConv"
45+
#define OP_DECONV_NAME "Deconvolution"
4646
#define OP_DEPTHTOSPACE_NAME "Depthtospace"
4747
#define OP_DETECTION_OUTPUT_NAME "DetectionOutput"
4848
#define OP_DETECTION_POSTPROCESS_NAME "DetectionPostProcess"
@@ -100,9 +100,9 @@
100100
#define OP_SOFTMAX_NAME "Softmax"
101101
#define OP_SPACETOBATCHND_NAME "Spacetobatchnd"
102102
#define OP_SPACETODEPTH_NAME "Spacetodepth"
103-
#define OP_SPARSETODENSE_NAME "Sparsetodense"
103+
#define OP_SPARSETODENSE_NAME "SparseToDense"
104104
#define OP_SPLIT_NAME "Split"
105-
#define OP_SQUAREDDIFFERENCE_NAME "Squareddifference"
105+
#define OP_SQUAREDDIFFERENCE_NAME "SquaredDifference"
106106
#define OP_SQUEEZE_NAME "Squeeze"
107107
#define OP_STRIDEDSLICE_NAME "StridedSlice"
108108
#define OP_SWAP_AXIS_NAME "SwapAxis"

0 commit comments

Comments
 (0)