Skip to content

Commit bbd4583

Browse files
Abid KarumannilAbid Karumannil
authored andcommitted
AKS update to 3.5 (#1164)
Co-authored-by: Abid Karumannil <[email protected]>
1 parent b56bcce commit bbd4583

File tree

36 files changed

+1072
-130
lines changed

36 files changed

+1072
-130
lines changed

src/AKS/.gitattributes

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
*.caffemodel filter=lfs diff=lfs merge=lfs -text
2+
*.pb filter=lfs diff=lfs merge=lfs -text
3+
*.npz filter=lfs diff=lfs merge=lfs -text
4+
*.mdb filter=lfs diff=lfs merge=lfs -text
5+
*.weights filter=lfs diff=lfs merge=lfs -text
6+
*.JPEG filter=lfs diff=lfs merge=lfs -text
7+
*.xclbin filter=lfs diff=lfs merge=lfs -text
8+
graph_zoo/meta_*/** filter=lfs diff=lfs merge=lfs -text

src/AKS/.gitignore

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
*.py.swp
2+
*.pyc
3+
*.so
4+
*.so.*
5+
*.obj
6+
*.o
7+
*.exe
8+
*.tfevents*
9+
*.nfs*
10+
*.pyc
11+
*.events*
12+
*.swp
13+
*.bak
14+
*~
15+
kernel_src/*/build
16+
examples/build
17+
\#*\#

src/AKS/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.12)
1616

1717
set(CMAKE_CXX_STANDARD 17)
1818
# Project Name and Version
19-
project(aks VERSION 2.5.0 LANGUAGES C CXX)
19+
project(aks VERSION 3.5.0 LANGUAGES C CXX)
2020

2121
include(${CMAKE_SOURCE_DIR}/cmake/VitisCommon.cmake)
2222

src/AKS/README.md

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,3 @@
1-
<table class="sphinxhide">
2-
<tr>
3-
<td align="center"><img src="https://raw.githubusercontent.com/Xilinx/Image-Collateral/main/xilinx-logo.png" width="30%"/><h1>Vitis AI</h1><h0>Adaptable & Real-Time AI Inference Acceleration</h0>
4-
</td>
5-
</tr>
6-
</table>
7-
81
# AI Kernel Scheduler
92

103
## Table of Contents
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
/*
2+
* Copyright 2019 Xilinx Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <iostream>
18+
#include <chrono>
19+
#include <queue>
20+
#include <boost/algorithm/string.hpp>
21+
#include <boost/filesystem.hpp>
22+
23+
#include <opencv2/core/core.hpp>
24+
#include <opencv2/imgcodecs.hpp>
25+
26+
#include <aks/AksSysManagerExt.h>
27+
#include <aks/AksNodeParams.h>
28+
#include <aks/AksTensorBuffer.h>
29+
#include <aks/AksBatchTensorBuffer.h>
30+
31+
using namespace AKS;
32+
33+
void usage (const char* exename) {
34+
std::cout << "[INFO] Usage: " << std::endl;
35+
std::cout << "[INFO] ---------------------- " << std::endl;
36+
std::cout << "[INFO] " << exename << " <Image Directory Path>" << std::endl;
37+
std::cout << std::endl;
38+
}
39+
40+
// Leave this empty to stop writing o/p to disk
41+
std::string _output_dir ("facedetect_outputs");
42+
43+
// Queue and Mutex to push/pop futures
44+
using aks_future_t = std::future<std::vector<std::unique_ptr<vart::TensorBuffer>>>;
45+
using aks_batch_img_t = std::vector<std::string>;
46+
std::queue<std::pair<aks_future_t, aks_batch_img_t>> future_q;
47+
std::mutex g_mtx;
48+
std::atomic<bool> thread_continue {true};
49+
50+
void write_output (
51+
std::vector<std::unique_ptr<vart::TensorBuffer>> results,
52+
aks_batch_img_t img_paths)
53+
{
54+
auto nboxes = results.front()->get_tensor()->get_element_num() / 6;
55+
if(!_output_dir.empty()) {
56+
if(!boost::filesystem::exists(_output_dir))
57+
boost::filesystem::create_directory(_output_dir);
58+
59+
int boxcnt = 0;
60+
for(int b = 0; b < img_paths.size(); ++b) {
61+
std::vector<std::string> tokens;
62+
boost::split(tokens, img_paths[b], boost::is_any_of("/,."));
63+
auto& imgFile = tokens[tokens.size()-2];
64+
65+
// Append output_dir and .txt to get output file
66+
std::string output_file = _output_dir + "/" + imgFile + ".txt";
67+
ofstream f(output_file);
68+
if(!f) {
69+
std::cerr << "[WARNING] : Couldn't open " << output_file << std::endl;
70+
std::cerr << "[WARNING] : Check if path is correct" << std::endl;
71+
return;
72+
}
73+
74+
auto* boxptr = reinterpret_cast<float*>(results.front()->data().first);
75+
for (int box = 0; box < nboxes; ++box) {
76+
if (boxptr[box*6] == b) {
77+
float score = boxptr[(box*6)+5];
78+
float x = boxptr[(box*6)+1];
79+
float y = boxptr[(box*6)+2];
80+
float w = boxptr[(box*6)+3];
81+
float h = boxptr[(box*6)+4];
82+
83+
f << score << " ";
84+
f << x << " " << y << " ";
85+
f << w << " " << h << '\n';
86+
}
87+
}
88+
f.close();
89+
}
90+
}
91+
}
92+
93+
// Wait for jobs and get results
94+
void get_results (void)
95+
{
96+
while (true) {
97+
g_mtx.lock();
98+
if (future_q.empty()) {
99+
g_mtx.unlock();
100+
if (!thread_continue) break;
101+
} else {
102+
auto element = std::move(future_q.front());
103+
future_q.pop();
104+
g_mtx.unlock();
105+
// Get data from future
106+
auto results = element.first.get();
107+
auto img_paths = element.second;
108+
// Write to disk
109+
write_output (std::move(results), std::move(img_paths));
110+
}
111+
}
112+
}
113+
114+
// Face-Detection Inference using
115+
// DPUCADF8H on Alveo-u200/u250 FPGAs
116+
int main(int argc, char **argv)
117+
{
118+
int ret = 0;
119+
if (argc != 2) {
120+
std::cout << "[ERROR] Usage invalid!" << std::endl;
121+
usage(argv[0]);
122+
return -1;
123+
}
124+
125+
// Get image directory path
126+
std::string imgDirPath (argv[1]);
127+
128+
// Get AKS System Manager instance
129+
AKS::SysManagerExt * sysMan = AKS::SysManagerExt::getGlobal();
130+
131+
// Load all kernels
132+
sysMan->loadKernels("kernel_zoo");
133+
134+
// Load graph
135+
sysMan->loadGraphs("graph_zoo/graph_facedetect_u200_u250_proteus.json");
136+
137+
// Get graph instance
138+
AKS::AIGraph *graph = sysMan->getGraph("facedetect");
139+
140+
if(!graph){
141+
cout<<"[ERROR] Couldn't find requested graph"<<endl;
142+
AKS::SysManagerExt::deleteGlobal();
143+
return -1;
144+
}
145+
146+
std::vector<std::string> images;
147+
int i = 0;
148+
// Load Dataset
149+
for (boost::filesystem::directory_iterator it {imgDirPath};
150+
it != boost::filesystem::directory_iterator{}; it++) {
151+
std::string fileExtension = it->path().extension().string();
152+
if(fileExtension == ".jpg" || fileExtension == ".JPEG" || fileExtension == ".png")
153+
images.push_back((*it).path().string());
154+
}
155+
156+
constexpr int bt = 4; // DPU batch size
157+
int left_out = images.size() % bt;
158+
if (left_out) { // Make a batch complete
159+
for (int b = 0; b < (bt-left_out); ++b) {
160+
std::string s = images.back();
161+
images.push_back(s);
162+
}
163+
}
164+
165+
int nImages = images.size();
166+
std::cout << "[INFO] Running " << nImages << " Images" << std::endl;
167+
168+
// Start a thread to wait for results
169+
std::thread wait_thread (get_results);
170+
171+
sysMan->resetTimer();
172+
auto t1 = std::chrono::steady_clock::now();
173+
174+
// User input
175+
std::cout << "[INFO] Starting enqueue ... " << std::endl;
176+
for (int i = 0; i < images.size(); i+=bt) {
177+
// Create input tensors
178+
std::vector<std::unique_ptr<xir::Tensor>> tensors;
179+
180+
// Create batch of images
181+
std::vector<cv::Mat> batchimgs;
182+
std::vector<std::string> image_paths;
183+
184+
for (int b = 0; b < bt; ++b) {
185+
cv::Mat img = cv::imread(images[i+b]);
186+
std::vector<int> shape = { 1, img.rows, img.cols, img.channels() };
187+
auto tensorOut =
188+
xir::Tensor::create("imread_output", shape,
189+
xir::create_data_type<unsigned char>());
190+
191+
tensors.push_back(std::move(tensorOut));
192+
batchimgs.push_back(std::move(img));
193+
image_paths.push_back(images[i+b]);
194+
}
195+
196+
// Create input buffer & fill image data
197+
std::unique_ptr<AKS::AksBatchTensorBuffer> tb =
198+
std::make_unique<AKS::AksBatchTensorBuffer>(std::move(tensors));
199+
200+
for (int b = 0; b < bt; ++b) {
201+
auto* imgptr = batchimgs[b].data;
202+
auto size = tb->get_tensors()[b]->get_data_size();
203+
auto* bufptr = reinterpret_cast<uint8_t*>(tb.get()->data({b}).first);
204+
// Copy image data
205+
memcpy(bufptr, imgptr, size);
206+
}
207+
208+
// Fill input vector with input buffer
209+
std::vector<std::unique_ptr<vart::TensorBuffer>> inputs;
210+
inputs.push_back(std::move(tb));
211+
212+
// Enqueue input & push future to queue
213+
{
214+
std::unique_lock<std::mutex> lock(g_mtx);
215+
future_q.push({
216+
std::move(sysMan->enqueueJob (graph, "", std::move(inputs), nullptr)),
217+
std::move(image_paths)
218+
});
219+
}
220+
batchimgs.clear();
221+
}
222+
223+
// Wait for results
224+
std::cout << "[INFO] Waiting for results ... " << std::endl;
225+
226+
thread_continue = false;
227+
wait_thread.join();
228+
229+
std::cout << "[INFO] Waiting for results ... Done!" << std::endl;
230+
231+
auto t2 = std::chrono::steady_clock::now();
232+
233+
auto time_taken = std::chrono::duration<double>(t2-t1).count();
234+
auto throughput = static_cast<double>(nImages)/time_taken;
235+
236+
// Print Stats
237+
std::cout << "[INFO] Total Images : " << nImages << std::endl;
238+
std::cout << "[INFO] Total Time (s): " << time_taken << std::endl;
239+
std::cout << "[INFO] Overall FPS : " << throughput << std::endl;
240+
241+
sysMan->printPerfStats();
242+
243+
// Clean-up
244+
AKS::SysManagerExt::deleteGlobal();
245+
return ret;
246+
}
247+

0 commit comments

Comments
 (0)