|
| 1 | +/* |
| 2 | + * Copyright 2019 Xilinx Inc. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +#include <iostream> |
| 18 | +#include <chrono> |
| 19 | +#include <queue> |
| 20 | +#include <boost/algorithm/string.hpp> |
| 21 | +#include <boost/filesystem.hpp> |
| 22 | + |
| 23 | +#include <opencv2/core/core.hpp> |
| 24 | +#include <opencv2/imgcodecs.hpp> |
| 25 | + |
| 26 | +#include <aks/AksSysManagerExt.h> |
| 27 | +#include <aks/AksNodeParams.h> |
| 28 | +#include <aks/AksTensorBuffer.h> |
| 29 | +#include <aks/AksBatchTensorBuffer.h> |
| 30 | + |
| 31 | +using namespace AKS; |
| 32 | + |
| 33 | +void usage (const char* exename) { |
| 34 | + std::cout << "[INFO] Usage: " << std::endl; |
| 35 | + std::cout << "[INFO] ---------------------- " << std::endl; |
| 36 | + std::cout << "[INFO] " << exename << " <Image Directory Path>" << std::endl; |
| 37 | + std::cout << std::endl; |
| 38 | +} |
| 39 | + |
| 40 | +// Leave this empty to stop writing o/p to disk |
| 41 | +std::string _output_dir ("facedetect_outputs"); |
| 42 | + |
| 43 | +// Queue and Mutex to push/pop futures |
| 44 | +using aks_future_t = std::future<std::vector<std::unique_ptr<vart::TensorBuffer>>>; |
| 45 | +using aks_batch_img_t = std::vector<std::string>; |
| 46 | +std::queue<std::pair<aks_future_t, aks_batch_img_t>> future_q; |
| 47 | +std::mutex g_mtx; |
| 48 | +std::atomic<bool> thread_continue {true}; |
| 49 | + |
| 50 | +void write_output ( |
| 51 | + std::vector<std::unique_ptr<vart::TensorBuffer>> results, |
| 52 | + aks_batch_img_t img_paths) |
| 53 | +{ |
| 54 | + auto nboxes = results.front()->get_tensor()->get_element_num() / 6; |
| 55 | + if(!_output_dir.empty()) { |
| 56 | + if(!boost::filesystem::exists(_output_dir)) |
| 57 | + boost::filesystem::create_directory(_output_dir); |
| 58 | + |
| 59 | + int boxcnt = 0; |
| 60 | + for(int b = 0; b < img_paths.size(); ++b) { |
| 61 | + std::vector<std::string> tokens; |
| 62 | + boost::split(tokens, img_paths[b], boost::is_any_of("/,.")); |
| 63 | + auto& imgFile = tokens[tokens.size()-2]; |
| 64 | + |
| 65 | + // Append output_dir and .txt to get output file |
| 66 | + std::string output_file = _output_dir + "/" + imgFile + ".txt"; |
| 67 | + ofstream f(output_file); |
| 68 | + if(!f) { |
| 69 | + std::cerr << "[WARNING] : Couldn't open " << output_file << std::endl; |
| 70 | + std::cerr << "[WARNING] : Check if path is correct" << std::endl; |
| 71 | + return; |
| 72 | + } |
| 73 | + |
| 74 | + auto* boxptr = reinterpret_cast<float*>(results.front()->data().first); |
| 75 | + for (int box = 0; box < nboxes; ++box) { |
| 76 | + if (boxptr[box*6] == b) { |
| 77 | + float score = boxptr[(box*6)+5]; |
| 78 | + float x = boxptr[(box*6)+1]; |
| 79 | + float y = boxptr[(box*6)+2]; |
| 80 | + float w = boxptr[(box*6)+3]; |
| 81 | + float h = boxptr[(box*6)+4]; |
| 82 | + |
| 83 | + f << score << " "; |
| 84 | + f << x << " " << y << " "; |
| 85 | + f << w << " " << h << '\n'; |
| 86 | + } |
| 87 | + } |
| 88 | + f.close(); |
| 89 | + } |
| 90 | + } |
| 91 | +} |
| 92 | + |
| 93 | +// Wait for jobs and get results |
| 94 | +void get_results (void) |
| 95 | +{ |
| 96 | + while (true) { |
| 97 | + g_mtx.lock(); |
| 98 | + if (future_q.empty()) { |
| 99 | + g_mtx.unlock(); |
| 100 | + if (!thread_continue) break; |
| 101 | + } else { |
| 102 | + auto element = std::move(future_q.front()); |
| 103 | + future_q.pop(); |
| 104 | + g_mtx.unlock(); |
| 105 | + // Get data from future |
| 106 | + auto results = element.first.get(); |
| 107 | + auto img_paths = element.second; |
| 108 | + // Write to disk |
| 109 | + write_output (std::move(results), std::move(img_paths)); |
| 110 | + } |
| 111 | + } |
| 112 | +} |
| 113 | + |
| 114 | +// Face-Detection Inference using |
| 115 | +// DPUCADF8H on Alveo-u200/u250 FPGAs |
| 116 | +int main(int argc, char **argv) |
| 117 | +{ |
| 118 | + int ret = 0; |
| 119 | + if (argc != 2) { |
| 120 | + std::cout << "[ERROR] Usage invalid!" << std::endl; |
| 121 | + usage(argv[0]); |
| 122 | + return -1; |
| 123 | + } |
| 124 | + |
| 125 | + // Get image directory path |
| 126 | + std::string imgDirPath (argv[1]); |
| 127 | + |
| 128 | + // Get AKS System Manager instance |
| 129 | + AKS::SysManagerExt * sysMan = AKS::SysManagerExt::getGlobal(); |
| 130 | + |
| 131 | + // Load all kernels |
| 132 | + sysMan->loadKernels("kernel_zoo"); |
| 133 | + |
| 134 | + // Load graph |
| 135 | + sysMan->loadGraphs("graph_zoo/graph_facedetect_u200_u250_proteus.json"); |
| 136 | + |
| 137 | + // Get graph instance |
| 138 | + AKS::AIGraph *graph = sysMan->getGraph("facedetect"); |
| 139 | + |
| 140 | + if(!graph){ |
| 141 | + cout<<"[ERROR] Couldn't find requested graph"<<endl; |
| 142 | + AKS::SysManagerExt::deleteGlobal(); |
| 143 | + return -1; |
| 144 | + } |
| 145 | + |
| 146 | + std::vector<std::string> images; |
| 147 | + int i = 0; |
| 148 | + // Load Dataset |
| 149 | + for (boost::filesystem::directory_iterator it {imgDirPath}; |
| 150 | + it != boost::filesystem::directory_iterator{}; it++) { |
| 151 | + std::string fileExtension = it->path().extension().string(); |
| 152 | + if(fileExtension == ".jpg" || fileExtension == ".JPEG" || fileExtension == ".png") |
| 153 | + images.push_back((*it).path().string()); |
| 154 | + } |
| 155 | + |
| 156 | + constexpr int bt = 4; // DPU batch size |
| 157 | + int left_out = images.size() % bt; |
| 158 | + if (left_out) { // Make a batch complete |
| 159 | + for (int b = 0; b < (bt-left_out); ++b) { |
| 160 | + std::string s = images.back(); |
| 161 | + images.push_back(s); |
| 162 | + } |
| 163 | + } |
| 164 | + |
| 165 | + int nImages = images.size(); |
| 166 | + std::cout << "[INFO] Running " << nImages << " Images" << std::endl; |
| 167 | + |
| 168 | + // Start a thread to wait for results |
| 169 | + std::thread wait_thread (get_results); |
| 170 | + |
| 171 | + sysMan->resetTimer(); |
| 172 | + auto t1 = std::chrono::steady_clock::now(); |
| 173 | + |
| 174 | + // User input |
| 175 | + std::cout << "[INFO] Starting enqueue ... " << std::endl; |
| 176 | + for (int i = 0; i < images.size(); i+=bt) { |
| 177 | + // Create input tensors |
| 178 | + std::vector<std::unique_ptr<xir::Tensor>> tensors; |
| 179 | + |
| 180 | + // Create batch of images |
| 181 | + std::vector<cv::Mat> batchimgs; |
| 182 | + std::vector<std::string> image_paths; |
| 183 | + |
| 184 | + for (int b = 0; b < bt; ++b) { |
| 185 | + cv::Mat img = cv::imread(images[i+b]); |
| 186 | + std::vector<int> shape = { 1, img.rows, img.cols, img.channels() }; |
| 187 | + auto tensorOut = |
| 188 | + xir::Tensor::create("imread_output", shape, |
| 189 | + xir::create_data_type<unsigned char>()); |
| 190 | + |
| 191 | + tensors.push_back(std::move(tensorOut)); |
| 192 | + batchimgs.push_back(std::move(img)); |
| 193 | + image_paths.push_back(images[i+b]); |
| 194 | + } |
| 195 | + |
| 196 | + // Create input buffer & fill image data |
| 197 | + std::unique_ptr<AKS::AksBatchTensorBuffer> tb = |
| 198 | + std::make_unique<AKS::AksBatchTensorBuffer>(std::move(tensors)); |
| 199 | + |
| 200 | + for (int b = 0; b < bt; ++b) { |
| 201 | + auto* imgptr = batchimgs[b].data; |
| 202 | + auto size = tb->get_tensors()[b]->get_data_size(); |
| 203 | + auto* bufptr = reinterpret_cast<uint8_t*>(tb.get()->data({b}).first); |
| 204 | + // Copy image data |
| 205 | + memcpy(bufptr, imgptr, size); |
| 206 | + } |
| 207 | + |
| 208 | + // Fill input vector with input buffer |
| 209 | + std::vector<std::unique_ptr<vart::TensorBuffer>> inputs; |
| 210 | + inputs.push_back(std::move(tb)); |
| 211 | + |
| 212 | + // Enqueue input & push future to queue |
| 213 | + { |
| 214 | + std::unique_lock<std::mutex> lock(g_mtx); |
| 215 | + future_q.push({ |
| 216 | + std::move(sysMan->enqueueJob (graph, "", std::move(inputs), nullptr)), |
| 217 | + std::move(image_paths) |
| 218 | + }); |
| 219 | + } |
| 220 | + batchimgs.clear(); |
| 221 | + } |
| 222 | + |
| 223 | + // Wait for results |
| 224 | + std::cout << "[INFO] Waiting for results ... " << std::endl; |
| 225 | + |
| 226 | + thread_continue = false; |
| 227 | + wait_thread.join(); |
| 228 | + |
| 229 | + std::cout << "[INFO] Waiting for results ... Done!" << std::endl; |
| 230 | + |
| 231 | + auto t2 = std::chrono::steady_clock::now(); |
| 232 | + |
| 233 | + auto time_taken = std::chrono::duration<double>(t2-t1).count(); |
| 234 | + auto throughput = static_cast<double>(nImages)/time_taken; |
| 235 | + |
| 236 | + // Print Stats |
| 237 | + std::cout << "[INFO] Total Images : " << nImages << std::endl; |
| 238 | + std::cout << "[INFO] Total Time (s): " << time_taken << std::endl; |
| 239 | + std::cout << "[INFO] Overall FPS : " << throughput << std::endl; |
| 240 | + |
| 241 | + sysMan->printPerfStats(); |
| 242 | + |
| 243 | + // Clean-up |
| 244 | + AKS::SysManagerExt::deleteGlobal(); |
| 245 | + return ret; |
| 246 | +} |
| 247 | + |
0 commit comments