-
Notifications
You must be signed in to change notification settings - Fork 201
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
C++ Demo for person_reid_youtureid (#277)
* add demo.cpp * add CMakeLists.txt * Update README.md * turn standard to c++11 --------- Co-authored-by: Gongjunzhe12210401 <[email protected]>
- Loading branch information
1 parent
779ffc8
commit 807f45b
Showing
3 changed files
with
332 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
cmake_minimum_required(VERSION 3.24.0) | ||
project(opencv_zoo_person_reid_youtureid) | ||
|
||
set(OPENCV_VERSION "4.10.0") | ||
set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation") | ||
|
||
# Find OpenCV | ||
find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH}) | ||
|
||
add_executable(demo demo.cpp) | ||
target_link_libraries(demo ${OpenCV_LIBS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,308 @@ | ||
#include <opencv2/opencv.hpp> | ||
#include "opencv2/dnn.hpp" | ||
#include <iostream> | ||
#include <vector> | ||
#include <map> | ||
#include <string> | ||
#include <numeric> | ||
|
||
|
||
// YoutuReID class for person re-identification | ||
class YoutuReID { | ||
public: | ||
YoutuReID(const std::string& model_path, | ||
const cv::Size& input_size = cv::Size(128, 256), | ||
int output_dim = 768, | ||
const cv::Scalar& mean = cv::Scalar(0.485, 0.456, 0.406), | ||
const cv::Scalar& std = cv::Scalar(0.229, 0.224, 0.225), | ||
int backend_id = 0, | ||
int target_id = 0) | ||
: model_path_(model_path), input_size_(input_size), | ||
output_dim_(output_dim), mean_(mean), std_(std), | ||
backend_id_(backend_id), target_id_(target_id) | ||
{ | ||
|
||
model_ = cv::dnn::readNet(model_path_); | ||
model_.setPreferableBackend(backend_id_); | ||
model_.setPreferableTarget(target_id_); | ||
} | ||
|
||
void setBackendAndTarget(int backend_id, int target_id) { | ||
backend_id_ = backend_id; | ||
target_id_ = target_id; | ||
model_.setPreferableBackend(backend_id_); | ||
model_.setPreferableTarget(target_id_); | ||
} | ||
|
||
void setInputSize(const cv::Size& input_size) { | ||
input_size_ = input_size; | ||
} | ||
|
||
// Preprocess image by resizing, normalizing, and creating a blob | ||
cv::Mat preprocess(const cv::Mat& image) { | ||
cv::Mat img; | ||
cv::cvtColor(image, img, cv::COLOR_BGR2RGB); | ||
img.convertTo(img, CV_32F, 1.0 / 255.0); | ||
|
||
// Normalize each channel separately | ||
std::vector<cv::Mat> channels(3); | ||
cv::split(img, channels); | ||
channels[0] = (channels[0] - mean_[0]) / std_[0]; | ||
channels[1] = (channels[1] - mean_[1]) / std_[1]; | ||
channels[2] = (channels[2] - mean_[2]) / std_[2]; | ||
cv::merge(channels, img); | ||
|
||
return cv::dnn::blobFromImage(img); | ||
} | ||
|
||
// Run inference to extract feature vector | ||
cv::Mat infer(const cv::Mat& image) { | ||
cv::Mat input_blob = preprocess(image); | ||
model_.setInput(input_blob); | ||
cv::Mat features = model_.forward(); | ||
|
||
if (features.dims == 4 && features.size[2] == 1 && features.size[3] == 1) { | ||
features = features.reshape(1, {1, features.size[1]}); | ||
} | ||
|
||
return features; | ||
} | ||
|
||
// Perform query, comparing each query image to each gallery image | ||
std::vector<std::vector<int>> query(const std::vector<cv::Mat>& query_img_list, | ||
const std::vector<cv::Mat>& gallery_img_list, | ||
int topK = 5) { | ||
std::vector<cv::Mat> query_features_list, gallery_features_list; | ||
cv::Mat query_features, gallery_features; | ||
|
||
for (size_t i = 0; i < query_img_list.size(); ++i) { | ||
cv::Mat feature = infer(query_img_list[i]); | ||
query_features_list.push_back(feature.clone()); | ||
} | ||
cv::vconcat(query_features_list, query_features); | ||
normalizeFeatures(query_features); | ||
|
||
for (size_t i = 0; i < gallery_img_list.size(); ++i) { | ||
cv::Mat feature = infer(gallery_img_list[i]); | ||
gallery_features_list.push_back(feature.clone()); | ||
} | ||
cv::vconcat(gallery_features_list, gallery_features); | ||
normalizeFeatures(gallery_features); | ||
|
||
cv::Mat dist = query_features * gallery_features.t(); | ||
return getTopK(dist, topK); | ||
} | ||
|
||
private: | ||
// Normalize feature vectors row-wise to unit length | ||
void normalizeFeatures(cv::Mat& features) { | ||
const float epsilon = 1e-6; | ||
for (int i = 0; i < features.rows; ++i) { | ||
cv::Mat featureRow = features.row(i); | ||
float norm = cv::norm(featureRow, cv::NORM_L2); | ||
if (norm < epsilon) { | ||
norm = epsilon; | ||
} | ||
featureRow /= norm; | ||
} | ||
} | ||
|
||
// Retrieve Top-K indices from similarity matrix | ||
std::vector<std::vector<int>> getTopK(const cv::Mat& dist, int topK) { | ||
std::vector<std::vector<int>> indices(dist.rows); | ||
|
||
for (int i = 0; i < dist.rows; ++i) { | ||
std::vector<std::pair<float, int>> sim_index_pairs; | ||
for (int j = 0; j < dist.cols; ++j) { | ||
sim_index_pairs.emplace_back(dist.at<float>(i, j), j); | ||
} | ||
std::sort(sim_index_pairs.begin(), sim_index_pairs.end(), | ||
[](const std::pair<float, int>& a, const std::pair<float, int>& b) { | ||
return a.first > b.first; | ||
}); | ||
|
||
for (int k = 0; k < topK && k < sim_index_pairs.size(); ++k) { | ||
indices[i].push_back(sim_index_pairs[k].second); | ||
} | ||
} | ||
return indices; | ||
} | ||
|
||
std::string model_path_; | ||
cv::Size input_size_; | ||
int output_dim_; | ||
cv::Scalar mean_, std_; | ||
int backend_id_; | ||
int target_id_; | ||
cv::dnn::Net model_; | ||
}; | ||
|
||
// Read images from directory and return a pair of image list and file list | ||
std::pair<std::vector<cv::Mat>, std::vector<std::string>> readImagesFromDirectory(const std::string& img_dir, int w = 128, int h = 256) { | ||
std::vector<cv::Mat> img_list; | ||
std::vector<std::string> file_list; | ||
|
||
std::vector<std::string> file_names; | ||
cv::glob(img_dir + "/*", file_names, false); | ||
|
||
for (size_t i = 0; i < file_names.size(); ++i) { | ||
std::string file_name = file_names[i].substr(file_names[i].find_last_of("/\\") + 1); | ||
cv::Mat img = cv::imread(file_names[i]); | ||
if (!img.empty()) { | ||
cv::resize(img, img, cv::Size(w, h)); | ||
img_list.push_back(img); | ||
file_list.push_back(file_name); | ||
} | ||
} | ||
return std::make_pair(img_list, file_list); | ||
} | ||
|
||
// Visualize query and gallery results by creating concatenated images | ||
std::map<std::string, cv::Mat> visualize( | ||
const std::map<std::string, std::vector<std::string>>& results, | ||
const std::string& query_dir, | ||
const std::string& gallery_dir, | ||
const cv::Size& output_size = cv::Size(128, 384)) { | ||
|
||
std::map<std::string, cv::Mat> results_vis; | ||
|
||
for (std::map<std::string, std::vector<std::string>>::const_iterator it = results.begin(); it != results.end(); ++it) { | ||
const std::string& query_file = it->first; | ||
const std::vector<std::string>& top_matches = it->second; | ||
|
||
cv::Mat query_img = cv::imread(query_dir + "/" + query_file); | ||
if (query_img.empty()) continue; | ||
|
||
cv::resize(query_img, query_img, output_size); | ||
cv::copyMakeBorder(query_img, query_img, 5, 5, 5, 5, | ||
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); | ||
cv::putText(query_img, "Query", cv::Point(10, 30), | ||
cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2); | ||
|
||
cv::Mat concat_img = query_img; | ||
|
||
for (size_t i = 0; i < top_matches.size(); ++i) { | ||
cv::Mat gallery_img = cv::imread(gallery_dir + "/" + top_matches[i]); | ||
if (gallery_img.empty()) continue; | ||
|
||
cv::resize(gallery_img, gallery_img, output_size); | ||
cv::copyMakeBorder(gallery_img, gallery_img, 5, 5, 5, 5, | ||
cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255)); | ||
cv::putText(gallery_img, "G" + std::to_string(i), cv::Point(10, 30), | ||
cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2); | ||
|
||
cv::hconcat(concat_img, gallery_img, concat_img); | ||
} | ||
results_vis[query_file] = concat_img; | ||
} | ||
return results_vis; | ||
} | ||
|
||
void printHelpMessage() { | ||
std::cout << "usage: demo.cpp [-h] [--query_dir QUERY_DIR] [--gallery_dir GALLERY_DIR] " | ||
<< "[--backend_target BACKEND_TARGET] [--topk TOPK] [--model MODEL] [--save] [--vis]\n\n" | ||
<< "ReID baseline models from Tencent Youtu Lab\n\n" | ||
<< "optional arguments:\n" | ||
<< " -h, --help show this help message and exit\n" | ||
<< " --query_dir QUERY_DIR, -q QUERY_DIR\n" | ||
<< " Query directory.\n" | ||
<< " --gallery_dir GALLERY_DIR, -g GALLERY_DIR\n" | ||
<< " Gallery directory.\n" | ||
<< " --backend_target BACKEND_TARGET, -bt BACKEND_TARGET\n" | ||
<< " Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + " | ||
"CPU, 1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU\n" | ||
<< " --topk TOPK Top-K closest from gallery for each query.\n" | ||
<< " --model MODEL, -m MODEL\n" | ||
<< " Path to the model.\n" | ||
<< " --save, -s Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in " | ||
"case of camera input.\n" | ||
<< " --vis, -v Usage: Specify to open a new window to show results. Invalid in case of camera input.\n"; | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
// CommandLineParser setup | ||
cv::CommandLineParser parser(argc, argv, | ||
"{help h | | Show help message.}" | ||
"{query_dir q | | Query directory.}" | ||
"{gallery_dir g | | Gallery directory.}" | ||
"{backend_target bt | 0 | Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + CPU, " | ||
"1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU}" | ||
"{topk k | 10 | Top-K closest from gallery for each query.}" | ||
"{model m | person_reid_youtu_2021nov.onnx | Path to the model.}" | ||
"{save s | false | Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.}" | ||
"{vis v | false | Usage: Specify to open a new window to show results. Invalid in case of camera input.}"); | ||
|
||
if (parser.has("help")) { | ||
printHelpMessage(); | ||
return 0; | ||
} | ||
|
||
std::string query_dir = parser.get<std::string>("query_dir"); | ||
std::string gallery_dir = parser.get<std::string>("gallery_dir"); | ||
int backend_target = parser.get<int>("backend_target"); | ||
int topK = parser.get<int>("topk"); | ||
std::string model_path = parser.get<std::string>("model"); | ||
bool save_flag = parser.get<bool>("save"); | ||
bool vis_flag = parser.get<bool>("vis"); | ||
|
||
if (!parser.check()) { | ||
parser.printErrors(); | ||
return 1; | ||
} | ||
|
||
const std::vector<std::pair<int, int>> backend_target_pairs = { | ||
{cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU}, | ||
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA}, | ||
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16}, | ||
{cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU}, | ||
{cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU} | ||
}; | ||
|
||
int backend_id = backend_target_pairs[backend_target].first; | ||
int target_id = backend_target_pairs[backend_target].second; | ||
|
||
YoutuReID reid(model_path, cv::Size(128, 256), 768, | ||
cv::Scalar(0.485, 0.456, 0.406), | ||
cv::Scalar(0.229, 0.224, 0.225), | ||
backend_id, target_id); | ||
|
||
std::pair<std::vector<cv::Mat>, std::vector<std::string>> query_data = readImagesFromDirectory(query_dir); | ||
std::pair<std::vector<cv::Mat>, std::vector<std::string>> gallery_data = readImagesFromDirectory(gallery_dir); | ||
|
||
std::vector<std::vector<int>> indices = reid.query(query_data.first, gallery_data.first, topK); | ||
|
||
std::map<std::string, std::vector<std::string>> results; | ||
for (size_t i = 0; i < query_data.second.size(); ++i) { | ||
std::vector<std::string> top_matches; | ||
for (int idx : indices[i]) { | ||
top_matches.push_back(gallery_data.second[idx]); | ||
} | ||
results[query_data.second[i]] = top_matches; | ||
std::cout << "Query: " << query_data.second[i] << "\n"; | ||
std::cout << "\tTop-" << topK << " from gallery: "; | ||
for (size_t j = 0; j < top_matches.size(); ++j) { | ||
std::cout << top_matches[j] << " "; | ||
} | ||
std::cout << std::endl; | ||
} | ||
|
||
std::map<std::string, cv::Mat> results_vis = visualize(results, query_dir, gallery_dir); | ||
|
||
if (save_flag) { | ||
for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) { | ||
std::string save_path = "result-" + it->first; | ||
cv::imwrite(save_path, it->second); | ||
} | ||
} | ||
|
||
if (vis_flag) { | ||
for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) { | ||
cv::namedWindow("result-" + it->first, cv::WINDOW_AUTOSIZE); | ||
cv::imshow("result-" + it->first, it->second); | ||
cv::waitKey(0); | ||
cv::destroyAllWindows(); | ||
} | ||
} | ||
|
||
return 0; | ||
} |