302 lines
9.9 KiB
C++
302 lines
9.9 KiB
C++
|
|
#include "onnxDetector.h"
|
|||
|
|
#include <opencv2/opencv.hpp>
|
|||
|
|
|
|||
|
|
using namespace std;
|
|||
|
|
using namespace cv;
|
|||
|
|
using namespace cv::dnn;
|
|||
|
|
|
|||
|
|
class OnnxDetectorImpl :
|
|||
|
|
public OnnxDetector
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
OnnxDetectorImpl() :
|
|||
|
|
classNum_(1), imgStride_(32), confidence_(0.25f),
|
|||
|
|
scoreThreshold_(0.45f), nmsThreshold_(0.3f){};
|
|||
|
|
~OnnxDetectorImpl() {};
|
|||
|
|
|
|||
|
|
public:
|
|||
|
|
virtual int loadOnnxModel(const char* filename, cv::Size2f inferSize) override;
|
|||
|
|
virtual std::vector<Detection2d> detect(cv::Mat image) override;
|
|||
|
|
|
|||
|
|
private:
|
|||
|
|
vector<Mat> preProcess(Mat& input_image);
|
|||
|
|
vector<Detection2d> postProcess(Mat& input_image, vector<Mat>& outputs);
|
|||
|
|
|
|||
|
|
private:
|
|||
|
|
int classNum_;
|
|||
|
|
int imgStride_;
|
|||
|
|
float confidence_;
|
|||
|
|
float scoreThreshold_;
|
|||
|
|
float nmsThreshold_;
|
|||
|
|
cv::Size2f size_;
|
|||
|
|
cv::dnn::Net network_;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
void letterbox(const cv::Mat& image, cv::Mat& outImage,
|
|||
|
|
const cv::Size& newShape = cv::Size(640, 640),
|
|||
|
|
const cv::Scalar& color = cv::Scalar(114, 114, 114),
|
|||
|
|
bool auto_ = true,
|
|||
|
|
bool scaleFill = false,
|
|||
|
|
bool scaleUp = true,
|
|||
|
|
int stride = 32)
|
|||
|
|
{
|
|||
|
|
cv::Size shape = image.size();
|
|||
|
|
float r = std::min((float)newShape.height / (float)shape.height,
|
|||
|
|
(float)newShape.width / (float)shape.width);
|
|||
|
|
if (!scaleUp)
|
|||
|
|
r = std::min(r, 1.0f);
|
|||
|
|
|
|||
|
|
float ratio[2]{ r, r };
|
|||
|
|
int newUnpad[2]{ (int)std::round((float)shape.width * r),
|
|||
|
|
(int)std::round((float)shape.height * r) };
|
|||
|
|
|
|||
|
|
auto dw = (float)(newShape.width - newUnpad[0]);
|
|||
|
|
auto dh = (float)(newShape.height - newUnpad[1]);
|
|||
|
|
|
|||
|
|
if (auto_)
|
|||
|
|
{
|
|||
|
|
dw = (float)((int)dw % stride);
|
|||
|
|
dh = (float)((int)dh % stride);
|
|||
|
|
}
|
|||
|
|
else if (scaleFill)
|
|||
|
|
{
|
|||
|
|
dw = 0.0f;
|
|||
|
|
dh = 0.0f;
|
|||
|
|
newUnpad[0] = newShape.width;
|
|||
|
|
newUnpad[1] = newShape.height;
|
|||
|
|
ratio[0] = (float)newShape.width / (float)shape.width;
|
|||
|
|
ratio[1] = (float)newShape.height / (float)shape.height;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
dw /= 2.0f;
|
|||
|
|
dh /= 2.0f;
|
|||
|
|
|
|||
|
|
if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
|
|||
|
|
{
|
|||
|
|
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int top = int(std::round(dh - 0.1f));
|
|||
|
|
int bottom = int(std::round(dh + 0.1f));
|
|||
|
|
int left = int(std::round(dw - 0.1f));
|
|||
|
|
int right = int(std::round(dw + 0.1f));
|
|||
|
|
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Ԥ<><D4A4><EFBFBD><EFBFBD>
|
|||
|
|
vector<Mat> OnnxDetectorImpl::preProcess(Mat& input_image)
|
|||
|
|
{
|
|||
|
|
Mat blob;
|
|||
|
|
blobFromImage(input_image, blob, 1. / 255., Size(size_), Scalar(), false, false);
|
|||
|
|
|
|||
|
|
network_.setInput(blob);
|
|||
|
|
|
|||
|
|
vector<std::string> names = network_.getUnconnectedOutLayersNames();
|
|||
|
|
|
|||
|
|
vector<Mat> outputs;
|
|||
|
|
network_.forward(outputs, names);
|
|||
|
|
|
|||
|
|
return outputs;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
#if 1
|
|||
|
|
vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& input_image, vector<Mat>& outputs)
|
|||
|
|
{
|
|||
|
|
vector<int> class_ids;
|
|||
|
|
vector<float> confidences;
|
|||
|
|
vector<Rect> boxes;
|
|||
|
|
|
|||
|
|
float x_factor = input_image.cols / size_.width;
|
|||
|
|
float y_factor = input_image.rows / size_.height;
|
|||
|
|
|
|||
|
|
float* data = (float*)outputs[0].data;
|
|||
|
|
|
|||
|
|
const int dimensions = 5 + classNum_;
|
|||
|
|
//const int rows = 25200;
|
|||
|
|
const int rows =(1 + 4 + 16) * (size_.width / imgStride_) * (size_.height / imgStride_);
|
|||
|
|
for (int i = 0; i < rows; ++i)
|
|||
|
|
{
|
|||
|
|
#if 1
|
|||
|
|
float confidence = data[rows * 4 + i];
|
|||
|
|
if (confidence > confidence_)
|
|||
|
|
{
|
|||
|
|
int centerX = (int)(data[rows * 0 + i] * x_factor);
|
|||
|
|
int centerY = (int)(data[rows * 1 + i] * y_factor);
|
|||
|
|
int width = (int)(data[rows * 2 + i] * x_factor);
|
|||
|
|
int height = (int)(data[rows * 3 + i] * y_factor);
|
|||
|
|
int left = centerX - width / 2;
|
|||
|
|
int top = centerY - height / 2;
|
|||
|
|
|
|||
|
|
boxes.emplace_back(left, top, width, height);
|
|||
|
|
confidences.emplace_back(confidence);
|
|||
|
|
class_ids.emplace_back(0);
|
|||
|
|
}
|
|||
|
|
#else
|
|||
|
|
float confidence = data[4];
|
|||
|
|
if (confidence >= confidence_)
|
|||
|
|
{
|
|||
|
|
float* classes_scores = data + 5;
|
|||
|
|
Mat scores(1, classNum_, CV_32FC1, classes_scores);
|
|||
|
|
Point class_id;
|
|||
|
|
double max_class_score;
|
|||
|
|
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
|||
|
|
if (max_class_score > scoreThreshold_)
|
|||
|
|
{
|
|||
|
|
confidences.push_back(confidence);
|
|||
|
|
class_ids.push_back(class_id.x);
|
|||
|
|
|
|||
|
|
float cx = data[0];
|
|||
|
|
float cy = data[1];
|
|||
|
|
float w = data[2];
|
|||
|
|
float h = data[3];
|
|||
|
|
int left = int((cx - 0.5 * w) * x_factor);
|
|||
|
|
int top = int((cy - 0.5 * h) * y_factor);
|
|||
|
|
int width = int(w * x_factor);
|
|||
|
|
int height = int(h * y_factor);
|
|||
|
|
boxes.push_back(Rect(left, top, width, height));
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
#endif
|
|||
|
|
//data += dimensions;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
vector<Detection2d> res2d;
|
|||
|
|
vector<int> indices;
|
|||
|
|
NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
|
|||
|
|
for (int i = 0; i < indices.size(); i++)
|
|||
|
|
{
|
|||
|
|
int idx = indices[i];
|
|||
|
|
Rect box = boxes[idx];
|
|||
|
|
|
|||
|
|
int left = box.x;
|
|||
|
|
int top = box.y;
|
|||
|
|
int width = box.width;
|
|||
|
|
int height = box.height;
|
|||
|
|
//rectangle(output_image, Point(left, top), Point(left + width, top + height), BLUE, 3 * THICKNESS);
|
|||
|
|
|
|||
|
|
//string label = format("%.2f", confidences[idx]);
|
|||
|
|
//label = class_name[class_ids[idx]] + ":" + label;
|
|||
|
|
//draw_label(output_image, label, left, top);
|
|||
|
|
|
|||
|
|
Detection2d d2d;
|
|||
|
|
d2d.bbox = box;
|
|||
|
|
d2d.classIdx = idx;
|
|||
|
|
d2d.confidence = confidences[idx];
|
|||
|
|
|
|||
|
|
res2d.push_back(d2d);
|
|||
|
|
}
|
|||
|
|
return res2d;
|
|||
|
|
}
|
|||
|
|
#else
|
|||
|
|
inline float sigmoid(float x)
|
|||
|
|
{
|
|||
|
|
return 1.f / (1.f + exp(-x));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& cv_src, vector<Mat>& outs)
|
|||
|
|
{
|
|||
|
|
std::vector<int> classIds;
|
|||
|
|
std::vector<float> confidences;
|
|||
|
|
std::vector<cv::Rect> boxes;
|
|||
|
|
|
|||
|
|
int strides[] = { 8, 16, 32 };
|
|||
|
|
std::vector<std::vector<int> > anchors = {
|
|||
|
|
{ 10,13, 16,30, 33,23 },
|
|||
|
|
{ 30,61, 62,45, 59,119 },
|
|||
|
|
{ 116,90, 156,198, 373,326 }
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
for (size_t k = 0; k < outs.size(); k++) {
|
|||
|
|
|
|||
|
|
float* data = outs[k].ptr<float>();
|
|||
|
|
int stride = strides[k];
|
|||
|
|
int num_classes = outs[k].size[4] - 5;
|
|||
|
|
for (int i = 0; i < outs[k].size[2]; i++) {
|
|||
|
|
for (int j = 0; j < outs[k].size[3]; j++) {
|
|||
|
|
for (int a = 0; a < outs[k].size[1]; ++a) {
|
|||
|
|
|
|||
|
|
float* record = data + a * outs[k].size[2] * outs[k].size[3] * outs[k].size[4] +
|
|||
|
|
i * outs[k].size[3] * outs[k].size[4] + j * outs[k].size[4];
|
|||
|
|
float* cls_ptr = record + 5;
|
|||
|
|
for (int cls = 0; cls < num_classes; cls++) {
|
|||
|
|
|
|||
|
|
float score = sigmoid(cls_ptr[cls]) * sigmoid(record[4]);
|
|||
|
|
if (score > scoreThreshold_) {
|
|||
|
|
|
|||
|
|
float cx = (sigmoid(record[0]) * 2.f - 0.5f + (float)j) * (float)stride;
|
|||
|
|
float cy = (sigmoid(record[1]) * 2.f - 0.5f + (float)i) * (float)stride;
|
|||
|
|
float w = pow(sigmoid(record[2]) * 2.f, 2) * anchors[k][2 * a];
|
|||
|
|
float h = pow(sigmoid(record[3]) * 2.f, 2) * anchors[k][2 * a + 1];
|
|||
|
|
float x1 = std::max(0, std::min(cv_src.cols, int((cx - w / 2.f) * (float)cv_src.cols / (float)size_.width)));
|
|||
|
|
float y1 = std::max(0, std::min(cv_src.rows, int((cy - h / 2.f) * (float)cv_src.rows / (float)size_.width)));
|
|||
|
|
float x2 = std::max(0, std::min(cv_src.cols, int((cx + w / 2.f) * (float)cv_src.cols / (float)size_.width)));
|
|||
|
|
float y2 = std::max(0, std::min(cv_src.rows, int((cy + h / 2.f) * (float)cv_src.rows / (float)size_.width)));
|
|||
|
|
classIds.push_back(cls);
|
|||
|
|
confidences.push_back(score);
|
|||
|
|
boxes.push_back(cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)));
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
vector<Detection2d> res2d;
|
|||
|
|
std::vector<int> indices;
|
|||
|
|
cv::dnn::NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
|
|||
|
|
for (size_t i = 0; i < indices.size(); i++){
|
|||
|
|
|
|||
|
|
int idx = indices[i];
|
|||
|
|
cv::Rect box = boxes[idx];
|
|||
|
|
//drawPred(classIds[idx], confidences[idx], box.x, box.y,
|
|||
|
|
// box.x + box.width, box.y + box.height, cv_src, classes);
|
|||
|
|
|
|||
|
|
Detection2d d2d;
|
|||
|
|
d2d.classIdx = classIds[idx];
|
|||
|
|
d2d.bbox = box;
|
|||
|
|
d2d.confidence = confidences[idx];
|
|||
|
|
res2d.push_back(d2d);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return res2d;
|
|||
|
|
}
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
int OnnxDetectorImpl::loadOnnxModel(const char* filename, cv::Size2f inferSize) {
|
|||
|
|
|
|||
|
|
std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
|
|||
|
|
for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
|
|||
|
|
std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
size_ = inferSize;
|
|||
|
|
network_ = cv::dnn::readNet(filename);
|
|||
|
|
|
|||
|
|
//std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
|
|||
|
|
//for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
|
|||
|
|
// std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
|
|||
|
|
//}
|
|||
|
|
//network_.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
|||
|
|
//network_.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
|||
|
|
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::vector<Detection2d> OnnxDetectorImpl::detect(cv::Mat image) {
|
|||
|
|
|
|||
|
|
std::vector<Detection2d> res2d;
|
|||
|
|
if (image.empty() || network_.empty())
|
|||
|
|
return res2d;
|
|||
|
|
|
|||
|
|
const int width = std::max(image.rows, image.cols);
|
|||
|
|
cv::Mat img = cv::Mat::zeros(image.rows, image.cols, image.type());
|
|||
|
|
image.copyTo(img(cv::Rect(0, 0, image.cols, image.rows)));
|
|||
|
|
|
|||
|
|
vector<Mat> detections = preProcess(img);
|
|||
|
|
return postProcess(img, detections);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
OnnxDetector* OnnxDetector::CreateInstance() {
|
|||
|
|
return new OnnxDetectorImpl();
|
|||
|
|
}
|