302 lines
9.9 KiB
C++
302 lines
9.9 KiB
C++
#include "onnxDetector.h"
|
||
#include <opencv2/opencv.hpp>
|
||
|
||
using namespace std;
|
||
using namespace cv;
|
||
using namespace cv::dnn;
|
||
|
||
class OnnxDetectorImpl :
|
||
public OnnxDetector
|
||
{
|
||
public:
|
||
OnnxDetectorImpl() :
|
||
classNum_(1), imgStride_(32), confidence_(0.25f),
|
||
scoreThreshold_(0.45f), nmsThreshold_(0.3f){};
|
||
~OnnxDetectorImpl() {};
|
||
|
||
public:
|
||
virtual int loadOnnxModel(const char* filename, cv::Size2f inferSize) override;
|
||
virtual std::vector<Detection2d> detect(cv::Mat image) override;
|
||
|
||
private:
|
||
vector<Mat> preProcess(Mat& input_image);
|
||
vector<Detection2d> postProcess(Mat& input_image, vector<Mat>& outputs);
|
||
|
||
private:
|
||
int classNum_;
|
||
int imgStride_;
|
||
float confidence_;
|
||
float scoreThreshold_;
|
||
float nmsThreshold_;
|
||
cv::Size2f size_;
|
||
cv::dnn::Net network_;
|
||
};
|
||
|
||
void letterbox(const cv::Mat& image, cv::Mat& outImage,
|
||
const cv::Size& newShape = cv::Size(640, 640),
|
||
const cv::Scalar& color = cv::Scalar(114, 114, 114),
|
||
bool auto_ = true,
|
||
bool scaleFill = false,
|
||
bool scaleUp = true,
|
||
int stride = 32)
|
||
{
|
||
cv::Size shape = image.size();
|
||
float r = std::min((float)newShape.height / (float)shape.height,
|
||
(float)newShape.width / (float)shape.width);
|
||
if (!scaleUp)
|
||
r = std::min(r, 1.0f);
|
||
|
||
float ratio[2]{ r, r };
|
||
int newUnpad[2]{ (int)std::round((float)shape.width * r),
|
||
(int)std::round((float)shape.height * r) };
|
||
|
||
auto dw = (float)(newShape.width - newUnpad[0]);
|
||
auto dh = (float)(newShape.height - newUnpad[1]);
|
||
|
||
if (auto_)
|
||
{
|
||
dw = (float)((int)dw % stride);
|
||
dh = (float)((int)dh % stride);
|
||
}
|
||
else if (scaleFill)
|
||
{
|
||
dw = 0.0f;
|
||
dh = 0.0f;
|
||
newUnpad[0] = newShape.width;
|
||
newUnpad[1] = newShape.height;
|
||
ratio[0] = (float)newShape.width / (float)shape.width;
|
||
ratio[1] = (float)newShape.height / (float)shape.height;
|
||
}
|
||
|
||
dw /= 2.0f;
|
||
dh /= 2.0f;
|
||
|
||
if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
|
||
{
|
||
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
|
||
}
|
||
|
||
int top = int(std::round(dh - 0.1f));
|
||
int bottom = int(std::round(dh + 0.1f));
|
||
int left = int(std::round(dw - 0.1f));
|
||
int right = int(std::round(dw + 0.1f));
|
||
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
|
||
}
|
||
|
||
// Ԥ<><D4A4><EFBFBD><EFBFBD>
|
||
vector<Mat> OnnxDetectorImpl::preProcess(Mat& input_image)
|
||
{
|
||
Mat blob;
|
||
blobFromImage(input_image, blob, 1. / 255., Size(size_), Scalar(), false, false);
|
||
|
||
network_.setInput(blob);
|
||
|
||
vector<std::string> names = network_.getUnconnectedOutLayersNames();
|
||
|
||
vector<Mat> outputs;
|
||
network_.forward(outputs, names);
|
||
|
||
return outputs;
|
||
}
|
||
|
||
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||
#if 1
|
||
vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& input_image, vector<Mat>& outputs)
|
||
{
|
||
vector<int> class_ids;
|
||
vector<float> confidences;
|
||
vector<Rect> boxes;
|
||
|
||
float x_factor = input_image.cols / size_.width;
|
||
float y_factor = input_image.rows / size_.height;
|
||
|
||
float* data = (float*)outputs[0].data;
|
||
|
||
const int dimensions = 5 + classNum_;
|
||
//const int rows = 25200;
|
||
const int rows =(1 + 4 + 16) * (size_.width / imgStride_) * (size_.height / imgStride_);
|
||
for (int i = 0; i < rows; ++i)
|
||
{
|
||
#if 1
|
||
float confidence = data[rows * 4 + i];
|
||
if (confidence > confidence_)
|
||
{
|
||
int centerX = (int)(data[rows * 0 + i] * x_factor);
|
||
int centerY = (int)(data[rows * 1 + i] * y_factor);
|
||
int width = (int)(data[rows * 2 + i] * x_factor);
|
||
int height = (int)(data[rows * 3 + i] * y_factor);
|
||
int left = centerX - width / 2;
|
||
int top = centerY - height / 2;
|
||
|
||
boxes.emplace_back(left, top, width, height);
|
||
confidences.emplace_back(confidence);
|
||
class_ids.emplace_back(0);
|
||
}
|
||
#else
|
||
float confidence = data[4];
|
||
if (confidence >= confidence_)
|
||
{
|
||
float* classes_scores = data + 5;
|
||
Mat scores(1, classNum_, CV_32FC1, classes_scores);
|
||
Point class_id;
|
||
double max_class_score;
|
||
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
||
if (max_class_score > scoreThreshold_)
|
||
{
|
||
confidences.push_back(confidence);
|
||
class_ids.push_back(class_id.x);
|
||
|
||
float cx = data[0];
|
||
float cy = data[1];
|
||
float w = data[2];
|
||
float h = data[3];
|
||
int left = int((cx - 0.5 * w) * x_factor);
|
||
int top = int((cy - 0.5 * h) * y_factor);
|
||
int width = int(w * x_factor);
|
||
int height = int(h * y_factor);
|
||
boxes.push_back(Rect(left, top, width, height));
|
||
}
|
||
}
|
||
#endif
|
||
//data += dimensions;
|
||
}
|
||
|
||
vector<Detection2d> res2d;
|
||
vector<int> indices;
|
||
NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
|
||
for (int i = 0; i < indices.size(); i++)
|
||
{
|
||
int idx = indices[i];
|
||
Rect box = boxes[idx];
|
||
|
||
int left = box.x;
|
||
int top = box.y;
|
||
int width = box.width;
|
||
int height = box.height;
|
||
//rectangle(output_image, Point(left, top), Point(left + width, top + height), BLUE, 3 * THICKNESS);
|
||
|
||
//string label = format("%.2f", confidences[idx]);
|
||
//label = class_name[class_ids[idx]] + ":" + label;
|
||
//draw_label(output_image, label, left, top);
|
||
|
||
Detection2d d2d;
|
||
d2d.bbox = box;
|
||
d2d.classIdx = idx;
|
||
d2d.confidence = confidences[idx];
|
||
|
||
res2d.push_back(d2d);
|
||
}
|
||
return res2d;
|
||
}
|
||
#else
|
||
inline float sigmoid(float x)
|
||
{
|
||
return 1.f / (1.f + exp(-x));
|
||
}
|
||
|
||
vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& cv_src, vector<Mat>& outs)
|
||
{
|
||
std::vector<int> classIds;
|
||
std::vector<float> confidences;
|
||
std::vector<cv::Rect> boxes;
|
||
|
||
int strides[] = { 8, 16, 32 };
|
||
std::vector<std::vector<int> > anchors = {
|
||
{ 10,13, 16,30, 33,23 },
|
||
{ 30,61, 62,45, 59,119 },
|
||
{ 116,90, 156,198, 373,326 }
|
||
};
|
||
|
||
for (size_t k = 0; k < outs.size(); k++) {
|
||
|
||
float* data = outs[k].ptr<float>();
|
||
int stride = strides[k];
|
||
int num_classes = outs[k].size[4] - 5;
|
||
for (int i = 0; i < outs[k].size[2]; i++) {
|
||
for (int j = 0; j < outs[k].size[3]; j++) {
|
||
for (int a = 0; a < outs[k].size[1]; ++a) {
|
||
|
||
float* record = data + a * outs[k].size[2] * outs[k].size[3] * outs[k].size[4] +
|
||
i * outs[k].size[3] * outs[k].size[4] + j * outs[k].size[4];
|
||
float* cls_ptr = record + 5;
|
||
for (int cls = 0; cls < num_classes; cls++) {
|
||
|
||
float score = sigmoid(cls_ptr[cls]) * sigmoid(record[4]);
|
||
if (score > scoreThreshold_) {
|
||
|
||
float cx = (sigmoid(record[0]) * 2.f - 0.5f + (float)j) * (float)stride;
|
||
float cy = (sigmoid(record[1]) * 2.f - 0.5f + (float)i) * (float)stride;
|
||
float w = pow(sigmoid(record[2]) * 2.f, 2) * anchors[k][2 * a];
|
||
float h = pow(sigmoid(record[3]) * 2.f, 2) * anchors[k][2 * a + 1];
|
||
float x1 = std::max(0, std::min(cv_src.cols, int((cx - w / 2.f) * (float)cv_src.cols / (float)size_.width)));
|
||
float y1 = std::max(0, std::min(cv_src.rows, int((cy - h / 2.f) * (float)cv_src.rows / (float)size_.width)));
|
||
float x2 = std::max(0, std::min(cv_src.cols, int((cx + w / 2.f) * (float)cv_src.cols / (float)size_.width)));
|
||
float y2 = std::max(0, std::min(cv_src.rows, int((cy + h / 2.f) * (float)cv_src.rows / (float)size_.width)));
|
||
classIds.push_back(cls);
|
||
confidences.push_back(score);
|
||
boxes.push_back(cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
vector<Detection2d> res2d;
|
||
std::vector<int> indices;
|
||
cv::dnn::NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
|
||
for (size_t i = 0; i < indices.size(); i++){
|
||
|
||
int idx = indices[i];
|
||
cv::Rect box = boxes[idx];
|
||
//drawPred(classIds[idx], confidences[idx], box.x, box.y,
|
||
// box.x + box.width, box.y + box.height, cv_src, classes);
|
||
|
||
Detection2d d2d;
|
||
d2d.classIdx = classIds[idx];
|
||
d2d.bbox = box;
|
||
d2d.confidence = confidences[idx];
|
||
res2d.push_back(d2d);
|
||
}
|
||
|
||
return res2d;
|
||
}
|
||
#endif
|
||
|
||
int OnnxDetectorImpl::loadOnnxModel(const char* filename, cv::Size2f inferSize) {
|
||
|
||
std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
|
||
for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
|
||
std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
|
||
}
|
||
|
||
size_ = inferSize;
|
||
network_ = cv::dnn::readNet(filename);
|
||
|
||
//std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
|
||
//for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
|
||
// std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
|
||
//}
|
||
//network_.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||
//network_.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||
|
||
return 0;
|
||
}
|
||
|
||
std::vector<Detection2d> OnnxDetectorImpl::detect(cv::Mat image) {
|
||
|
||
std::vector<Detection2d> res2d;
|
||
if (image.empty() || network_.empty())
|
||
return res2d;
|
||
|
||
const int width = std::max(image.rows, image.cols);
|
||
cv::Mat img = cv::Mat::zeros(image.rows, image.cols, image.type());
|
||
image.copyTo(img(cv::Rect(0, 0, image.cols, image.rows)));
|
||
|
||
vector<Mat> detections = preProcess(img);
|
||
return postProcess(img, detections);
|
||
}
|
||
|
||
OnnxDetector* OnnxDetector::CreateInstance() {
|
||
return new OnnxDetectorImpl();
|
||
} |