camAlgo/camCalib/onnxDetector.cpp

302 lines
9.9 KiB
C++
Raw Normal View History

#include "onnxDetector.h"
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
using namespace cv::dnn;
class OnnxDetectorImpl :
public OnnxDetector
{
public:
OnnxDetectorImpl() :
classNum_(1), imgStride_(32), confidence_(0.25f),
scoreThreshold_(0.45f), nmsThreshold_(0.3f){};
~OnnxDetectorImpl() {};
public:
virtual int loadOnnxModel(const char* filename, cv::Size2f inferSize) override;
virtual std::vector<Detection2d> detect(cv::Mat image) override;
private:
vector<Mat> preProcess(Mat& input_image);
vector<Detection2d> postProcess(Mat& input_image, vector<Mat>& outputs);
private:
int classNum_;
int imgStride_;
float confidence_;
float scoreThreshold_;
float nmsThreshold_;
cv::Size2f size_;
cv::dnn::Net network_;
};
void letterbox(const cv::Mat& image, cv::Mat& outImage,
const cv::Size& newShape = cv::Size(640, 640),
const cv::Scalar& color = cv::Scalar(114, 114, 114),
bool auto_ = true,
bool scaleFill = false,
bool scaleUp = true,
int stride = 32)
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height,
(float)newShape.width / (float)shape.width);
if (!scaleUp)
r = std::min(r, 1.0f);
float ratio[2]{ r, r };
int newUnpad[2]{ (int)std::round((float)shape.width * r),
(int)std::round((float)shape.height * r) };
auto dw = (float)(newShape.width - newUnpad[0]);
auto dh = (float)(newShape.height - newUnpad[1]);
if (auto_)
{
dw = (float)((int)dw % stride);
dh = (float)((int)dh % stride);
}
else if (scaleFill)
{
dw = 0.0f;
dh = 0.0f;
newUnpad[0] = newShape.width;
newUnpad[1] = newShape.height;
ratio[0] = (float)newShape.width / (float)shape.width;
ratio[1] = (float)newShape.height / (float)shape.height;
}
dw /= 2.0f;
dh /= 2.0f;
if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
{
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
}
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
// Ԥ<><D4A4><EFBFBD><EFBFBD>
vector<Mat> OnnxDetectorImpl::preProcess(Mat& input_image)
{
Mat blob;
blobFromImage(input_image, blob, 1. / 255., Size(size_), Scalar(), false, false);
network_.setInput(blob);
vector<std::string> names = network_.getUnconnectedOutLayersNames();
vector<Mat> outputs;
network_.forward(outputs, names);
return outputs;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#if 1
vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& input_image, vector<Mat>& outputs)
{
vector<int> class_ids;
vector<float> confidences;
vector<Rect> boxes;
float x_factor = input_image.cols / size_.width;
float y_factor = input_image.rows / size_.height;
float* data = (float*)outputs[0].data;
const int dimensions = 5 + classNum_;
//const int rows = 25200;
const int rows =(1 + 4 + 16) * (size_.width / imgStride_) * (size_.height / imgStride_);
for (int i = 0; i < rows; ++i)
{
#if 1
float confidence = data[rows * 4 + i];
if (confidence > confidence_)
{
int centerX = (int)(data[rows * 0 + i] * x_factor);
int centerY = (int)(data[rows * 1 + i] * y_factor);
int width = (int)(data[rows * 2 + i] * x_factor);
int height = (int)(data[rows * 3 + i] * y_factor);
int left = centerX - width / 2;
int top = centerY - height / 2;
boxes.emplace_back(left, top, width, height);
confidences.emplace_back(confidence);
class_ids.emplace_back(0);
}
#else
float confidence = data[4];
if (confidence >= confidence_)
{
float* classes_scores = data + 5;
Mat scores(1, classNum_, CV_32FC1, classes_scores);
Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > scoreThreshold_)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float cx = data[0];
float cy = data[1];
float w = data[2];
float h = data[3];
int left = int((cx - 0.5 * w) * x_factor);
int top = int((cy - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(Rect(left, top, width, height));
}
}
#endif
//data += dimensions;
}
vector<Detection2d> res2d;
vector<int> indices;
NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
for (int i = 0; i < indices.size(); i++)
{
int idx = indices[i];
Rect box = boxes[idx];
int left = box.x;
int top = box.y;
int width = box.width;
int height = box.height;
//rectangle(output_image, Point(left, top), Point(left + width, top + height), BLUE, 3 * THICKNESS);
//string label = format("%.2f", confidences[idx]);
//label = class_name[class_ids[idx]] + ":" + label;
//draw_label(output_image, label, left, top);
Detection2d d2d;
d2d.bbox = box;
d2d.classIdx = idx;
d2d.confidence = confidences[idx];
res2d.push_back(d2d);
}
return res2d;
}
#else
inline float sigmoid(float x)
{
return 1.f / (1.f + exp(-x));
}
vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& cv_src, vector<Mat>& outs)
{
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
int strides[] = { 8, 16, 32 };
std::vector<std::vector<int> > anchors = {
{ 10,13, 16,30, 33,23 },
{ 30,61, 62,45, 59,119 },
{ 116,90, 156,198, 373,326 }
};
for (size_t k = 0; k < outs.size(); k++) {
float* data = outs[k].ptr<float>();
int stride = strides[k];
int num_classes = outs[k].size[4] - 5;
for (int i = 0; i < outs[k].size[2]; i++) {
for (int j = 0; j < outs[k].size[3]; j++) {
for (int a = 0; a < outs[k].size[1]; ++a) {
float* record = data + a * outs[k].size[2] * outs[k].size[3] * outs[k].size[4] +
i * outs[k].size[3] * outs[k].size[4] + j * outs[k].size[4];
float* cls_ptr = record + 5;
for (int cls = 0; cls < num_classes; cls++) {
float score = sigmoid(cls_ptr[cls]) * sigmoid(record[4]);
if (score > scoreThreshold_) {
float cx = (sigmoid(record[0]) * 2.f - 0.5f + (float)j) * (float)stride;
float cy = (sigmoid(record[1]) * 2.f - 0.5f + (float)i) * (float)stride;
float w = pow(sigmoid(record[2]) * 2.f, 2) * anchors[k][2 * a];
float h = pow(sigmoid(record[3]) * 2.f, 2) * anchors[k][2 * a + 1];
float x1 = std::max(0, std::min(cv_src.cols, int((cx - w / 2.f) * (float)cv_src.cols / (float)size_.width)));
float y1 = std::max(0, std::min(cv_src.rows, int((cy - h / 2.f) * (float)cv_src.rows / (float)size_.width)));
float x2 = std::max(0, std::min(cv_src.cols, int((cx + w / 2.f) * (float)cv_src.cols / (float)size_.width)));
float y2 = std::max(0, std::min(cv_src.rows, int((cy + h / 2.f) * (float)cv_src.rows / (float)size_.width)));
classIds.push_back(cls);
confidences.push_back(score);
boxes.push_back(cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)));
}
}
}
}
}
}
vector<Detection2d> res2d;
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
for (size_t i = 0; i < indices.size(); i++){
int idx = indices[i];
cv::Rect box = boxes[idx];
//drawPred(classIds[idx], confidences[idx], box.x, box.y,
// box.x + box.width, box.y + box.height, cv_src, classes);
Detection2d d2d;
d2d.classIdx = classIds[idx];
d2d.bbox = box;
d2d.confidence = confidences[idx];
res2d.push_back(d2d);
}
return res2d;
}
#endif
int OnnxDetectorImpl::loadOnnxModel(const char* filename, cv::Size2f inferSize) {
std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
}
size_ = inferSize;
network_ = cv::dnn::readNet(filename);
//std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
//for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
// std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
//}
//network_.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
//network_.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
return 0;
}
std::vector<Detection2d> OnnxDetectorImpl::detect(cv::Mat image) {
std::vector<Detection2d> res2d;
if (image.empty() || network_.empty())
return res2d;
const int width = std::max(image.rows, image.cols);
cv::Mat img = cv::Mat::zeros(image.rows, image.cols, image.type());
image.copyTo(img(cv::Rect(0, 0, image.cols, image.rows)));
vector<Mat> detections = preProcess(img);
return postProcess(img, detections);
}
OnnxDetector* OnnxDetector::CreateInstance() {
return new OnnxDetectorImpl();
}