#include "onnxDetector.h" #include using namespace std; using namespace cv; using namespace cv::dnn; class OnnxDetectorImpl : public OnnxDetector { public: OnnxDetectorImpl() : classNum_(1), imgStride_(32), confidence_(0.25f), scoreThreshold_(0.45f), nmsThreshold_(0.3f){}; ~OnnxDetectorImpl() {}; public: virtual int loadOnnxModel(const char* filename, cv::Size2f inferSize) override; virtual std::vector detect(cv::Mat image) override; private: vector preProcess(Mat& input_image); vector postProcess(Mat& input_image, vector& outputs); private: int classNum_; int imgStride_; float confidence_; float scoreThreshold_; float nmsThreshold_; cv::Size2f size_; cv::dnn::Net network_; }; void letterbox(const cv::Mat& image, cv::Mat& outImage, const cv::Size& newShape = cv::Size(640, 640), const cv::Scalar& color = cv::Scalar(114, 114, 114), bool auto_ = true, bool scaleFill = false, bool scaleUp = true, int stride = 32) { cv::Size shape = image.size(); float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width); if (!scaleUp) r = std::min(r, 1.0f); float ratio[2]{ r, r }; int newUnpad[2]{ (int)std::round((float)shape.width * r), (int)std::round((float)shape.height * r) }; auto dw = (float)(newShape.width - newUnpad[0]); auto dh = (float)(newShape.height - newUnpad[1]); if (auto_) { dw = (float)((int)dw % stride); dh = (float)((int)dh % stride); } else if (scaleFill) { dw = 0.0f; dh = 0.0f; newUnpad[0] = newShape.width; newUnpad[1] = newShape.height; ratio[0] = (float)newShape.width / (float)shape.width; ratio[1] = (float)newShape.height / (float)shape.height; } dw /= 2.0f; dh /= 2.0f; if (shape.width != newUnpad[0] && shape.height != newUnpad[1]) { cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1])); } int top = int(std::round(dh - 0.1f)); int bottom = int(std::round(dh + 0.1f)); int left = int(std::round(dw - 0.1f)); int right = int(std::round(dw + 0.1f)); cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); } // 预处理 vector OnnxDetectorImpl::preProcess(Mat& input_image) { Mat blob; blobFromImage(input_image, blob, 1. / 255., Size(size_), Scalar(), false, false); network_.setInput(blob); vector names = network_.getUnconnectedOutLayersNames(); vector outputs; network_.forward(outputs, names); return outputs; } // 后处理 #if 1 vector OnnxDetectorImpl::postProcess(Mat& input_image, vector& outputs) { vector class_ids; vector confidences; vector boxes; float x_factor = input_image.cols / size_.width; float y_factor = input_image.rows / size_.height; float* data = (float*)outputs[0].data; const int dimensions = 5 + classNum_; //const int rows = 25200; const int rows =(1 + 4 + 16) * (size_.width / imgStride_) * (size_.height / imgStride_); for (int i = 0; i < rows; ++i) { #if 1 float confidence = data[rows * 4 + i]; if (confidence > confidence_) { int centerX = (int)(data[rows * 0 + i] * x_factor); int centerY = (int)(data[rows * 1 + i] * y_factor); int width = (int)(data[rows * 2 + i] * x_factor); int height = (int)(data[rows * 3 + i] * y_factor); int left = centerX - width / 2; int top = centerY - height / 2; boxes.emplace_back(left, top, width, height); confidences.emplace_back(confidence); class_ids.emplace_back(0); } #else float confidence = data[4]; if (confidence >= confidence_) { float* classes_scores = data + 5; Mat scores(1, classNum_, CV_32FC1, classes_scores); Point class_id; double max_class_score; minMaxLoc(scores, 0, &max_class_score, 0, &class_id); if (max_class_score > scoreThreshold_) { confidences.push_back(confidence); class_ids.push_back(class_id.x); float cx = data[0]; float cy = data[1]; float w = data[2]; float h = data[3]; int left = int((cx - 0.5 * w) * x_factor); int top = int((cy - 0.5 * h) * y_factor); int width = int(w * x_factor); int height = int(h * y_factor); boxes.push_back(Rect(left, top, width, height)); } } #endif //data += dimensions; } vector res2d; vector indices; NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices); for (int i = 0; i < indices.size(); i++) { int idx = indices[i]; Rect box = boxes[idx]; int left = box.x; int top = box.y; int width = box.width; int height = box.height; //rectangle(output_image, Point(left, top), Point(left + width, top + height), BLUE, 3 * THICKNESS); //string label = format("%.2f", confidences[idx]); //label = class_name[class_ids[idx]] + ":" + label; //draw_label(output_image, label, left, top); Detection2d d2d; d2d.bbox = box; d2d.classIdx = idx; d2d.confidence = confidences[idx]; res2d.push_back(d2d); } return res2d; } #else inline float sigmoid(float x) { return 1.f / (1.f + exp(-x)); } vector OnnxDetectorImpl::postProcess(Mat& cv_src, vector& outs) { std::vector classIds; std::vector confidences; std::vector boxes; int strides[] = { 8, 16, 32 }; std::vector > anchors = { { 10,13, 16,30, 33,23 }, { 30,61, 62,45, 59,119 }, { 116,90, 156,198, 373,326 } }; for (size_t k = 0; k < outs.size(); k++) { float* data = outs[k].ptr(); int stride = strides[k]; int num_classes = outs[k].size[4] - 5; for (int i = 0; i < outs[k].size[2]; i++) { for (int j = 0; j < outs[k].size[3]; j++) { for (int a = 0; a < outs[k].size[1]; ++a) { float* record = data + a * outs[k].size[2] * outs[k].size[3] * outs[k].size[4] + i * outs[k].size[3] * outs[k].size[4] + j * outs[k].size[4]; float* cls_ptr = record + 5; for (int cls = 0; cls < num_classes; cls++) { float score = sigmoid(cls_ptr[cls]) * sigmoid(record[4]); if (score > scoreThreshold_) { float cx = (sigmoid(record[0]) * 2.f - 0.5f + (float)j) * (float)stride; float cy = (sigmoid(record[1]) * 2.f - 0.5f + (float)i) * (float)stride; float w = pow(sigmoid(record[2]) * 2.f, 2) * anchors[k][2 * a]; float h = pow(sigmoid(record[3]) * 2.f, 2) * anchors[k][2 * a + 1]; float x1 = std::max(0, std::min(cv_src.cols, int((cx - w / 2.f) * (float)cv_src.cols / (float)size_.width))); float y1 = std::max(0, std::min(cv_src.rows, int((cy - h / 2.f) * (float)cv_src.rows / (float)size_.width))); float x2 = std::max(0, std::min(cv_src.cols, int((cx + w / 2.f) * (float)cv_src.cols / (float)size_.width))); float y2 = std::max(0, std::min(cv_src.rows, int((cy + h / 2.f) * (float)cv_src.rows / (float)size_.width))); classIds.push_back(cls); confidences.push_back(score); boxes.push_back(cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2))); } } } } } } vector res2d; std::vector indices; cv::dnn::NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices); for (size_t i = 0; i < indices.size(); i++){ int idx = indices[i]; cv::Rect box = boxes[idx]; //drawPred(classIds[idx], confidences[idx], box.x, box.y, // box.x + box.width, box.y + box.height, cv_src, classes); Detection2d d2d; d2d.classIdx = classIds[idx]; d2d.bbox = box; d2d.confidence = confidences[idx]; res2d.push_back(d2d); } return res2d; } #endif int OnnxDetectorImpl::loadOnnxModel(const char* filename, cv::Size2f inferSize) { std::vector< std::pair > backends = cv::dnn::getAvailableBackends(); for (std::pair& det : backends) { std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl; } size_ = inferSize; network_ = cv::dnn::readNet(filename); //std::vector< std::pair > backends = cv::dnn::getAvailableBackends(); //for (std::pair& det : backends) { // std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl; //} //network_.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); //network_.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); return 0; } std::vector OnnxDetectorImpl::detect(cv::Mat image) { std::vector res2d; if (image.empty() || network_.empty()) return res2d; const int width = std::max(image.rows, image.cols); cv::Mat img = cv::Mat::zeros(image.rows, image.cols, image.type()); image.copyTo(img(cv::Rect(0, 0, image.cols, image.rows))); vector detections = preProcess(img); return postProcess(img, detections); } OnnxDetector* OnnxDetector::CreateInstance() { return new OnnxDetectorImpl(); }