camAlgo/camCalib/onnxDetector.cpp

#include "onnxDetector.h"
#include <opencv2/opencv.hpp>

using namespace std;
using namespace cv;
using namespace cv::dnn;

class OnnxDetectorImpl :
	public OnnxDetector
{
public:
	OnnxDetectorImpl() :
        classNum_(1), imgStride_(32), confidence_(0.25f),
        scoreThreshold_(0.45f), nmsThreshold_(0.3f){};
	~OnnxDetectorImpl() {};

public:
	virtual int loadOnnxModel(const char* filename, cv::Size2f inferSize) override;
	virtual std::vector<Detection2d> detect(cv::Mat image) override;

private:
    vector<Mat> preProcess(Mat& input_image);
    vector<Detection2d> postProcess(Mat& input_image, vector<Mat>& outputs);

private:
    int classNum_;
    int imgStride_;
    float confidence_;
    float scoreThreshold_;
    float nmsThreshold_;
    cv::Size2f size_;
	cv::dnn::Net network_;
};

void letterbox(const cv::Mat& image, cv::Mat& outImage,
    const cv::Size& newShape = cv::Size(640, 640),
    const cv::Scalar& color = cv::Scalar(114, 114, 114),
    bool auto_ = true,
    bool scaleFill = false,
    bool scaleUp = true,
    int stride = 32)
{
    cv::Size shape = image.size();
    float r = std::min((float)newShape.height / (float)shape.height,
        (float)newShape.width / (float)shape.width);
    if (!scaleUp)
        r = std::min(r, 1.0f);

    float ratio[2]{ r, r };
    int newUnpad[2]{ (int)std::round((float)shape.width * r),
                     (int)std::round((float)shape.height * r) };

    auto dw = (float)(newShape.width - newUnpad[0]);
    auto dh = (float)(newShape.height - newUnpad[1]);

    if (auto_)
    {
        dw = (float)((int)dw % stride);
        dh = (float)((int)dh % stride);
    }
    else if (scaleFill)
    {
        dw = 0.0f;
        dh = 0.0f;
        newUnpad[0] = newShape.width;
        newUnpad[1] = newShape.height;
        ratio[0] = (float)newShape.width / (float)shape.width;
        ratio[1] = (float)newShape.height / (float)shape.height;
    }

    dw /= 2.0f;
    dh /= 2.0f;

    if (shape.width != newUnpad[0] && shape.height != newUnpad[1])
    {
        cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
    }

    int top = int(std::round(dh - 0.1f));
    int bottom = int(std::round(dh + 0.1f));
    int left = int(std::round(dw - 0.1f));
    int right = int(std::round(dw + 0.1f));
    cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}

// Ԥ<><D4A4><EFBFBD><EFBFBD>
vector<Mat> OnnxDetectorImpl::preProcess(Mat& input_image)
{
    Mat blob;
    blobFromImage(input_image, blob, 1. / 255., Size(size_), Scalar(), false, false);

    network_.setInput(blob);

    vector<std::string> names = network_.getUnconnectedOutLayersNames();

    vector<Mat> outputs;
    network_.forward(outputs, names);

    return outputs;
}

// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#if 1
vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& input_image, vector<Mat>& outputs)
{
    vector<int> class_ids;
    vector<float> confidences;
    vector<Rect> boxes;

    float x_factor = input_image.cols / size_.width;
    float y_factor = input_image.rows / size_.height;

    float* data = (float*)outputs[0].data;

    const int dimensions = 5 + classNum_;
    //const int rows = 25200;
    const int rows =(1 + 4 + 16) * (size_.width / imgStride_) * (size_.height / imgStride_);
    for (int i = 0; i < rows; ++i)
    {
#if 1
        float confidence = data[rows * 4 + i];
        if (confidence > confidence_)
        {
            int centerX = (int)(data[rows * 0 + i] * x_factor);
            int centerY = (int)(data[rows * 1 + i] * y_factor);
            int width = (int)(data[rows * 2 + i] * x_factor);
            int height = (int)(data[rows * 3 + i] * y_factor);
            int left = centerX - width / 2;
            int top = centerY - height / 2;

            boxes.emplace_back(left, top, width, height);
            confidences.emplace_back(confidence);
            class_ids.emplace_back(0);
        }
#else
        float confidence = data[4];
        if (confidence >= confidence_)
        {
            float* classes_scores = data + 5;
            Mat scores(1, classNum_, CV_32FC1, classes_scores);
            Point class_id;
            double max_class_score;
            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
            if (max_class_score > scoreThreshold_)
            {
                confidences.push_back(confidence);
                class_ids.push_back(class_id.x);

                float cx = data[0];
                float cy = data[1];
                float w = data[2];
                float h = data[3];
                int left = int((cx - 0.5 * w) * x_factor);
                int top = int((cy - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
                boxes.push_back(Rect(left, top, width, height));
            }
        }
#endif
        //data += dimensions;
    }

    vector<Detection2d> res2d;
    vector<int> indices;
    NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
    for (int i = 0; i < indices.size(); i++)
    {
        int idx = indices[i];
        Rect box = boxes[idx];

        int left = box.x;
        int top = box.y;
        int width = box.width;
        int height = box.height;
        //rectangle(output_image, Point(left, top), Point(left + width, top + height), BLUE, 3 * THICKNESS);

        //string label = format("%.2f", confidences[idx]);
        //label = class_name[class_ids[idx]] + ":" + label;
        //draw_label(output_image, label, left, top);

        Detection2d d2d;
        d2d.bbox = box;
        d2d.classIdx = idx;
        d2d.confidence = confidences[idx];

        res2d.push_back(d2d);
    }
    return res2d;
}
#else
inline float sigmoid(float x)
{
    return 1.f / (1.f + exp(-x));
}

vector<Detection2d> OnnxDetectorImpl::postProcess(Mat& cv_src, vector<Mat>& outs)
{
    std::vector<int> classIds;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;

    int strides[] = { 8, 16, 32 };
    std::vector<std::vector<int> > anchors = {
        { 10,13, 16,30, 33,23 },
        { 30,61, 62,45, 59,119 },
        { 116,90, 156,198, 373,326 }
    };

    for (size_t k = 0; k < outs.size(); k++) {

        float* data = outs[k].ptr<float>();
        int stride = strides[k];
        int num_classes = outs[k].size[4] - 5;
        for (int i = 0; i < outs[k].size[2]; i++) {
            for (int j = 0; j < outs[k].size[3]; j++) {
                for (int a = 0; a < outs[k].size[1]; ++a) {

                    float* record = data + a * outs[k].size[2] * outs[k].size[3] * outs[k].size[4] +
                        i * outs[k].size[3] * outs[k].size[4] + j * outs[k].size[4];
                    float* cls_ptr = record + 5;
                    for (int cls = 0; cls < num_classes; cls++) {

                        float score = sigmoid(cls_ptr[cls]) * sigmoid(record[4]);
                        if (score > scoreThreshold_) {

                            float cx = (sigmoid(record[0]) * 2.f - 0.5f + (float)j) * (float)stride;
                            float cy = (sigmoid(record[1]) * 2.f - 0.5f + (float)i) * (float)stride;
                            float w = pow(sigmoid(record[2]) * 2.f, 2) * anchors[k][2 * a];
                            float h = pow(sigmoid(record[3]) * 2.f, 2) * anchors[k][2 * a + 1];
                            float x1 = std::max(0, std::min(cv_src.cols, int((cx - w / 2.f) * (float)cv_src.cols / (float)size_.width)));
                            float y1 = std::max(0, std::min(cv_src.rows, int((cy - h / 2.f) * (float)cv_src.rows / (float)size_.width)));
                            float x2 = std::max(0, std::min(cv_src.cols, int((cx + w / 2.f) * (float)cv_src.cols / (float)size_.width)));
                            float y2 = std::max(0, std::min(cv_src.rows, int((cy + h / 2.f) * (float)cv_src.rows / (float)size_.width)));
                            classIds.push_back(cls);
                            confidences.push_back(score);
                            boxes.push_back(cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)));
                        }
                    }
                }
            }
        }
    }

    vector<Detection2d> res2d;
    std::vector<int> indices;
    cv::dnn::NMSBoxes(boxes, confidences, scoreThreshold_, nmsThreshold_, indices);
    for (size_t i = 0; i < indices.size(); i++){

        int idx = indices[i];
        cv::Rect box = boxes[idx];
        //drawPred(classIds[idx], confidences[idx], box.x, box.y,
        //    box.x + box.width, box.y + box.height, cv_src, classes);

        Detection2d d2d;
        d2d.classIdx = classIds[idx];
        d2d.bbox = box;
        d2d.confidence = confidences[idx];
        res2d.push_back(d2d);
    }

    return res2d;
}
#endif

int OnnxDetectorImpl::loadOnnxModel(const char* filename, cv::Size2f inferSize) {

    std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
    for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
        std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
    }

    size_ = inferSize;
	network_ = cv::dnn::readNet(filename);

    //std::vector< std::pair<cv::dnn::Backend, cv::dnn::Target> > backends = cv::dnn::getAvailableBackends();
    //for (std::pair<cv::dnn::Backend, cv::dnn::Target>& det : backends) {
    //    std::cout << "Detected Valid Backends: " << det.first << ", " << det.second << std::endl;
    //}
    //network_.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
    //network_.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);

	return 0;
}

std::vector<Detection2d> OnnxDetectorImpl::detect(cv::Mat image) {

    std::vector<Detection2d> res2d;
    if (image.empty() || network_.empty())
        return res2d;

    const int width = std::max(image.rows, image.cols);
    cv::Mat img = cv::Mat::zeros(image.rows, image.cols, image.type());
    image.copyTo(img(cv::Rect(0, 0, image.cols, image.rows)));

    vector<Mat> detections = preProcess(img);
    return postProcess(img, detections);
}

OnnxDetector* OnnxDetector::CreateInstance() {
    return new OnnxDetectorImpl();
}