maskrcnn windows 上C++做预测

it2022-05-05 126

我自己做下记录

keras 训练代码

https://github.com/matterport/Mask_RCNN

1.keras 模型转 .pb

import tensorflow as tf from keras import backend as K from tensorflow.python.framework import graph_util model_keras = model.keras_model # All new operations will be in test mode from now on. K.set_learning_phase(0) # Create output layer with customized names num_output = 7 pred_node_names = ["detections", "mrcnn_class", "mrcnn_bbox", "mrcnn_mask", "rois", "rpn_class", "rpn_bbox"] pred_node_names = ["output_" + name for name in pred_node_names] pred = [tf.identity(model_keras.outputs[i], name=pred_node_names[i]) for i in range(num_output)] sess = K.get_session() # Get the object detection graph od_graph_def = graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), pred_node_names) model_dirpath = os.path.dirname("model/") if not os.path.exists(model_dirpath): os.mkdir(model_dirpath) filename = 'seg_model.pb' pb_filepath = os.path.join(model_dirpath, filename) print('Saving frozen graph {} ...'.format(os.path.basename(pb_filepath))) frozen_graph_path = pb_filepath with tf.gfile.GFile(frozen_graph_path, 'wb') as f: f.write(od_graph_def.SerializeToString())

2.windows 调用代码

#include "pch.h" #include <iostream> #include <tchar.h> #define COMPILER_MSVC #define NOMINMA //#include "stdafx.h" #include <iostream> //#include <Eigen\\Dense> #include "tensorflow/core/public/session.h" #include "tensorflow/cc/ops/standard_ops.h" using namespace tensorflow; #define COMPILER_MSVC #define NOMINMAX #define _SCL_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS #include <fstream> #include <utility> #include <vector> #include <iostream> #include <sstream> #include <string> #include <tensorflow/cc/ops/array_ops.h> #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/graph/default_device.h" #include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/util/command_line_flags.h" #include <opencv2/opencv.hpp> #include <opencv2/highgui/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include<vector> using namespace cv; // These are all common classes it's handy to reference with no namespace. using tensorflow::Flag; using tensorflow::Tensor; using tensorflow::Status; using tensorflow::string; using tensorflow::int32; using namespace std; // ensure TensorFlow C++ build OK //int main() { // printf("Hello World from Tensorflow C libnrary version %s\n", TF_Version()); // tensorflow::Session* session = tensorflow::NewSession(tensorflow::SessionOptions()); // return 0; //} struct maskBox { float fScore; int x1; int x2; int y1; int y2; int area; vector<cv::Point> vecContourPt; int iClass; }; //升序排列 bool cmpScore(maskBox lsh, maskBox rsh) { if (lsh.fScore < rsh.fScore) return true; else return false; } void nms(vector<maskBox> &boundingBox_, const float overlap_threshold, string modelname = "Union") { if (boundingBox_.empty()) { return; } //对各个候选框根据score的大小进行升序排列 sort(boundingBox_.begin(), boundingBox_.end(), cmpScore); float IOU = 0; float maxX = 0; float maxY = 0; float minX = 0; float minY = 0; vector<int> vPick; int nPick = 0; multimap<float, int> vScores; //存放升序排列后的score和对应的序号 const int num_boxes = boundingBox_.size(); vPick.resize(num_boxes); for (int i = 0; i < num_boxes; ++i) { vScores.insert(pair<float, int>(boundingBox_[i].fScore, i)); } while (vScores.size() > 0) { int last = vScores.rbegin()->second; //反向迭代器，获得vScores序列的最后那个序列号 vPick[nPick] = last; nPick += 1; auto iter = vScores.end(); iter--; vScores.erase(iter); for (multimap<float, int>::iterator it = vScores.begin(); it != vScores.end();) { int it_idx = it->second; maxX = max(boundingBox_.at(it_idx).x1, boundingBox_.at(last).x1); maxY = max(boundingBox_.at(it_idx).y1, boundingBox_.at(last).y1); minX = min(boundingBox_.at(it_idx).x2, boundingBox_.at(last).x2); minY = min(boundingBox_.at(it_idx).y2, boundingBox_.at(last).y2); //转换成了两个边界框相交区域的边长 maxX = ((minX - maxX + 1) > 0) ? (minX - maxX + 1) : 0; maxY = ((minY - maxY + 1) > 0) ? (minY - maxY + 1) : 0; //求交并比IOU IOU = (maxX * maxY) / (boundingBox_.at(it_idx).area + boundingBox_.at(last).area - IOU); if (IOU > overlap_threshold) { it = vScores.erase(it++); //删除交并比大于阈值的候选框,erase返回删除元素的下一个元素 } else { it++; } } } vPick.resize(nPick); vector<maskBox> tmp_; tmp_.resize(nPick); for (int i = 0; i < nPick; i++) { tmp_[i] = boundingBox_[vPick[i]]; } boundingBox_ = tmp_; } int main(int argc, char* argv[]) { cv::Mat inputMat; inputMat = cv::imread("F:\\data\\segdata\\test\\16378\\16378.jpg", CV_LOAD_IMAGE_COLOR); // cvtColor(inputMat, inputMat, CV_BGR2GRAY); int TF_MASKRCNN_IMG_WIDTHHEIGHT = 768; cv::Scalar TF_MASKRCNN_MEAN_PIXEL(123.7, 116.8, 103.9); // float TF_MASKRCNN_IMAGE_METADATA[38] = { 0, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT, 3, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT, 3, 0, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT,1, 0, 0 }; float TF_MASKRCNN_IMAGE_METADATA[38] = { 0, inputMat.rows, inputMat.cols, 3, TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT, 3, 17, 0, TF_MASKRCNN_IMG_WIDTHHEIGHT,TF_MASKRCNN_IMG_WIDTHHEIGHT, 0.627, 0 }; cv::Mat dest = cv::Mat(inputMat.size(), CV_8UC3); dest = inputMat.clone(); //Resizr to square with max dim, so we can resize it to 256x256 int largestDim = inputMat.size().height > inputMat.size().width ? inputMat.size().height : inputMat.size().width; cv::Mat squareInputMat(cv::Size(largestDim, largestDim), CV_8UC3); int leftBorder = (largestDim - inputMat.size().width) / 2; int topBorder = (largestDim - inputMat.size().height) / 2; cv::copyMakeBorder(inputMat, squareInputMat, topBorder, largestDim - (inputMat.size().height + topBorder), leftBorder, largestDim - (inputMat.size().width + leftBorder), cv::BORDER_CONSTANT, cv::Scalar(0)); cv::Mat resizedInputMat(cv::Size(TF_MASKRCNN_IMG_WIDTHHEIGHT, TF_MASKRCNN_IMG_WIDTHHEIGHT), CV_8UC3); cv::resize(squareInputMat, resizedInputMat, resizedInputMat.size(), 0, 0); cv::Mat dst = resizedInputMat.clone(); // Need to "mold_image" like in mask rcnn cv::Mat moldedInput(resizedInputMat.size(), CV_32FC3); resizedInputMat.convertTo(moldedInput, CV_32FC3); cv::subtract(moldedInput, TF_MASKRCNN_MEAN_PIXEL, moldedInput); tensorflow::Tensor inputTensor(tensorflow::DT_FLOAT, { 1, moldedInput.size().height, moldedInput.size().width, 3 }); // single image instance with 3 channels float_t *p = inputTensor.flat<float_t>().data(); cv::Mat inputTensorMat(moldedInput.size(), CV_32FC3, p); moldedInput.convertTo(inputTensorMat, CV_32FC3); int TF_MASKRCNN_IMAGE_METADATA_LENGTH = 38; // Copy the TF_MASKRCNN_IMAGE_METADATA data into a tensor tensorflow::Tensor inputMetadataTensor(tensorflow::DT_FLOAT, { 1, TF_MASKRCNN_IMAGE_METADATA_LENGTH }); auto inputMetadataTensorMap = inputMetadataTensor.tensor<float, 2>(); for (int i = 0; i < TF_MASKRCNN_IMAGE_METADATA_LENGTH; ++i) { inputMetadataTensorMap(0, i) = TF_MASKRCNN_IMAGE_METADATA[i]; } // for specific 1920x1280 images auto input_anchors = tensorflow::Tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ 1,147312,4 })); auto anchors_API = input_anchors.tensor<float, 3>(); //input_anchors.flat<float_t>()(0, 0, 0) = 1.111111; string fileName = "F:\\gc\\maskrcnntest2017\\maskrcnntest\\x64\\Release\\model\\anchors.txt"; fstream in; in.open(fileName.c_str(), ios::in); if (!in.is_open()) { cout << "Can not find " << fileName << endl; system("pause"); } string buff; int i = 0; //line i while (getline(in, buff)) { vector<float> nums; // string->char * char *s_input = (char *)buff.c_str(); const char * split = ","; char *p2 = strtok(s_input, split); double a; while (p2 != NULL) { // char * -> int a = atof(p2); //cout << a << endl; nums.push_back(a); p2 = strtok(NULL, split); }//end while for (int b = 0; b < nums.size(); b++) { anchors_API(0, i, b) = nums[b]; }//end for i++; }//end while in.close(); string root_dir = ""; string graph = "F:\\gc\\maskrcnntest2017\\maskrcnntest\\x64\\Release\\model\\seg_model.pb"; // First we load and initialize the model. string graph_path = tensorflow::io::JoinPath(root_dir, graph); tensorflow::GraphDef graph_def; tensorflow::SessionOptions options; std::unique_ptr<tensorflow::Session> session(tensorflow::NewSession(options)); Status load_graph_status = ReadBinaryProto(tensorflow::Env::Default(), graph_path, &graph_def); //for (int n = 0; n < graph_def.node_size(); ++n) { // graph_def.mutable_node(n)->clear_device(); //} //tfSession.reset(tensorflow::NewSession(tensorflow::SessionOptions())); TF_CHECK_OK(session->Create(graph_def)); //Status session_create_status = session->Create(graph_def); //Status load_graph_status = LoadGraph(graph_path, &session); if (!load_graph_status.ok()) { LOG(ERROR) << "LoadGraph ERROR!!!!" << load_graph_status; cout << load_graph_status << endl; return -1; } // Actually run the image through the model. std::vector<Tensor> outputs; tensorflow::Status run_status = session->Run({ { "input_image", inputTensor },{ "input_image_meta", inputMetadataTensor },{ "input_anchors",input_anchors } }, { "output_detections", "output_mrcnn_class", "output_mrcnn_bbox", "output_mrcnn_mask", "output_rois", "output_rpn_class", "output_rpn_bbox" }, {}, &outputs); if (!run_status.ok()) { LOG(ERROR) << "Running model failed: " << run_status; return -1; } //if (outputs[3].shape().dims() != 5 || outputs[3].shape().dim_size(4) != 2) //{ // throw std::runtime_error("Expected mask dimensions to be [1,100,28,28,2] but got: " + outputs[3].shape().DebugString()); //} vector<maskBox> vecBox; auto detectionsMap = outputs[0].tensor<float, 3>(); auto mask = outputs[3].tensor<float, 5>(); for (int i = 0; i < outputs[3].shape().dim_size(1); ++i) { auto y1 = detectionsMap(0, i, 0) * TF_MASKRCNN_IMG_WIDTHHEIGHT; float x1 = detectionsMap(0, i, 1) * TF_MASKRCNN_IMG_WIDTHHEIGHT; auto y2 = detectionsMap(0, i, 2) * TF_MASKRCNN_IMG_WIDTHHEIGHT; float x2 = detectionsMap(0, i, 3) * TF_MASKRCNN_IMG_WIDTHHEIGHT; auto scoreAtI = detectionsMap(0, i, 5); // detectionsMap(0, i, 1) 0.8862123; detectionsMap(0, i, 3) 0.91774625 auto detectedClass = detectionsMap(0, i, 4); cout << x1 << " " << x2 << " " << y1 << " " << y2 << " " << scoreAtI << endl; maskBox stMaskBox; stMaskBox.fScore = scoreAtI; stMaskBox.iClass = detectedClass; auto walala = detectionsMap(0, i, 6); auto maskHeight = (y2 - y1), maskWidth = (x2 - x1); if (maskHeight != 0 && maskWidth != 0) { // Pointer arithmetic const int i0 = 0, /* size0 = (int)outputs[3].shape().dim_size(1), */ i1 = i, size1 = (int)outputs[3].shape().dim_size(1), h = (int)outputs[3].shape().dim_size(2), w = (int)outputs[3].shape().dim_size(3); int iClassNum = (int)outputs[3].shape().dim_size(4); // int pointerLocationOfI = (i0*size1 + i1)*size2; int pointerLocationOfI = h * w * iClassNum * i; float_t *maskPointer = outputs[3].flat<float_t>().data(); // The shape of the detection is [28,28,2], where the last index is the class of interest. // We'll extract index 1 because it's the toilet seat. cv::Mat initialMask(cv::Size(h, w), CV_32FC(iClassNum), &maskPointer[pointerLocationOfI]); // CV_32FC2 because I know size4 is 2 cv::Mat detectedMask(initialMask.size(), CV_32FC1); cv::extractChannel(initialMask, detectedMask, (int)detectedClass); // Convert to B&W cv::Mat binaryMask(detectedMask.size(), CV_8UC1); cv::threshold(detectedMask, binaryMask, 0.5, 255, cv::THRESH_BINARY); // First scale and offset in relation to TF_MASKRCNN_IMG_WIDTHHEIGHT cv::Mat scaledDetectionMat(maskHeight, maskWidth, CV_8UC1); cv::resize(binaryMask, scaledDetectionMat, scaledDetectionMat.size(), 0, 0); vector<vector<cv::Point>> contours; scaledDetectionMat.convertTo(scaledDetectionMat, CV_8UC1); findContours(scaledDetectionMat, contours, CV_RETR_TREE, CHAIN_APPROX_NONE); int iMaxArea = 0; int iNum = 0; for (int c = 0; c < contours.size(); c++) { if (contours[c].size() == 0) continue; double area = contourArea(contours[c]); // printf("area:%f \n", area); if (iMaxArea > area) { iNum = c; } } cv::Mat scaledOffsetMat(moldedInput.size(), CV_8UC1, cv::Scalar(0)); scaledDetectionMat.copyTo(scaledOffsetMat(cv::Rect(x1, y1, maskWidth, maskHeight))); cvtColor(scaledDetectionMat, scaledDetectionMat, CV_GRAY2BGR); int ilen = contours[iNum].size(); for (int k = 0; k < ilen; k++) { Point pt = contours[iNum][k]; Point org(x1, y1); pt = org+pt; contours[iNum][k] = pt; } //Scalar color(rand() / 255, rand() / 255, rand() / 255, rand() / 255); //drawContours(dst, contours, iNum, color); //Rect rect(x1, y1, x2 - x1, y2 - y1); //rectangle(dst, rect, color, 1); // string strText = to_string(stBox.iClass) + string(" ") + to_string(stBox.fScore); // putText(dst, strText, Point(stBox.x1, stBox.y1), 1, 1, color); stMaskBox.x1 = x1; stMaskBox.x2 = x2; stMaskBox.y1 = y1; stMaskBox.y2 = y2; stMaskBox.area = (x2 - x1)*(y2 - y1); stMaskBox.vecContourPt = contours[iNum]; vecBox.push_back(stMaskBox); } /**/ } nms(vecBox, 0.3, "Union"); for (int i = 0; i < vecBox.size(); i++) { maskBox stBox; stBox = vecBox[i]; vector<vector<cv::Point>> contours; contours.push_back(stBox.vecContourPt); Scalar color(rand() / 255, rand() / 255, rand() / 255, rand() / 255); drawContours(dst, contours, 0, color); Rect rect(stBox.x1, stBox.y1, stBox.x2-stBox.x1, stBox.y2-stBox.y1); rectangle(dst, rect, color, 1); string strText = to_string(stBox.iClass) + string(" ") + to_string(stBox.fScore); putText(dst, strText, Point(stBox.x1, stBox.y1), 2, 0.5, color); } cv::imshow("Detection Result", dst); cv::waitKey(0); //cv::imwrite("C:\\", dest); return 0; }

专利

最新回复(0)