国外 创意 网站,jsp网站开发如何把自横批排,超市网站建设费用,低代码开发平台 免费paddle模型使用TensorRT推理 1 模型末端添加softmax和argmax算子2 paddle模型转onnx模型3 onnx模型转TensorRT模型3.1 安装TensorRT-8.5.3.13.2 使用 trtexec 将onnx模型编译优化导出为engine模型 4 TensorRT模型推理测试5 完整代码6 测试结果 1 模型末端添加softmax和argmax算… paddle模型使用TensorRT推理 1 模型末端添加softmax和argmax算子2 paddle模型转onnx模型3 onnx模型转TensorRT模型3.1 安装TensorRT-8.5.3.13.2 使用 trtexec 将onnx模型编译优化导出为engine模型 4 TensorRT模型推理测试5 完整代码6 测试结果 1 模型末端添加softmax和argmax算子
前文 PaddleSeg c部署OCRNetHRNet模型中的语义分割模型输出为float32类型模型不含softmax和argmax处理导致在项目应用过程中后处理耗时较高。 通过PaddleSeg/tools/export.py在网络末端增加softmax和argmax算子解决应用中的后处理耗时问题。
参考文档PaddleSeg/docs/model_export_cn.md导出预测模型。将导出的预测模型文件保存在output/inference_model文件夹中如下。模型输出类型为int32。
./output/inference_model├── deploy.yaml # 部署相关的配置文件主要说明数据预处理的方式├── model.pdmodel # 预测模型的拓扑结构文件├── model.pdiparams # 预测模型的权重文件└── model.pdiparams.info # 参数额外信息一般无需关注网络输出类型为int32。python tools/export.py \--config configs\ocrnet\ocrnet_hrnetw18_cityscapes_1024x512_160k_lovasz_softmax.yml\--model_path output\iter_12000\model.pdparams \--save_dir output\inference_model--output_op argmaxPaddleSeg v2.0以前export.py中不含argmax和softmax参数选项可通过以下代码在模型末端增加softmax和argmax算子。
import argparse
import os
import paddle
import yaml
from paddleseg.cvlibs import Config
from paddleseg.utils import loggerdef parse_args():parser argparse.ArgumentParser(descriptionModel export.)# params of trainingparser.add_argument(--config,destcfg,helpThe config file.,defaultNone,typestr,requiredTrue)parser.add_argument(--save_dir,destsave_dir,helpThe directory for saving the model snapshot,typestr,default./output)parser.add_argument(--model_path,destmodel_path,helpThe path of model for evaluation,typestr,defaultNone)return parser.parse_args()class SavedSegmentationNet(paddle.nn.Layer):def __init__(self, net, without_argmaxFalse, with_softmaxFalse):super().__init__()self.net netself.post_processer PostPorcesser(without_argmax, with_softmax)def forward(self, x):outs self.net(x)outs self.post_processer(outs)return outsclass PostPorcesser(paddle.nn.Layer):def __init__(self, without_argmax, with_softmax):super().__init__()self.without_argmax without_argmaxself.with_softmax with_softmaxdef forward(self, outs):new_outs []for out in outs:if self.with_softmax:out paddle.nn.functional.softmax(out, axis1)if not self.without_argmax:out paddle.argmax(out, axis1)new_outs.append(out)return new_outsdef main(args):os.environ[PADDLESEG_EXPORT_STAGE] Truecfg Config(args.cfg)net cfg.modelif args.model_path:para_state_dict paddle.load(args.model_path)net.set_dict(para_state_dict)logger.info(Loaded trained params of model successfully.)# 增加softmax、argmax处理new_net SavedSegmentationNet(net, True,True)new_net.eval()new_net paddle.jit.to_static(new_net,input_spec[paddle.static.InputSpec(shape[None, 3, None, None], dtypefloat32)])save_path os.path.join(args.save_dir, model)paddle.jit.save(new_net, save_path)yml_file os.path.join(args.save_dir, deploy.yaml)with open(yml_file, w) as file:transforms cfg.export_config.get(transforms, [{type: Normalize}])data {Deploy: {transforms: transforms,model: model.pdmodel,params: model.pdiparams}}yaml.dump(data, file)logger.info(fModel is saved in {args.save_dir}.)if __name__ __main__:args parse_args()main(args)2 paddle模型转onnx模型
参考文档 PaddleSeg/docs/model_export_onnx_cn.md 参考文档Paddle2ONNX
1安装Paddle2ONNX
pip install paddle2onnx2模型转换 执行如下命令使用Paddle2ONNX将output/inference_model文件夹中的预测模型导出为ONNX格式模型。将导出的预测模型文件保存为model.onnx。
paddle2onnx --model_dir output/inference_model \--model_filename model.pdmodel \--params_filename model.pdiparams \--opset_version 12 \--save_file model.onnx \--enable_dev_version True3 onnx模型转TensorRT模型
3.1 安装TensorRT-8.5.3.1
参考TensorRt安装
3.2 使用 trtexec 将onnx模型编译优化导出为engine模型
由于是动态输入因此指定了输入尺寸范围和最优尺寸。将导出的预测模型文件保存为model.trt。
trtexec.exe --onnxmodel.onnx --explicitBatch --fp16 --minShapesx:1x3x540x960 --optShapesx:1x3x720x1280 --maxShapesx:1x3x1080x1920 --saveEnginemodel.trt4 TensorRT模型推理测试
参考TensorRt动态尺寸输入的分割模型测试
5 完整代码
namespace TRTSegmentation {class Logger : public nvinfer1::ILogger{public:Logger(Severity severity Severity::kWARNING) :severity_(severity) {}virtual void log(Severity severity, const char* msg) noexcept override{// suppress info-level messagesif (severity severity_) {//std::cout msg std::endl;}}nvinfer1::ILogger getTRTLogger() noexcept{return *this;}private:Severity severity_;};struct InferDeleter{template typename Tvoid operator()(T* obj) const{delete obj;}};template typename Tusing SampleUniquePtr std::unique_ptrT, InferDeleter;class LaneSegInferTRT{public:LaneSegInferTRT(const std::string seg_model_dir ) {this-seg_model_dir_ seg_model_dir;InitPredictor();}~LaneSegInferTRT(){cudaFree(bindings_[0]);cudaFree(bindings_[1]);}void PredictSeg(const cv::Mat image_mat, std::vectorPaddleSegmentation::DataLane solLanes /*实线*/,std::vectorPaddleSegmentation::DataLane dasLanes /*虚线*/,std::vectordouble* times nullptr);private:void InitPredictor();// Preprocess image and copy data to input buffercv::Mat Preprocess(const cv::Mat image_mat);// Postprocess imagevoid Postprocess(int rows, int cols, std::vectorint out_data,std::vectorPaddleSegmentation::DataLane solLanes,std::vectorPaddleSegmentation::DataLane dasLanes);private://static const int num_classes_ 15;std::shared_ptrnvinfer1::ICudaEngine mEngine_;SampleUniquePtrnvinfer1::IExecutionContext context_seg_lane_;std::vectorvoid* bindings_;std::string seg_model_dir_;int gpuMaxBufSize 1280 * 720; // output};}//namespace PaddleSegmentation#include LaneSegInferTRT.hpp
namespace {class Logger : public nvinfer1::ILogger{public:Logger(Severity severity Severity::kWARNING) :severity_(severity) {}virtual void log(Severity severity, const char* msg) noexcept override{// suppress info-level messagesif (severity severity_) {//std::cout msg std::endl;}}nvinfer1::ILogger getTRTLogger() noexcept{return *this;}private:Severity severity_;};
}namespace TRTSegmentation {#define CHECK(status) \do \{ \auto ret (status); \if (ret ! 0) \{ \std::cerr Cuda failure: ret std::endl; \} \} while (0)void LaneSegInferTRT::InitPredictor(){if (seg_model_dir_.empty()) {throw Predictor must receive seg_model!;}std::ifstream ifs(seg_model_dir_, std::ifstream::binary);if (!ifs) {throw seg_model_dir error!;}ifs.seekg(0, std::ios_base::end);int size ifs.tellg();ifs.seekg(0, std::ios_base::beg);std::unique_ptrchar pData(new char[size]);ifs.read(pData.get(), size);ifs.close();// engine模型Logger logger(nvinfer1::ILogger::Severity::kVERBOSE);SampleUniquePtrnvinfer1::IRuntime runtime{nvinfer1::createInferRuntime(logger.getTRTLogger()) };mEngine_ std::shared_ptrnvinfer1::ICudaEngine(runtime-deserializeCudaEngine(pData.get(), size), InferDeleter());this-context_seg_lane_ SampleUniquePtrnvinfer1::IExecutionContext(mEngine_-createExecutionContext());bindings_.resize(mEngine_-getNbBindings());CHECK(cudaMalloc(bindings_[0], sizeof(float) * 3 * gpuMaxBufSize)); // n*3*h*wCHECK(cudaMalloc(bindings_[1], sizeof(int) * 1 * gpuMaxBufSize)); // n*1*h*w}cv::Mat LaneSegInferTRT::Preprocess(const cv::Mat image_mat){cv::Mat img;cv::cvtColor(image_mat, img, cv::COLOR_BGR2RGB);if (true/*is_normalize*/) {img.convertTo(img, CV_32F, 1.0 / 255, 0);img (img - 0.5) / 0.5;}return img;}void LaneSegInferTRT::PredictSeg(const cv::Mat image_mat,std::vectorPaddleSegmentation::DataLane solLanes ,std::vectorPaddleSegmentation::DataLane dasLanes,std::vectordouble* times){// Preprocess imagecv::Mat img Preprocess(image_mat); int rows img.rows;int cols img.cols;this-context_seg_lane_-setBindingDimensions(0, nvinfer1::Dims4{ 1, 3 , rows, cols });int chs img.channels();std::vectorfloat input_data(1 * chs * rows * cols, 0.0f);hwc_img_2_chw_data(img, input_data.data()); CHECK(cudaMemcpy(bindings_[0], static_castconst void*(input_data.data()), 3 * img.rows * img.cols * sizeof(float), cudaMemcpyHostToDevice));// Run predictor 推理context_seg_lane_-executeV2(bindings_.data());// Get output tensor std::vectorint out_data(1 * 1 * rows * cols);CHECK(cudaMemcpy(static_castvoid*(out_data.data()), bindings_[1], out_data.size() * sizeof(int), cudaMemcpyDeviceToHost));// PostprocessingPostprocess(rows, cols, out_data, solLanes,dasLanes);}void LaneSegInferTRT::Postprocess(int rows, int cols, vectorint out_data,std::vectorPaddleSegmentation::DataLane solLanes,std::vectorPaddleSegmentation::DataLane dasLanes){PaddleSegmentation::LanePostProcess laneNet(rows, cols);laneNet.lanePostprocessForTRT(out_data,solLanes,dasLanes);} }//namespace PaddleSegmentation6 测试结果