wap手机网站模版,学做旗袍衣服的网站,app制作平台下载,杭州网站设计 网站目录
1--Tensor算子API
1-1--卷积算子
1-2--激活算子
1-3--池化算子
1-4--FC层算子
2--代码实例
3--编译运行 1--Tensor算子API
TensorRT提供了卷积层、激活函数和池化层三种最常用算子的API#xff1a;
// 创建一个空的网络
nvinfer1::INetworkDefinition* network …目录
1--Tensor算子API
1-1--卷积算子
1-2--激活算子
1-3--池化算子
1-4--FC层算子
2--代码实例
3--编译运行 1--Tensor算子API
TensorRT提供了卷积层、激活函数和池化层三种最常用算子的API
// 创建一个空的网络
nvinfer1::INetworkDefinition* network builder-createNetworkV2(0U); // 添加卷积层算子
nvinfer1::IConvolutionLayer* conv1 network-addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap[features.0.weight], weightMap[features.0.bias]);// 添加激活算子
nvinfer1::IActivationLayer* relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);// 添加池化算子
nvinfer1::IPoolingLayer* pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
1-1--卷积算子
IConvolutionLayer* addConvolutionNd(ITensor input, int32_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights
) 第一个参数表示输入的Tensor数据 第二个参数表示卷积层输出的特征图数即通道数channel 第三个参数表示使用的卷积核大小 第四个参数和第五个参数表示加载的权重 1-2--激活算子
IActivationLayer* addActivation(ITensor input, ActivationType type
) 第一个参数表示输入的Tensor数据 第二个参数表示使用的激活函数类型包括以下激活函数 enum class ActivationType : int32_t
{kRELU 0, //! Rectified linear activation.kSIGMOID 1, //! Sigmoid activation.kTANH 2, //! TanH activation.kLEAKY_RELU 3, //! LeakyRelu activation: x0 ? x : alpha * x.kELU 4, //! Elu activation: x0 ? x : alpha * (exp(x) - 1).kSELU 5, //! Selu activation: x0 ? beta * x : beta * (alpha*exp(x) - alpha)kSOFTSIGN 6, //! Softsign activation: x / (1|x|)kSOFTPLUS 7, //! Parametric softplus activation: alpha*log(exp(beta*x)1)kCLIP 8, //! Clip activation: max(alpha, min(beta, x))kHARD_SIGMOID 9, //! Hard sigmoid activation: max(0, min(1, alpha*xbeta))kSCALED_TANH 10, //! Scaled tanh activation: alpha*tanh(beta*x)kTHRESHOLDED_RELU 11 //! Thresholded ReLU activation: xalpha ? x : 0
};
1-3--池化算子
IPoolingLayer* addPoolingNd(ITensor input, PoolingType type, Dims windowSize
) 第一个参数表示输入的Tensor数据 第二个参数表示使用的池化类型 第三个参数表示池化窗口的大小 提供的池化类型包括 enum class PoolingType : int32_t
{kMAX 0, // Maximum over elementskAVERAGE 1, // Average over elements. If the tensor is padded, the count includes the paddingkMAX_AVERAGE_BLEND 2 // Blending between max and average pooling: (1-blendFactor)*maxPool blendFactor*avgPool
};
1-4--FC层算子
IFullyConnectedLayer* addFullyConnected(ITensor input, int32_t nbOutputs, Weights kernelWeights, Weights biasWeights
) 第一个参数表示输入的Tensor数据 第二个参数表示输出的通道数 第三个参数和第四个参数表示加载的权重 2--代码实例
基于算子 API 搭建 VGG11:完整可运行的代码参考liujf69/TensorRT-Demo
核心程序代码
// 创建builder和config
nvinfer1::IBuilder* builder nvinfer1::createInferBuilder(gLogger);
nvinfer1::IBuilderConfig* config builder-createBuilderConfig();// 基于builder创建network
nvinfer1::INetworkDefinition* network builder-createNetworkV2(0U); // 一开始是空的// 调用API搭建Network
// 创建输入
nvinfer1::ITensor* data network-addInput(this-INPUT_BLOB_NAME, dt, nvinfer1::Dims3{3, this-INPUT_H, this-INPUT_W});
// 搭建卷积层
nvinfer1::IConvolutionLayer* conv1 network-addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap[features.0.weight], weightMap[features.0.bias]);
conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});
// 搭建激活层
nvinfer1::IActivationLayer* relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);
// 搭建池化层
nvinfer1::IPoolingLayer* pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
pool1-setStrideNd(nvinfer1::DimsHW{2, 2});
...
// 搭建FC层
nvinfer1::IFullyConnectedLayer* fc1 network-addFullyConnected(*pool1-getOutput(0), 4096, weightMap[classifier.0.weight], weightMap[classifier.0.bias]);
...// 基于config和network生成engine
builder-setMaxBatchSize(maxBatchSize);
config-setMaxWorkspaceSize(1 20);
nvinfer1::ICudaEngine* engine builder-buildEngineWithConfig(*network, *config);
...
主程序代码
#include NvInfer.h
#include cuda_runtime_api.h
#include fstream
#include iostream
#include map
#include sstream
#include vector
#include chrono
#include logging.h
#include iostream#define CHECK(status) \do\{\auto ret (status);\if (ret ! 0)\{\std::cerr Cuda failure: ret std::endl;\abort();\}\} while (0)static Logger gLogger; // 日志class VGG_Demo{
public:VGG_Demo(){this-prob new float[OUTPUT_SIZE];}~VGG_Demo(){delete[] prob;}int serialize();void APIToModel(unsigned int maxBatchSize, nvinfer1::IHostMemory** modelStream);nvinfer1::ICudaEngine* createEngine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt);std::mapstd::string, nvinfer1::Weights loadWeights(const std::string file);void doInference(nvinfer1::IExecutionContext context, float* input, float* output, int batchSize);void deserialize(float* data);void load_engine();const char* INPUT_BLOB_NAME data; // 输入名称const char* OUTPUT_BLOB_NAME prob; // 输出名称const int INPUT_H 224; // 输入数据高度const int INPUT_W 224; // 输入数据宽度const int OUTPUT_SIZE 1000; // 输出大小std::string engine_file ./vgg.engine;char* trtModelStream nullptr;float* prob nullptr;size_t size 0;
};int VGG_Demo::serialize(){nvinfer1::IHostMemory* modelStream nullptr;this-APIToModel(1, modelStream); // 调用API构建networkassert(modelStream ! nullptr);// 保存std::ofstream p(./vgg.engine, std::ios::binary);if (!p) {std::cerr could not open plan output file std::endl;return -1;}p.write(reinterpret_castconst char*(modelStream-data()), modelStream-size());modelStream-destroy();return 1;
}void VGG_Demo::APIToModel(unsigned int maxBatchSize, nvinfer1::IHostMemory** modelStream){// 创建builder和confignvinfer1::IBuilder* builder nvinfer1::createInferBuilder(gLogger);nvinfer1::IBuilderConfig* config builder-createBuilderConfig();nvinfer1::ICudaEngine* engine this-createEngine(maxBatchSize, builder, config, nvinfer1::DataType::kFLOAT);assert(engine ! nullptr);// 序列化*modelStream engine-serialize();// 销毁engine-destroy();builder-destroy();config-destroy();
}nvinfer1::ICudaEngine* VGG_Demo::createEngine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt){// 加载权重std::mapstd::string, nvinfer1::Weights weightMap loadWeights(../weights/vgg.wts);nvinfer1::Weights emptywts{nvinfer1::DataType::kFLOAT, nullptr, 0};nvinfer1::INetworkDefinition* network builder-createNetworkV2(0U); // 创建一个空的networknvinfer1::ITensor* data network-addInput(this-INPUT_BLOB_NAME, dt, nvinfer1::Dims3{3, this-INPUT_H, this-INPUT_W}); // 创建输入assert(data);// 使用卷积、激活和池化三种算子按顺序连接三种算子并用对应的权重初始化nvinfer1::IConvolutionLayer* conv1 network-addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap[features.0.weight], weightMap[features.0.bias]);assert(conv1);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});nvinfer1::IActivationLayer* relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);assert(relu1);nvinfer1::IPoolingLayer* pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});assert(pool1);pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 128, nvinfer1::DimsHW{3, 3}, weightMap[features.3.weight], weightMap[features.3.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 256, nvinfer1::DimsHW{3, 3}, weightMap[features.6.weight], weightMap[features.6.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);conv1 network-addConvolutionNd(*relu1-getOutput(0), 256, nvinfer1::DimsHW{3, 3}, weightMap[features.8.weight], weightMap[features.8.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.11.weight], weightMap[features.11.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);conv1 network-addConvolutionNd(*relu1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.13.weight], weightMap[features.13.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.16.weight], weightMap[features.16.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);conv1 network-addConvolutionNd(*relu1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.18.weight], weightMap[features.18.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});// 使用全连接层算子nvinfer1::IFullyConnectedLayer* fc1 network-addFullyConnected(*pool1-getOutput(0), 4096, weightMap[classifier.0.weight], weightMap[classifier.0.bias]);assert(fc1);relu1 network-addActivation(*fc1-getOutput(0), nvinfer1::ActivationType::kRELU);fc1 network-addFullyConnected(*relu1-getOutput(0), 4096, weightMap[classifier.3.weight], weightMap[classifier.3.bias]);relu1 network-addActivation(*fc1-getOutput(0), nvinfer1::ActivationType::kRELU);fc1 network-addFullyConnected(*relu1-getOutput(0), 1000, weightMap[classifier.6.weight], weightMap[classifier.6.bias]);fc1-getOutput(0)-setName(OUTPUT_BLOB_NAME); // 设置输出名称network-markOutput(*fc1-getOutput(0)); // 标记输出// 生成enginebuilder-setMaxBatchSize(maxBatchSize);config-setMaxWorkspaceSize(1 20);nvinfer1::ICudaEngine* engine builder-buildEngineWithConfig(*network, *config);std::cout build out std::endl;// 生成engine后释放networknetwork-destroy();// 释放权重内存for (auto mem : weightMap) free((void*) (mem.second.values)); return engine;
}std::mapstd::string, nvinfer1::Weights VGG_Demo::loadWeights(const std::string file){std::cout Loading weights: file std::endl;std::mapstd::string, nvinfer1::Weights weightMap; // 权重名称和权重类的哈希表std::ifstream input(file);assert(input.is_open() Unable to load weight file.);// 首先读取权重block的个数int32_t count;input count;assert(count 0 Invalid weight map file.);// 遍历权重blockwhile (count--){nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0}; // 初始化一个权重对象uint32_t size;// Read name and type of blobstd::string name;input name std::dec size; // std::dec表示使用十进制表示权重的sizewt.type nvinfer1::DataType::kFLOAT; // 设置权重的类型// 拷贝权重值uint32_t* val reinterpret_castuint32_t*(malloc(sizeof(val) * size));for (uint32_t x 0, y size; x y; x){ // 拷贝size大小input std::hex val[x];}// 完成哈希映射wt.values val;wt.count size;weightMap[name] wt;}return weightMap;
}void VGG_Demo::deserialize(float* data){load_engine(); // 加载enginenvinfer1::IRuntime* runtime nvinfer1::createInferRuntime(gLogger);assert(runtime ! nullptr);nvinfer1::ICudaEngine* engine runtime-deserializeCudaEngine(this-trtModelStream, this-size);assert(engine ! nullptr);nvinfer1::IExecutionContext* context engine-createExecutionContext();assert(context ! nullptr);delete[] this-trtModelStream; // 手动释放trtModelStream// 执行推理for (int i 0; i 10; i){ // 记录推理10次的时间auto start std::chrono::system_clock::now();doInference(*context, data, this-prob, 1);auto end std::chrono::system_clock::now();std::cout std::chrono::duration_caststd::chrono::milliseconds(end - start).count() ms std::endl;}// 销毁context-destroy();engine-destroy();runtime-destroy();// 打印推理结果std::cout \nOutput:\n\n;for (unsigned int i 0; i 10; i){ // 打印10个std::cout this-prob[i] , ;if (i % 10 0) std::cout i / 10 std::endl;}std::cout std::endl;
}void VGG_Demo::load_engine(){std::ifstream file(this-engine_file, std::ios::binary);if(file.good()){file.seekg(0, file.end);this-size file.tellg();file.seekg(0, file.beg);this-trtModelStream new char[size];assert(this-trtModelStream);file.read(this-trtModelStream, size);file.close();}
}void VGG_Demo::doInference(nvinfer1::IExecutionContext context, float* input, float* output, int batchSize){const nvinfer1::ICudaEngine engine context.getEngine();assert(engine.getNbBindings() 2);void* buffers[2];const int inputIndex engine.getBindingIndex(this-INPUT_BLOB_NAME);const int outputIndex engine.getBindingIndex(this-OUTPUT_BLOB_NAME);CHECK(cudaMalloc(buffers[inputIndex], batchSize * 3 * this-INPUT_H * this-INPUT_W * sizeof(float)));CHECK(cudaMalloc(buffers[outputIndex], batchSize * this-OUTPUT_SIZE * sizeof(float)));// 创建streamcudaStream_t stream;CHECK(cudaStreamCreate(stream));// Host to deviceCHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));context.enqueue(batchSize, buffers, stream, nullptr);// device to hostCHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));cudaStreamSynchronize(stream);// 释放cudaStreamDestroy(stream);CHECK(cudaFree(buffers[inputIndex]));CHECK(cudaFree(buffers[outputIndex]));
}int main(int argc, char** argv){// 判断参数是否准确if(argc ! 2){std::cerr arguments not right! std::endl;std::cerr ./vgg_demo -s // serialize model to plan file std::endl;std::cerr ./vgg_demo -d // deserialize plan file and run inference std::endl;return -1;}VGG_Demo vgg_demo1;if(std::string(argv[1]) -s){ // 序列化vgg_demo1.serialize();}else if(std::string(argv[1]) -d){ // 反序列化并推理// 生成测试数据float data[3 * 224 * 224];for (int i 0; i 3 * 224 * 224; i) data[i] 1;vgg_demo1.deserialize(data);}else{std::cerr wrong arguments! std::endl;;return -1;}return 0;
}
3--编译运行
mkdir build cd build
cmake ..
make ./vgg_demo -s
./vgg_demo -d