当前位置：首页 > news >正文

wap手机网站模版学做旗袍衣服的网站

news 2025/11/14 13:02:02

wap手机网站模版,学做旗袍衣服的网站,app制作平台下载,杭州网站设计网站目录 1--Tensor算子API 1-1--卷积算子 1-2--激活算子 1-3--池化算子 1-4--FC层算子 2--代码实例 3--编译运行 1--Tensor算子API TensorRT提供了卷积层、激活函数和池化层三种最常用算子的API#xff1a; // 创建一个空的网络 nvinfer1::INetworkDefinition* network …目录 1--Tensor算子API 1-1--卷积算子 1-2--激活算子 1-3--池化算子 1-4--FC层算子 2--代码实例 3--编译运行 1--Tensor算子API TensorRT提供了卷积层、激活函数和池化层三种最常用算子的API // 创建一个空的网络 nvinfer1::INetworkDefinition* network builder-createNetworkV2(0U); // 添加卷积层算子 nvinfer1::IConvolutionLayer* conv1 network-addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap[features.0.weight], weightMap[features.0.bias]);// 添加激活算子 nvinfer1::IActivationLayer* relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);// 添加池化算子 nvinfer1::IPoolingLayer* pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2}); 1-1--卷积算子 IConvolutionLayer* addConvolutionNd(ITensor input, int32_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights ) 第一个参数表示输入的Tensor数据第二个参数表示卷积层输出的特征图数即通道数channel 第三个参数表示使用的卷积核大小第四个参数和第五个参数表示加载的权重 1-2--激活算子 IActivationLayer* addActivation(ITensor input, ActivationType type ) 第一个参数表示输入的Tensor数据第二个参数表示使用的激活函数类型包括以下激活函数 enum class ActivationType : int32_t {kRELU 0, //! Rectified linear activation.kSIGMOID 1, //! Sigmoid activation.kTANH 2, //! TanH activation.kLEAKY_RELU 3, //! LeakyRelu activation: x0 ? x : alpha * x.kELU 4, //! Elu activation: x0 ? x : alpha * (exp(x) - 1).kSELU 5, //! Selu activation: x0 ? beta * x : beta * (alpha*exp(x) - alpha)kSOFTSIGN 6, //! Softsign activation: x / (1|x|)kSOFTPLUS 7, //! Parametric softplus activation: alpha*log(exp(beta*x)1)kCLIP 8, //! Clip activation: max(alpha, min(beta, x))kHARD_SIGMOID 9, //! Hard sigmoid activation: max(0, min(1, alpha*xbeta))kSCALED_TANH 10, //! Scaled tanh activation: alpha*tanh(beta*x)kTHRESHOLDED_RELU 11 //! Thresholded ReLU activation: xalpha ? x : 0 }; 1-3--池化算子 IPoolingLayer* addPoolingNd(ITensor input, PoolingType type, Dims windowSize ) 第一个参数表示输入的Tensor数据第二个参数表示使用的池化类型第三个参数表示池化窗口的大小提供的池化类型包括 enum class PoolingType : int32_t {kMAX 0, // Maximum over elementskAVERAGE 1, // Average over elements. If the tensor is padded, the count includes the paddingkMAX_AVERAGE_BLEND 2 // Blending between max and average pooling: (1-blendFactor)*maxPool blendFactor*avgPool }; 1-4--FC层算子 IFullyConnectedLayer* addFullyConnected(ITensor input, int32_t nbOutputs, Weights kernelWeights, Weights biasWeights ) 第一个参数表示输入的Tensor数据第二个参数表示输出的通道数第三个参数和第四个参数表示加载的权重 2--代码实例基于算子 API 搭建 VGG11:完整可运行的代码参考liujf69/TensorRT-Demo 核心程序代码 // 创建builder和config nvinfer1::IBuilder* builder nvinfer1::createInferBuilder(gLogger); nvinfer1::IBuilderConfig* config builder-createBuilderConfig();// 基于builder创建network nvinfer1::INetworkDefinition* network builder-createNetworkV2(0U); // 一开始是空的// 调用API搭建Network // 创建输入 nvinfer1::ITensor* data network-addInput(this-INPUT_BLOB_NAME, dt, nvinfer1::Dims3{3, this-INPUT_H, this-INPUT_W}); // 搭建卷积层 nvinfer1::IConvolutionLayer* conv1 network-addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap[features.0.weight], weightMap[features.0.bias]); conv1-setPaddingNd(nvinfer1::DimsHW{1, 1}); // 搭建激活层 nvinfer1::IActivationLayer* relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU); // 搭建池化层 nvinfer1::IPoolingLayer* pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2}); pool1-setStrideNd(nvinfer1::DimsHW{2, 2}); ... // 搭建FC层 nvinfer1::IFullyConnectedLayer* fc1 network-addFullyConnected(*pool1-getOutput(0), 4096, weightMap[classifier.0.weight], weightMap[classifier.0.bias]); ...// 基于config和network生成engine builder-setMaxBatchSize(maxBatchSize); config-setMaxWorkspaceSize(1 20); nvinfer1::ICudaEngine* engine builder-buildEngineWithConfig(*network, *config); ... 主程序代码 #include NvInfer.h #include cuda_runtime_api.h #include fstream #include iostream #include map #include sstream #include vector #include chrono #include logging.h #include iostream#define CHECK(status) \do\{\auto ret (status);\if (ret ! 0)\{\std::cerr Cuda failure: ret std::endl;\abort();\}\} while (0)static Logger gLogger; // 日志class VGG_Demo{ public:VGG_Demo(){this-prob new float[OUTPUT_SIZE];}~VGG_Demo(){delete[] prob;}int serialize();void APIToModel(unsigned int maxBatchSize, nvinfer1::IHostMemory** modelStream);nvinfer1::ICudaEngine* createEngine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt);std::mapstd::string, nvinfer1::Weights loadWeights(const std::string file);void doInference(nvinfer1::IExecutionContext context, float* input, float* output, int batchSize);void deserialize(float* data);void load_engine();const char* INPUT_BLOB_NAME data; // 输入名称const char* OUTPUT_BLOB_NAME prob; // 输出名称const int INPUT_H 224; // 输入数据高度const int INPUT_W 224; // 输入数据宽度const int OUTPUT_SIZE 1000; // 输出大小std::string engine_file ./vgg.engine;char* trtModelStream nullptr;float* prob nullptr;size_t size 0; };int VGG_Demo::serialize(){nvinfer1::IHostMemory* modelStream nullptr;this-APIToModel(1, modelStream); // 调用API构建networkassert(modelStream ! nullptr);// 保存std::ofstream p(./vgg.engine, std::ios::binary);if (!p) {std::cerr could not open plan output file std::endl;return -1;}p.write(reinterpret_castconst char*(modelStream-data()), modelStream-size());modelStream-destroy();return 1; }void VGG_Demo::APIToModel(unsigned int maxBatchSize, nvinfer1::IHostMemory** modelStream){// 创建builder和confignvinfer1::IBuilder* builder nvinfer1::createInferBuilder(gLogger);nvinfer1::IBuilderConfig* config builder-createBuilderConfig();nvinfer1::ICudaEngine* engine this-createEngine(maxBatchSize, builder, config, nvinfer1::DataType::kFLOAT);assert(engine ! nullptr);// 序列化*modelStream engine-serialize();// 销毁engine-destroy();builder-destroy();config-destroy(); }nvinfer1::ICudaEngine* VGG_Demo::createEngine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt){// 加载权重std::mapstd::string, nvinfer1::Weights weightMap loadWeights(../weights/vgg.wts);nvinfer1::Weights emptywts{nvinfer1::DataType::kFLOAT, nullptr, 0};nvinfer1::INetworkDefinition* network builder-createNetworkV2(0U); // 创建一个空的networknvinfer1::ITensor* data network-addInput(this-INPUT_BLOB_NAME, dt, nvinfer1::Dims3{3, this-INPUT_H, this-INPUT_W}); // 创建输入assert(data);// 使用卷积、激活和池化三种算子按顺序连接三种算子并用对应的权重初始化nvinfer1::IConvolutionLayer* conv1 network-addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap[features.0.weight], weightMap[features.0.bias]);assert(conv1);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});nvinfer1::IActivationLayer* relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);assert(relu1);nvinfer1::IPoolingLayer* pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});assert(pool1);pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 128, nvinfer1::DimsHW{3, 3}, weightMap[features.3.weight], weightMap[features.3.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 256, nvinfer1::DimsHW{3, 3}, weightMap[features.6.weight], weightMap[features.6.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);conv1 network-addConvolutionNd(*relu1-getOutput(0), 256, nvinfer1::DimsHW{3, 3}, weightMap[features.8.weight], weightMap[features.8.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.11.weight], weightMap[features.11.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);conv1 network-addConvolutionNd(*relu1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.13.weight], weightMap[features.13.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});conv1 network-addConvolutionNd(*pool1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.16.weight], weightMap[features.16.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);conv1 network-addConvolutionNd(*relu1-getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap[features.18.weight], weightMap[features.18.bias]);conv1-setPaddingNd(nvinfer1::DimsHW{1, 1});relu1 network-addActivation(*conv1-getOutput(0), nvinfer1::ActivationType::kRELU);pool1 network-addPoolingNd(*relu1-getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});pool1-setStrideNd(nvinfer1::DimsHW{2, 2});// 使用全连接层算子nvinfer1::IFullyConnectedLayer* fc1 network-addFullyConnected(*pool1-getOutput(0), 4096, weightMap[classifier.0.weight], weightMap[classifier.0.bias]);assert(fc1);relu1 network-addActivation(*fc1-getOutput(0), nvinfer1::ActivationType::kRELU);fc1 network-addFullyConnected(*relu1-getOutput(0), 4096, weightMap[classifier.3.weight], weightMap[classifier.3.bias]);relu1 network-addActivation(*fc1-getOutput(0), nvinfer1::ActivationType::kRELU);fc1 network-addFullyConnected(*relu1-getOutput(0), 1000, weightMap[classifier.6.weight], weightMap[classifier.6.bias]);fc1-getOutput(0)-setName(OUTPUT_BLOB_NAME); // 设置输出名称network-markOutput(*fc1-getOutput(0)); // 标记输出// 生成enginebuilder-setMaxBatchSize(maxBatchSize);config-setMaxWorkspaceSize(1 20);nvinfer1::ICudaEngine* engine builder-buildEngineWithConfig(*network, *config);std::cout build out std::endl;// 生成engine后释放networknetwork-destroy();// 释放权重内存for (auto mem : weightMap) free((void*) (mem.second.values)); return engine; }std::mapstd::string, nvinfer1::Weights VGG_Demo::loadWeights(const std::string file){std::cout Loading weights: file std::endl;std::mapstd::string, nvinfer1::Weights weightMap; // 权重名称和权重类的哈希表std::ifstream input(file);assert(input.is_open() Unable to load weight file.);// 首先读取权重block的个数int32_t count;input count;assert(count 0 Invalid weight map file.);// 遍历权重blockwhile (count--){nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0}; // 初始化一个权重对象uint32_t size;// Read name and type of blobstd::string name;input name std::dec size; // std::dec表示使用十进制表示权重的sizewt.type nvinfer1::DataType::kFLOAT; // 设置权重的类型// 拷贝权重值uint32_t* val reinterpret_castuint32_t*(malloc(sizeof(val) * size));for (uint32_t x 0, y size; x y; x){ // 拷贝size大小input std::hex val[x];}// 完成哈希映射wt.values val;wt.count size;weightMap[name] wt;}return weightMap; }void VGG_Demo::deserialize(float* data){load_engine(); // 加载enginenvinfer1::IRuntime* runtime nvinfer1::createInferRuntime(gLogger);assert(runtime ! nullptr);nvinfer1::ICudaEngine* engine runtime-deserializeCudaEngine(this-trtModelStream, this-size);assert(engine ! nullptr);nvinfer1::IExecutionContext* context engine-createExecutionContext();assert(context ! nullptr);delete[] this-trtModelStream; // 手动释放trtModelStream// 执行推理for (int i 0; i 10; i){ // 记录推理10次的时间auto start std::chrono::system_clock::now();doInference(*context, data, this-prob, 1);auto end std::chrono::system_clock::now();std::cout std::chrono::duration_caststd::chrono::milliseconds(end - start).count() ms std::endl;}// 销毁context-destroy();engine-destroy();runtime-destroy();// 打印推理结果std::cout \nOutput:\n\n;for (unsigned int i 0; i 10; i){ // 打印10个std::cout this-prob[i] , ;if (i % 10 0) std::cout i / 10 std::endl;}std::cout std::endl; }void VGG_Demo::load_engine(){std::ifstream file(this-engine_file, std::ios::binary);if(file.good()){file.seekg(0, file.end);this-size file.tellg();file.seekg(0, file.beg);this-trtModelStream new char[size];assert(this-trtModelStream);file.read(this-trtModelStream, size);file.close();} }void VGG_Demo::doInference(nvinfer1::IExecutionContext context, float* input, float* output, int batchSize){const nvinfer1::ICudaEngine engine context.getEngine();assert(engine.getNbBindings() 2);void* buffers[2];const int inputIndex engine.getBindingIndex(this-INPUT_BLOB_NAME);const int outputIndex engine.getBindingIndex(this-OUTPUT_BLOB_NAME);CHECK(cudaMalloc(buffers[inputIndex], batchSize * 3 * this-INPUT_H * this-INPUT_W * sizeof(float)));CHECK(cudaMalloc(buffers[outputIndex], batchSize * this-OUTPUT_SIZE * sizeof(float)));// 创建streamcudaStream_t stream;CHECK(cudaStreamCreate(stream));// Host to deviceCHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));context.enqueue(batchSize, buffers, stream, nullptr);// device to hostCHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));cudaStreamSynchronize(stream);// 释放cudaStreamDestroy(stream);CHECK(cudaFree(buffers[inputIndex]));CHECK(cudaFree(buffers[outputIndex])); }int main(int argc, char** argv){// 判断参数是否准确if(argc ! 2){std::cerr arguments not right! std::endl;std::cerr ./vgg_demo -s // serialize model to plan file std::endl;std::cerr ./vgg_demo -d // deserialize plan file and run inference std::endl;return -1;}VGG_Demo vgg_demo1;if(std::string(argv[1]) -s){ // 序列化vgg_demo1.serialize();}else if(std::string(argv[1]) -d){ // 反序列化并推理// 生成测试数据float data[3 * 224 * 224];for (int i 0; i 3 * 224 * 224; i) data[i] 1;vgg_demo1.deserialize(data);}else{std::cerr wrong arguments! std::endl;;return -1;}return 0; } 3--编译运行 mkdir build cd build cmake .. make ./vgg_demo -s ./vgg_demo -d

查看全文

http://www.zqtcl.cn/news/70999/