Series

Code Example

include headers

#include <assert.h>

#include <sys/stat.h>

#include <time.h>

#include <iostream>

#include <fstream>

#include <sstream>

#include <iomanip>

#include <cmath>

#include <algorithm>

#include <cuda_runtime_api.h>

#include "NvCaffeParser.h"

#include "NvOnnxConfig.h"

#include "NvOnnxParser.h"

#include "NvInfer.h"

#include "common.h"

using namespace nvinfer1;

using namespace nvcaffeparser1;

static Logger gLogger;

// Attributes of MNIST Caffe model

static const int INPUT_H = 28;

static const int INPUT_W = 28;

static const int OUTPUT_SIZE = 10;

//const char* INPUT_BLOB_NAME = "data";

const char* OUTPUT_BLOB_NAME = "prob";

const std::string mnist_data_dir = "data/mnist/";

// Simple PGM (portable greyscale map) reader

void readPGMFile(const std::string& fileName, uint8_t buffer[INPUT_H * INPUT_W])

{

    readPGMFile(fileName, buffer, INPUT_H, INPUT_W);

}

caffe model to tensorrt

void caffeToTRTModel(const std::string& deployFilepath,       // Path of Caffe prototxt file

                     const std::string& modelFilepath,        // Path of Caffe model file

                     const std::vector<std::string>& outputs, // Names of network outputs

                     unsigned int maxBatchSize,               // Note: Must be at least as large as the batch we want to run with

                     IHostMemory*& trtModelStream)            // Output buffer for the TRT model

{

    // Create builder

    IBuilder* builder = createInferBuilder(gLogger);

    // Parse caffe model to populate network, then set the outputs

    std::cout << "Reading Caffe prototxt: " << deployFilepath << "\n";

    std::cout << "Reading Caffe model: " << modelFilepath << "\n";

    INetworkDefinition* network = builder->createNetwork();

    ICaffeParser* parser = createCaffeParser();

    bool useFp16 = builder->platformHasFastFp16();

    std::cout << "platformHasFastFp16: " << useFp16 << "\n";

    bool useInt8 = builder->platformHasFastInt8();

    std::cout << "platformHasFastInt8: " << useInt8 << "\n";

    // create a 16-bit model if it's natively supported

    DataType modelDataType = useFp16 ? DataType::kHALF : DataType::kFLOAT; 

    const IBlobNameToTensor* blobNameToTensor = parser->parse(deployFilepath.c_str(),

                                                              modelFilepath.c_str(),

                                                              *network,

                                                              modelDataType);

    // Specify output tensors of network

    // ERROR: Network must have at least one output

    for (auto& s : outputs){

        std::cout<<"output = "<< s.c_str() << std::endl;

        network->markOutput(*blobNameToTensor->find(s.c_str())); // prob

    } 

    builder->setMaxBatchSize(maxBatchSize);

    builder->setMaxWorkspaceSize(1 << 20);

    // set up the network for paired-fp16 format if available

    if(useFp16)

        builder->setFp16Mode(true);

    // Build engine

    ICudaEngine* engine = builder->buildCudaEngine(*network);

    assert(engine);

    // Destroy parser and network

    network->destroy();

    parser->destroy();

    // Serialize engine and destroy it

    trtModelStream = engine->serialize();

    engine->destroy();

    builder->destroy();

    //shutdownProtobufLibrary();

}

pytorch onnx to tensorrt

void onnxToTRTModel( const std::string& modelFilepath,        // name of the onnx model

                     unsigned int maxBatchSize,            // batch size - NB must be at least as large as the batch we want to run with

                     IHostMemory *&trtModelStream)      // output buffer for the TensorRT model

{

    // create the builder

    IBuilder* builder = createInferBuilder(gLogger);

    nvonnxparser::IOnnxConfig* config = nvonnxparser::createONNXConfig();

    config->setModelFileName(modelFilepath.c_str());

    nvonnxparser::IONNXParser* parser = nvonnxparser::createONNXParser(*config);

    //Optional - uncomment below lines to view network layer information

    //config->setPrintLayerInfo(true);

    //parser->reportParsingInfo();

    if (!parser->parse(modelFilepath.c_str(), DataType::kFLOAT))

    {

        string msg("failed to parse onnx file");

        gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());

        exit(EXIT_FAILURE);

    }

    if (!parser->convertToTRTNetwork()) {

        string msg("ERROR, failed to convert onnx network into TRT network");

        gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());

        exit(EXIT_FAILURE);

    }

    nvinfer1::INetworkDefinition* network = parser->getTRTNetwork();

    // Build the engine

    builder->setMaxBatchSize(maxBatchSize);

    builder->setMaxWorkspaceSize(1 << 20);

    ICudaEngine* engine = builder->buildCudaEngine(*network);

    assert(engine);

    // we don't need the network any more, and we can destroy the parser

    network->destroy();

    parser->destroy();

    // serialize the engine, then close everything down

    trtModelStream = engine->serialize();

    engine->destroy();

    builder->destroy();

    //shutdownProtobufLibrary();

}

do inference

void doInference(IExecutionContext& context, float* input, float* output, int batchSize)

{

    const ICudaEngine& engine = context.getEngine();

    // Pointers to input and output device buffers to pass to engine.

    // Engine requires exactly IEngine::getNbBindings() number of buffers.

    assert(engine.getNbBindings() == 2);

    void* buffers[2];

    // In order to bind the buffers, we need to know the names of the input and output tensors.

    // Note that indices are guaranteed to be less than IEngine::getNbBindings()

    int inputIndex, outputIndex;

    printf("Bindings after deserializing:\n");

    for (int bi = 0; bi < engine.getNbBindings(); bi++)

    {

        if (engine.bindingIsInput(bi) == true)

        {

            inputIndex = bi;

            printf("Binding %d (%s): Input.\n",  bi, engine.getBindingName(bi));

        } else

        {

            outputIndex = bi;

            printf("Binding %d (%s): Output.\n", bi, engine.getBindingName(bi));

        }

    }

    //const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);

    //const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);

    std::cout<<"inputIndex = "<< inputIndex << std::endl; // 0   data

    std::cout<<"outputIndex = "<< outputIndex << std::endl; // 1  prob

    // Create GPU buffers on device

    CHECK(cudaMalloc(&buffers[inputIndex], batchSize * INPUT_H * INPUT_W * sizeof(float)));

    CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));

    // Create stream

    cudaStream_t stream;

    CHECK(cudaStreamCreate(&stream));

    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host

    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));

    context.enqueue(batchSize, buffers, stream, nullptr);

    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));

    cudaStreamSynchronize(stream);

    // Release stream and buffers

    cudaStreamDestroy(stream);

    CHECK(cudaFree(buffers[inputIndex]));

    CHECK(cudaFree(buffers[outputIndex]));

}

save and load engine

void SaveEngine(const nvinfer1::IHostMemory& trtModelStream, const std::string& engine_filepath)

{

    std::ofstream file;

    file.open(engine_filepath, std::ios::binary | std::ios::out);

    if(!file.is_open())

    {

        std::cout << "read create engine file" << engine_filepath <<" failed" << std::endl;

        return;

    }

    file.write((const char*)trtModelStream.data(), trtModelStream.size());

    file.close();

};

ICudaEngine* LoadEngine(IRuntime& runtime, const std::string& engine_filepath)

{

    ifstream file;

    file.open(engine_filepath, ios::binary | ios::in);

    file.seekg(0, ios::end);

    int length = file.tellg();

    file.seekg(0, ios::beg); 

    std::shared_ptr<char> data(new char[length], std::default_delete<char[]>());

    file.read(data.get(), length);

    file.close();

    // runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr);

    ICudaEngine* engine = runtime.deserializeCudaEngine(data.get(), length, nullptr);

    assert(engine != nullptr);

    return engine;

}

example

void demo_save_caffe_to_trt(const std::string& engine_filepath)

{

    std::string deploy_filepath = mnist_data_dir + "mnist.prototxt";

    std::string model_filepath = mnist_data_dir + "mnist.caffemodel";

     // Create TRT model from caffe model and serialize it to a stream

    IHostMemory* trtModelStream{nullptr};

    caffeToTRTModel(deploy_filepath, model_filepath, std::vector<std::string>{OUTPUT_BLOB_NAME}, 1, trtModelStream);

    assert(trtModelStream != nullptr);

    SaveEngine(*trtModelStream, engine_filepath);

    // destroy stream

    trtModelStream->destroy();

}

void demo_save_onnx_to_trt(const std::string& engine_filepath)

{

    std::string onnx_filepath = mnist_data_dir + "mnist.onnx";

     // Create TRT model from caffe model and serialize it to a stream

    IHostMemory* trtModelStream{nullptr};

    onnxToTRTModel(onnx_filepath, 1, trtModelStream);

    assert(trtModelStream != nullptr);

    SaveEngine(*trtModelStream, engine_filepath);

    // destroy stream

    trtModelStream->destroy();

}

int mnist_demo()

{

    bool use_caffe = false;

    std::string engine_filepath;

    if (use_caffe){

        engine_filepath = "cfg/mnist/caffe_minist_fp32.trt";

        demo_save_caffe_to_trt(engine_filepath);

    } else {

        engine_filepath = "cfg/mnist/onnx_minist_fp32.trt";

        demo_save_onnx_to_trt(engine_filepath);

    }

    std::cout<<"[API] Save engine to "<< engine_filepath <<std::endl;

    //if (watrix::algorithm::FilesystemUtil::not_exists(engine_filepath)){

    const int num = 6;

    std::string digit_filepath = mnist_data_dir + std::to_string(num) + ".pgm";

     // Read a digit file

    uint8_t fileData[INPUT_H * INPUT_W];

    readPGMFile(digit_filepath, fileData);

    float data[INPUT_H * INPUT_W];

    if (use_caffe){

        std::string mean_filepath = mnist_data_dir + "mnist_mean.binaryproto";

        // Parse mean file

        ICaffeParser* parser = createCaffeParser();

        IBinaryProtoBlob* meanBlob = parser->parseBinaryProto(mean_filepath.c_str());

        parser->destroy();

        // Subtract mean from image

        const float* meanData = reinterpret_cast<const float*>(meanBlob->getData()); // size 786

        for (int i = 0; i < INPUT_H * INPUT_W; i++)

            data[i] = float(fileData[i]) - meanData[i];

        meanBlob->destroy();

    } else {

        for (int i = 0; i < INPUT_H * INPUT_W; i++)

            data[i] = 1.0 - float(fileData[i]/255.0);

    }

    // Deserialize engine we serialized earlier

    IRuntime* runtime = createInferRuntime(gLogger);

    assert(runtime != nullptr);

    std::cout<<"[API] Load engine from "<< engine_filepath <<std::endl;

    ICudaEngine* engine = LoadEngine(*runtime, engine_filepath);

    assert(engine != nullptr);

    IExecutionContext* context = engine->createExecutionContext();

    assert(context != nullptr);

    // Run inference on input data

    float prob[OUTPUT_SIZE];

    doInference(*context, data, prob, 1);

    // Destroy the engine

    context->destroy();

    engine->destroy();

    runtime->destroy();

    // Print histogram of the output distribution

    std::cout << "\nOutput:\n\n";

    // for onnx,we get z as output, we need to use softmax to get probs

    if ( !use_caffe){

        //Calculate Softmax

        float sum{0.0f};

        for(int i = 0; i < OUTPUT_SIZE; i++)

        {

            prob[i] = exp(prob[i]);

            sum += prob[i];

        }

        for(int i = 0; i < OUTPUT_SIZE; i++)

        {

            prob[i] /= sum;

        }

    }

    // find max probs

    float val{0.0f};

    int idx{0};

    for (unsigned int i = 0; i < 10; i++)

    {

        val = std::max(val, prob[i]);

        if (val == prob[i]) {

            idx = i;

        }

        cout << " Prob " << i << "  "<< std::fixed << std::setw(5) << std::setprecision(4) << prob[i];

        std::cout << i << ": " << std::string(int(std::floor(prob[i] * 10 + 0.5f)), '*') << "\n";

    }

    std::cout << std::endl;

    return (idx == num && val > 0.9f) ? EXIT_SUCCESS : EXIT_FAILURE;

}

int main(int argc, char** argv)

{

    mnist_demo();

    return 0;

}

results

./bin/sample_mnist

[API] Save engine to cfg/mnist/onnx_minist_fp32.trt

[API] Load engine from cfg/mnist/onnx_minist_fp32.trt

Bindings after deserializing:

Binding 0 (Input3): Input.

Binding 1 (Plus214_Output_0): Output.

inputIndex = 0

outputIndex = 1

Output:

 Prob 0  0.00000:

 Prob 1  0.00001:

 Prob 2  0.00002:

 Prob 3  0.00003:

 Prob 4  0.00004:

 Prob 5  0.00005:

 Prob 6  1.00006: **********

 Prob 7  0.00007:

 Prob 8  0.00008:

 Prob 9  0.00009:

Reference

tensorrt-api

History

20190422 created.

Copyright

Post author: kezunlin
Post link: https://kezunlin.me/post/bcdfb73c/
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 3.0 unless stating additionally.

使用TensorRT对caffe和pytorch onnx版本的mnist模型进行fp32和fp16 推理 | tensorrt fp32 fp16 tutorial with caffe pytorch minist model的更多相关文章

Ubuntu14.04+caffe+cuda7.5 环境搭建以及MNIST数据集的训练与测试
Ubuntu14.04+caffe+cuda 环境搭建以及MNIST数据集的训练与测试一.ubuntu14.04的安装: ubuntu的安装是一件十分简单的事情,这里给出一个参考教程: http:/ ...
[源码解析] PyTorch分布式优化器(3)---- 模型并行
[源码解析] PyTorch分布式优化器(3)---- 模型并行目录 [源码解析] PyTorch分布式优化器(3)---- 模型并行 0x00 摘要 0x01 前文回顾 0x02 单机模型 2.1 ...
pytorch加载和保存模型
在模型完成训练后,我们需要将训练好的模型保存为一个文件供测试使用,或者因为一些原因我们需要继续之前的状态训练之前保存的模型,那么如何在PyTorch中保存和恢复模型呢? 方法一(推荐): 第一种方法也 ...
PyTorch如何构建深度学习模型？
简介每过一段时间,就会有一个深度学习库被开发,这些深度学习库往往可以改变深度学习领域的景观.Pytorch就是这样一个库. 在过去的一段时间里,我研究了Pytorch,我惊叹于它的操作简易.Pyto ...
Windows10+Anaconda+PyTorch(cpu版本)环境搭建
1.安装Anaconda,具体参考网上相关教程 2.安装PyTorch 2.1 在Anaconda自带的Anaconda Prompt中创建名为PyTorch的虚拟环境[conda create -- ...
win10+vs2015编译caffe的cpu debug版本、部署matcaffe
一.编译caffe 1.安装python-3.5.2-amd64.exe https://www.python.org/ftp/python/3.5.2/python-3.5.2-amd64.exe ...
ubuntu查看安装的pytorch/cuda版本
使用命令: user@home:~$ python Python |Anaconda custom (-bit)| ( , ::) [GCC ] on linux Type "help&qu ...
Nanodet模型部署(ncnn,openvino)/YOLOX部署(TensorRT)
Nanodet模型部署(ncnn,openvino) nanodet官方代码库nanodet 1. nanodet模型部署在openvino上 step1: 参考链接 nanodet官方demo op ...
caffe初步实践---------使用训练好的模型完成语义分割任务
caffe刚刚安装配置结束,乘热打铁! (一)环境准备前面我有两篇文章写到caffe的搭建,第一篇cpu only ,第二篇是在服务器上搭建的,其中第二篇因为硬件环境更佳我们的步骤稍显复杂.其实,第 ...

随机推荐

题解 CF600E 【Lomsat gelral】
没有多少人用莫队做吗? 蒟蒻水一波莫队这是一道树上莫队好题. 时间复杂度($n\sqrt{n}logn$) 蒟蒻过菜,不会去掉logn的做法qaq 思路很简单: 1.dfs跑一下树上点的dfs序 ...
VirtualBox NAT Network配置
VirtualBox NAT Network配置(OSX上的) VirtualBox的5种连接方式 NAT :虚拟机之间不能互通 NAT网络 :本文对象桥接 :一般情况下虚拟机无法设置静态IP,并且 ...
简单多层神经网络实现异或XOR
最近在看<Neural Network Design_Hagan> 然后想自己实现一个XOR 的网络. 由于单层神经网络不能将异或的判定分为两类. 根据 a^b=(a&~b)|(~ ...
OptimalSolution(8)--位运算
一.不用额外变量交换两个整数的值如果给定整数a和b,用以下三行代码即可交换a和b的值.a = a ^ b; b = a ^ b; a = a ^ b; a = a ^ b :假设a异或b的结果记为c ...
文件读取(filestream)
在stream中已经介绍过,文件读取应用filestream,其是以字节为单位读取文件的.在操作中,当应用filestream创建文件流,读取时应先定义一个字节数组,在转化成char类型,最后转化成s ...
NetworkManager网络通讯_问题汇总（四）
此篇来填坑,有些坑是unet自身问题,而大部分则是理解不准确造成的(或者unity定义太复杂) 问题一: isLocalPlayer 值一直是false 出现场景:NetworkLobbyPlayer ...
InfluxDB常见疑问与解答 - 数据写入时如何在表级别指定保留策略
网友Siguoei:我想让一个库中不同的measurment能够指定不同的保存策略.而不是写入时使用数据库的默认保留策略. Answer:这个特性InfluxDB支持的,写入时序数据时,在行协议前加上 ...
MySQL字符集与排序规则总结
字符集与排序规则概念在数据库当中都有字符集和排序规则的概念, 很多开发人员甚至包括有些DBA都会将这个混淆,当然这个情况也有一些情有可原的原因.一来两者本来就是相辅相成,相互依赖关联: 另外一方 ...
NOIP模拟测试23
这次考试又一次暴露了我很大的问题. 首先做的比较好的是这几次考试一分没挂, 但是,这也体现了更大的问题,那就是我的实力似乎也仅限于此了. 考试先拿满了暴力分(100+0+50),然后看了看T2没看懂, ...
python学习之【第五篇】：Python中的元组及其所具有的方法
1.前言 Python的元组(tuple)与列表很相似,不同之处在于元组不能被修改,即元组一旦创建,就不能向元组中的增加新元素,不能删除元素中的元素,更不能修改元组中元素.但是元组可以访问任意元素,可 ...

使用TensorRT对caffe和pytorch onnx版本的mnist模型进行fp32和fp16 推理 | tensorrt fp32 fp16 tutorial with caffe pytorch minist model

Series

Code Example

include headers

caffe model to tensorrt

pytorch onnx to tensorrt

do inference

save and load engine

example

results

Reference

History

Copyright

使用TensorRT对caffe和pytorch onnx版本的mnist模型进行fp32和fp16 推理 | tensorrt fp32 fp16 tutorial with caffe pytorch minist model的更多相关文章

随机推荐

热门专题