// create_engine.cpp // // This program creates TensorRT engine for the GoogLeNet model. // // Inputs: // deploy.prototxt // deploy.caffemodel // // Outputs: // deploy.engine #include #include #include #include #include #include #include #include #include #include #include "NvInfer.h" #include "NvCaffeParser.h" #include "common.h" using namespace nvinfer1; using namespace nvcaffeparser1; //static Logger gLogger(ILogger::Severity::kINFO); static Logger gLogger(ILogger::Severity::kWARNING); class IHostMemoryFromFile : public IHostMemory { public: IHostMemoryFromFile(std::string filename); #if NV_TENSORRT_MAJOR >= 6 void* data() const noexcept { return mem; } std::size_t size() const noexcept { return s; } DataType type () const noexcept { return DataType::kFLOAT; } // not used void destroy() noexcept { free(mem); } #else // NV_TENSORRT_MAJOR < 6 void* data() const { return mem; } std::size_t size() const { return s; } DataType type () const { return DataType::kFLOAT; } // not used void destroy() { free(mem); } #endif // NV_TENSORRT_MAJOR private: void *mem{nullptr}; std::size_t s; }; IHostMemoryFromFile::IHostMemoryFromFile(std::string filename) { std::ifstream infile(filename, std::ifstream::binary | std::ifstream::ate); s = infile.tellg(); infile.seekg(0, std::ios::beg); mem = malloc(s); infile.read(reinterpret_cast(mem), s); } std::string locateFile(const std::string& input) { std::vector dirs{"./"}; return locateFile(input, dirs); } void caffeToTRTModel(const std::string& deployFile, // name for caffe prototxt const std::string& modelFile, // name for model const std::vector& outputs, // network outputs unsigned int maxBatchSize, // batch size - NB must be at least as large as the batch we want to run with) IHostMemory *&trtModelStream) { // create API root class - must span the lifetime of the engine usage IBuilder* builder = createInferBuilder(gLogger); #if NV_TENSORRT_MAJOR >= 7 INetworkDefinition* network = builder->createNetworkV2(0); // no kEXPLICIT_BATCH #else // NV_TENSORRT_MAJOR < 7 INetworkDefinition* network = builder->createNetwork(); #endif // parse the caffe model to populate the network, then set the outputs ICaffeParser* parser = createCaffeParser(); bool useFp16 = builder->platformHasFastFp16(); // create a 16-bit model if it's natively supported DataType modelDataType = useFp16 ? DataType::kHALF : DataType::kFLOAT; const IBlobNameToTensor *blobNameToTensor = parser->parse(locateFile(deployFile).c_str(), // caffe deploy file locateFile(modelFile).c_str(), // caffe model file *network, // network definition that the parser will populate modelDataType); assert(blobNameToTensor != nullptr); // the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate for (auto& s : outputs) network->markOutput(*blobNameToTensor->find(s.c_str())); #if NV_TENSORRT_MAJOR >= 7 auto config = builder->createBuilderConfig(); assert(config != nullptr); builder->setMaxBatchSize(maxBatchSize); config->setMaxWorkspaceSize(64_MB); if (useFp16) { config->setFlag(BuilderFlag::kFP16); cout << "Building TensorRT engine in FP16 mode..." << endl; } else { cout << "Building TensorRT engine in FP32 mode..." << endl; } ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); config->destroy(); #else // NV_TENSORRT_MAJOR < 7 // Build the engine builder->setMaxBatchSize(maxBatchSize); builder->setMaxWorkspaceSize(64_MB); // set up the network for paired-fp16 format if available if (useFp16) { #if NV_TENSORRT_MAJOR >= 4 builder->setFp16Mode(true); #else // NV_TENSORRT_MAJOR < 4 builder->setHalf2Mode(true); #endif } ICudaEngine* engine = builder->buildCudaEngine(*network); #endif // NV_TENSORRT_MAJOR >= 7 assert(engine != nullptr); // we don't need the network any more, and we can destroy the parser parser->destroy(); network->destroy(); // serialize the engine, then close everything down trtModelStream = engine->serialize(); engine->destroy(); builder->destroy(); } void giestream_to_file(IHostMemory *trtModelStream, const std::string filename) { assert(trtModelStream != nullptr); std::ofstream outfile(filename, std::ofstream::binary); assert(!outfile.fail()); outfile.write(reinterpret_cast(trtModelStream->data()), trtModelStream->size()); outfile.close(); } void file_to_giestream(const std::string filename, IHostMemoryFromFile *&trtModelStream) { trtModelStream = new IHostMemoryFromFile(filename); } void verify_engine(std::string det_name) { std::stringstream ss; ss << det_name << ".engine"; IHostMemoryFromFile *trtModelStream{nullptr}; file_to_giestream(ss.str(), trtModelStream); // create an engine IRuntime* infer = createInferRuntime(gLogger); assert(infer != nullptr); ICudaEngine* engine = infer->deserializeCudaEngine( trtModelStream->data(), trtModelStream->size(), nullptr); assert(engine != nullptr); assert(engine->getNbBindings() == 2); std::cout << "Bindings for " << det_name << " after deserializing:" << std::endl; for (int bi = 0; bi < 2; bi++) { #if NV_TENSORRT_MAJOR >= 4 Dims3 dim = static_cast(engine->getBindingDimensions(bi)); if (engine->bindingIsInput(bi) == true) { std::cout << " Input "; } else { std::cout << " Output "; } std::cout << bi << ": " << engine->getBindingName(bi) << ", " << dim.d[0] << "x" << dim.d[1] << "x" << dim.d[2] << std::endl; #else // NV_TENSORRT_MAJOR < 4 DimsCHW dim = static_cast(engine->getBindingDimensions(bi)); if (engine->bindingIsInput(bi) == true) { std::cout << " Input "; } else { std::cout << " Output "; } std::cout << bi << ": " << engine->getBindingName(bi) << ", " << dim.c() << "x" << dim.h() << "x" << dim.w() << std::endl; #endif // NV_TENSORRT_MAJOR } engine->destroy(); infer->destroy(); trtModelStream->destroy(); } int main(int argc, char** argv) { IHostMemory *trtModelStream{nullptr}; std::cout << "Building deploy.engine, maxBatchSize = 1" << std::endl; caffeToTRTModel("deploy.prototxt", "deploy.caffemodel", std::vector { "prob" }, 1, // batch size trtModelStream); giestream_to_file(trtModelStream, "deploy.engine"); trtModelStream->destroy(); //delete trtModelStream; shutdownProtobufLibrary(); std::cout << std::endl << "Verifying engine..." << std::endl; verify_engine("deploy"); std::cout << "Done." << std::endl; return 0; }