TensorRT-Demo/mtcnn/create_engines.cpp

252 lines
8.4 KiB
C++

// create_engines.cpp
//
// This program creates TensorRT engines for MTCNN models.
//
// Inputs:
// det1.prototxt
// det1.caffemodel
// det2.prototxt
// det2.caffemodel
// det3.prototxt
// det3.caffemodel
//
// Outputs:
// det1.engine
// det2.engine
// det3.engine
#include <assert.h>
#include <fstream>
#include <sstream>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <sys/stat.h>
#include <cmath>
#include <time.h>
#include <cuda_runtime_api.h>
#include "NvInfer.h"
#include "NvCaffeParser.h"
#include "common.h"
using namespace nvinfer1;
using namespace nvcaffeparser1;
//static Logger gLogger(ILogger::Severity::kINFO);
static Logger gLogger(ILogger::Severity::kWARNING);
class IHostMemoryFromFile : public IHostMemory
{
public:
IHostMemoryFromFile(std::string filename);
#if NV_TENSORRT_MAJOR >= 6
void* data() const noexcept { return mem; }
std::size_t size() const noexcept { return s; }
DataType type () const noexcept { return DataType::kFLOAT; } // not used
void destroy() noexcept { free(mem); }
#else // NV_TENSORRT_MAJOR < 6
void* data() const { return mem; }
std::size_t size() const { return s; }
DataType type () const { return DataType::kFLOAT; } // not used
void destroy() { free(mem); }
#endif // NV_TENSORRT_MAJOR
private:
void *mem{nullptr};
std::size_t s;
};
IHostMemoryFromFile::IHostMemoryFromFile(std::string filename)
{
std::ifstream infile(filename, std::ifstream::binary | std::ifstream::ate);
s = infile.tellg();
infile.seekg(0, std::ios::beg);
mem = malloc(s);
infile.read(reinterpret_cast<char*>(mem), s);
}
std::string locateFile(const std::string& input)
{
std::vector<std::string> dirs{"./"};
return locateFile(input, dirs);
}
void caffeToTRTModel(const std::string& deployFile, // name for caffe prototxt
const std::string& modelFile, // name for model
const std::vector<std::string>& outputs, // network outputs
unsigned int maxBatchSize, // batch size - NB must be at least as large as the batch we want to run with)
IHostMemory *&trtModelStream)
{
// create API root class - must span the lifetime of the engine usage
IBuilder* builder = createInferBuilder(gLogger);
#if NV_TENSORRT_MAJOR >= 7
INetworkDefinition* network = builder->createNetworkV2(0); // no kEXPLICIT_BATCH
#else // NV_TENSORRT_MAJOR < 7
INetworkDefinition* network = builder->createNetwork();
#endif
// parse the caffe model to populate the network, then set the outputs
ICaffeParser* parser = createCaffeParser();
bool useFp16 = builder->platformHasFastFp16();
// create a 16-bit model if it's natively supported
DataType modelDataType = useFp16 ? DataType::kHALF : DataType::kFLOAT;
const IBlobNameToTensor *blobNameToTensor =
parser->parse(locateFile(deployFile).c_str(), // caffe deploy file
locateFile(modelFile).c_str(), // caffe model file
*network, // network definition that the parser will populate
modelDataType);
assert(blobNameToTensor != nullptr);
// the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate
for (auto& s : outputs)
network->markOutput(*blobNameToTensor->find(s.c_str()));
#if NV_TENSORRT_MAJOR >= 7
auto config = builder->createBuilderConfig();
assert(config != nullptr);
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(64_MB);
if (useFp16) {
config->setFlag(BuilderFlag::kFP16);
cout << "Building TensorRT engine in FP16 mode..." << endl;
} else {
cout << "Building TensorRT engine in FP32 mode..." << endl;
}
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
config->destroy();
#else // NV_TENSORRT_MAJOR < 7
// Build the engine
builder->setMaxBatchSize(maxBatchSize);
builder->setMaxWorkspaceSize(64_MB);
// set up the network for paired-fp16 format if available
if (useFp16) {
#if NV_TENSORRT_MAJOR >= 4
builder->setFp16Mode(true);
#else // NV_TENSORRT_MAJOR < 4
builder->setHalf2Mode(true);
#endif
}
ICudaEngine* engine = builder->buildCudaEngine(*network);
#endif // NV_TENSORRT_MAJOR >= 7
assert(engine != nullptr);
// we don't need the network any more, and we can destroy the parser
parser->destroy();
network->destroy();
// serialize the engine, then close everything down
trtModelStream = engine->serialize();
engine->destroy();
builder->destroy();
}
void giestream_to_file(IHostMemory *trtModelStream, const std::string filename)
{
assert(trtModelStream != nullptr);
std::ofstream outfile(filename, std::ofstream::binary);
assert(!outfile.fail());
outfile.write(reinterpret_cast<char*>(trtModelStream->data()), trtModelStream->size());
outfile.close();
}
void file_to_giestream(const std::string filename, IHostMemoryFromFile *&trtModelStream)
{
trtModelStream = new IHostMemoryFromFile(filename);
}
void verify_engine(std::string det_name, int num_bindings)
{
std::stringstream ss;
ss << det_name << ".engine";
IHostMemoryFromFile *trtModelStream{nullptr};
file_to_giestream(ss.str(), trtModelStream);
// create an engine
IRuntime* infer = createInferRuntime(gLogger);
assert(infer != nullptr);
ICudaEngine* engine = infer->deserializeCudaEngine(
trtModelStream->data(),
trtModelStream->size(),
nullptr);
assert(engine != nullptr);
assert(engine->getNbBindings() == num_bindings);
std::cout << "Bindings for " << det_name << " after deserializing:"
<< std::endl;
for (int bi = 0; bi < num_bindings; bi++) {
#if NV_TENSORRT_MAJOR >= 4
Dims3 dim = static_cast<Dims3&&>(engine->getBindingDimensions(bi));
if (engine->bindingIsInput(bi) == true) {
std::cout << " Input ";
} else {
std::cout << " Output ";
}
std::cout << bi << ": " << engine->getBindingName(bi) << ", "
<< dim.d[0] << "x" << dim.d[1] << "x" << dim.d[2]
<< std::endl;
#else // NV_TENSORRT_MAJOR < 4
DimsCHW dim = static_cast<DimsCHW&&>(engine->getBindingDimensions(bi));
if (engine->bindingIsInput(bi) == true) {
std::cout << " Input ";
} else {
std::cout << " Output ";
}
std::cout << bi << ": " << engine->getBindingName(bi) << ", "
<< dim.c() << "x" << dim.h() << "x" << dim.w()
<< std::endl;
#endif // NV_TENSORRT_MAJOR
}
engine->destroy();
infer->destroy();
trtModelStream->destroy();
}
int main(int argc, char** argv)
{
IHostMemory *trtModelStream{nullptr};
std::cout << "Building det1.engine (PNet), maxBatchSize = 1"
<< std::endl;
caffeToTRTModel("det1_relu.prototxt",
"det1_relu.caffemodel",
std::vector <std::string> { "prob1", "conv4-2" },
1, // max batch size
trtModelStream);
giestream_to_file(trtModelStream, "det1.engine");
trtModelStream->destroy();
std::cout << "Building det2.engine (RNet), maxBatchSize = 256"
<< std::endl;
caffeToTRTModel("det2_relu.prototxt",
"det2_relu.caffemodel",
std::vector <std::string> { "prob1", "conv5-2" },
256, // max batch size
trtModelStream);
giestream_to_file(trtModelStream, "det2.engine");
trtModelStream->destroy();
std::cout << "Building det3.engine (ONet), maxBatchSize = 64"
<< std::endl;
caffeToTRTModel("det3_relu.prototxt",
"det3_relu.caffemodel",
std::vector <std::string> { "prob1", "conv6-2", "conv6-3" },
64, // max batch size
trtModelStream);
giestream_to_file(trtModelStream, "det3.engine");
trtModelStream->destroy();
//delete trtModelStream;
shutdownProtobufLibrary();
std::cout << std::endl << "Verifying engines..." << std::endl;
verify_engine("det1", 3);
verify_engine("det2", 3);
verify_engine("det3", 4);
std::cout << "Done." << std::endl;
return 0;
}