304 lines
13 KiB
C++
304 lines
13 KiB
C++
|
// trtNet.cpp
|
||
|
|
||
|
#include "trtNet.h"
|
||
|
|
||
|
using namespace nvinfer1;
|
||
|
using namespace nvcaffeparser1;
|
||
|
|
||
|
#define CHECK(status) \
|
||
|
do { \
|
||
|
auto ret = status; \
|
||
|
if (ret != 0) { \
|
||
|
std::cerr << "Cuda failure in file '" << __FILE__ \
|
||
|
<< "' line " << __LINE__ \
|
||
|
<< ": " << ret << std::endl; \
|
||
|
abort(); \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
#define my_assert(EXP, MSG) \
|
||
|
do { \
|
||
|
if (!(EXP)) { \
|
||
|
std::cerr << "Assertion fail in file '" << __FILE__ \
|
||
|
<< "' line " << __LINE__ \
|
||
|
<< ": " << (MSG) << std:: endl; \
|
||
|
throw std::exception(); \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
|
||
|
namespace trtnet {
|
||
|
|
||
|
//
|
||
|
// TrtGooglenet stuffs
|
||
|
//
|
||
|
|
||
|
TrtGooglenet::TrtGooglenet()
|
||
|
{
|
||
|
for (int i = 0; i < 2; i++) {
|
||
|
_gpu_buffers[i] = nullptr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void TrtGooglenet::_initEngine(std::string filePath)
|
||
|
{
|
||
|
_gieModelStream = new IHostMemoryFromFile(filePath);
|
||
|
_runtime = createInferRuntime(_gLogger);
|
||
|
my_assert(_runtime != nullptr, "_runtime is null");
|
||
|
_engine = _runtime->deserializeCudaEngine(
|
||
|
_gieModelStream->data(),
|
||
|
_gieModelStream->size(),
|
||
|
nullptr);
|
||
|
my_assert(_engine != nullptr, "_engine is null");
|
||
|
my_assert(_engine->getNbBindings() == 2, "wrong number of bindings");
|
||
|
_binding_data = _engine->getBindingIndex("data");
|
||
|
my_assert(_engine->bindingIsInput(_binding_data) == true, "bad type of binding 'data'");
|
||
|
_binding_prob = _engine->getBindingIndex("prob");
|
||
|
my_assert(_engine->bindingIsInput(_binding_prob) == false, "bad type of binding 'prob'");
|
||
|
_context = _engine->createExecutionContext();
|
||
|
my_assert(_context != nullptr, "_context is null");
|
||
|
_gieModelStream->destroy();
|
||
|
CHECK(cudaStreamCreate(&_stream));
|
||
|
}
|
||
|
|
||
|
void TrtGooglenet::initEngine(std::string filePath, int dataDims[3], int probDims[3])
|
||
|
{
|
||
|
_initEngine(filePath);
|
||
|
#if NV_TENSORRT_MAJOR >= 4
|
||
|
Dims3 d;
|
||
|
d = static_cast<Dims3&&>(_engine->getBindingDimensions(_binding_data));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'data'");
|
||
|
my_assert(d.d[0] == dataDims[0] && d.d[1] == dataDims[1] && d.d[2] == dataDims[2], "bad dims for 'data'");
|
||
|
_blob_sizes[_binding_data] = d.d[0] * d.d[1] * d.d[2];
|
||
|
|
||
|
d = static_cast<Dims3&&>(_engine->getBindingDimensions(_binding_prob));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'prob'");
|
||
|
my_assert(d.d[0] == probDims[0] && d.d[1] == probDims[1] && d.d[2] == probDims[2], "bad dims for 'prob'");
|
||
|
_blob_sizes[_binding_prob] = d.d[0] * d.d[1] * d.d[2];
|
||
|
#else // NV_TENSORRT_MAJOR < 4
|
||
|
DimsCHW d;
|
||
|
d = static_cast<DimsCHW&&>(_engine->getBindingDimensions(_binding_data));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'data'");
|
||
|
my_assert(d.c() == dataDims[0] && d.h() == dataDims[1] && d.w() == dataDims[2], "bad dims for 'data'");
|
||
|
_blob_sizes[_binding_data] = d.c() * d.h() * d.w();
|
||
|
|
||
|
d = static_cast<DimsCHW&&>(_engine->getBindingDimensions(_binding_prob));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'prob'");
|
||
|
my_assert(d.c() == probDims[0] && d.h() == probDims[1] && d.w() == probDims[2], "bad dims for 'prob'");
|
||
|
_blob_sizes[_binding_prob] = d.c() * d.h() * d.w();
|
||
|
#endif // NV_TENSORRT_MAJOR
|
||
|
|
||
|
for (int i = 0; i < 2; i++) {
|
||
|
CHECK(cudaMalloc(&_gpu_buffers[i], _blob_sizes[i] * sizeof(float)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void TrtGooglenet::forward(float *imgs, float *prob)
|
||
|
{
|
||
|
CHECK(cudaMemcpyAsync(_gpu_buffers[_binding_data],
|
||
|
imgs,
|
||
|
_blob_sizes[_binding_data] * sizeof(float),
|
||
|
cudaMemcpyHostToDevice,
|
||
|
_stream));
|
||
|
_context->enqueue(1, _gpu_buffers, _stream, nullptr);
|
||
|
CHECK(cudaMemcpyAsync(prob,
|
||
|
_gpu_buffers[_binding_prob],
|
||
|
_blob_sizes[_binding_prob] * sizeof(float),
|
||
|
cudaMemcpyDeviceToHost,
|
||
|
_stream));
|
||
|
cudaStreamSynchronize(_stream);
|
||
|
}
|
||
|
|
||
|
void TrtGooglenet::destroy()
|
||
|
{
|
||
|
for (int i = 0; i < 2; i++) {
|
||
|
if (_gpu_buffers[i] != nullptr) {
|
||
|
CHECK(cudaFree(_gpu_buffers[i]));
|
||
|
_gpu_buffers[i] = nullptr;
|
||
|
}
|
||
|
}
|
||
|
cudaStreamDestroy(_stream);
|
||
|
_context->destroy();
|
||
|
_engine->destroy();
|
||
|
_runtime->destroy();
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// TrtMtcnnDet stuffs
|
||
|
//
|
||
|
|
||
|
TrtMtcnnDet::TrtMtcnnDet()
|
||
|
{
|
||
|
for (int i = 0; i < 4; i++) {
|
||
|
_gpu_buffers[i] = nullptr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::_initEngine(std::string filePath, const char *dataName, const char *prob1Name, const char *boxesName, const char *marksName="unspecified")
|
||
|
{
|
||
|
_gieModelStream = new IHostMemoryFromFile(filePath);
|
||
|
_runtime = createInferRuntime(_gLogger);
|
||
|
my_assert(_runtime != nullptr, "_runtime is null");
|
||
|
_engine = _runtime->deserializeCudaEngine(
|
||
|
_gieModelStream->data(),
|
||
|
_gieModelStream->size(),
|
||
|
nullptr);
|
||
|
my_assert(_engine != nullptr, "_engine is null");
|
||
|
my_assert(_engine->getNbBindings() == _num_bindings, "wrong number of bindings");
|
||
|
_binding_data = _engine->getBindingIndex(dataName);
|
||
|
my_assert(_engine->bindingIsInput(_binding_data) == true, "bad type of binding 'data'");
|
||
|
_binding_prob1 = _engine->getBindingIndex(prob1Name);
|
||
|
my_assert(_engine->bindingIsInput(_binding_prob1) == false, "bad type of binding 'prob1'");
|
||
|
_binding_boxes = _engine->getBindingIndex(boxesName);
|
||
|
my_assert(_engine->bindingIsInput(_binding_boxes) == false, "bad type of binding 'boxes'");
|
||
|
if (_num_bindings == 4) {
|
||
|
_binding_marks = _engine->getBindingIndex(marksName);
|
||
|
my_assert(_engine->bindingIsInput(_binding_marks) == false, "bad type of binding 'marks'");
|
||
|
}
|
||
|
_context = _engine->createExecutionContext();
|
||
|
my_assert(_context != nullptr, "_context is null");
|
||
|
_gieModelStream->destroy();
|
||
|
CHECK(cudaStreamCreate(&_stream));
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::_setBlobSizes(int dataDims[3], int prob1Dims[3], int boxesDims[3])
|
||
|
{
|
||
|
#if NV_TENSORRT_MAJOR >= 4
|
||
|
Dims3 d;
|
||
|
d = static_cast<Dims3&&>(_engine->getBindingDimensions(_binding_data));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'data'");
|
||
|
my_assert(d.d[0] == dataDims[0] && d.d[1] == dataDims[1] && d.d[2] == dataDims[2], "bad dims for 'data'");
|
||
|
_blob_sizes[_binding_data] = d.d[0] * d.d[1] * d.d[2];
|
||
|
|
||
|
d = static_cast<Dims3&&>(_engine->getBindingDimensions(_binding_prob1));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'prob1'");
|
||
|
my_assert(d.d[0] == prob1Dims[0] && d.d[1] == prob1Dims[1] && d.d[2] == prob1Dims[2], "bad dims for 'prob1'");
|
||
|
_blob_sizes[_binding_prob1] = d.d[0] * d.d[1] * d.d[2];
|
||
|
|
||
|
d = static_cast<Dims3&&>(_engine->getBindingDimensions(_binding_boxes));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'boxes'");
|
||
|
my_assert(d.d[0] == boxesDims[0] && d.d[1] == boxesDims[1] && d.d[2] == boxesDims[2], "bad dims for 'boxes'");
|
||
|
_blob_sizes[_binding_boxes] = d.d[0] * d.d[1] * d.d[2];
|
||
|
#else // NV_TENSORRT_MAJOR < 4
|
||
|
DimsCHW d;
|
||
|
d = static_cast<DimsCHW&&>(_engine->getBindingDimensions(_binding_data));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'data'");
|
||
|
my_assert(d.c() == dataDims[0] && d.h() == dataDims[1] && d.w() == dataDims[2], "bad dims for 'data'");
|
||
|
_blob_sizes[_binding_data] = d.c() * d.h() * d.w();
|
||
|
|
||
|
d = static_cast<DimsCHW&&>(_engine->getBindingDimensions(_binding_prob1));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'prob1'");
|
||
|
my_assert(d.c() == prob1Dims[0] && d.h() == prob1Dims[1] && d.w() == prob1Dims[2], "bad dims for 'prob1'");
|
||
|
_blob_sizes[_binding_prob1] = d.c() * d.h() * d.w();
|
||
|
|
||
|
d = static_cast<DimsCHW&&>(_engine->getBindingDimensions(_binding_boxes));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'boxes'");
|
||
|
my_assert(d.c() == boxesDims[0] && d.h() == boxesDims[1] && d.w() == boxesDims[2], "bad dims for 'boxes'");
|
||
|
_blob_sizes[_binding_boxes] = d.c() * d.h() * d.w();
|
||
|
#endif // NV_TENSORRT_MAJOR
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::initDet1(std::string filePath, int dataDims[3], int prob1Dims[3], int boxesDims[3])
|
||
|
{
|
||
|
_num_bindings = 3;
|
||
|
_initEngine(filePath, "data", "prob1", "conv4-2");
|
||
|
_setBlobSizes(dataDims, prob1Dims, boxesDims);
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::initDet2(std::string filePath, int dataDims[3], int prob1Dims[3], int boxesDims[3])
|
||
|
{
|
||
|
_num_bindings = 3;
|
||
|
_initEngine(filePath, "data", "prob1", "conv5-2");
|
||
|
_setBlobSizes(dataDims, prob1Dims, boxesDims);
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::initDet3(std::string filePath, int dataDims[3], int prob1Dims[3], int boxesDims[3], int marksDims[3])
|
||
|
{
|
||
|
_num_bindings = 4;
|
||
|
_initEngine(filePath, "data", "prob1", "conv6-2", "conv6-3");
|
||
|
_setBlobSizes(dataDims, prob1Dims, boxesDims);
|
||
|
|
||
|
#if NV_TENSORRT_MAJOR >= 4
|
||
|
Dims3 d;
|
||
|
d = static_cast<Dims3&&>(_engine->getBindingDimensions(_binding_marks));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'marks'");
|
||
|
my_assert(d.d[0] == marksDims[0] && d.d[1] == marksDims[1] && d.d[2] == marksDims[2], "bad dims for 'marks'");
|
||
|
_blob_sizes[_binding_marks] = d.d[0] * d.d[1] * d.d[2];
|
||
|
#else // NV_TENSORRT_MAJOR < 4
|
||
|
DimsCHW d;
|
||
|
d = static_cast<DimsCHW&&>(_engine->getBindingDimensions(_binding_marks));
|
||
|
my_assert(d.nbDims == 3, "bad nbDims for 'marks'");
|
||
|
my_assert(d.c() == marksDims[0] && d.h() == marksDims[1] && d.w() == marksDims[2], "bad dims for 'marks'");
|
||
|
_blob_sizes[_binding_marks] = d.c() * d.h() * d.w();
|
||
|
#endif // NV_TENSORRT_MAJOR
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::setBatchSize(int value)
|
||
|
{
|
||
|
my_assert(value > 0 && value <= 1024, "bad batch_size");
|
||
|
if (value == _batchsize || _engine == nullptr)
|
||
|
return; // do nothing
|
||
|
_batchsize = value;
|
||
|
for (int i = 0; i < _num_bindings; i++) {
|
||
|
if (_gpu_buffers[i] != nullptr) {
|
||
|
CHECK(cudaFree(_gpu_buffers[i]));
|
||
|
_gpu_buffers[i] = nullptr;
|
||
|
}
|
||
|
}
|
||
|
for (int i = 0; i < _num_bindings; i++) {
|
||
|
CHECK(cudaMalloc(&_gpu_buffers[i],
|
||
|
_batchsize * _blob_sizes[i] * sizeof(float)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int TrtMtcnnDet::getBatchSize()
|
||
|
{
|
||
|
return _batchsize;
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::forward(float *imgs, float *probs, float *boxes, float *marks=nullptr)
|
||
|
{
|
||
|
my_assert(_batchsize > 0, "_batchsize is not set");
|
||
|
CHECK(cudaMemcpyAsync(_gpu_buffers[_binding_data],
|
||
|
imgs,
|
||
|
_batchsize * _blob_sizes[_binding_data] * sizeof(float),
|
||
|
cudaMemcpyHostToDevice,
|
||
|
_stream));
|
||
|
_context->enqueue(_batchsize, _gpu_buffers, _stream, nullptr);
|
||
|
CHECK(cudaMemcpyAsync(probs,
|
||
|
_gpu_buffers[_binding_prob1],
|
||
|
_batchsize * _blob_sizes[_binding_prob1] * sizeof(float),
|
||
|
cudaMemcpyDeviceToHost,
|
||
|
_stream));
|
||
|
CHECK(cudaMemcpyAsync(boxes,
|
||
|
_gpu_buffers[_binding_boxes],
|
||
|
_batchsize * _blob_sizes[_binding_boxes] * sizeof(float),
|
||
|
cudaMemcpyDeviceToHost,
|
||
|
_stream));
|
||
|
if (_num_bindings == 4) {
|
||
|
my_assert(marks != nullptr, "pointer 'marks' is null");
|
||
|
CHECK(cudaMemcpyAsync(marks,
|
||
|
_gpu_buffers[_binding_marks],
|
||
|
_batchsize * _blob_sizes[_binding_marks] * sizeof(float),
|
||
|
cudaMemcpyDeviceToHost,
|
||
|
_stream));
|
||
|
}
|
||
|
cudaStreamSynchronize(_stream);
|
||
|
}
|
||
|
|
||
|
void TrtMtcnnDet::destroy()
|
||
|
{
|
||
|
for (int i = 0; i < _num_bindings; i++) {
|
||
|
if (_gpu_buffers[i] != nullptr) {
|
||
|
CHECK(cudaFree(_gpu_buffers[i]));
|
||
|
_gpu_buffers[i] = nullptr;
|
||
|
}
|
||
|
}
|
||
|
cudaStreamDestroy(_stream);
|
||
|
_context->destroy();
|
||
|
_engine->destroy();
|
||
|
_runtime->destroy();
|
||
|
}
|
||
|
|
||
|
} // namespace trtnet
|