305 lines
11 KiB
Python
305 lines
11 KiB
Python
"""build_engine.py
|
|
|
|
This script converts a SSD model (pb) to UFF and subsequently builds
|
|
the TensorRT engine.
|
|
|
|
Input : ssd_mobilenet_v[1|2]_[coco|egohands].pb
|
|
Output: TRT_ssd_mobilenet_v[1|2]_[coco|egohands].bin
|
|
"""
|
|
|
|
|
|
import os
|
|
import ctypes
|
|
import argparse
|
|
|
|
import numpy as np
|
|
import uff
|
|
import tensorrt as trt
|
|
import graphsurgeon as gs
|
|
|
|
|
|
DIR_NAME = os.path.dirname(__file__)
|
|
LIB_FILE = os.path.abspath(os.path.join(DIR_NAME, 'libflattenconcat.so'))
|
|
MODEL_SPECS = {
|
|
'ssd_mobilenet_v1_coco': {
|
|
'input_pb': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v1_coco.pb')),
|
|
'tmp_uff': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v1_coco.uff')),
|
|
'output_bin': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'TRT_ssd_mobilenet_v1_coco.bin')),
|
|
'num_classes': 91,
|
|
'min_size': 0.2,
|
|
'max_size': 0.95,
|
|
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
|
|
},
|
|
'ssd_mobilenet_v1_egohands': {
|
|
'input_pb': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v1_egohands.pb')),
|
|
'tmp_uff': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v1_egohands.uff')),
|
|
'output_bin': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'TRT_ssd_mobilenet_v1_egohands.bin')),
|
|
'num_classes': 2,
|
|
'min_size': 0.05,
|
|
'max_size': 0.95,
|
|
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
|
|
},
|
|
'ssd_mobilenet_v2_coco': {
|
|
'input_pb': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v2_coco.pb')),
|
|
'tmp_uff': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v2_coco.uff')),
|
|
'output_bin': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'TRT_ssd_mobilenet_v2_coco.bin')),
|
|
'num_classes': 91,
|
|
'min_size': 0.2,
|
|
'max_size': 0.95,
|
|
'input_order': [1, 0, 2], # order of loc_data, conf_data, priorbox_data
|
|
},
|
|
'ssd_mobilenet_v2_egohands': {
|
|
'input_pb': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v2_egohands.pb')),
|
|
'tmp_uff': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_mobilenet_v2_egohands.uff')),
|
|
'output_bin': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'TRT_ssd_mobilenet_v2_egohands.bin')),
|
|
'num_classes': 2,
|
|
'min_size': 0.05,
|
|
'max_size': 0.95,
|
|
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
|
|
},
|
|
'ssd_inception_v2_coco': {
|
|
'input_pb': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_inception_v2_coco.pb')),
|
|
'tmp_uff': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssd_inception_v2_coco.uff')),
|
|
'output_bin': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'TRT_ssd_inception_v2_coco.bin')),
|
|
'num_classes': 91,
|
|
'min_size': 0.2,
|
|
'max_size': 0.95,
|
|
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
|
|
},
|
|
'ssdlite_mobilenet_v2_coco': {
|
|
'input_pb': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssdlite_mobilenet_v2_coco.pb')),
|
|
'tmp_uff': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'ssdlite_mobilenet_v2_coco.uff')),
|
|
'output_bin': os.path.abspath(os.path.join(
|
|
DIR_NAME, 'TRT_ssdlite_mobilenet_v2_coco.bin')),
|
|
'num_classes': 91,
|
|
'min_size': 0.2,
|
|
'max_size': 0.95,
|
|
'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data
|
|
},
|
|
}
|
|
INPUT_DIMS = (3, 300, 300)
|
|
DEBUG_UFF = False
|
|
|
|
|
|
def replace_addv2(graph):
|
|
"""Replace all 'AddV2' in the graph with 'Add'.
|
|
|
|
'AddV2' is not supported by UFF parser.
|
|
|
|
Reference:
|
|
1. https://github.com/jkjung-avt/tensorrt_demos/issues/113#issuecomment-629900809
|
|
"""
|
|
for node in graph.find_nodes_by_op('AddV2'):
|
|
gs.update_node(node, op='Add')
|
|
return graph
|
|
|
|
|
|
def replace_fusedbnv3(graph):
|
|
"""Replace all 'FusedBatchNormV3' in the graph with 'FusedBatchNorm'.
|
|
|
|
'FusedBatchNormV3' is not supported by UFF parser.
|
|
|
|
Reference:
|
|
1. https://devtalk.nvidia.com/default/topic/1066445/tensorrt/tensorrt-6-0-1-tensorflow-1-14-no-conversion-function-registered-for-layer-fusedbatchnormv3-yet/post/5403567/#5403567
|
|
2. https://github.com/jkjung-avt/tensorrt_demos/issues/76#issuecomment-607879831
|
|
"""
|
|
for node in graph.find_nodes_by_op('FusedBatchNormV3'):
|
|
gs.update_node(node, op='FusedBatchNorm')
|
|
return graph
|
|
|
|
|
|
def add_anchor_input(graph):
|
|
"""Add the missing const input for the GridAnchor node.
|
|
|
|
Reference:
|
|
1. https://www.minds.ai/post/deploying-ssd-mobilenet-v2-on-the-nvidia-jetson-and-nano-platforms
|
|
"""
|
|
data = np.array([1, 1], dtype=np.float32)
|
|
anchor_input = gs.create_node('AnchorInput', 'Const', value=data)
|
|
graph.append(anchor_input)
|
|
graph.find_nodes_by_op('GridAnchor_TRT')[0].input.insert(0, 'AnchorInput')
|
|
return graph
|
|
|
|
def add_plugin(graph, model, spec):
|
|
"""add_plugin
|
|
|
|
Reference:
|
|
1. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v1_coco_2018_01_28.py
|
|
2. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v2_coco_2018_03_29.py
|
|
3. https://devtalk.nvidia.com/default/topic/1050465/jetson-nano/how-to-write-config-py-for-converting-ssd-mobilenetv2-to-uff-format/post/5333033/#5333033
|
|
"""
|
|
numClasses = spec['num_classes']
|
|
minSize = spec['min_size']
|
|
maxSize = spec['max_size']
|
|
inputOrder = spec['input_order']
|
|
|
|
all_assert_nodes = graph.find_nodes_by_op('Assert')
|
|
graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
|
|
|
|
all_identity_nodes = graph.find_nodes_by_op('Identity')
|
|
graph.forward_inputs(all_identity_nodes)
|
|
|
|
Input = gs.create_plugin_node(
|
|
name='Input',
|
|
op='Placeholder',
|
|
shape=(1,) + INPUT_DIMS
|
|
)
|
|
|
|
PriorBox = gs.create_plugin_node(
|
|
name='MultipleGridAnchorGenerator',
|
|
op='GridAnchor_TRT',
|
|
minSize=minSize, # was 0.2
|
|
maxSize=maxSize, # was 0.95
|
|
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
|
|
variance=[0.1, 0.1, 0.2, 0.2],
|
|
featureMapShapes=[19, 10, 5, 3, 2, 1],
|
|
numLayers=6
|
|
)
|
|
|
|
NMS = gs.create_plugin_node(
|
|
name='NMS',
|
|
op='NMS_TRT',
|
|
shareLocation=1,
|
|
varianceEncodedInTarget=0,
|
|
backgroundLabelId=0,
|
|
confidenceThreshold=0.3, # was 1e-8
|
|
nmsThreshold=0.6,
|
|
topK=100,
|
|
keepTopK=100,
|
|
numClasses=numClasses, # was 91
|
|
inputOrder=inputOrder,
|
|
confSigmoid=1,
|
|
isNormalized=1
|
|
)
|
|
|
|
concat_priorbox = gs.create_node(
|
|
'concat_priorbox',
|
|
op='ConcatV2',
|
|
axis=2
|
|
)
|
|
|
|
if trt.__version__[0] >= '7':
|
|
concat_box_loc = gs.create_plugin_node(
|
|
'concat_box_loc',
|
|
op='FlattenConcat_TRT',
|
|
axis=1,
|
|
ignoreBatch=0
|
|
)
|
|
concat_box_conf = gs.create_plugin_node(
|
|
'concat_box_conf',
|
|
op='FlattenConcat_TRT',
|
|
axis=1,
|
|
ignoreBatch=0
|
|
)
|
|
else:
|
|
concat_box_loc = gs.create_plugin_node(
|
|
'concat_box_loc',
|
|
op='FlattenConcat_TRT'
|
|
)
|
|
concat_box_conf = gs.create_plugin_node(
|
|
'concat_box_conf',
|
|
op='FlattenConcat_TRT'
|
|
)
|
|
|
|
namespace_for_removal = [
|
|
'ToFloat',
|
|
'image_tensor',
|
|
'Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3',
|
|
]
|
|
namespace_plugin_map = {
|
|
'MultipleGridAnchorGenerator': PriorBox,
|
|
'Postprocessor': NMS,
|
|
'Preprocessor': Input,
|
|
'ToFloat': Input,
|
|
'Cast': Input, # added for models trained with tf 1.15+
|
|
'image_tensor': Input,
|
|
'MultipleGridAnchorGenerator/Concatenate': concat_priorbox, # for 'ssd_mobilenet_v1_coco'
|
|
'Concatenate': concat_priorbox, # for other models
|
|
'concat': concat_box_loc,
|
|
'concat_1': concat_box_conf
|
|
}
|
|
|
|
graph.remove(graph.find_nodes_by_path(['Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3']), remove_exclusive_dependencies=False) # for 'ssd_inception_v2_coco'
|
|
|
|
graph.collapse_namespaces(namespace_plugin_map)
|
|
graph = replace_addv2(graph)
|
|
graph = replace_fusedbnv3(graph)
|
|
|
|
if 'image_tensor:0' in graph.find_nodes_by_name('Input')[0].input:
|
|
graph.find_nodes_by_name('Input')[0].input.remove('image_tensor:0')
|
|
if 'Input' in graph.find_nodes_by_name('NMS')[0].input:
|
|
graph.find_nodes_by_name('NMS')[0].input.remove('Input')
|
|
# Remove the Squeeze to avoid "Assertion 'isPlugin(layerName)' failed"
|
|
graph.forward_inputs(graph.find_node_inputs_by_name(graph.graph_outputs[0], 'Squeeze'))
|
|
if 'anchors' in [node.name for node in graph.graph_outputs]:
|
|
graph.remove('anchors', remove_exclusive_dependencies=False)
|
|
if len(graph.find_nodes_by_op('GridAnchor_TRT')[0].input) < 1:
|
|
graph = add_anchor_input(graph)
|
|
if 'NMS' not in [node.name for node in graph.graph_outputs]:
|
|
graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
|
|
if 'NMS' not in [node.name for node in graph.graph_outputs]:
|
|
# We expect 'NMS' to be one of the outputs
|
|
raise RuntimeError('bad graph_outputs')
|
|
|
|
return graph
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys()))
|
|
args = parser.parse_args()
|
|
|
|
# initialize
|
|
if trt.__version__[0] < '7':
|
|
ctypes.CDLL(LIB_FILE)
|
|
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
|
|
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
|
|
|
|
# compile the model into TensorRT engine
|
|
model = args.model
|
|
spec = MODEL_SPECS[model]
|
|
dynamic_graph = add_plugin(
|
|
gs.DynamicGraph(spec['input_pb']),
|
|
model,
|
|
spec)
|
|
_ = uff.from_tensorflow(
|
|
dynamic_graph.as_graph_def(),
|
|
output_nodes=['NMS'],
|
|
output_filename=spec['tmp_uff'],
|
|
text=True,
|
|
debug_mode=DEBUG_UFF)
|
|
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
|
|
builder.max_workspace_size = 1 << 28
|
|
builder.max_batch_size = 1
|
|
builder.fp16_mode = True
|
|
|
|
parser.register_input('Input', INPUT_DIMS)
|
|
parser.register_output('MarkOutput_0')
|
|
parser.parse(spec['tmp_uff'], network)
|
|
engine = builder.build_cuda_engine(network)
|
|
|
|
buf = engine.serialize()
|
|
with open(spec['output_bin'], 'wb') as f:
|
|
f.write(buf)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|