"""build_engine.py This script converts a SSD model (pb) to UFF and subsequently builds the TensorRT engine. Input : ssd_mobilenet_v[1|2]_[coco|egohands].pb Output: TRT_ssd_mobilenet_v[1|2]_[coco|egohands].bin """ import os import ctypes import argparse import numpy as np import uff import tensorrt as trt import graphsurgeon as gs DIR_NAME = os.path.dirname(__file__) LIB_FILE = os.path.abspath(os.path.join(DIR_NAME, 'libflattenconcat.so')) MODEL_SPECS = { 'ssd_mobilenet_v1_coco': { 'input_pb': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v1_coco.pb')), 'tmp_uff': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v1_coco.uff')), 'output_bin': os.path.abspath(os.path.join( DIR_NAME, 'TRT_ssd_mobilenet_v1_coco.bin')), 'num_classes': 91, 'min_size': 0.2, 'max_size': 0.95, 'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data }, 'ssd_mobilenet_v1_egohands': { 'input_pb': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v1_egohands.pb')), 'tmp_uff': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v1_egohands.uff')), 'output_bin': os.path.abspath(os.path.join( DIR_NAME, 'TRT_ssd_mobilenet_v1_egohands.bin')), 'num_classes': 2, 'min_size': 0.05, 'max_size': 0.95, 'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data }, 'ssd_mobilenet_v2_coco': { 'input_pb': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v2_coco.pb')), 'tmp_uff': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v2_coco.uff')), 'output_bin': os.path.abspath(os.path.join( DIR_NAME, 'TRT_ssd_mobilenet_v2_coco.bin')), 'num_classes': 91, 'min_size': 0.2, 'max_size': 0.95, 'input_order': [1, 0, 2], # order of loc_data, conf_data, priorbox_data }, 'ssd_mobilenet_v2_egohands': { 'input_pb': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v2_egohands.pb')), 'tmp_uff': os.path.abspath(os.path.join( DIR_NAME, 'ssd_mobilenet_v2_egohands.uff')), 'output_bin': os.path.abspath(os.path.join( DIR_NAME, 'TRT_ssd_mobilenet_v2_egohands.bin')), 'num_classes': 2, 'min_size': 0.05, 'max_size': 0.95, 'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data }, 'ssd_inception_v2_coco': { 'input_pb': os.path.abspath(os.path.join( DIR_NAME, 'ssd_inception_v2_coco.pb')), 'tmp_uff': os.path.abspath(os.path.join( DIR_NAME, 'ssd_inception_v2_coco.uff')), 'output_bin': os.path.abspath(os.path.join( DIR_NAME, 'TRT_ssd_inception_v2_coco.bin')), 'num_classes': 91, 'min_size': 0.2, 'max_size': 0.95, 'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data }, 'ssdlite_mobilenet_v2_coco': { 'input_pb': os.path.abspath(os.path.join( DIR_NAME, 'ssdlite_mobilenet_v2_coco.pb')), 'tmp_uff': os.path.abspath(os.path.join( DIR_NAME, 'ssdlite_mobilenet_v2_coco.uff')), 'output_bin': os.path.abspath(os.path.join( DIR_NAME, 'TRT_ssdlite_mobilenet_v2_coco.bin')), 'num_classes': 91, 'min_size': 0.2, 'max_size': 0.95, 'input_order': [0, 2, 1], # order of loc_data, conf_data, priorbox_data }, } INPUT_DIMS = (3, 300, 300) DEBUG_UFF = False def replace_addv2(graph): """Replace all 'AddV2' in the graph with 'Add'. 'AddV2' is not supported by UFF parser. Reference: 1. https://github.com/jkjung-avt/tensorrt_demos/issues/113#issuecomment-629900809 """ for node in graph.find_nodes_by_op('AddV2'): gs.update_node(node, op='Add') return graph def replace_fusedbnv3(graph): """Replace all 'FusedBatchNormV3' in the graph with 'FusedBatchNorm'. 'FusedBatchNormV3' is not supported by UFF parser. Reference: 1. https://devtalk.nvidia.com/default/topic/1066445/tensorrt/tensorrt-6-0-1-tensorflow-1-14-no-conversion-function-registered-for-layer-fusedbatchnormv3-yet/post/5403567/#5403567 2. https://github.com/jkjung-avt/tensorrt_demos/issues/76#issuecomment-607879831 """ for node in graph.find_nodes_by_op('FusedBatchNormV3'): gs.update_node(node, op='FusedBatchNorm') return graph def add_anchor_input(graph): """Add the missing const input for the GridAnchor node. Reference: 1. https://www.minds.ai/post/deploying-ssd-mobilenet-v2-on-the-nvidia-jetson-and-nano-platforms """ data = np.array([1, 1], dtype=np.float32) anchor_input = gs.create_node('AnchorInput', 'Const', value=data) graph.append(anchor_input) graph.find_nodes_by_op('GridAnchor_TRT')[0].input.insert(0, 'AnchorInput') return graph def add_plugin(graph, model, spec): """add_plugin Reference: 1. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v1_coco_2018_01_28.py 2. https://github.com/AastaNV/TRT_object_detection/blob/master/config/model_ssd_mobilenet_v2_coco_2018_03_29.py 3. https://devtalk.nvidia.com/default/topic/1050465/jetson-nano/how-to-write-config-py-for-converting-ssd-mobilenetv2-to-uff-format/post/5333033/#5333033 """ numClasses = spec['num_classes'] minSize = spec['min_size'] maxSize = spec['max_size'] inputOrder = spec['input_order'] all_assert_nodes = graph.find_nodes_by_op('Assert') graph.remove(all_assert_nodes, remove_exclusive_dependencies=True) all_identity_nodes = graph.find_nodes_by_op('Identity') graph.forward_inputs(all_identity_nodes) Input = gs.create_plugin_node( name='Input', op='Placeholder', shape=(1,) + INPUT_DIMS ) PriorBox = gs.create_plugin_node( name='MultipleGridAnchorGenerator', op='GridAnchor_TRT', minSize=minSize, # was 0.2 maxSize=maxSize, # was 0.95 aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33], variance=[0.1, 0.1, 0.2, 0.2], featureMapShapes=[19, 10, 5, 3, 2, 1], numLayers=6 ) NMS = gs.create_plugin_node( name='NMS', op='NMS_TRT', shareLocation=1, varianceEncodedInTarget=0, backgroundLabelId=0, confidenceThreshold=0.3, # was 1e-8 nmsThreshold=0.6, topK=100, keepTopK=100, numClasses=numClasses, # was 91 inputOrder=inputOrder, confSigmoid=1, isNormalized=1 ) concat_priorbox = gs.create_node( 'concat_priorbox', op='ConcatV2', axis=2 ) if trt.__version__[0] >= '7': concat_box_loc = gs.create_plugin_node( 'concat_box_loc', op='FlattenConcat_TRT', axis=1, ignoreBatch=0 ) concat_box_conf = gs.create_plugin_node( 'concat_box_conf', op='FlattenConcat_TRT', axis=1, ignoreBatch=0 ) else: concat_box_loc = gs.create_plugin_node( 'concat_box_loc', op='FlattenConcat_TRT' ) concat_box_conf = gs.create_plugin_node( 'concat_box_conf', op='FlattenConcat_TRT' ) namespace_for_removal = [ 'ToFloat', 'image_tensor', 'Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3', ] namespace_plugin_map = { 'MultipleGridAnchorGenerator': PriorBox, 'Postprocessor': NMS, 'Preprocessor': Input, 'ToFloat': Input, 'Cast': Input, # added for models trained with tf 1.15+ 'image_tensor': Input, 'MultipleGridAnchorGenerator/Concatenate': concat_priorbox, # for 'ssd_mobilenet_v1_coco' 'Concatenate': concat_priorbox, # for other models 'concat': concat_box_loc, 'concat_1': concat_box_conf } graph.remove(graph.find_nodes_by_path(['Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3']), remove_exclusive_dependencies=False) # for 'ssd_inception_v2_coco' graph.collapse_namespaces(namespace_plugin_map) graph = replace_addv2(graph) graph = replace_fusedbnv3(graph) if 'image_tensor:0' in graph.find_nodes_by_name('Input')[0].input: graph.find_nodes_by_name('Input')[0].input.remove('image_tensor:0') if 'Input' in graph.find_nodes_by_name('NMS')[0].input: graph.find_nodes_by_name('NMS')[0].input.remove('Input') # Remove the Squeeze to avoid "Assertion 'isPlugin(layerName)' failed" graph.forward_inputs(graph.find_node_inputs_by_name(graph.graph_outputs[0], 'Squeeze')) if 'anchors' in [node.name for node in graph.graph_outputs]: graph.remove('anchors', remove_exclusive_dependencies=False) if len(graph.find_nodes_by_op('GridAnchor_TRT')[0].input) < 1: graph = add_anchor_input(graph) if 'NMS' not in [node.name for node in graph.graph_outputs]: graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False) if 'NMS' not in [node.name for node in graph.graph_outputs]: # We expect 'NMS' to be one of the outputs raise RuntimeError('bad graph_outputs') return graph def main(): parser = argparse.ArgumentParser() parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys())) args = parser.parse_args() # initialize if trt.__version__[0] < '7': ctypes.CDLL(LIB_FILE) TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = args.model spec = MODEL_SPECS[model] dynamic_graph = add_plugin( gs.DynamicGraph(spec['input_pb']), model, spec) _ = uff.from_tensorflow( dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', INPUT_DIMS) parser.register_output('MarkOutput_0') parser.parse(spec['tmp_uff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(spec['output_bin'], 'wb') as f: f.write(buf) if __name__ == '__main__': main()