TensorRT-Demo/yolo/onnx_to_tensorrt.py

# onnx_to_tensorrt.py
#
# Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
#
# NOTICE TO LICENSEE:
#
# This source code and/or documentation ("Licensed Deliverables") are
# subject to NVIDIA intellectual property rights under U.S. and
# international Copyright laws.
#
# These Licensed Deliverables contained herein is PROPRIETARY and
# CONFIDENTIAL to NVIDIA and is being provided under the terms and
# conditions of a form of NVIDIA software license agreement by and
# between NVIDIA and Licensee ("License Agreement") or electronically
# accepted by Licensee.  Notwithstanding any terms or conditions to
# the contrary in the License Agreement, reproduction or disclosure
# of the Licensed Deliverables to any third party without the express
# written consent of NVIDIA is prohibited.
#
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THESE LICENSED DELIVERABLES.
#
# U.S. Government End Users.  These Licensed Deliverables are a
# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
# 1995), consisting of "commercial computer software" and "commercial
# computer software documentation" as such terms are used in 48
# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
# only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
# U.S. Government End Users acquire the Licensed Deliverables with
# only those rights set forth herein.
#
# Any use of the Licensed Deliverables in individual and commercial
# software must include, in the user documentation and internal
# comments to the code, the above Disclaimer and U.S. Government End
# Users Notice.
#


from __future__ import print_function

import os
import argparse

import tensorrt as trt

from yolo_to_onnx import DarkNetParser, get_h_and_w
from plugins import add_yolo_plugins, add_concat


MAX_BATCH_SIZE = 1


def get_c(layer_configs):
    """Find input channels of the yolo model from layer configs."""
    net_config = layer_configs['000_net']
    return net_config.get('channels', 3)


def load_onnx(model_name):
    """Read the ONNX file."""
    onnx_path = '%s.onnx' % model_name
    if not os.path.isfile(onnx_path):
        print('ERROR: file (%s) not found!  You might want to run yolo_to_onnx.py first to generate it.' % onnx_path)
        return None
    else:
        with open(onnx_path, 'rb') as f:
            return f.read()


def set_net_batch(network, batch_size):
    """Set network input batch size.

    The ONNX file might have been generated with a different batch size,
    say, 64.
    """
    if trt.__version__[0] >= '7':
        shape = list(network.get_input(0).shape)
        shape[0] = batch_size
        network.get_input(0).shape = shape
    return network


def build_engine(model_name, do_int8, dla_core, verbose=False):
    """Build a TensorRT engine from ONNX using the older API."""
    cfg_file_path = model_name + '.cfg'
    parser = DarkNetParser()
    layer_configs = parser.parse_cfg_file(cfg_file_path)
    net_c = get_c(layer_configs)
    net_h, net_w = get_h_and_w(layer_configs)

    print('Loading the ONNX file...')
    onnx_data = load_onnx(model_name)
    if onnx_data is None:
        return None

    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \
        [1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        if do_int8 and not builder.platform_has_fast_int8:
            raise RuntimeError('INT8 not supported on this platform')
        if not parser.parse(onnx_data):
            print('ERROR: Failed to parse the ONNX file.')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            return None
        network = set_net_batch(network, MAX_BATCH_SIZE)

        print('Adding yolo_layer plugins.')
        network = add_yolo_plugins(network, model_name, TRT_LOGGER)

        print('Adding a concatenated output as "detections".')
        network = add_concat(network, model_name, TRT_LOGGER)

        print('Naming the input tensort as "input".')
        network.get_input(0).name = 'input'

        print('Building the TensorRT engine.  This would take a while...')
        print('(Use "--verbose" or "-v" to enable verbose logging.)')
        if trt.__version__[0] < '7':  # older API: build_cuda_engine()
            if dla_core >= 0:
                raise RuntimeError('DLA core not supported by old API')
            builder.max_batch_size = MAX_BATCH_SIZE
            builder.max_workspace_size = 1 << 30
            builder.fp16_mode = True  # alternative: builder.platform_has_fast_fp16
            if do_int8:
                from calibrator import YOLOEntropyCalibrator
                builder.int8_mode = True
                builder.int8_calibrator = YOLOEntropyCalibrator(
                    'calib_images', (net_h, net_w), 'calib_%s.bin' % model_name)
            engine = builder.build_cuda_engine(network)
        else:  # new API: build_engine() with builder config
            builder.max_batch_size = MAX_BATCH_SIZE
            config = builder.create_builder_config()
            config.max_workspace_size = 1 << 30
            config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
            config.set_flag(trt.BuilderFlag.FP16)
            profile = builder.create_optimization_profile()
            profile.set_shape(
                'input',                                # input tensor name
                (MAX_BATCH_SIZE, net_c, net_h, net_w),  # min shape
                (MAX_BATCH_SIZE, net_c, net_h, net_w),  # opt shape
                (MAX_BATCH_SIZE, net_c, net_h, net_w))  # max shape
            config.add_optimization_profile(profile)
            if do_int8:
                from calibrator import YOLOEntropyCalibrator
                config.set_flag(trt.BuilderFlag.INT8)
                config.int8_calibrator = YOLOEntropyCalibrator(
                    'calib_images', (net_h, net_w),
                    'calib_%s.bin' % model_name)
                config.set_calibration_profile(profile)
            if dla_core >= 0:
                config.default_device_type = trt.DeviceType.DLA
                config.DLA_core = dla_core
                config.set_flag(trt.BuilderFlag.STRICT_TYPES)
                print('Using DLA core %d.' % dla_core)
            engine = builder.build_engine(network, config)

        if engine is not None:
            print('Completed creating engine.')
        return engine


def main():
    """Create a TensorRT engine for ONNX-based YOLO."""
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-v', '--verbose', action='store_true',
        help='enable verbose output (for debugging)')
    parser.add_argument(
        '-c', '--category_num', type=int,
        help='number of object categories (obsolete)')
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help=('[yolov3-tiny|yolov3|yolov3-spp|yolov4-tiny|yolov4|'
              'yolov4-csp|yolov4x-mish|yolov4-p5]-[{dimension}], where '
              '{dimension} could be either a single number (e.g. '
              '288, 416, 608) or 2 numbers, WxH (e.g. 416x256)'))
    parser.add_argument(
        '--int8', action='store_true',
        help='build INT8 TensorRT engine')
    parser.add_argument(
        '--dla_core', type=int, default=-1,
        help='id of DLA core for inference (0 ~ N-1)')
    args = parser.parse_args()

    engine = build_engine(
        args.model, args.int8, args.dla_core, args.verbose)
    if engine is None:
        raise SystemExit('ERROR: failed to build the TensorRT engine!')

    engine_path = '%s.trt' % args.model
    with open(engine_path, 'wb') as f:
        f.write(engine.serialize())
    print('Serialized the TensorRT engine to file: %s' % engine_path)


if __name__ == '__main__':
    main()
first commit 2023-03-06 20:44:29 +08:00			`# onnx_to_tensorrt.py`
			`#`
			`# Copyright 1993-2019 NVIDIA Corporation. All rights reserved.`
			`#`
			`# NOTICE TO LICENSEE:`
			`#`
			`# This source code and/or documentation ("Licensed Deliverables") are`
			`# subject to NVIDIA intellectual property rights under U.S. and`
			`# international Copyright laws.`
			`#`
			`# These Licensed Deliverables contained herein is PROPRIETARY and`
			`# CONFIDENTIAL to NVIDIA and is being provided under the terms and`
			`# conditions of a form of NVIDIA software license agreement by and`
			`# between NVIDIA and Licensee ("License Agreement") or electronically`
			`# accepted by Licensee. Notwithstanding any terms or conditions to`
			`# the contrary in the License Agreement, reproduction or disclosure`
			`# of the Licensed Deliverables to any third party without the express`
			`# written consent of NVIDIA is prohibited.`
			`#`
			`# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE`
			`# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE`
			`# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS`
			`# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.`
			`# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED`
			`# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,`
			`# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.`
			`# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE`
			`# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY`
			`# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY`
			`# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,`
			`# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS`
			`# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE`
			`# OF THESE LICENSED DELIVERABLES.`
			`#`
			`# U.S. Government End Users. These Licensed Deliverables are a`
			`# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT`
			`# 1995), consisting of "commercial computer software" and "commercial`
			`# computer software documentation" as such terms are used in 48`
			`# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government`
			`# only as a commercial end item. Consistent with 48 C.F.R.12.212 and`
			`# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all`
			`# U.S. Government End Users acquire the Licensed Deliverables with`
			`# only those rights set forth herein.`
			`#`
			`# Any use of the Licensed Deliverables in individual and commercial`
			`# software must include, in the user documentation and internal`
			`# comments to the code, the above Disclaimer and U.S. Government End`
			`# Users Notice.`
			`#`


			`from __future__ import print_function`

			`import os`
			`import argparse`

			`import tensorrt as trt`

			`from yolo_to_onnx import DarkNetParser, get_h_and_w`
			`from plugins import add_yolo_plugins, add_concat`


			`MAX_BATCH_SIZE = 1`


			`def get_c(layer_configs):`
			`"""Find input channels of the yolo model from layer configs."""`
			`net_config = layer_configs['000_net']`
			`return net_config.get('channels', 3)`


			`def load_onnx(model_name):`
			`"""Read the ONNX file."""`
			`onnx_path = '%s.onnx' % model_name`
			`if not os.path.isfile(onnx_path):`
			`print('ERROR: file (%s) not found! You might want to run yolo_to_onnx.py first to generate it.' % onnx_path)`
			`return None`
			`else:`
			`with open(onnx_path, 'rb') as f:`
			`return f.read()`


			`def set_net_batch(network, batch_size):`
			`"""Set network input batch size.`

			`The ONNX file might have been generated with a different batch size,`
			`say, 64.`
			`"""`
			`if trt.__version__[0] >= '7':`
			`shape = list(network.get_input(0).shape)`
			`shape[0] = batch_size`
			`network.get_input(0).shape = shape`
			`return network`


			`def build_engine(model_name, do_int8, dla_core, verbose=False):`
			`"""Build a TensorRT engine from ONNX using the older API."""`
			`cfg_file_path = model_name + '.cfg'`
			`parser = DarkNetParser()`
			`layer_configs = parser.parse_cfg_file(cfg_file_path)`
			`net_c = get_c(layer_configs)`
			`net_h, net_w = get_h_and_w(layer_configs)`

			`print('Loading the ONNX file...')`
			`onnx_data = load_onnx(model_name)`
			`if onnx_data is None:`
			`return None`

			`TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()`
			`EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \`
			`[1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]`
			`with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:`
			`if do_int8 and not builder.platform_has_fast_int8:`
			`raise RuntimeError('INT8 not supported on this platform')`
			`if not parser.parse(onnx_data):`
			`print('ERROR: Failed to parse the ONNX file.')`
			`for error in range(parser.num_errors):`
			`print(parser.get_error(error))`
			`return None`
			`network = set_net_batch(network, MAX_BATCH_SIZE)`

			`print('Adding yolo_layer plugins.')`
			`network = add_yolo_plugins(network, model_name, TRT_LOGGER)`

			`print('Adding a concatenated output as "detections".')`
			`network = add_concat(network, model_name, TRT_LOGGER)`

			`print('Naming the input tensort as "input".')`
			`network.get_input(0).name = 'input'`

			`print('Building the TensorRT engine. This would take a while...')`
			`print('(Use "--verbose" or "-v" to enable verbose logging.)')`
			`if trt.__version__[0] < '7': # older API: build_cuda_engine()`
			`if dla_core >= 0:`
			`raise RuntimeError('DLA core not supported by old API')`
			`builder.max_batch_size = MAX_BATCH_SIZE`
			`builder.max_workspace_size = 1 << 30`
			`builder.fp16_mode = True # alternative: builder.platform_has_fast_fp16`
			`if do_int8:`
			`from calibrator import YOLOEntropyCalibrator`
			`builder.int8_mode = True`
			`builder.int8_calibrator = YOLOEntropyCalibrator(`
			`'calib_images', (net_h, net_w), 'calib_%s.bin' % model_name)`
			`engine = builder.build_cuda_engine(network)`
			`else: # new API: build_engine() with builder config`
			`builder.max_batch_size = MAX_BATCH_SIZE`
			`config = builder.create_builder_config()`
			`config.max_workspace_size = 1 << 30`
			`config.set_flag(trt.BuilderFlag.GPU_FALLBACK)`
			`config.set_flag(trt.BuilderFlag.FP16)`
			`profile = builder.create_optimization_profile()`
			`profile.set_shape(`
			`'input', # input tensor name`
			`(MAX_BATCH_SIZE, net_c, net_h, net_w), # min shape`
			`(MAX_BATCH_SIZE, net_c, net_h, net_w), # opt shape`
			`(MAX_BATCH_SIZE, net_c, net_h, net_w)) # max shape`
			`config.add_optimization_profile(profile)`
			`if do_int8:`
			`from calibrator import YOLOEntropyCalibrator`
			`config.set_flag(trt.BuilderFlag.INT8)`
			`config.int8_calibrator = YOLOEntropyCalibrator(`
			`'calib_images', (net_h, net_w),`
			`'calib_%s.bin' % model_name)`
			`config.set_calibration_profile(profile)`
			`if dla_core >= 0:`
			`config.default_device_type = trt.DeviceType.DLA`
			`config.DLA_core = dla_core`
			`config.set_flag(trt.BuilderFlag.STRICT_TYPES)`
			`print('Using DLA core %d.' % dla_core)`
			`engine = builder.build_engine(network, config)`

			`if engine is not None:`
			`print('Completed creating engine.')`
			`return engine`


			`def main():`
			`"""Create a TensorRT engine for ONNX-based YOLO."""`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument(`
			`'-v', '--verbose', action='store_true',`
			`help='enable verbose output (for debugging)')`
			`parser.add_argument(`
			`'-c', '--category_num', type=int,`
			`help='number of object categories (obsolete)')`
			`parser.add_argument(`
			`'-m', '--model', type=str, required=True,`
			`help=('[yolov3-tiny\|yolov3\|yolov3-spp\|yolov4-tiny\|yolov4\|'`
			`'yolov4-csp\|yolov4x-mish\|yolov4-p5]-[{dimension}], where '`
			`'{dimension} could be either a single number (e.g. '`
			`'288, 416, 608) or 2 numbers, WxH (e.g. 416x256)'))`
			`parser.add_argument(`
			`'--int8', action='store_true',`
			`help='build INT8 TensorRT engine')`
			`parser.add_argument(`
			`'--dla_core', type=int, default=-1,`
			`help='id of DLA core for inference (0 ~ N-1)')`
			`args = parser.parse_args()`

			`engine = build_engine(`
			`args.model, args.int8, args.dla_core, args.verbose)`
			`if engine is None:`
			`raise SystemExit('ERROR: failed to build the TensorRT engine!')`

			`engine_path = '%s.trt' % args.model`
			`with open(engine_path, 'wb') as f:`
			`f.write(engine.serialize())`
			`print('Serialized the TensorRT engine to file: %s' % engine_path)`


			`if __name__ == '__main__':`
			`main()`