添加注释；

2023-03-07 13:56:20 +08:00 · 2023-03-07 13:56:20 +08:00 · 7f0e021118
parent 4e9a5c2ba1
commit 7f0e021118
5 changed files with 56 additions and 18 deletions
--- a/trt_yolo.py
+++ b/trt_yolo.py
@ -66,7 +66,9 @@ def loop_and_detect(cam, trt_yolo, conf_th, vis):
        img = cam.read()
        if img is None:
            break
        # 获取trt推理结果
        boxes, confs, clss = trt_yolo.detect(img, conf_th)
        # 绘制识别框
        img = vis.draw_bboxes(img, boxes, confs, clss)
        img = show_fps(img, fps)
        cv2.imshow(WINDOW_NAME, img)
@ -93,14 +95,17 @@ def main():
    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')
-
+    # 加载COCO数据集标签
    cls_dict = get_cls_dict(args.category_num)
    # 加载框绘制器
    vis = BBoxVisualization(cls_dict)
    # 创建推理引擎
    trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)
-
+    # 打开窗口
    open_window(
        WINDOW_NAME, 'Camera TensorRT YOLO Demo',
        cam.img_width, cam.img_height)
    # 开始推理
    loop_and_detect(cam, trt_yolo, args.conf_thresh, vis=vis)
    cam.release()
--- a/utils/yolo_with_plugins.py
+++ b/utils/yolo_with_plugins.py
@ -112,8 +112,11 @@ def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold,
        boxes, scores, classes (after NMS)
    """
    # filter low-conf detections and concatenate results of all yolo layers
    # 输出应该是[3*(80 + c + w + h + x + y)),(13+26+52),(13+26+52)]
    # 猜测是应该将80分类换算成了1个id号和1个置信度，这可能就是yolo更换输出的原因，确实是在插件中更换的输出类型
    detections = []
    for o in trt_outputs:
        # x, y, w, h , c , id , score
        dets = o.reshape((-1, 7))
        dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
        detections.append(dets)
@ -278,24 +281,31 @@ class TrtYOLO(object):
    def __init__(self, model, category_num=80, letter_box=False, cuda_ctx=None):
        """Initialize TensorRT plugins, engine and conetxt."""
        # 保存engine模型
        self.model = model
        # 保存分类数
        self.category_num = category_num
        # 统一输入大小到letterbox
        self.letter_box = letter_box
        # 默认CUDA上下文只能从创建它的CPU线程访问，其他线程访问需push/pop从创建它的线程中弹出它，这样context可以被推送到任何其他CPU线程的当前上下文栈，并且随后的CUDA调用将引用该上下文。
        self.cuda_ctx = cuda_ctx
        if self.cuda_ctx:
            self.cuda_ctx.push()
-
+        # 设置推理函数
        self.inference_fn = do_inference if trt.__version__[0] < '7' \
                                         else do_inference_v2
        # 打印日志，启动一个logging界面，抑制warning和errors，仅报告informational messages。
        self.trt_logger = trt.Logger(trt.Logger.INFO)
        # 加载模型deserialize
        self.engine = self._load_engine()
-
+        # 从模型中获取输入大小
        self.input_shape = get_input_shape(self.engine)
        try:
-            self.context = self.engine.create_execution_context()
+            # 创建一个上下文，储存中间值，因为engine包含network定义和训练参数，因此需要额外的空间。
            self.context = self.engine.create_execution_context() # create_execution_context是写在ICudaEngine.py的一个闭源方法，这个方法是创建立一个IExecutionContext类型的对象。
            self.inputs, self.outputs, self.bindings, self.stream = \
-                allocate_buffers(self.engine)
+                allocate_buffers(self.engine)   # 为输入输出分配host和device的buffers。host指的是CPU内存，device指的是GPU显存
        except Exception as e:
            raise RuntimeError('fail to allocate CUDA resources') from e
        finally:
@ -311,22 +321,26 @@ class TrtYOLO(object):
    def detect(self, img, conf_th=0.3, letter_box=None):
        """Detect objects in the input image."""
        letter_box = self.letter_box if letter_box is None else letter_box
        # 保证输入源统一大小，符合推理模型使用
        img_resized = _preprocess_yolo(img, self.input_shape, letter_box)
        # Set host input to the image. The do_inference() function
        # will copy the input to the GPU before executing.
        # 开辟一块内存空间，用于放入输入图像
        self.inputs[0].host = np.ascontiguousarray(img_resized)
        if self.cuda_ctx:
            self.cuda_ctx.push()
        # 开始推理
        trt_outputs = self.inference_fn(
-            context=self.context,
+            context=self.context,   # 制定GPU的Context，可以理解为上下文，{}
-            bindings=self.bindings,
+            bindings=self.bindings, # 大概指的是内存到显存之间的绑定关系
-            inputs=self.inputs,
+            inputs=self.inputs,     # 输入数据
-            outputs=self.outputs,
+            outputs=self.outputs,   # 输出数据
-            stream=self.stream)
+            stream=self.stream)     # cuda的操作顺序流
        if self.cuda_ctx:
            self.cuda_ctx.pop()
        # 后处理GPU返回的输出结果
        boxes, scores, classes = _postprocess_yolo(
            trt_outputs, img.shape[1], img.shape[0], conf_th,
            nms_threshold=0.5, input_shape=self.input_shape,
--- a/yolo/onnx_to_tensorrt.py
+++ b/yolo/onnx_to_tensorrt.py
@ -96,19 +96,27 @@ def set_net_batch(network, batch_size):
 def build_engine(model_name, do_int8, dla_core, verbose=False):
    """Build a TensorRT engine from ONNX using the older API."""
    cfg_file_path = model_name + '.cfg'
    # 初始化解释器
    parser = DarkNetParser()
    # 加载网络层信息
    layer_configs = parser.parse_cfg_file(cfg_file_path)
    # 获得输出层个数（几个候选框）
    net_c = get_c(layer_configs)
    # 获得输入的宽和高
    net_h, net_w = get_h_and_w(layer_configs)
    # 读取onnx模型
    print('Loading the ONNX file...')
    onnx_data = load_onnx(model_name)
    if onnx_data is None:
        return None
    # 创建记录对象，用于调试和报错
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    # 明确输入的批次数
    EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \
        [1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
    # 解释onnx文件，并生成trt网络
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        if do_int8 and not builder.platform_has_fast_int8:
            raise RuntimeError('INT8 not supported on this platform')
@ -117,14 +125,15 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            return None
        # 设置网络Batch
        network = set_net_batch(network, MAX_BATCH_SIZE)
-
+        # 更换Yolo插件
        print('Adding yolo_layer plugins.')
        network = add_yolo_plugins(network, model_name, TRT_LOGGER)
-
+        # 将三个yolo输出层Concat到一起
        print('Adding a concatenated output as "detections".')
        network = add_concat(network, model_name, TRT_LOGGER)
-
+        # 命名输入层名称
        print('Naming the input tensort as "input".')
        network.get_input(0).name = 'input'
@ -144,6 +153,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
            engine = builder.build_cuda_engine(network)
        else:  # new API: build_engine() with builder config
            builder.max_batch_size = MAX_BATCH_SIZE
            # 设置builder参数
            config = builder.create_builder_config()
            config.max_workspace_size = 1 << 30
            config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
@ -167,6 +177,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
                config.DLA_core = dla_core
                config.set_flag(trt.BuilderFlag.STRICT_TYPES)
                print('Using DLA core %d.' % dla_core)
            # 开始编译模型文件
            engine = builder.build_engine(network, config)
        if engine is not None:
--- a/yolo/plugins.py
+++ b/yolo/plugins.py
@ -80,6 +80,7 @@ def add_yolo_plugins(network, model_name, logger):
        raise TypeError('bad number of outputs: %d' % len(output_tensor_names))
    if is_pan_arch(cfg_file_path):
        yolo_whs.reverse()
    # 获取Anchor大小
    anchors = get_anchors(cfg_file_path)
    if len(anchors) != len(yolo_whs):
        raise ValueError('bad number of yolo layers: %d vs. %d' %
@ -87,14 +88,16 @@ def add_yolo_plugins(network, model_name, logger):
    if network.num_outputs != len(anchors):
        raise ValueError('bad number of network outputs: %d vs. %d' %
                         (network.num_outputs, len(anchors)))
    # 获取Scale大小 ？？
    scales = get_scales(cfg_file_path)
    if any([s < 1.0 for s in scales]):
        raise ValueError('bad scale_x_y: %s' % str(scales))
    if len(scales) != len(anchors):
        raise ValueError('bad number of scales: %d vs. %d' %
                         (len(scales), len(anchors)))
    # ？？ yolov4中的参数
    new_coords = get_new_coords(cfg_file_path)
-
+    # 获取TRT插件
    plugin_creator = get_plugin_creator('YoloLayer_TRT', logger)
    if not plugin_creator:
        raise RuntimeError('cannot get YoloLayer_TRT plugin creator')
@ -115,7 +118,7 @@ def add_yolo_plugins(network, model_name, logger):
                trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
            ]))
        ).get_output(0)
-
+    # 更换trt模型中的yolo输出层
    for new_tensor in new_tensors:
        network.mark_output(new_tensor)
    for old_tensor in old_tensors:
--- a/yolo/yolo_to_onnx.py
+++ b/yolo/yolo_to_onnx.py
@ -191,8 +191,13 @@ class DarkNetParser(object):
        self.layer_configs = OrderedDict()
        # 支持的节点类型
        self.supported_layers = supported_layers if supported_layers else \
-                                ['net', 'convolutional', 'maxpool', 'shortcut',
+                                ['net',             # 超参数层，无操作
-                                 'route', 'upsample', 'yolo']
+                                 'convolutional',   # 卷积层
                                 'maxpool',         # 池化层
                                 'shortcut',        # 捷径层
                                 'route',           # 路由层
                                 'upsample',        # 上采样层
                                 'yolo']            # 输出层
        self.layer_counter = 0
    # 加载网络模型文件.cfg