From 7f0e021118b41050fc6dc81cec36699dabe5e64c Mon Sep 17 00:00:00 2001
From: 12345qiupeng <qpeng0504@163.com>
Date: Tue, 7 Mar 2023 13:56:20 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=B3=A8=E9=87=8A=EF=BC=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 trt_yolo.py                |  9 +++++++--
 utils/yolo_with_plugins.py | 32 +++++++++++++++++++++++---------
 yolo/onnx_to_tensorrt.py   | 17 ++++++++++++++---
 yolo/plugins.py            |  7 +++++--
 yolo/yolo_to_onnx.py       |  9 +++++++--
 5 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/trt_yolo.py b/trt_yolo.py
index 88cc4ac..35f4dfd 100644
--- a/trt_yolo.py
+++ b/trt_yolo.py
@@ -66,7 +66,9 @@ def loop_and_detect(cam, trt_yolo, conf_th, vis):
         img = cam.read()
         if img is None:
             break
+        # 获取trt推理结果
         boxes, confs, clss = trt_yolo.detect(img, conf_th)
+        # 绘制识别框
         img = vis.draw_bboxes(img, boxes, confs, clss)
         img = show_fps(img, fps)
         cv2.imshow(WINDOW_NAME, img)
@@ -93,14 +95,17 @@ def main():
     cam = Camera(args)
     if not cam.isOpened():
         raise SystemExit('ERROR: failed to open camera!')
-
+    # 加载COCO数据集标签
     cls_dict = get_cls_dict(args.category_num)
+    # 加载框绘制器
     vis = BBoxVisualization(cls_dict)
+    # 创建推理引擎
     trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)
-
+    # 打开窗口
     open_window(
         WINDOW_NAME, 'Camera TensorRT YOLO Demo',
         cam.img_width, cam.img_height)
+    # 开始推理
     loop_and_detect(cam, trt_yolo, args.conf_thresh, vis=vis)
 
     cam.release()
diff --git a/utils/yolo_with_plugins.py b/utils/yolo_with_plugins.py
index 42704be..b449cd1 100644
--- a/utils/yolo_with_plugins.py
+++ b/utils/yolo_with_plugins.py
@@ -112,8 +112,11 @@ def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold,
         boxes, scores, classes (after NMS)
     """
     # filter low-conf detections and concatenate results of all yolo layers
+    # 输出应该是[3*(80 + c + w + h + x + y)),(13+26+52),(13+26+52)]
+    # 猜测是应该将80分类换算成了1个id号和1个置信度，这可能就是yolo更换输出的原因，确实是在插件中更换的输出类型
     detections = []
     for o in trt_outputs:
+        # x, y, w, h , c , id , score
         dets = o.reshape((-1, 7))
         dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
         detections.append(dets)
@@ -278,24 +281,31 @@ class TrtYOLO(object):
 
     def __init__(self, model, category_num=80, letter_box=False, cuda_ctx=None):
         """Initialize TensorRT plugins, engine and conetxt."""
+        # 保存engine模型
         self.model = model
+        # 保存分类数
         self.category_num = category_num
+        # 统一输入大小到letterbox
         self.letter_box = letter_box
+        # 默认CUDA上下文只能从创建它的CPU线程访问，其他线程访问需push/pop从创建它的线程中弹出它，这样context可以被推送到任何其他CPU线程的当前上下文栈，并且随后的CUDA调用将引用该上下文。
         self.cuda_ctx = cuda_ctx
         if self.cuda_ctx:
             self.cuda_ctx.push()
-
+        # 设置推理函数
         self.inference_fn = do_inference if trt.__version__[0] < '7' \
                                          else do_inference_v2
+        # 打印日志，启动一个logging界面，抑制warning和errors，仅报告informational messages。
         self.trt_logger = trt.Logger(trt.Logger.INFO)
+        # 加载模型deserialize
         self.engine = self._load_engine()
-
+        # 从模型中获取输入大小
         self.input_shape = get_input_shape(self.engine)
 
         try:
-            self.context = self.engine.create_execution_context()
+            # 创建一个上下文，储存中间值，因为engine包含network定义和训练参数，因此需要额外的空间。
+            self.context = self.engine.create_execution_context() # create_execution_context是写在ICudaEngine.py的一个闭源方法，这个方法是创建立一个IExecutionContext类型的对象。
             self.inputs, self.outputs, self.bindings, self.stream = \
-                allocate_buffers(self.engine)
+                allocate_buffers(self.engine)   # 为输入输出分配host和device的buffers。host指的是CPU内存，device指的是GPU显存
         except Exception as e:
             raise RuntimeError('fail to allocate CUDA resources') from e
         finally:
@@ -311,22 +321,26 @@ class TrtYOLO(object):
     def detect(self, img, conf_th=0.3, letter_box=None):
         """Detect objects in the input image."""
         letter_box = self.letter_box if letter_box is None else letter_box
+        # 保证输入源统一大小，符合推理模型使用
         img_resized = _preprocess_yolo(img, self.input_shape, letter_box)
 
         # Set host input to the image. The do_inference() function
         # will copy the input to the GPU before executing.
+        # 开辟一块内存空间，用于放入输入图像
         self.inputs[0].host = np.ascontiguousarray(img_resized)
         if self.cuda_ctx:
             self.cuda_ctx.push()
+        # 开始推理
         trt_outputs = self.inference_fn(
-            context=self.context,
-            bindings=self.bindings,
-            inputs=self.inputs,
-            outputs=self.outputs,
-            stream=self.stream)
+            context=self.context,   # 制定GPU的Context，可以理解为上下文，{}
+            bindings=self.bindings, # 大概指的是内存到显存之间的绑定关系
+            inputs=self.inputs,     # 输入数据
+            outputs=self.outputs,   # 输出数据
+            stream=self.stream)     # cuda的操作顺序流
         if self.cuda_ctx:
             self.cuda_ctx.pop()
 
+        # 后处理GPU返回的输出结果
         boxes, scores, classes = _postprocess_yolo(
             trt_outputs, img.shape[1], img.shape[0], conf_th,
             nms_threshold=0.5, input_shape=self.input_shape,
diff --git a/yolo/onnx_to_tensorrt.py b/yolo/onnx_to_tensorrt.py
index 01366b6..13986f6 100644
--- a/yolo/onnx_to_tensorrt.py
+++ b/yolo/onnx_to_tensorrt.py
@@ -96,19 +96,27 @@ def set_net_batch(network, batch_size):
 def build_engine(model_name, do_int8, dla_core, verbose=False):
     """Build a TensorRT engine from ONNX using the older API."""
     cfg_file_path = model_name + '.cfg'
+    # 初始化解释器
     parser = DarkNetParser()
+    # 加载网络层信息
     layer_configs = parser.parse_cfg_file(cfg_file_path)
+    # 获得输出层个数（几个候选框）
     net_c = get_c(layer_configs)
+    # 获得输入的宽和高
     net_h, net_w = get_h_and_w(layer_configs)
 
+    # 读取onnx模型
     print('Loading the ONNX file...')
     onnx_data = load_onnx(model_name)
     if onnx_data is None:
         return None
 
+    # 创建记录对象，用于调试和报错
     TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
+    # 明确输入的批次数
     EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \
         [1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
+    # 解释onnx文件，并生成trt网络
     with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
         if do_int8 and not builder.platform_has_fast_int8:
             raise RuntimeError('INT8 not supported on this platform')
@@ -117,14 +125,15 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
             for error in range(parser.num_errors):
                 print(parser.get_error(error))
             return None
+        # 设置网络Batch
         network = set_net_batch(network, MAX_BATCH_SIZE)
-
+        # 更换Yolo插件
         print('Adding yolo_layer plugins.')
         network = add_yolo_plugins(network, model_name, TRT_LOGGER)
-
+        # 将三个yolo输出层Concat到一起
         print('Adding a concatenated output as "detections".')
         network = add_concat(network, model_name, TRT_LOGGER)
-
+        # 命名输入层名称
         print('Naming the input tensort as "input".')
         network.get_input(0).name = 'input'
 
@@ -144,6 +153,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
             engine = builder.build_cuda_engine(network)
         else:  # new API: build_engine() with builder config
             builder.max_batch_size = MAX_BATCH_SIZE
+            # 设置builder参数
             config = builder.create_builder_config()
             config.max_workspace_size = 1 << 30
             config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
@@ -167,6 +177,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
                 config.DLA_core = dla_core
                 config.set_flag(trt.BuilderFlag.STRICT_TYPES)
                 print('Using DLA core %d.' % dla_core)
+            # 开始编译模型文件
             engine = builder.build_engine(network, config)
 
         if engine is not None:
diff --git a/yolo/plugins.py b/yolo/plugins.py
index b48d8ff..8daed16 100644
--- a/yolo/plugins.py
+++ b/yolo/plugins.py
@@ -80,6 +80,7 @@ def add_yolo_plugins(network, model_name, logger):
         raise TypeError('bad number of outputs: %d' % len(output_tensor_names))
     if is_pan_arch(cfg_file_path):
         yolo_whs.reverse()
+    # 获取Anchor大小
     anchors = get_anchors(cfg_file_path)
     if len(anchors) != len(yolo_whs):
         raise ValueError('bad number of yolo layers: %d vs. %d' %
@@ -87,14 +88,16 @@ def add_yolo_plugins(network, model_name, logger):
     if network.num_outputs != len(anchors):
         raise ValueError('bad number of network outputs: %d vs. %d' %
                          (network.num_outputs, len(anchors)))
+    # 获取Scale大小 ？？
     scales = get_scales(cfg_file_path)
     if any([s < 1.0 for s in scales]):
         raise ValueError('bad scale_x_y: %s' % str(scales))
     if len(scales) != len(anchors):
         raise ValueError('bad number of scales: %d vs. %d' %
                          (len(scales), len(anchors)))
+    # ？？ yolov4中的参数
     new_coords = get_new_coords(cfg_file_path)
-
+    # 获取TRT插件
     plugin_creator = get_plugin_creator('YoloLayer_TRT', logger)
     if not plugin_creator:
         raise RuntimeError('cannot get YoloLayer_TRT plugin creator')
@@ -115,7 +118,7 @@ def add_yolo_plugins(network, model_name, logger):
                 trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
             ]))
         ).get_output(0)
-
+    # 更换trt模型中的yolo输出层
     for new_tensor in new_tensors:
         network.mark_output(new_tensor)
     for old_tensor in old_tensors:
diff --git a/yolo/yolo_to_onnx.py b/yolo/yolo_to_onnx.py
index 4994e83..8dbb05e 100644
--- a/yolo/yolo_to_onnx.py
+++ b/yolo/yolo_to_onnx.py
@@ -191,8 +191,13 @@ class DarkNetParser(object):
         self.layer_configs = OrderedDict()
         # 支持的节点类型
         self.supported_layers = supported_layers if supported_layers else \
-                                ['net', 'convolutional', 'maxpool', 'shortcut',
-                                 'route', 'upsample', 'yolo']
+                                ['net',             # 超参数层，无操作
+                                 'convolutional',   # 卷积层
+                                 'maxpool',         # 池化层
+                                 'shortcut',        # 捷径层
+                                 'route',           # 路由层
+                                 'upsample',        # 上采样层
+                                 'yolo']            # 输出层
         self.layer_counter = 0
 
     # 加载网络模型文件.cfg