From 7f0e021118b41050fc6dc81cec36699dabe5e64c Mon Sep 17 00:00:00 2001 From: 12345qiupeng Date: Tue, 7 Mar 2023 13:56:20 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=B3=A8=E9=87=8A=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- trt_yolo.py | 9 +++++++-- utils/yolo_with_plugins.py | 32 +++++++++++++++++++++++--------- yolo/onnx_to_tensorrt.py | 17 ++++++++++++++--- yolo/plugins.py | 7 +++++-- yolo/yolo_to_onnx.py | 9 +++++++-- 5 files changed, 56 insertions(+), 18 deletions(-) diff --git a/trt_yolo.py b/trt_yolo.py index 88cc4ac..35f4dfd 100644 --- a/trt_yolo.py +++ b/trt_yolo.py @@ -66,7 +66,9 @@ def loop_and_detect(cam, trt_yolo, conf_th, vis): img = cam.read() if img is None: break + # 获取trt推理结果 boxes, confs, clss = trt_yolo.detect(img, conf_th) + # 绘制识别框 img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) @@ -93,14 +95,17 @@ def main(): cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') - + # 加载COCO数据集标签 cls_dict = get_cls_dict(args.category_num) + # 加载框绘制器 vis = BBoxVisualization(cls_dict) + # 创建推理引擎 trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box) - + # 打开窗口 open_window( WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) + # 开始推理 loop_and_detect(cam, trt_yolo, args.conf_thresh, vis=vis) cam.release() diff --git a/utils/yolo_with_plugins.py b/utils/yolo_with_plugins.py index 42704be..b449cd1 100644 --- a/utils/yolo_with_plugins.py +++ b/utils/yolo_with_plugins.py @@ -112,8 +112,11 @@ def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold, boxes, scores, classes (after NMS) """ # filter low-conf detections and concatenate results of all yolo layers + # 输出应该是[3*(80 + c + w + h + x + y)),(13+26+52),(13+26+52)] + # 猜测是应该将80分类换算成了1个id号和1个置信度,这可能就是yolo更换输出的原因,确实是在插件中更换的输出类型 detections = [] for o in trt_outputs: + # x, y, w, h , c , id , score dets = o.reshape((-1, 7)) dets = dets[dets[:, 4] * dets[:, 6] >= conf_th] detections.append(dets) @@ -278,24 +281,31 @@ class TrtYOLO(object): def __init__(self, model, category_num=80, letter_box=False, cuda_ctx=None): """Initialize TensorRT plugins, engine and conetxt.""" + # 保存engine模型 self.model = model + # 保存分类数 self.category_num = category_num + # 统一输入大小到letterbox self.letter_box = letter_box + # 默认CUDA上下文只能从创建它的CPU线程访问,其他线程访问需push/pop从创建它的线程中弹出它,这样context可以被推送到任何其他CPU线程的当前上下文栈,并且随后的CUDA调用将引用该上下文。 self.cuda_ctx = cuda_ctx if self.cuda_ctx: self.cuda_ctx.push() - + # 设置推理函数 self.inference_fn = do_inference if trt.__version__[0] < '7' \ else do_inference_v2 + # 打印日志,启动一个logging界面,抑制warning和errors,仅报告informational messages。 self.trt_logger = trt.Logger(trt.Logger.INFO) + # 加载模型deserialize self.engine = self._load_engine() - + # 从模型中获取输入大小 self.input_shape = get_input_shape(self.engine) try: - self.context = self.engine.create_execution_context() + # 创建一个上下文,储存中间值,因为engine包含network定义和训练参数,因此需要额外的空间。 + self.context = self.engine.create_execution_context() # create_execution_context是写在ICudaEngine.py的一个闭源方法,这个方法是创建立一个IExecutionContext类型的对象。 self.inputs, self.outputs, self.bindings, self.stream = \ - allocate_buffers(self.engine) + allocate_buffers(self.engine) # 为输入输出分配host和device的buffers。host指的是CPU内存,device指的是GPU显存 except Exception as e: raise RuntimeError('fail to allocate CUDA resources') from e finally: @@ -311,22 +321,26 @@ class TrtYOLO(object): def detect(self, img, conf_th=0.3, letter_box=None): """Detect objects in the input image.""" letter_box = self.letter_box if letter_box is None else letter_box + # 保证输入源统一大小,符合推理模型使用 img_resized = _preprocess_yolo(img, self.input_shape, letter_box) # Set host input to the image. The do_inference() function # will copy the input to the GPU before executing. + # 开辟一块内存空间,用于放入输入图像 self.inputs[0].host = np.ascontiguousarray(img_resized) if self.cuda_ctx: self.cuda_ctx.push() + # 开始推理 trt_outputs = self.inference_fn( - context=self.context, - bindings=self.bindings, - inputs=self.inputs, - outputs=self.outputs, - stream=self.stream) + context=self.context, # 制定GPU的Context,可以理解为上下文,{} + bindings=self.bindings, # 大概指的是内存到显存之间的绑定关系 + inputs=self.inputs, # 输入数据 + outputs=self.outputs, # 输出数据 + stream=self.stream) # cuda的操作顺序流 if self.cuda_ctx: self.cuda_ctx.pop() + # 后处理GPU返回的输出结果 boxes, scores, classes = _postprocess_yolo( trt_outputs, img.shape[1], img.shape[0], conf_th, nms_threshold=0.5, input_shape=self.input_shape, diff --git a/yolo/onnx_to_tensorrt.py b/yolo/onnx_to_tensorrt.py index 01366b6..13986f6 100644 --- a/yolo/onnx_to_tensorrt.py +++ b/yolo/onnx_to_tensorrt.py @@ -96,19 +96,27 @@ def set_net_batch(network, batch_size): def build_engine(model_name, do_int8, dla_core, verbose=False): """Build a TensorRT engine from ONNX using the older API.""" cfg_file_path = model_name + '.cfg' + # 初始化解释器 parser = DarkNetParser() + # 加载网络层信息 layer_configs = parser.parse_cfg_file(cfg_file_path) + # 获得输出层个数(几个候选框) net_c = get_c(layer_configs) + # 获得输入的宽和高 net_h, net_w = get_h_and_w(layer_configs) + # 读取onnx模型 print('Loading the ONNX file...') onnx_data = load_onnx(model_name) if onnx_data is None: return None + # 创建记录对象,用于调试和报错 TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger() + # 明确输入的批次数 EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \ [1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)] + # 解释onnx文件,并生成trt网络 with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: if do_int8 and not builder.platform_has_fast_int8: raise RuntimeError('INT8 not supported on this platform') @@ -117,14 +125,15 @@ def build_engine(model_name, do_int8, dla_core, verbose=False): for error in range(parser.num_errors): print(parser.get_error(error)) return None + # 设置网络Batch network = set_net_batch(network, MAX_BATCH_SIZE) - + # 更换Yolo插件 print('Adding yolo_layer plugins.') network = add_yolo_plugins(network, model_name, TRT_LOGGER) - + # 将三个yolo输出层Concat到一起 print('Adding a concatenated output as "detections".') network = add_concat(network, model_name, TRT_LOGGER) - + # 命名输入层名称 print('Naming the input tensort as "input".') network.get_input(0).name = 'input' @@ -144,6 +153,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False): engine = builder.build_cuda_engine(network) else: # new API: build_engine() with builder config builder.max_batch_size = MAX_BATCH_SIZE + # 设置builder参数 config = builder.create_builder_config() config.max_workspace_size = 1 << 30 config.set_flag(trt.BuilderFlag.GPU_FALLBACK) @@ -167,6 +177,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False): config.DLA_core = dla_core config.set_flag(trt.BuilderFlag.STRICT_TYPES) print('Using DLA core %d.' % dla_core) + # 开始编译模型文件 engine = builder.build_engine(network, config) if engine is not None: diff --git a/yolo/plugins.py b/yolo/plugins.py index b48d8ff..8daed16 100644 --- a/yolo/plugins.py +++ b/yolo/plugins.py @@ -80,6 +80,7 @@ def add_yolo_plugins(network, model_name, logger): raise TypeError('bad number of outputs: %d' % len(output_tensor_names)) if is_pan_arch(cfg_file_path): yolo_whs.reverse() + # 获取Anchor大小 anchors = get_anchors(cfg_file_path) if len(anchors) != len(yolo_whs): raise ValueError('bad number of yolo layers: %d vs. %d' % @@ -87,14 +88,16 @@ def add_yolo_plugins(network, model_name, logger): if network.num_outputs != len(anchors): raise ValueError('bad number of network outputs: %d vs. %d' % (network.num_outputs, len(anchors))) + # 获取Scale大小 ?? scales = get_scales(cfg_file_path) if any([s < 1.0 for s in scales]): raise ValueError('bad scale_x_y: %s' % str(scales)) if len(scales) != len(anchors): raise ValueError('bad number of scales: %d vs. %d' % (len(scales), len(anchors))) + # ?? yolov4中的参数 new_coords = get_new_coords(cfg_file_path) - + # 获取TRT插件 plugin_creator = get_plugin_creator('YoloLayer_TRT', logger) if not plugin_creator: raise RuntimeError('cannot get YoloLayer_TRT plugin creator') @@ -115,7 +118,7 @@ def add_yolo_plugins(network, model_name, logger): trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32), ])) ).get_output(0) - + # 更换trt模型中的yolo输出层 for new_tensor in new_tensors: network.mark_output(new_tensor) for old_tensor in old_tensors: diff --git a/yolo/yolo_to_onnx.py b/yolo/yolo_to_onnx.py index 4994e83..8dbb05e 100644 --- a/yolo/yolo_to_onnx.py +++ b/yolo/yolo_to_onnx.py @@ -191,8 +191,13 @@ class DarkNetParser(object): self.layer_configs = OrderedDict() # 支持的节点类型 self.supported_layers = supported_layers if supported_layers else \ - ['net', 'convolutional', 'maxpool', 'shortcut', - 'route', 'upsample', 'yolo'] + ['net', # 超参数层,无操作 + 'convolutional', # 卷积层 + 'maxpool', # 池化层 + 'shortcut', # 捷径层 + 'route', # 路由层 + 'upsample', # 上采样层 + 'yolo'] # 输出层 self.layer_counter = 0 # 加载网络模型文件.cfg