添加注释;

main
邱棚 2023-03-07 13:56:20 +08:00
parent 4e9a5c2ba1
commit 7f0e021118
5 changed files with 56 additions and 18 deletions

View File

@ -66,7 +66,9 @@ def loop_and_detect(cam, trt_yolo, conf_th, vis):
img = cam.read()
if img is None:
break
# 获取trt推理结果
boxes, confs, clss = trt_yolo.detect(img, conf_th)
# 绘制识别框
img = vis.draw_bboxes(img, boxes, confs, clss)
img = show_fps(img, fps)
cv2.imshow(WINDOW_NAME, img)
@ -93,14 +95,17 @@ def main():
cam = Camera(args)
if not cam.isOpened():
raise SystemExit('ERROR: failed to open camera!')
# 加载COCO数据集标签
cls_dict = get_cls_dict(args.category_num)
# 加载框绘制器
vis = BBoxVisualization(cls_dict)
# 创建推理引擎
trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)
# 打开窗口
open_window(
WINDOW_NAME, 'Camera TensorRT YOLO Demo',
cam.img_width, cam.img_height)
# 开始推理
loop_and_detect(cam, trt_yolo, args.conf_thresh, vis=vis)
cam.release()

View File

@ -112,8 +112,11 @@ def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold,
boxes, scores, classes (after NMS)
"""
# filter low-conf detections and concatenate results of all yolo layers
# 输出应该是[3*(80 + c + w + h + x + y)),(13+26+52),(13+26+52)]
# 猜测是应该将80分类换算成了1个id号和1个置信度这可能就是yolo更换输出的原因确实是在插件中更换的输出类型
detections = []
for o in trt_outputs:
# x, y, w, h , c , id , score
dets = o.reshape((-1, 7))
dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
detections.append(dets)
@ -278,24 +281,31 @@ class TrtYOLO(object):
def __init__(self, model, category_num=80, letter_box=False, cuda_ctx=None):
"""Initialize TensorRT plugins, engine and conetxt."""
# 保存engine模型
self.model = model
# 保存分类数
self.category_num = category_num
# 统一输入大小到letterbox
self.letter_box = letter_box
# 默认CUDA上下文只能从创建它的CPU线程访问其他线程访问需push/pop从创建它的线程中弹出它这样context可以被推送到任何其他CPU线程的当前上下文栈并且随后的CUDA调用将引用该上下文。
self.cuda_ctx = cuda_ctx
if self.cuda_ctx:
self.cuda_ctx.push()
# 设置推理函数
self.inference_fn = do_inference if trt.__version__[0] < '7' \
else do_inference_v2
# 打印日志启动一个logging界面抑制warning和errors仅报告informational messages。
self.trt_logger = trt.Logger(trt.Logger.INFO)
# 加载模型deserialize
self.engine = self._load_engine()
# 从模型中获取输入大小
self.input_shape = get_input_shape(self.engine)
try:
self.context = self.engine.create_execution_context()
# 创建一个上下文储存中间值因为engine包含network定义和训练参数因此需要额外的空间。
self.context = self.engine.create_execution_context() # create_execution_context是写在ICudaEngine.py的一个闭源方法这个方法是创建立一个IExecutionContext类型的对象。
self.inputs, self.outputs, self.bindings, self.stream = \
allocate_buffers(self.engine)
allocate_buffers(self.engine) # 为输入输出分配host和device的buffers。host指的是CPU内存device指的是GPU显存
except Exception as e:
raise RuntimeError('fail to allocate CUDA resources') from e
finally:
@ -311,22 +321,26 @@ class TrtYOLO(object):
def detect(self, img, conf_th=0.3, letter_box=None):
"""Detect objects in the input image."""
letter_box = self.letter_box if letter_box is None else letter_box
# 保证输入源统一大小,符合推理模型使用
img_resized = _preprocess_yolo(img, self.input_shape, letter_box)
# Set host input to the image. The do_inference() function
# will copy the input to the GPU before executing.
# 开辟一块内存空间,用于放入输入图像
self.inputs[0].host = np.ascontiguousarray(img_resized)
if self.cuda_ctx:
self.cuda_ctx.push()
# 开始推理
trt_outputs = self.inference_fn(
context=self.context,
bindings=self.bindings,
inputs=self.inputs,
outputs=self.outputs,
stream=self.stream)
context=self.context, # 制定GPU的Context可以理解为上下文{}
bindings=self.bindings, # 大概指的是内存到显存之间的绑定关系
inputs=self.inputs, # 输入数据
outputs=self.outputs, # 输出数据
stream=self.stream) # cuda的操作顺序流
if self.cuda_ctx:
self.cuda_ctx.pop()
# 后处理GPU返回的输出结果
boxes, scores, classes = _postprocess_yolo(
trt_outputs, img.shape[1], img.shape[0], conf_th,
nms_threshold=0.5, input_shape=self.input_shape,

View File

@ -96,19 +96,27 @@ def set_net_batch(network, batch_size):
def build_engine(model_name, do_int8, dla_core, verbose=False):
"""Build a TensorRT engine from ONNX using the older API."""
cfg_file_path = model_name + '.cfg'
# 初始化解释器
parser = DarkNetParser()
# 加载网络层信息
layer_configs = parser.parse_cfg_file(cfg_file_path)
# 获得输出层个数(几个候选框)
net_c = get_c(layer_configs)
# 获得输入的宽和高
net_h, net_w = get_h_and_w(layer_configs)
# 读取onnx模型
print('Loading the ONNX file...')
onnx_data = load_onnx(model_name)
if onnx_data is None:
return None
# 创建记录对象,用于调试和报错
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
# 明确输入的批次数
EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \
[1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
# 解释onnx文件并生成trt网络
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
if do_int8 and not builder.platform_has_fast_int8:
raise RuntimeError('INT8 not supported on this platform')
@ -117,14 +125,15 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
for error in range(parser.num_errors):
print(parser.get_error(error))
return None
# 设置网络Batch
network = set_net_batch(network, MAX_BATCH_SIZE)
# 更换Yolo插件
print('Adding yolo_layer plugins.')
network = add_yolo_plugins(network, model_name, TRT_LOGGER)
# 将三个yolo输出层Concat到一起
print('Adding a concatenated output as "detections".')
network = add_concat(network, model_name, TRT_LOGGER)
# 命名输入层名称
print('Naming the input tensort as "input".')
network.get_input(0).name = 'input'
@ -144,6 +153,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
engine = builder.build_cuda_engine(network)
else: # new API: build_engine() with builder config
builder.max_batch_size = MAX_BATCH_SIZE
# 设置builder参数
config = builder.create_builder_config()
config.max_workspace_size = 1 << 30
config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
@ -167,6 +177,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
config.DLA_core = dla_core
config.set_flag(trt.BuilderFlag.STRICT_TYPES)
print('Using DLA core %d.' % dla_core)
# 开始编译模型文件
engine = builder.build_engine(network, config)
if engine is not None:

View File

@ -80,6 +80,7 @@ def add_yolo_plugins(network, model_name, logger):
raise TypeError('bad number of outputs: %d' % len(output_tensor_names))
if is_pan_arch(cfg_file_path):
yolo_whs.reverse()
# 获取Anchor大小
anchors = get_anchors(cfg_file_path)
if len(anchors) != len(yolo_whs):
raise ValueError('bad number of yolo layers: %d vs. %d' %
@ -87,14 +88,16 @@ def add_yolo_plugins(network, model_name, logger):
if network.num_outputs != len(anchors):
raise ValueError('bad number of network outputs: %d vs. %d' %
(network.num_outputs, len(anchors)))
# 获取Scale大小
scales = get_scales(cfg_file_path)
if any([s < 1.0 for s in scales]):
raise ValueError('bad scale_x_y: %s' % str(scales))
if len(scales) != len(anchors):
raise ValueError('bad number of scales: %d vs. %d' %
(len(scales), len(anchors)))
# yolov4中的参数
new_coords = get_new_coords(cfg_file_path)
# 获取TRT插件
plugin_creator = get_plugin_creator('YoloLayer_TRT', logger)
if not plugin_creator:
raise RuntimeError('cannot get YoloLayer_TRT plugin creator')
@ -115,7 +118,7 @@ def add_yolo_plugins(network, model_name, logger):
trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
]))
).get_output(0)
# 更换trt模型中的yolo输出层
for new_tensor in new_tensors:
network.mark_output(new_tensor)
for old_tensor in old_tensors:

View File

@ -191,8 +191,13 @@ class DarkNetParser(object):
self.layer_configs = OrderedDict()
# 支持的节点类型
self.supported_layers = supported_layers if supported_layers else \
['net', 'convolutional', 'maxpool', 'shortcut',
'route', 'upsample', 'yolo']
['net', # 超参数层,无操作
'convolutional', # 卷积层
'maxpool', # 池化层
'shortcut', # 捷径层
'route', # 路由层
'upsample', # 上采样层
'yolo'] # 输出层
self.layer_counter = 0
# 加载网络模型文件.cfg