添加注释;
parent
4e9a5c2ba1
commit
7f0e021118
|
@ -66,7 +66,9 @@ def loop_and_detect(cam, trt_yolo, conf_th, vis):
|
|||
img = cam.read()
|
||||
if img is None:
|
||||
break
|
||||
# 获取trt推理结果
|
||||
boxes, confs, clss = trt_yolo.detect(img, conf_th)
|
||||
# 绘制识别框
|
||||
img = vis.draw_bboxes(img, boxes, confs, clss)
|
||||
img = show_fps(img, fps)
|
||||
cv2.imshow(WINDOW_NAME, img)
|
||||
|
@ -93,14 +95,17 @@ def main():
|
|||
cam = Camera(args)
|
||||
if not cam.isOpened():
|
||||
raise SystemExit('ERROR: failed to open camera!')
|
||||
|
||||
# 加载COCO数据集标签
|
||||
cls_dict = get_cls_dict(args.category_num)
|
||||
# 加载框绘制器
|
||||
vis = BBoxVisualization(cls_dict)
|
||||
# 创建推理引擎
|
||||
trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)
|
||||
|
||||
# 打开窗口
|
||||
open_window(
|
||||
WINDOW_NAME, 'Camera TensorRT YOLO Demo',
|
||||
cam.img_width, cam.img_height)
|
||||
# 开始推理
|
||||
loop_and_detect(cam, trt_yolo, args.conf_thresh, vis=vis)
|
||||
|
||||
cam.release()
|
||||
|
|
|
@ -112,8 +112,11 @@ def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold,
|
|||
boxes, scores, classes (after NMS)
|
||||
"""
|
||||
# filter low-conf detections and concatenate results of all yolo layers
|
||||
# 输出应该是[3*(80 + c + w + h + x + y)),(13+26+52),(13+26+52)]
|
||||
# 猜测是应该将80分类换算成了1个id号和1个置信度,这可能就是yolo更换输出的原因,确实是在插件中更换的输出类型
|
||||
detections = []
|
||||
for o in trt_outputs:
|
||||
# x, y, w, h , c , id , score
|
||||
dets = o.reshape((-1, 7))
|
||||
dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
|
||||
detections.append(dets)
|
||||
|
@ -278,24 +281,31 @@ class TrtYOLO(object):
|
|||
|
||||
def __init__(self, model, category_num=80, letter_box=False, cuda_ctx=None):
|
||||
"""Initialize TensorRT plugins, engine and conetxt."""
|
||||
# 保存engine模型
|
||||
self.model = model
|
||||
# 保存分类数
|
||||
self.category_num = category_num
|
||||
# 统一输入大小到letterbox
|
||||
self.letter_box = letter_box
|
||||
# 默认CUDA上下文只能从创建它的CPU线程访问,其他线程访问需push/pop从创建它的线程中弹出它,这样context可以被推送到任何其他CPU线程的当前上下文栈,并且随后的CUDA调用将引用该上下文。
|
||||
self.cuda_ctx = cuda_ctx
|
||||
if self.cuda_ctx:
|
||||
self.cuda_ctx.push()
|
||||
|
||||
# 设置推理函数
|
||||
self.inference_fn = do_inference if trt.__version__[0] < '7' \
|
||||
else do_inference_v2
|
||||
# 打印日志,启动一个logging界面,抑制warning和errors,仅报告informational messages。
|
||||
self.trt_logger = trt.Logger(trt.Logger.INFO)
|
||||
# 加载模型deserialize
|
||||
self.engine = self._load_engine()
|
||||
|
||||
# 从模型中获取输入大小
|
||||
self.input_shape = get_input_shape(self.engine)
|
||||
|
||||
try:
|
||||
self.context = self.engine.create_execution_context()
|
||||
# 创建一个上下文,储存中间值,因为engine包含network定义和训练参数,因此需要额外的空间。
|
||||
self.context = self.engine.create_execution_context() # create_execution_context是写在ICudaEngine.py的一个闭源方法,这个方法是创建立一个IExecutionContext类型的对象。
|
||||
self.inputs, self.outputs, self.bindings, self.stream = \
|
||||
allocate_buffers(self.engine)
|
||||
allocate_buffers(self.engine) # 为输入输出分配host和device的buffers。host指的是CPU内存,device指的是GPU显存
|
||||
except Exception as e:
|
||||
raise RuntimeError('fail to allocate CUDA resources') from e
|
||||
finally:
|
||||
|
@ -311,22 +321,26 @@ class TrtYOLO(object):
|
|||
def detect(self, img, conf_th=0.3, letter_box=None):
|
||||
"""Detect objects in the input image."""
|
||||
letter_box = self.letter_box if letter_box is None else letter_box
|
||||
# 保证输入源统一大小,符合推理模型使用
|
||||
img_resized = _preprocess_yolo(img, self.input_shape, letter_box)
|
||||
|
||||
# Set host input to the image. The do_inference() function
|
||||
# will copy the input to the GPU before executing.
|
||||
# 开辟一块内存空间,用于放入输入图像
|
||||
self.inputs[0].host = np.ascontiguousarray(img_resized)
|
||||
if self.cuda_ctx:
|
||||
self.cuda_ctx.push()
|
||||
# 开始推理
|
||||
trt_outputs = self.inference_fn(
|
||||
context=self.context,
|
||||
bindings=self.bindings,
|
||||
inputs=self.inputs,
|
||||
outputs=self.outputs,
|
||||
stream=self.stream)
|
||||
context=self.context, # 制定GPU的Context,可以理解为上下文,{}
|
||||
bindings=self.bindings, # 大概指的是内存到显存之间的绑定关系
|
||||
inputs=self.inputs, # 输入数据
|
||||
outputs=self.outputs, # 输出数据
|
||||
stream=self.stream) # cuda的操作顺序流
|
||||
if self.cuda_ctx:
|
||||
self.cuda_ctx.pop()
|
||||
|
||||
# 后处理GPU返回的输出结果
|
||||
boxes, scores, classes = _postprocess_yolo(
|
||||
trt_outputs, img.shape[1], img.shape[0], conf_th,
|
||||
nms_threshold=0.5, input_shape=self.input_shape,
|
||||
|
|
|
@ -96,19 +96,27 @@ def set_net_batch(network, batch_size):
|
|||
def build_engine(model_name, do_int8, dla_core, verbose=False):
|
||||
"""Build a TensorRT engine from ONNX using the older API."""
|
||||
cfg_file_path = model_name + '.cfg'
|
||||
# 初始化解释器
|
||||
parser = DarkNetParser()
|
||||
# 加载网络层信息
|
||||
layer_configs = parser.parse_cfg_file(cfg_file_path)
|
||||
# 获得输出层个数(几个候选框)
|
||||
net_c = get_c(layer_configs)
|
||||
# 获得输入的宽和高
|
||||
net_h, net_w = get_h_and_w(layer_configs)
|
||||
|
||||
# 读取onnx模型
|
||||
print('Loading the ONNX file...')
|
||||
onnx_data = load_onnx(model_name)
|
||||
if onnx_data is None:
|
||||
return None
|
||||
|
||||
# 创建记录对象,用于调试和报错
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
|
||||
# 明确输入的批次数
|
||||
EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \
|
||||
[1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
|
||||
# 解释onnx文件,并生成trt网络
|
||||
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
|
||||
if do_int8 and not builder.platform_has_fast_int8:
|
||||
raise RuntimeError('INT8 not supported on this platform')
|
||||
|
@ -117,14 +125,15 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
|
|||
for error in range(parser.num_errors):
|
||||
print(parser.get_error(error))
|
||||
return None
|
||||
# 设置网络Batch
|
||||
network = set_net_batch(network, MAX_BATCH_SIZE)
|
||||
|
||||
# 更换Yolo插件
|
||||
print('Adding yolo_layer plugins.')
|
||||
network = add_yolo_plugins(network, model_name, TRT_LOGGER)
|
||||
|
||||
# 将三个yolo输出层Concat到一起
|
||||
print('Adding a concatenated output as "detections".')
|
||||
network = add_concat(network, model_name, TRT_LOGGER)
|
||||
|
||||
# 命名输入层名称
|
||||
print('Naming the input tensort as "input".')
|
||||
network.get_input(0).name = 'input'
|
||||
|
||||
|
@ -144,6 +153,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
|
|||
engine = builder.build_cuda_engine(network)
|
||||
else: # new API: build_engine() with builder config
|
||||
builder.max_batch_size = MAX_BATCH_SIZE
|
||||
# 设置builder参数
|
||||
config = builder.create_builder_config()
|
||||
config.max_workspace_size = 1 << 30
|
||||
config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
|
||||
|
@ -167,6 +177,7 @@ def build_engine(model_name, do_int8, dla_core, verbose=False):
|
|||
config.DLA_core = dla_core
|
||||
config.set_flag(trt.BuilderFlag.STRICT_TYPES)
|
||||
print('Using DLA core %d.' % dla_core)
|
||||
# 开始编译模型文件
|
||||
engine = builder.build_engine(network, config)
|
||||
|
||||
if engine is not None:
|
||||
|
|
|
@ -80,6 +80,7 @@ def add_yolo_plugins(network, model_name, logger):
|
|||
raise TypeError('bad number of outputs: %d' % len(output_tensor_names))
|
||||
if is_pan_arch(cfg_file_path):
|
||||
yolo_whs.reverse()
|
||||
# 获取Anchor大小
|
||||
anchors = get_anchors(cfg_file_path)
|
||||
if len(anchors) != len(yolo_whs):
|
||||
raise ValueError('bad number of yolo layers: %d vs. %d' %
|
||||
|
@ -87,14 +88,16 @@ def add_yolo_plugins(network, model_name, logger):
|
|||
if network.num_outputs != len(anchors):
|
||||
raise ValueError('bad number of network outputs: %d vs. %d' %
|
||||
(network.num_outputs, len(anchors)))
|
||||
# 获取Scale大小 ??
|
||||
scales = get_scales(cfg_file_path)
|
||||
if any([s < 1.0 for s in scales]):
|
||||
raise ValueError('bad scale_x_y: %s' % str(scales))
|
||||
if len(scales) != len(anchors):
|
||||
raise ValueError('bad number of scales: %d vs. %d' %
|
||||
(len(scales), len(anchors)))
|
||||
# ?? yolov4中的参数
|
||||
new_coords = get_new_coords(cfg_file_path)
|
||||
|
||||
# 获取TRT插件
|
||||
plugin_creator = get_plugin_creator('YoloLayer_TRT', logger)
|
||||
if not plugin_creator:
|
||||
raise RuntimeError('cannot get YoloLayer_TRT plugin creator')
|
||||
|
@ -115,7 +118,7 @@ def add_yolo_plugins(network, model_name, logger):
|
|||
trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
|
||||
]))
|
||||
).get_output(0)
|
||||
|
||||
# 更换trt模型中的yolo输出层
|
||||
for new_tensor in new_tensors:
|
||||
network.mark_output(new_tensor)
|
||||
for old_tensor in old_tensors:
|
||||
|
|
|
@ -191,8 +191,13 @@ class DarkNetParser(object):
|
|||
self.layer_configs = OrderedDict()
|
||||
# 支持的节点类型
|
||||
self.supported_layers = supported_layers if supported_layers else \
|
||||
['net', 'convolutional', 'maxpool', 'shortcut',
|
||||
'route', 'upsample', 'yolo']
|
||||
['net', # 超参数层,无操作
|
||||
'convolutional', # 卷积层
|
||||
'maxpool', # 池化层
|
||||
'shortcut', # 捷径层
|
||||
'route', # 路由层
|
||||
'upsample', # 上采样层
|
||||
'yolo'] # 输出层
|
||||
self.layer_counter = 0
|
||||
|
||||
# 加载网络模型文件.cfg
|
||||
|
|
Loading…
Reference in New Issue