323 lines
12 KiB
Python
323 lines
12 KiB
Python
from __future__ import division
|
||
import math
|
||
import time
|
||
import tqdm
|
||
import torch
|
||
import torch.nn as nn
|
||
import torch.nn.functional as F
|
||
from torch.autograd import Variable
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib.patches as patches
|
||
|
||
|
||
def to_cpu(tensor):
|
||
return tensor.detach().cpu()
|
||
|
||
|
||
def load_classes(path):
|
||
"""
|
||
Loads class labels at 'path'
|
||
"""
|
||
fp = open(path, "r")
|
||
names = fp.read().split("\n")[:-1]
|
||
return names
|
||
|
||
|
||
def weights_init_normal(m):
|
||
classname = m.__class__.__name__
|
||
if classname.find("Conv") != -1:
|
||
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
|
||
elif classname.find("BatchNorm2d") != -1:
|
||
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
|
||
torch.nn.init.constant_(m.bias.data, 0.0)
|
||
|
||
|
||
def rescale_boxes(boxes, current_dim, original_shape):
|
||
""" Rescales bounding boxes to the original shape """
|
||
orig_h, orig_w = original_shape
|
||
# The amount of padding that was added
|
||
pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
|
||
pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
|
||
# Image height and width after padding is removed
|
||
unpad_h = current_dim - pad_y
|
||
unpad_w = current_dim - pad_x
|
||
# Rescale bounding boxes to dimension of original image
|
||
boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
|
||
boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
|
||
boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
|
||
boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
|
||
return boxes
|
||
|
||
|
||
def xywh2xyxy(x):
|
||
y = x.new(x.shape)
|
||
y[..., 0] = x[..., 0] - x[..., 2] / 2
|
||
y[..., 1] = x[..., 1] - x[..., 3] / 2
|
||
y[..., 2] = x[..., 0] + x[..., 2] / 2
|
||
y[..., 3] = x[..., 1] + x[..., 3] / 2
|
||
return y
|
||
|
||
|
||
def ap_per_class(tp, conf, pred_cls, target_cls):
|
||
""" Compute the average precision, given the recall and precision curves.
|
||
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
||
# Arguments
|
||
tp: True positives (list).
|
||
conf: Objectness value from 0-1 (list).
|
||
pred_cls: Predicted object classes (list).
|
||
target_cls: True object classes (list).
|
||
# Returns
|
||
The average precision as computed in py-faster-rcnn.
|
||
"""
|
||
|
||
# Sort by objectness
|
||
i = np.argsort(-conf)
|
||
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
||
|
||
# Find unique classes
|
||
unique_classes = np.unique(target_cls)
|
||
|
||
# Create Precision-Recall curve and compute AP for each class
|
||
ap, p, r = [], [], []
|
||
for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
|
||
i = pred_cls == c
|
||
n_gt = (target_cls == c).sum() # Number of ground truth objects
|
||
n_p = i.sum() # Number of predicted objects
|
||
|
||
if n_p == 0 and n_gt == 0:
|
||
continue
|
||
elif n_p == 0 or n_gt == 0:
|
||
ap.append(0)
|
||
r.append(0)
|
||
p.append(0)
|
||
else:
|
||
# Accumulate FPs and TPs
|
||
fpc = (1 - tp[i]).cumsum()
|
||
tpc = (tp[i]).cumsum()
|
||
|
||
# Recall
|
||
recall_curve = tpc / (n_gt + 1e-16)
|
||
r.append(recall_curve[-1])
|
||
|
||
# Precision
|
||
precision_curve = tpc / (tpc + fpc)
|
||
p.append(precision_curve[-1])
|
||
|
||
# AP from recall-precision curve
|
||
ap.append(compute_ap(recall_curve, precision_curve))
|
||
|
||
# Compute F1 score (harmonic mean of precision and recall)
|
||
p, r, ap = np.array(p), np.array(r), np.array(ap)
|
||
f1 = 2 * p * r / (p + r + 1e-16)
|
||
|
||
return p, r, ap, f1, unique_classes.astype("int32")
|
||
|
||
|
||
def compute_ap(recall, precision):
|
||
""" Compute the average precision, given the recall and precision curves.
|
||
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
||
|
||
# Arguments
|
||
recall: The recall curve (list).
|
||
precision: The precision curve (list).
|
||
# Returns
|
||
The average precision as computed in py-faster-rcnn.
|
||
"""
|
||
# correct AP calculation
|
||
# first append sentinel values at the end
|
||
mrec = np.concatenate(([0.0], recall, [1.0]))
|
||
mpre = np.concatenate(([0.0], precision, [0.0]))
|
||
|
||
# compute the precision envelope
|
||
for i in range(mpre.size - 1, 0, -1):
|
||
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
||
|
||
# to calculate area under PR curve, look for points
|
||
# where X axis (recall) changes value
|
||
i = np.where(mrec[1:] != mrec[:-1])[0]
|
||
|
||
# and sum (\Delta recall) * prec
|
||
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
||
return ap
|
||
|
||
|
||
def get_batch_statistics(outputs, targets, iou_threshold):
|
||
""" Compute true positives, predicted scores and predicted labels per sample """
|
||
batch_metrics = []
|
||
for sample_i in range(len(outputs)):
|
||
|
||
if outputs[sample_i] is None:
|
||
continue
|
||
|
||
output = outputs[sample_i]
|
||
pred_boxes = output[:, :4]
|
||
pred_scores = output[:, 4]
|
||
pred_labels = output[:, -1]
|
||
|
||
true_positives = np.zeros(pred_boxes.shape[0])
|
||
|
||
annotations = targets[targets[:, 0] == sample_i][:, 1:]
|
||
target_labels = annotations[:, 0] if len(annotations) else []
|
||
if len(annotations):
|
||
detected_boxes = []
|
||
target_boxes = annotations[:, 1:]
|
||
|
||
for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
|
||
|
||
# If targets are found break
|
||
if len(detected_boxes) == len(annotations):
|
||
break
|
||
|
||
# Ignore if label is not one of the target labels
|
||
if pred_label not in target_labels:
|
||
continue
|
||
|
||
iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
|
||
if iou >= iou_threshold and box_index not in detected_boxes:
|
||
true_positives[pred_i] = 1
|
||
detected_boxes += [box_index]
|
||
batch_metrics.append([true_positives, pred_scores, pred_labels])
|
||
return batch_metrics
|
||
|
||
|
||
def bbox_wh_iou(wh1, wh2):
|
||
wh2 = wh2.t()
|
||
w1, h1 = wh1[0], wh1[1]
|
||
w2, h2 = wh2[0], wh2[1]
|
||
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
|
||
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
|
||
return inter_area / union_area
|
||
|
||
|
||
def bbox_iou(box1, box2, x1y1x2y2=True):
|
||
"""
|
||
Returns the IoU of two bounding boxes
|
||
"""
|
||
if not x1y1x2y2:
|
||
# Transform from center and width to exact coordinates
|
||
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
|
||
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
|
||
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
|
||
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
|
||
else:
|
||
# Get the coordinates of bounding boxes
|
||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
|
||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
|
||
|
||
# get the corrdinates of the intersection rectangle
|
||
inter_rect_x1 = torch.max(b1_x1, b2_x1)
|
||
inter_rect_y1 = torch.max(b1_y1, b2_y1)
|
||
inter_rect_x2 = torch.min(b1_x2, b2_x2)
|
||
inter_rect_y2 = torch.min(b1_y2, b2_y2)
|
||
# Intersection area
|
||
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
|
||
inter_rect_y2 - inter_rect_y1 + 1, min=0
|
||
)
|
||
# Union Area
|
||
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
|
||
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
|
||
|
||
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
|
||
|
||
return iou
|
||
|
||
|
||
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
|
||
"""
|
||
Removes detections with lower object confidence score than 'conf_thres' and performs
|
||
Non-Maximum Suppression to further filter detections.
|
||
Returns detections with shape:
|
||
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
||
"""
|
||
|
||
# From (center x, center y, width, height) to (x1, y1, x2, y2)
|
||
prediction[..., :4] = xywh2xyxy(prediction[..., :4])
|
||
output = [None for _ in range(len(prediction))]
|
||
for image_i, image_pred in enumerate(prediction):
|
||
# Filter out confidence scores below threshold
|
||
image_pred = image_pred[image_pred[:, 4] >= conf_thres]
|
||
# If none are remaining => process next image
|
||
if not image_pred.size(0):
|
||
continue
|
||
# Object confidence times class confidence
|
||
score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
|
||
# Sort by it
|
||
image_pred = image_pred[(-score).argsort()]
|
||
class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
|
||
detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
|
||
# Perform non-maximum suppression
|
||
keep_boxes = []
|
||
while detections.size(0):
|
||
large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
|
||
label_match = detections[0, -1] == detections[:, -1]
|
||
# Indices of boxes with lower confidence scores, large IOUs and matching labels
|
||
invalid = large_overlap & label_match
|
||
weights = detections[invalid, 4:5]
|
||
# Merge overlapping bboxes by order of confidence
|
||
detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
|
||
keep_boxes += [detections[0]]
|
||
detections = detections[~invalid]
|
||
if keep_boxes:
|
||
output[image_i] = torch.stack(keep_boxes)
|
||
|
||
return output
|
||
|
||
|
||
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
|
||
|
||
ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
|
||
FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
|
||
|
||
nB = pred_boxes.size(0) # batchsieze 4
|
||
nA = pred_boxes.size(1) # 每个格子对应了多少个anchor
|
||
nC = pred_cls.size(-1) # 类别的数量
|
||
nG = pred_boxes.size(2) # gridsize
|
||
|
||
# Output tensors
|
||
obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0) # obj,anchor包含物体, 即为1,默认为0 考虑前景
|
||
noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1) # noobj, anchor不包含物体, 则为1,默认为1 考虑背景
|
||
class_mask = FloatTensor(nB, nA, nG, nG).fill_(0) # 类别掩膜,类别预测正确即为1,默认全为0
|
||
iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0) # 预测框与真实框的iou得分
|
||
tx = FloatTensor(nB, nA, nG, nG).fill_(0) # 真实框相对于网格的位置
|
||
ty = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||
tw = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||
th = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||
tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
|
||
|
||
# Convert to position relative to box
|
||
target_boxes = target[:, 2:6] * nG #target中的xywh都是0-1的,可以得到其在当前gridsize上的xywh
|
||
gxy = target_boxes[:, :2]
|
||
gwh = target_boxes[:, 2:]
|
||
# Get anchors with best iou
|
||
ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) #每一种规格的anchor跟每个标签上的框的IOU得分
|
||
print (ious.shape)
|
||
best_ious, best_n = ious.max(0) # 得到其最高分以及哪种规格框和当前目标最相似
|
||
# Separate target values
|
||
b, target_labels = target[:, :2].long().t() # 真实框所对应的batch,以及每个框所代表的实际类别
|
||
gx, gy = gxy.t()
|
||
gw, gh = gwh.t()
|
||
gi, gj = gxy.long().t() #位置信息,向下取整了
|
||
# Set masks
|
||
obj_mask[b, best_n, gj, gi] = 1 # 实际包含物体的设置成1
|
||
noobj_mask[b, best_n, gj, gi] = 0 # 相反
|
||
|
||
# Set noobj mask to zero where iou exceeds ignore threshold
|
||
for i, anchor_ious in enumerate(ious.t()): # IOU超过了指定的阈值就相当于有物体了
|
||
noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
|
||
|
||
# Coordinates
|
||
tx[b, best_n, gj, gi] = gx - gx.floor() # 根据真实框所在位置,得到其相当于网络的位置
|
||
ty[b, best_n, gj, gi] = gy - gy.floor()
|
||
# Width and height
|
||
tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
|
||
th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
|
||
# One-hot encoding of label
|
||
tcls[b, best_n, gj, gi, target_labels] = 1 #将真实框的标签转换为one-hot编码形式
|
||
# Compute label correctness and iou at best anchor 计算预测的和真实一样的索引
|
||
class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
|
||
iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) #与真实框想匹配的预测框之间的iou值
|
||
|
||
tconf = obj_mask.float() # 真实框的置信度,也就是1
|
||
return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
|