commit 703829a1230f4c6ff2f1216afa2ef499b537da21 Author: 12345qiupeng Date: Fri Mar 3 11:31:50 2023 +0800 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c3a2bd7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +data/coco/ +output/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/PyTorch-YOLOv3.iml b/.idea/PyTorch-YOLOv3.iml new file mode 100644 index 0000000..a276113 --- /dev/null +++ b/.idea/PyTorch-YOLOv3.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..ac37a5a --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..8071927 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/assets/dog.png b/assets/dog.png new file mode 100644 index 0000000..8056c38 Binary files /dev/null and b/assets/dog.png differ diff --git a/assets/giraffe.png b/assets/giraffe.png new file mode 100644 index 0000000..d0787de Binary files /dev/null and b/assets/giraffe.png differ diff --git a/assets/messi.png b/assets/messi.png new file mode 100644 index 0000000..1412915 Binary files /dev/null and b/assets/messi.png differ diff --git a/assets/traffic.png b/assets/traffic.png new file mode 100644 index 0000000..128b95f Binary files /dev/null and b/assets/traffic.png differ diff --git a/config/coco.data b/config/coco.data new file mode 100644 index 0000000..18beac1 --- /dev/null +++ b/config/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train=data/coco/trainvalno5k.txt +valid=data/coco/5k.txt +names=data/coco.names +backup=backup/ +eval=coco diff --git a/config/create_custom_model.sh b/config/create_custom_model.sh new file mode 100644 index 0000000..eba2ebe --- /dev/null +++ b/config/create_custom_model.sh @@ -0,0 +1,794 @@ +#!/bin/bash + +NUM_CLASSES=$1 + +echo " +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=16 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5)) +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=$NUM_CLASSES +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5)) +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=$NUM_CLASSES +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5)) +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=$NUM_CLASSES +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +" >> yolov3-custom.cfg diff --git a/config/custom.data b/config/custom.data new file mode 100644 index 0000000..846fad7 --- /dev/null +++ b/config/custom.data @@ -0,0 +1,4 @@ +classes= 1 +train=data/custom/train.txt +valid=data/custom/valid.txt +names=data/custom/classes.names diff --git a/config/yolov3-tiny.cfg b/config/yolov3-tiny.cfg new file mode 100644 index 0000000..ade4969 --- /dev/null +++ b/config/yolov3-tiny.cfg @@ -0,0 +1,206 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +# 0 +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +# 1 +[maxpool] +size=2 +stride=2 + +# 2 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# 3 +[maxpool] +size=2 +stride=2 + +# 4 +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +# 5 +[maxpool] +size=2 +stride=2 + +# 6 +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +# 7 +[maxpool] +size=2 +stride=2 + +# 8 +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +# 9 +[maxpool] +size=2 +stride=2 + +# 10 +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +# 11 +[maxpool] +size=2 +stride=1 + +# 12 +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +# 13 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +# 14 +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +# 15 +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +# 16 +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +# 17 +[route] +layers = -4 + +# 18 +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +# 19 +[upsample] +stride=2 + +# 20 +[route] +layers = -1, 8 + +# 21 +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +# 22 +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +# 23 +[yolo] +mask = 1,2,3 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/config/yolov3.cfg b/config/yolov3.cfg new file mode 100644 index 0000000..946e015 --- /dev/null +++ b/config/yolov3.cfg @@ -0,0 +1,788 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=16 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/data/coco.names b/data/coco.names new file mode 100644 index 0000000..ca76c80 --- /dev/null +++ b/data/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/data/custom/classes.names b/data/custom/classes.names new file mode 100644 index 0000000..08afa18 --- /dev/null +++ b/data/custom/classes.names @@ -0,0 +1 @@ +train diff --git a/data/custom/images/train.jpg b/data/custom/images/train.jpg new file mode 100644 index 0000000..d832967 Binary files /dev/null and b/data/custom/images/train.jpg differ diff --git a/data/custom/labels/train.txt b/data/custom/labels/train.txt new file mode 100644 index 0000000..3bf4be4 --- /dev/null +++ b/data/custom/labels/train.txt @@ -0,0 +1 @@ +0 0.515 0.5 0.21694873 0.18286777 diff --git a/data/custom/train.txt b/data/custom/train.txt new file mode 100644 index 0000000..7fa5443 --- /dev/null +++ b/data/custom/train.txt @@ -0,0 +1 @@ +data/custom/images/train.jpg diff --git a/data/custom/valid.txt b/data/custom/valid.txt new file mode 100644 index 0000000..7fa5443 --- /dev/null +++ b/data/custom/valid.txt @@ -0,0 +1 @@ +data/custom/images/train.jpg diff --git a/data/get_coco_dataset.sh b/data/get_coco_dataset.sh new file mode 100644 index 0000000..81b0017 --- /dev/null +++ b/data/get_coco_dataset.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt diff --git a/data/samples/dog.jpg b/data/samples/dog.jpg new file mode 100644 index 0000000..77b0381 Binary files /dev/null and b/data/samples/dog.jpg differ diff --git a/data/samples/eagle.jpg b/data/samples/eagle.jpg new file mode 100644 index 0000000..8b75095 Binary files /dev/null and b/data/samples/eagle.jpg differ diff --git a/data/samples/field.jpg b/data/samples/field.jpg new file mode 100644 index 0000000..61d377f Binary files /dev/null and b/data/samples/field.jpg differ diff --git a/data/samples/giraffe.jpg b/data/samples/giraffe.jpg new file mode 100644 index 0000000..a93e8b8 Binary files /dev/null and b/data/samples/giraffe.jpg differ diff --git a/data/samples/herd_of_horses.jpg b/data/samples/herd_of_horses.jpg new file mode 100644 index 0000000..3a761f4 Binary files /dev/null and b/data/samples/herd_of_horses.jpg differ diff --git a/data/samples/messi.jpg b/data/samples/messi.jpg new file mode 100644 index 0000000..997451d Binary files /dev/null and b/data/samples/messi.jpg differ diff --git a/data/samples/person.jpg b/data/samples/person.jpg new file mode 100644 index 0000000..a137366 Binary files /dev/null and b/data/samples/person.jpg differ diff --git a/data/samples/room.jpg b/data/samples/room.jpg new file mode 100644 index 0000000..b42aaa7 Binary files /dev/null and b/data/samples/room.jpg differ diff --git a/data/samples/street.jpg b/data/samples/street.jpg new file mode 100644 index 0000000..832688c Binary files /dev/null and b/data/samples/street.jpg differ diff --git a/detect.py b/detect.py new file mode 100644 index 0000000..f23fbc8 --- /dev/null +++ b/detect.py @@ -0,0 +1,141 @@ +from __future__ import division + +from models import * +from utils.utils import * +from utils.datasets import * + +import os +import sys +import time +import datetime +import argparse + +from PIL import Image + +import torch +from torch.utils.data import DataLoader +from torchvision import datasets +from torch.autograd import Variable + +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from matplotlib.ticker import NullLocator + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--image_folder", type=str, default="data/samples", help="path to dataset") + parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") + parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") + parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") + parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold") + parser.add_argument("--nms_thres", type=float, default=0.4, help="iou thresshold for non-maximum suppression") + parser.add_argument("--batch_size", type=int, default=1, help="size of the batches") + parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation") + parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") + parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model") + opt = parser.parse_args() + print(opt) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + os.makedirs("output", exist_ok=True) + + # Set up model + model = Darknet(opt.model_def, img_size=opt.img_size).to(device) + + if opt.weights_path.endswith(".weights"): + # Load darknet weights + model.load_darknet_weights(opt.weights_path) + else: + # Load checkpoint weights + model.load_state_dict(torch.load(opt.weights_path)) + + model.eval() # Set in evaluation mode + + dataloader = DataLoader( + ImageFolder(opt.image_folder, img_size=opt.img_size), + batch_size=opt.batch_size, + shuffle=False, + num_workers=opt.n_cpu, + ) + + classes = load_classes(opt.class_path) # Extracts class labels from file + + Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor + + imgs = [] # Stores image paths + img_detections = [] # Stores detections for each image index + + print("\nPerforming object detection:") + prev_time = time.time() + for batch_i, (img_paths, input_imgs) in enumerate(dataloader): + # Configure input + input_imgs = Variable(input_imgs.type(Tensor)) + + # Get detections + with torch.no_grad(): + detections = model(input_imgs) + detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres) + + # Log progress + current_time = time.time() + inference_time = datetime.timedelta(seconds=current_time - prev_time) + prev_time = current_time + print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time)) + + # Save image and detections + imgs.extend(img_paths) + img_detections.extend(detections) + + # Bounding-box colors + cmap = plt.get_cmap("tab20b") + colors = [cmap(i) for i in np.linspace(0, 1, 20)] + + print("\nSaving images:") + # Iterate through images and save plot of detections + for img_i, (path, detections) in enumerate(zip(imgs, img_detections)): + + print("(%d) Image: '%s'" % (img_i, path)) + + # Create plot + img = np.array(Image.open(path)) + plt.figure() + fig, ax = plt.subplots(1) + ax.imshow(img) + + # Draw bounding boxes and labels of detections + if detections is not None: + # Rescale boxes to original image + detections = rescale_boxes(detections, opt.img_size, img.shape[:2]) + unique_labels = detections[:, -1].cpu().unique() + n_cls_preds = len(unique_labels) + bbox_colors = random.sample(colors, n_cls_preds) + for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: + + print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item())) + + box_w = x2 - x1 + box_h = y2 - y1 + + color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] + # Create a Rectangle patch + bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none") + # Add the bbox to the plot + ax.add_patch(bbox) + # Add label + plt.text( + x1, + y1, + s=classes[int(cls_pred)], + color="white", + verticalalignment="top", + bbox={"color": color, "pad": 0}, + ) + + # Save generated image with detections + plt.axis("off") + plt.gca().xaxis.set_major_locator(NullLocator()) + plt.gca().yaxis.set_major_locator(NullLocator()) + filename = path.split("/")[-1].split(".")[0] + plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0) + plt.close() diff --git a/logs/events.out.tfevents.1585286242.P-V-12.4672.5.v2 b/logs/events.out.tfevents.1585286242.P-V-12.4672.5.v2 new file mode 100644 index 0000000..b46ac44 Binary files /dev/null and b/logs/events.out.tfevents.1585286242.P-V-12.4672.5.v2 differ diff --git a/logs/events.out.tfevents.1585286419.P-V-12.9132.5.v2 b/logs/events.out.tfevents.1585286419.P-V-12.9132.5.v2 new file mode 100644 index 0000000..166d788 Binary files /dev/null and b/logs/events.out.tfevents.1585286419.P-V-12.9132.5.v2 differ diff --git a/logs/events.out.tfevents.1585286534.P-V-12.3528.5.v2 b/logs/events.out.tfevents.1585286534.P-V-12.3528.5.v2 new file mode 100644 index 0000000..0a68aca Binary files /dev/null and b/logs/events.out.tfevents.1585286534.P-V-12.3528.5.v2 differ diff --git a/logs/events.out.tfevents.1585286765.P-V-12.12896.5.v2 b/logs/events.out.tfevents.1585286765.P-V-12.12896.5.v2 new file mode 100644 index 0000000..012849e Binary files /dev/null and b/logs/events.out.tfevents.1585286765.P-V-12.12896.5.v2 differ diff --git a/logs/events.out.tfevents.1585287038.P-V-12.11624.5.v2 b/logs/events.out.tfevents.1585287038.P-V-12.11624.5.v2 new file mode 100644 index 0000000..c43ff03 Binary files /dev/null and b/logs/events.out.tfevents.1585287038.P-V-12.11624.5.v2 differ diff --git a/logs/events.out.tfevents.1585287196.P-V-12.12576.5.v2 b/logs/events.out.tfevents.1585287196.P-V-12.12576.5.v2 new file mode 100644 index 0000000..71ffc1c Binary files /dev/null and b/logs/events.out.tfevents.1585287196.P-V-12.12576.5.v2 differ diff --git a/logs/events.out.tfevents.1585287272.P-V-12.12628.5.v2 b/logs/events.out.tfevents.1585287272.P-V-12.12628.5.v2 new file mode 100644 index 0000000..16b59cb Binary files /dev/null and b/logs/events.out.tfevents.1585287272.P-V-12.12628.5.v2 differ diff --git a/logs/events.out.tfevents.1585287331.P-V-12.13252.5.v2 b/logs/events.out.tfevents.1585287331.P-V-12.13252.5.v2 new file mode 100644 index 0000000..f6422fd Binary files /dev/null and b/logs/events.out.tfevents.1585287331.P-V-12.13252.5.v2 differ diff --git a/logs/events.out.tfevents.1585287394.P-V-12.1776.5.v2 b/logs/events.out.tfevents.1585287394.P-V-12.1776.5.v2 new file mode 100644 index 0000000..d4efe7e Binary files /dev/null and b/logs/events.out.tfevents.1585287394.P-V-12.1776.5.v2 differ diff --git a/logs/events.out.tfevents.1585287408.P-V-12.11580.5.v2 b/logs/events.out.tfevents.1585287408.P-V-12.11580.5.v2 new file mode 100644 index 0000000..3e87a59 Binary files /dev/null and b/logs/events.out.tfevents.1585287408.P-V-12.11580.5.v2 differ diff --git a/logs/events.out.tfevents.1585287492.P-V-12.2360.5.v2 b/logs/events.out.tfevents.1585287492.P-V-12.2360.5.v2 new file mode 100644 index 0000000..32f5124 Binary files /dev/null and b/logs/events.out.tfevents.1585287492.P-V-12.2360.5.v2 differ diff --git a/logs/events.out.tfevents.1585287551.P-V-12.13140.5.v2 b/logs/events.out.tfevents.1585287551.P-V-12.13140.5.v2 new file mode 100644 index 0000000..4520024 Binary files /dev/null and b/logs/events.out.tfevents.1585287551.P-V-12.13140.5.v2 differ diff --git a/logs/events.out.tfevents.1585287636.P-V-12.1840.5.v2 b/logs/events.out.tfevents.1585287636.P-V-12.1840.5.v2 new file mode 100644 index 0000000..e990045 Binary files /dev/null and b/logs/events.out.tfevents.1585287636.P-V-12.1840.5.v2 differ diff --git a/logs/events.out.tfevents.1585287761.P-V-12.5664.5.v2 b/logs/events.out.tfevents.1585287761.P-V-12.5664.5.v2 new file mode 100644 index 0000000..58ecc2e Binary files /dev/null and b/logs/events.out.tfevents.1585287761.P-V-12.5664.5.v2 differ diff --git a/logs/events.out.tfevents.1585287779.P-V-12.1184.5.v2 b/logs/events.out.tfevents.1585287779.P-V-12.1184.5.v2 new file mode 100644 index 0000000..bd019ff Binary files /dev/null and b/logs/events.out.tfevents.1585287779.P-V-12.1184.5.v2 differ diff --git a/logs/events.out.tfevents.1585287939.P-V-12.7816.5.v2 b/logs/events.out.tfevents.1585287939.P-V-12.7816.5.v2 new file mode 100644 index 0000000..93d7d58 Binary files /dev/null and b/logs/events.out.tfevents.1585287939.P-V-12.7816.5.v2 differ diff --git a/logs/events.out.tfevents.1585288035.P-V-12.1012.5.v2 b/logs/events.out.tfevents.1585288035.P-V-12.1012.5.v2 new file mode 100644 index 0000000..7888197 Binary files /dev/null and b/logs/events.out.tfevents.1585288035.P-V-12.1012.5.v2 differ diff --git a/logs/events.out.tfevents.1585288063.P-V-12.12836.5.v2 b/logs/events.out.tfevents.1585288063.P-V-12.12836.5.v2 new file mode 100644 index 0000000..0d23900 Binary files /dev/null and b/logs/events.out.tfevents.1585288063.P-V-12.12836.5.v2 differ diff --git a/logs/events.out.tfevents.1585288244.P-V-12.10436.5.v2 b/logs/events.out.tfevents.1585288244.P-V-12.10436.5.v2 new file mode 100644 index 0000000..88487f2 Binary files /dev/null and b/logs/events.out.tfevents.1585288244.P-V-12.10436.5.v2 differ diff --git a/logs/events.out.tfevents.1585288562.P-V-12.8452.5.v2 b/logs/events.out.tfevents.1585288562.P-V-12.8452.5.v2 new file mode 100644 index 0000000..511ccab Binary files /dev/null and b/logs/events.out.tfevents.1585288562.P-V-12.8452.5.v2 differ diff --git a/logs/events.out.tfevents.1585288617.P-V-12.11624.5.v2 b/logs/events.out.tfevents.1585288617.P-V-12.11624.5.v2 new file mode 100644 index 0000000..fc10541 Binary files /dev/null and b/logs/events.out.tfevents.1585288617.P-V-12.11624.5.v2 differ diff --git a/logs/events.out.tfevents.1585288857.P-V-12.10380.5.v2 b/logs/events.out.tfevents.1585288857.P-V-12.10380.5.v2 new file mode 100644 index 0000000..2a1c7b1 Binary files /dev/null and b/logs/events.out.tfevents.1585288857.P-V-12.10380.5.v2 differ diff --git a/logs/events.out.tfevents.1585288957.P-V-12.12580.5.v2 b/logs/events.out.tfevents.1585288957.P-V-12.12580.5.v2 new file mode 100644 index 0000000..1f04972 Binary files /dev/null and b/logs/events.out.tfevents.1585288957.P-V-12.12580.5.v2 differ diff --git a/logs/events.out.tfevents.1585297145.P-V-12.2868.5.v2 b/logs/events.out.tfevents.1585297145.P-V-12.2868.5.v2 new file mode 100644 index 0000000..a43e045 Binary files /dev/null and b/logs/events.out.tfevents.1585297145.P-V-12.2868.5.v2 differ diff --git a/logs/events.out.tfevents.1585297391.P-V-12.11856.5.v2 b/logs/events.out.tfevents.1585297391.P-V-12.11856.5.v2 new file mode 100644 index 0000000..97b074c Binary files /dev/null and b/logs/events.out.tfevents.1585297391.P-V-12.11856.5.v2 differ diff --git a/logs/events.out.tfevents.1585297467.P-V-12.12800.5.v2 b/logs/events.out.tfevents.1585297467.P-V-12.12800.5.v2 new file mode 100644 index 0000000..95d429f Binary files /dev/null and b/logs/events.out.tfevents.1585297467.P-V-12.12800.5.v2 differ diff --git a/logs/events.out.tfevents.1585297601.P-V-12.14276.5.v2 b/logs/events.out.tfevents.1585297601.P-V-12.14276.5.v2 new file mode 100644 index 0000000..95d9751 Binary files /dev/null and b/logs/events.out.tfevents.1585297601.P-V-12.14276.5.v2 differ diff --git a/logs/events.out.tfevents.1585304696.P-V-12.14936.5.v2 b/logs/events.out.tfevents.1585304696.P-V-12.14936.5.v2 new file mode 100644 index 0000000..e1b6c20 Binary files /dev/null and b/logs/events.out.tfevents.1585304696.P-V-12.14936.5.v2 differ diff --git a/logs/events.out.tfevents.1585306196.P-V-12.13036.5.v2 b/logs/events.out.tfevents.1585306196.P-V-12.13036.5.v2 new file mode 100644 index 0000000..c1fa408 Binary files /dev/null and b/logs/events.out.tfevents.1585306196.P-V-12.13036.5.v2 differ diff --git a/logs/events.out.tfevents.1585306288.P-V-12.13988.5.v2 b/logs/events.out.tfevents.1585306288.P-V-12.13988.5.v2 new file mode 100644 index 0000000..e1344d2 Binary files /dev/null and b/logs/events.out.tfevents.1585306288.P-V-12.13988.5.v2 differ diff --git a/logs/events.out.tfevents.1585306362.P-V-12.15080.5.v2 b/logs/events.out.tfevents.1585306362.P-V-12.15080.5.v2 new file mode 100644 index 0000000..0a8f136 Binary files /dev/null and b/logs/events.out.tfevents.1585306362.P-V-12.15080.5.v2 differ diff --git a/logs/events.out.tfevents.1585307223.P-V-12.15892.5.v2 b/logs/events.out.tfevents.1585307223.P-V-12.15892.5.v2 new file mode 100644 index 0000000..2142487 Binary files /dev/null and b/logs/events.out.tfevents.1585307223.P-V-12.15892.5.v2 differ diff --git a/logs/events.out.tfevents.1585730649.P-V-12.13564.5.v2 b/logs/events.out.tfevents.1585730649.P-V-12.13564.5.v2 new file mode 100644 index 0000000..a59c87e Binary files /dev/null and b/logs/events.out.tfevents.1585730649.P-V-12.13564.5.v2 differ diff --git a/logs/events.out.tfevents.1585731158.P-V-12.20540.5.v2 b/logs/events.out.tfevents.1585731158.P-V-12.20540.5.v2 new file mode 100644 index 0000000..ff6cc40 Binary files /dev/null and b/logs/events.out.tfevents.1585731158.P-V-12.20540.5.v2 differ diff --git a/logs/events.out.tfevents.1585820895.P-V-12.14140.5.v2 b/logs/events.out.tfevents.1585820895.P-V-12.14140.5.v2 new file mode 100644 index 0000000..be621be Binary files /dev/null and b/logs/events.out.tfevents.1585820895.P-V-12.14140.5.v2 differ diff --git a/logs/events.out.tfevents.1585821091.P-V-12.1188.5.v2 b/logs/events.out.tfevents.1585821091.P-V-12.1188.5.v2 new file mode 100644 index 0000000..f0fe59a Binary files /dev/null and b/logs/events.out.tfevents.1585821091.P-V-12.1188.5.v2 differ diff --git a/logs/events.out.tfevents.1585821171.P-V-12.18044.5.v2 b/logs/events.out.tfevents.1585821171.P-V-12.18044.5.v2 new file mode 100644 index 0000000..515053e Binary files /dev/null and b/logs/events.out.tfevents.1585821171.P-V-12.18044.5.v2 differ diff --git a/logs/events.out.tfevents.1585821566.P-V-12.19632.5.v2 b/logs/events.out.tfevents.1585821566.P-V-12.19632.5.v2 new file mode 100644 index 0000000..d6ec179 Binary files /dev/null and b/logs/events.out.tfevents.1585821566.P-V-12.19632.5.v2 differ diff --git a/logs/events.out.tfevents.1585821698.P-V-12.12456.5.v2 b/logs/events.out.tfevents.1585821698.P-V-12.12456.5.v2 new file mode 100644 index 0000000..8d68b21 Binary files /dev/null and b/logs/events.out.tfevents.1585821698.P-V-12.12456.5.v2 differ diff --git a/logs/events.out.tfevents.1585822213.P-V-12.2604.5.v2 b/logs/events.out.tfevents.1585822213.P-V-12.2604.5.v2 new file mode 100644 index 0000000..136021c Binary files /dev/null and b/logs/events.out.tfevents.1585822213.P-V-12.2604.5.v2 differ diff --git a/logs/events.out.tfevents.1585822453.P-V-12.11736.5.v2 b/logs/events.out.tfevents.1585822453.P-V-12.11736.5.v2 new file mode 100644 index 0000000..4b19db8 Binary files /dev/null and b/logs/events.out.tfevents.1585822453.P-V-12.11736.5.v2 differ diff --git a/logs/events.out.tfevents.1585823008.P-V-12.6436.5.v2 b/logs/events.out.tfevents.1585823008.P-V-12.6436.5.v2 new file mode 100644 index 0000000..4ed1672 Binary files /dev/null and b/logs/events.out.tfevents.1585823008.P-V-12.6436.5.v2 differ diff --git a/logs/events.out.tfevents.1585823156.P-V-12.20104.5.v2 b/logs/events.out.tfevents.1585823156.P-V-12.20104.5.v2 new file mode 100644 index 0000000..d8bee3a Binary files /dev/null and b/logs/events.out.tfevents.1585823156.P-V-12.20104.5.v2 differ diff --git a/logs/events.out.tfevents.1585823199.P-V-12.16624.5.v2 b/logs/events.out.tfevents.1585823199.P-V-12.16624.5.v2 new file mode 100644 index 0000000..5d5ab45 Binary files /dev/null and b/logs/events.out.tfevents.1585823199.P-V-12.16624.5.v2 differ diff --git a/logs/events.out.tfevents.1585823722.P-V-12.1188.5.v2 b/logs/events.out.tfevents.1585823722.P-V-12.1188.5.v2 new file mode 100644 index 0000000..fcbe32e Binary files /dev/null and b/logs/events.out.tfevents.1585823722.P-V-12.1188.5.v2 differ diff --git a/logs/events.out.tfevents.1585823992.P-V-12.11752.5.v2 b/logs/events.out.tfevents.1585823992.P-V-12.11752.5.v2 new file mode 100644 index 0000000..560ddba Binary files /dev/null and b/logs/events.out.tfevents.1585823992.P-V-12.11752.5.v2 differ diff --git a/logs/events.out.tfevents.1585825203.P-V-12.11356.5.v2 b/logs/events.out.tfevents.1585825203.P-V-12.11356.5.v2 new file mode 100644 index 0000000..c0fca69 Binary files /dev/null and b/logs/events.out.tfevents.1585825203.P-V-12.11356.5.v2 differ diff --git a/logs/events.out.tfevents.1585831722.P-V-12.7600.5.v2 b/logs/events.out.tfevents.1585831722.P-V-12.7600.5.v2 new file mode 100644 index 0000000..46f3ca2 Binary files /dev/null and b/logs/events.out.tfevents.1585831722.P-V-12.7600.5.v2 differ diff --git a/logs/events.out.tfevents.1585831750.P-V-12.15720.5.v2 b/logs/events.out.tfevents.1585831750.P-V-12.15720.5.v2 new file mode 100644 index 0000000..afa03e1 Binary files /dev/null and b/logs/events.out.tfevents.1585831750.P-V-12.15720.5.v2 differ diff --git a/logs/events.out.tfevents.1585832218.P-V-12.14532.5.v2 b/logs/events.out.tfevents.1585832218.P-V-12.14532.5.v2 new file mode 100644 index 0000000..650e58a Binary files /dev/null and b/logs/events.out.tfevents.1585832218.P-V-12.14532.5.v2 differ diff --git a/logs/events.out.tfevents.1585832274.P-V-12.11564.5.v2 b/logs/events.out.tfevents.1585832274.P-V-12.11564.5.v2 new file mode 100644 index 0000000..4982c04 Binary files /dev/null and b/logs/events.out.tfevents.1585832274.P-V-12.11564.5.v2 differ diff --git a/logs/events.out.tfevents.1585832314.P-V-12.12160.5.v2 b/logs/events.out.tfevents.1585832314.P-V-12.12160.5.v2 new file mode 100644 index 0000000..be563a1 Binary files /dev/null and b/logs/events.out.tfevents.1585832314.P-V-12.12160.5.v2 differ diff --git a/logs/events.out.tfevents.1585832350.P-V-12.6972.5.v2 b/logs/events.out.tfevents.1585832350.P-V-12.6972.5.v2 new file mode 100644 index 0000000..7836d44 Binary files /dev/null and b/logs/events.out.tfevents.1585832350.P-V-12.6972.5.v2 differ diff --git a/logs/events.out.tfevents.1585832388.P-V-12.10572.5.v2 b/logs/events.out.tfevents.1585832388.P-V-12.10572.5.v2 new file mode 100644 index 0000000..3789935 Binary files /dev/null and b/logs/events.out.tfevents.1585832388.P-V-12.10572.5.v2 differ diff --git a/logs/events.out.tfevents.1585832416.P-V-12.20072.5.v2 b/logs/events.out.tfevents.1585832416.P-V-12.20072.5.v2 new file mode 100644 index 0000000..41bb081 Binary files /dev/null and b/logs/events.out.tfevents.1585832416.P-V-12.20072.5.v2 differ diff --git a/logs/events.out.tfevents.1585832445.P-V-12.3348.5.v2 b/logs/events.out.tfevents.1585832445.P-V-12.3348.5.v2 new file mode 100644 index 0000000..046129e Binary files /dev/null and b/logs/events.out.tfevents.1585832445.P-V-12.3348.5.v2 differ diff --git a/logs/events.out.tfevents.1585832608.P-V-12.17292.5.v2 b/logs/events.out.tfevents.1585832608.P-V-12.17292.5.v2 new file mode 100644 index 0000000..29e6def Binary files /dev/null and b/logs/events.out.tfevents.1585832608.P-V-12.17292.5.v2 differ diff --git a/logs/events.out.tfevents.1585832666.P-V-12.19260.5.v2 b/logs/events.out.tfevents.1585832666.P-V-12.19260.5.v2 new file mode 100644 index 0000000..8c301ff Binary files /dev/null and b/logs/events.out.tfevents.1585832666.P-V-12.19260.5.v2 differ diff --git a/logs/events.out.tfevents.1585832711.P-V-12.4128.5.v2 b/logs/events.out.tfevents.1585832711.P-V-12.4128.5.v2 new file mode 100644 index 0000000..fbd281c Binary files /dev/null and b/logs/events.out.tfevents.1585832711.P-V-12.4128.5.v2 differ diff --git a/logs/events.out.tfevents.1585832764.P-V-12.19252.5.v2 b/logs/events.out.tfevents.1585832764.P-V-12.19252.5.v2 new file mode 100644 index 0000000..94bc006 Binary files /dev/null and b/logs/events.out.tfevents.1585832764.P-V-12.19252.5.v2 differ diff --git a/logs/events.out.tfevents.1585832797.P-V-12.20048.5.v2 b/logs/events.out.tfevents.1585832797.P-V-12.20048.5.v2 new file mode 100644 index 0000000..b6fa912 Binary files /dev/null and b/logs/events.out.tfevents.1585832797.P-V-12.20048.5.v2 differ diff --git a/logs/events.out.tfevents.1585832872.P-V-12.19792.5.v2 b/logs/events.out.tfevents.1585832872.P-V-12.19792.5.v2 new file mode 100644 index 0000000..c740b6c Binary files /dev/null and b/logs/events.out.tfevents.1585832872.P-V-12.19792.5.v2 differ diff --git a/logs/events.out.tfevents.1585832909.P-V-12.17900.5.v2 b/logs/events.out.tfevents.1585832909.P-V-12.17900.5.v2 new file mode 100644 index 0000000..07e3f2e Binary files /dev/null and b/logs/events.out.tfevents.1585832909.P-V-12.17900.5.v2 differ diff --git a/logs/events.out.tfevents.1585832962.P-V-12.14388.5.v2 b/logs/events.out.tfevents.1585832962.P-V-12.14388.5.v2 new file mode 100644 index 0000000..62fefb6 Binary files /dev/null and b/logs/events.out.tfevents.1585832962.P-V-12.14388.5.v2 differ diff --git a/logs/events.out.tfevents.1585833042.P-V-12.17788.5.v2 b/logs/events.out.tfevents.1585833042.P-V-12.17788.5.v2 new file mode 100644 index 0000000..a032935 Binary files /dev/null and b/logs/events.out.tfevents.1585833042.P-V-12.17788.5.v2 differ diff --git a/logs/events.out.tfevents.1585888551.P-V-12.21420.5.v2 b/logs/events.out.tfevents.1585888551.P-V-12.21420.5.v2 new file mode 100644 index 0000000..81700fa Binary files /dev/null and b/logs/events.out.tfevents.1585888551.P-V-12.21420.5.v2 differ diff --git a/logs/events.out.tfevents.1585889167.P-V-12.10180.5.v2 b/logs/events.out.tfevents.1585889167.P-V-12.10180.5.v2 new file mode 100644 index 0000000..1fcd186 Binary files /dev/null and b/logs/events.out.tfevents.1585889167.P-V-12.10180.5.v2 differ diff --git a/logs/events.out.tfevents.1585889420.P-V-12.22636.5.v2 b/logs/events.out.tfevents.1585889420.P-V-12.22636.5.v2 new file mode 100644 index 0000000..7f949f7 Binary files /dev/null and b/logs/events.out.tfevents.1585889420.P-V-12.22636.5.v2 differ diff --git a/logs/events.out.tfevents.1585889471.P-V-12.19200.5.v2 b/logs/events.out.tfevents.1585889471.P-V-12.19200.5.v2 new file mode 100644 index 0000000..7a002cb Binary files /dev/null and b/logs/events.out.tfevents.1585889471.P-V-12.19200.5.v2 differ diff --git a/logs/events.out.tfevents.1585894384.P-V-12.21692.5.v2 b/logs/events.out.tfevents.1585894384.P-V-12.21692.5.v2 new file mode 100644 index 0000000..f39c19b Binary files /dev/null and b/logs/events.out.tfevents.1585894384.P-V-12.21692.5.v2 differ diff --git a/logs/events.out.tfevents.1585895045.P-V-12.21520.5.v2 b/logs/events.out.tfevents.1585895045.P-V-12.21520.5.v2 new file mode 100644 index 0000000..98f37ed Binary files /dev/null and b/logs/events.out.tfevents.1585895045.P-V-12.21520.5.v2 differ diff --git a/logs/events.out.tfevents.1585975838.P-V-12.24392.5.v2 b/logs/events.out.tfevents.1585975838.P-V-12.24392.5.v2 new file mode 100644 index 0000000..e235559 Binary files /dev/null and b/logs/events.out.tfevents.1585975838.P-V-12.24392.5.v2 differ diff --git a/logs/events.out.tfevents.1585978553.P-V-12.21292.5.v2 b/logs/events.out.tfevents.1585978553.P-V-12.21292.5.v2 new file mode 100644 index 0000000..7d67f40 Binary files /dev/null and b/logs/events.out.tfevents.1585978553.P-V-12.21292.5.v2 differ diff --git a/models.py b/models.py new file mode 100644 index 0000000..0f1b15e --- /dev/null +++ b/models.py @@ -0,0 +1,345 @@ +from __future__ import division + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +import numpy as np + +from utils.parse_config import * +from utils.utils import build_targets, to_cpu, non_max_suppression + +import matplotlib.pyplot as plt +import matplotlib.patches as patches + + +def create_modules(module_defs): + """ + Constructs module list of layer blocks from module configuration in module_defs + """ + hyperparams = module_defs.pop(0) + output_filters = [int(hyperparams["channels"])] + module_list = nn.ModuleList() + for module_i, module_def in enumerate(module_defs): + modules = nn.Sequential() + + if module_def["type"] == "convolutional": + bn = int(module_def["batch_normalize"]) + filters = int(module_def["filters"]) + kernel_size = int(module_def["size"]) + pad = (kernel_size - 1) // 2 + modules.add_module( + f"conv_{module_i}", + nn.Conv2d( + in_channels=output_filters[-1], + out_channels=filters, + kernel_size=kernel_size, + stride=int(module_def["stride"]), + padding=pad, + bias=not bn, + ), + ) + if bn: + modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) + if module_def["activation"] == "leaky": + modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) + + elif module_def["type"] == "maxpool": + kernel_size = int(module_def["size"]) + stride = int(module_def["stride"]) + if kernel_size == 2 and stride == 1: + modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) + maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) + modules.add_module(f"maxpool_{module_i}", maxpool) + + elif module_def["type"] == "upsample": + upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") + modules.add_module(f"upsample_{module_i}", upsample) + + elif module_def["type"] == "route": # 输入1:26*26*256 输入2:26*26*128 输出:26*26*(256+128) + layers = [int(x) for x in module_def["layers"].split(",")] + filters = sum([output_filters[1:][i] for i in layers]) + modules.add_module(f"route_{module_i}", EmptyLayer()) + + elif module_def["type"] == "shortcut": + filters = output_filters[1:][int(module_def["from"])] + modules.add_module(f"shortcut_{module_i}", EmptyLayer()) + + elif module_def["type"] == "yolo": + anchor_idxs = [int(x) for x in module_def["mask"].split(",")] + # Extract anchors + anchors = [int(x) for x in module_def["anchors"].split(",")] + anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] + anchors = [anchors[i] for i in anchor_idxs] + num_classes = int(module_def["classes"]) + img_size = int(hyperparams["height"]) + # Define detection layer + yolo_layer = YOLOLayer(anchors, num_classes, img_size) + modules.add_module(f"yolo_{module_i}", yolo_layer) + # Register module list and number of output filters + module_list.append(modules) + output_filters.append(filters) + + return hyperparams, module_list + + +class Upsample(nn.Module): + """ nn.Upsample is deprecated """ + + def __init__(self, scale_factor, mode="nearest"): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + self.mode = mode + + def forward(self, x): + x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) + return x + + +class EmptyLayer(nn.Module): + """Placeholder for 'route' and 'shortcut' layers""" + + def __init__(self): + super(EmptyLayer, self).__init__() + + +class YOLOLayer(nn.Module): + """Detection layer""" + + def __init__(self, anchors, num_classes, img_dim=416): + super(YOLOLayer, self).__init__() + self.anchors = anchors + self.num_anchors = len(anchors) + self.num_classes = num_classes + self.ignore_thres = 0.5 + self.mse_loss = nn.MSELoss() + self.bce_loss = nn.BCELoss() + self.obj_scale = 1 + self.noobj_scale = 100 + self.metrics = {} + self.img_dim = img_dim + self.grid_size = 0 # grid size + + def compute_grid_offsets(self, grid_size, cuda=True): + self.grid_size = grid_size + g = self.grid_size + FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor + self.stride = self.img_dim / self.grid_size + # Calculate offsets for each grid + self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor) + self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor) + self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]) + self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) + self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) + + def forward(self, x, targets=None, img_dim=None): + # Tensors for cuda support + print (x.shape) + FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor + LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor + ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor + + self.img_dim = img_dim + num_samples = x.size(0) + grid_size = x.size(2) + + prediction = ( + x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) + .permute(0, 1, 3, 4, 2) + .contiguous() + ) + print (prediction.shape) + # Get outputs + x = torch.sigmoid(prediction[..., 0]) # Center x + y = torch.sigmoid(prediction[..., 1]) # Center y + w = prediction[..., 2] # Width + h = prediction[..., 3] # Height + pred_conf = torch.sigmoid(prediction[..., 4]) # Conf + pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. + + # If grid size does not match current we compute new offsets + if grid_size != self.grid_size: + self.compute_grid_offsets(grid_size, cuda=x.is_cuda) #相对位置得到对应的绝对位置比如之前的位置是0.5,0.5变为 11.5,11.5这样的 + + # Add offset and scale with anchors #特征图中的实际位置 + pred_boxes = FloatTensor(prediction[..., :4].shape) + pred_boxes[..., 0] = x.data + self.grid_x + pred_boxes[..., 1] = y.data + self.grid_y + pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w + pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h + + output = torch.cat( + ( + pred_boxes.view(num_samples, -1, 4) * self.stride, #还原到原始图中 + pred_conf.view(num_samples, -1, 1), + pred_cls.view(num_samples, -1, self.num_classes), + ), + -1, + ) + + if targets is None: + return output, 0 + else: + iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( + pred_boxes=pred_boxes, + pred_cls=pred_cls, + target=targets, + anchors=self.scaled_anchors, + ignore_thres=self.ignore_thres, + ) + # iou_scores:真实值与最匹配的anchor的IOU得分值 class_mask:分类正确的索引 obj_mask:目标框所在位置的最好anchor置为1 noobj_mask obj_mask那里置0,还有计算的iou大于阈值的也置0,其他都为1 tx, ty, tw, th, 对应的对于该大小的特征图的xywh目标值也就是我们需要拟合的值 tconf 目标置信度 + # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) + loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) # 只计算有目标的 + loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) + loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) + loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) + loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) + loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) + loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj #有物体越接近1越好 没物体的越接近0越好 + loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) #分类损失 + total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls #总损失 + + # Metrics + cls_acc = 100 * class_mask[obj_mask].mean() + conf_obj = pred_conf[obj_mask].mean() + conf_noobj = pred_conf[noobj_mask].mean() + conf50 = (pred_conf > 0.5).float() + iou50 = (iou_scores > 0.5).float() + iou75 = (iou_scores > 0.75).float() + detected_mask = conf50 * class_mask * tconf + precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) + recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) + recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) + + self.metrics = { + "loss": to_cpu(total_loss).item(), + "x": to_cpu(loss_x).item(), + "y": to_cpu(loss_y).item(), + "w": to_cpu(loss_w).item(), + "h": to_cpu(loss_h).item(), + "conf": to_cpu(loss_conf).item(), + "cls": to_cpu(loss_cls).item(), + "cls_acc": to_cpu(cls_acc).item(), + "recall50": to_cpu(recall50).item(), + "recall75": to_cpu(recall75).item(), + "precision": to_cpu(precision).item(), + "conf_obj": to_cpu(conf_obj).item(), + "conf_noobj": to_cpu(conf_noobj).item(), + "grid_size": grid_size, + } + + return output, total_loss + + +class Darknet(nn.Module): + """YOLOv3 object detection model""" + + def __init__(self, config_path, img_size=416): + super(Darknet, self).__init__() + self.module_defs = parse_model_config(config_path) + self.hyperparams, self.module_list = create_modules(self.module_defs) + self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")] + self.img_size = img_size + self.seen = 0 + self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32) + + def forward(self, x, targets=None): + img_dim = x.shape[2] + loss = 0 + layer_outputs, yolo_outputs = [], [] + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): + if module_def["type"] in ["convolutional", "upsample", "maxpool"]: + x = module(x) + elif module_def["type"] == "route": + x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1) + elif module_def["type"] == "shortcut": + layer_i = int(module_def["from"]) + x = layer_outputs[-1] + layer_outputs[layer_i] + elif module_def["type"] == "yolo": + x, layer_loss = module[0](x, targets, img_dim) + loss += layer_loss + yolo_outputs.append(x) + layer_outputs.append(x) + yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) + return yolo_outputs if targets is None else (loss, yolo_outputs) + + def load_darknet_weights(self, weights_path): + """Parses and loads the weights stored in 'weights_path'""" + + # Open the weights file + with open(weights_path, "rb") as f: + header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values + self.header_info = header # Needed to write header when saving weights + self.seen = header[3] # number of images seen during training + weights = np.fromfile(f, dtype=np.float32) # The rest are weights + + # Establish cutoff for loading backbone weights + cutoff = None + if "darknet53.conv.74" in weights_path: + cutoff = 75 + + ptr = 0 + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): + if i == cutoff: + break + if module_def["type"] == "convolutional": + conv_layer = module[0] + if module_def["batch_normalize"]: + # Load BN bias, weights, running mean and running variance + bn_layer = module[1] + num_b = bn_layer.bias.numel() # Number of biases + # Bias + bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias) + bn_layer.bias.data.copy_(bn_b) + ptr += num_b + # Weight + bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight) + bn_layer.weight.data.copy_(bn_w) + ptr += num_b + # Running Mean + bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean) + bn_layer.running_mean.data.copy_(bn_rm) + ptr += num_b + # Running Var + bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var) + bn_layer.running_var.data.copy_(bn_rv) + ptr += num_b + else: + # Load conv. bias + num_b = conv_layer.bias.numel() + conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias) + conv_layer.bias.data.copy_(conv_b) + ptr += num_b + # Load conv. weights + num_w = conv_layer.weight.numel() + conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight) + conv_layer.weight.data.copy_(conv_w) + ptr += num_w + + def save_darknet_weights(self, path, cutoff=-1): + """ + @:param path - path of the new weights file + @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved) + """ + fp = open(path, "wb") + self.header_info[3] = self.seen + self.header_info.tofile(fp) + + # Iterate through layers + for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): + if module_def["type"] == "convolutional": + conv_layer = module[0] + # If batch norm, load bn first + if module_def["batch_normalize"]: + bn_layer = module[1] + bn_layer.bias.data.cpu().numpy().tofile(fp) + bn_layer.weight.data.cpu().numpy().tofile(fp) + bn_layer.running_mean.data.cpu().numpy().tofile(fp) + bn_layer.running_var.data.cpu().numpy().tofile(fp) + # Load conv bias + else: + conv_layer.bias.data.cpu().numpy().tofile(fp) + # Load conv weights + conv_layer.weight.data.cpu().numpy().tofile(fp) + + fp.close() diff --git a/test.py b/test.py new file mode 100644 index 0000000..98ee186 --- /dev/null +++ b/test.py @@ -0,0 +1,105 @@ +from __future__ import division + +from models import * +from utils.utils import * +from utils.datasets import * +from utils.parse_config import * + +import os +import sys +import time +import datetime +import argparse +import tqdm + +import torch +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision import transforms +from torch.autograd import Variable +import torch.optim as optim + + +def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size): + model.eval() + + # Get dataloader + dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False) + dataloader = torch.utils.data.DataLoader( + dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn + ) + + Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor + + labels = [] + sample_metrics = [] # List of tuples (TP, confs, pred) + for batch_i, (_, imgs, targets) in enumerate(tqdm.tqdm(dataloader, desc="Detecting objects")): + + # Extract labels + labels += targets[:, 1].tolist() + # Rescale target + targets[:, 2:] = xywh2xyxy(targets[:, 2:]) + targets[:, 2:] *= img_size + + imgs = Variable(imgs.type(Tensor), requires_grad=False) + + with torch.no_grad(): + outputs = model(imgs) + outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) + + sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) + + # Concatenate sample statistics + true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))] + precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) + + return precision, recall, AP, f1, ap_class + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch") + parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") + parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file") + parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") + parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") + parser.add_argument("--iou_thres", type=float, default=0.5, help="iou threshold required to qualify as detected") + parser.add_argument("--conf_thres", type=float, default=0.001, help="object confidence threshold") + parser.add_argument("--nms_thres", type=float, default=0.5, help="iou thresshold for non-maximum suppression") + parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation") + parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") + opt = parser.parse_args() + print(opt) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + data_config = parse_data_config(opt.data_config) + valid_path = data_config["valid"] + class_names = load_classes(data_config["names"]) + + # Initiate model + model = Darknet(opt.model_def).to(device) + if opt.weights_path.endswith(".weights"): + # Load darknet weights + model.load_darknet_weights(opt.weights_path) + else: + # Load checkpoint weights + model.load_state_dict(torch.load(opt.weights_path)) + + print("Compute mAP...") + + precision, recall, AP, f1, ap_class = evaluate( + model, + path=valid_path, + iou_thres=opt.iou_thres, + conf_thres=opt.conf_thres, + nms_thres=opt.nms_thres, + img_size=opt.img_size, + batch_size=8, + ) + + print("Average Precisions:") + for i, c in enumerate(ap_class): + print(f"+ Class '{c}' ({class_names[c]}) - AP: {AP[i]}") + + print(f"mAP: {AP.mean()}") diff --git a/train.py b/train.py new file mode 100644 index 0000000..08820a5 --- /dev/null +++ b/train.py @@ -0,0 +1,187 @@ +from __future__ import division + +from models import * +from utils.logger import * +from utils.utils import * +from utils.datasets import * +from utils.parse_config import * +from test import evaluate + +import warnings +warnings.filterwarnings("ignore") + +from terminaltables import AsciiTable + +import os +import sys +import time +import datetime +import argparse + +import torch +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision import transforms +from torch.autograd import Variable +import torch.optim as optim + +""" +--data_config config/coco.data +--pretrained_weights weights/darknet53.conv.74 +""" + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--epochs", type=int, default=100, help="number of epochs") + parser.add_argument("--batch_size", type=int, default=4, help="size of each image batch") + parser.add_argument("--gradient_accumulations", type=int, default=2, help="number of gradient accums before step") + parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") + parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file") + parser.add_argument("--pretrained_weights", type=str, help="if specified starts from checkpoint model") + parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation") + parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") + parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights") + parser.add_argument("--evaluation_interval", type=int, default=1, help="interval evaluations on validation set") + parser.add_argument("--compute_map", default=False, help="if True computes mAP every tenth batch") + parser.add_argument("--multiscale_training", default=True, help="allow for multi-scale training") + opt = parser.parse_args() + print(opt) + + logger = Logger("logs") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + os.makedirs("output", exist_ok=True) + os.makedirs("checkpoints", exist_ok=True) + + # Get data configuration + data_config = parse_data_config(opt.data_config) + train_path = data_config["train"] + valid_path = data_config["valid"] + class_names = load_classes(data_config["names"]) + + # Initiate model + model = Darknet(opt.model_def).to(device) + model.apply(weights_init_normal) + + # If specified we start from checkpoint + if opt.pretrained_weights: + if opt.pretrained_weights.endswith(".pth"): + model.load_state_dict(torch.load(opt.pretrained_weights)) + else: + model.load_darknet_weights(opt.pretrained_weights) + + # Get dataloader + dataset = ListDataset(train_path, augment=True, multiscale=opt.multiscale_training) + dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=opt.batch_size, + shuffle=True, + num_workers=opt.n_cpu, + pin_memory=True, + collate_fn=dataset.collate_fn, + ) + + optimizer = torch.optim.Adam(model.parameters()) + + metrics = [ + "grid_size", + "loss", + "x", + "y", + "w", + "h", + "conf", + "cls", + "cls_acc", + "recall50", + "recall75", + "precision", + "conf_obj", + "conf_noobj", + ] + + for epoch in range(opt.epochs): + model.train() + start_time = time.time() + for batch_i, (_, imgs, targets) in enumerate(dataloader): + batches_done = len(dataloader) * epoch + batch_i + + imgs = Variable(imgs.to(device)) + targets = Variable(targets.to(device), requires_grad=False) + print ('imgs',imgs.shape) + print ('targets',targets.shape) + loss, outputs = model(imgs, targets) + loss.backward() + + if batches_done % opt.gradient_accumulations: + # Accumulates gradient before each step + optimizer.step() + optimizer.zero_grad() + + # ---------------- + # Log progress + # ---------------- + + log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, opt.epochs, batch_i, len(dataloader)) + + metric_table = [["Metrics", *[f"YOLO Layer {i}" for i in range(len(model.yolo_layers))]]] + + # Log metrics at each YOLO layer + for i, metric in enumerate(metrics): + formats = {m: "%.6f" for m in metrics} + formats["grid_size"] = "%2d" + formats["cls_acc"] = "%.2f%%" + row_metrics = [formats[metric] % yolo.metrics.get(metric, 0) for yolo in model.yolo_layers] + metric_table += [[metric, *row_metrics]] + + # Tensorboard logging + tensorboard_log = [] + for j, yolo in enumerate(model.yolo_layers): + for name, metric in yolo.metrics.items(): + if name != "grid_size": + tensorboard_log += [(f"{name}_{j+1}", metric)] + tensorboard_log += [("loss", loss.item())] + logger.list_of_scalars_summary(tensorboard_log, batches_done) + + log_str += AsciiTable(metric_table).table + log_str += f"\nTotal loss {loss.item()}" + + # Determine approximate time left for epoch + epoch_batches_left = len(dataloader) - (batch_i + 1) + time_left = datetime.timedelta(seconds=epoch_batches_left * (time.time() - start_time) / (batch_i + 1)) + log_str += f"\n---- ETA {time_left}" + + print(log_str) + + model.seen += imgs.size(0) + + if epoch % opt.evaluation_interval == 0: + print("\n---- Evaluating Model ----") + # Evaluate the model on the validation set + precision, recall, AP, f1, ap_class = evaluate( + model, + path=valid_path, + iou_thres=0.5, + conf_thres=0.5, + nms_thres=0.5, + img_size=opt.img_size, + batch_size=8, + ) + evaluation_metrics = [ + ("val_precision", precision.mean()), + ("val_recall", recall.mean()), + ("val_mAP", AP.mean()), + ("val_f1", f1.mean()), + ] + logger.list_of_scalars_summary(evaluation_metrics, epoch) + + # Print class APs and mAP + ap_table = [["Index", "Class name", "AP"]] + for i, c in enumerate(ap_class): + ap_table += [[c, class_names[c], "%.5f" % AP[i]]] + print(AsciiTable(ap_table).table) + print(f"---- mAP {AP.mean()}") + + if epoch % opt.checkpoint_interval == 0: + torch.save(model.state_dict(), f"checkpoints/yolov3_ckpt_%d.pth" % epoch) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/augmentations.py b/utils/augmentations.py new file mode 100644 index 0000000..b1aed5d --- /dev/null +++ b/utils/augmentations.py @@ -0,0 +1,9 @@ +import torch +import torch.nn.functional as F +import numpy as np + + +def horisontal_flip(images, targets): + images = torch.flip(images, [-1]) + targets[:, 2] = 1 - targets[:, 2] + return images, targets diff --git a/utils/datasets.py b/utils/datasets.py new file mode 100644 index 0000000..efcd06a --- /dev/null +++ b/utils/datasets.py @@ -0,0 +1,154 @@ +import glob +import random +import os +import sys +import numpy as np +from PIL import Image +import torch +import torch.nn.functional as F + +from utils.augmentations import horisontal_flip +from torch.utils.data import Dataset +import torchvision.transforms as transforms + + +def pad_to_square(img, pad_value): + c, h, w = img.shape + dim_diff = np.abs(h - w) + # (upper / left) padding and (lower / right) padding + pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 + # Determine padding + pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) + # Add padding + img = F.pad(img, pad, "constant", value=pad_value) + + return img, pad + + +def resize(image, size): + image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) + return image + + +def random_resize(images, min_size=288, max_size=448): + new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] + images = F.interpolate(images, size=new_size, mode="nearest") + return images + + +class ImageFolder(Dataset): + def __init__(self, folder_path, img_size=416): + self.files = sorted(glob.glob("%s/*.*" % folder_path)) + self.img_size = img_size + + def __getitem__(self, index): + img_path = self.files[index % len(self.files)] + # Extract image as PyTorch tensor + img = transforms.ToTensor()(Image.open(img_path)) + # Pad to square resolution + img, _ = pad_to_square(img, 0) + # Resize + img = resize(img, self.img_size) + + return img_path, img + + def __len__(self): + return len(self.files) + + +class ListDataset(Dataset): + def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True): + with open(list_path, "r") as file: + self.img_files = file.readlines() + + self.label_files = [ + path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt") + for path in self.img_files + ] + self.img_size = img_size + self.max_objects = 100 + self.augment = augment + self.multiscale = multiscale + self.normalized_labels = normalized_labels + self.min_size = self.img_size - 3 * 32 + self.max_size = self.img_size + 3 * 32 + self.batch_count = 0 + + def __getitem__(self, index): + + # --------- + # Image + # --------- + + img_path = self.img_files[index % len(self.img_files)].rstrip() + img_path = 'E:\\eclipse-workspace\\PyTorch\\PyTorch-YOLOv3\\data\\coco' + img_path + #print (img_path) + # Extract image as PyTorch tensor + img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) + + # Handle images with less than three channels + if len(img.shape) != 3: + img = img.unsqueeze(0) + img = img.expand((3, img.shape[1:])) + + _, h, w = img.shape + h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) + # Pad to square resolution + img, pad = pad_to_square(img, 0) + _, padded_h, padded_w = img.shape + + # --------- + # Label + # --------- + + label_path = self.label_files[index % len(self.img_files)].rstrip() + label_path = 'E:\\eclipse-workspace\\PyTorch\\PyTorch-YOLOv3\\data\\coco\\labels' + label_path + #print (label_path) + + targets = None + if os.path.exists(label_path): + boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) + # Extract coordinates for unpadded + unscaled image + x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) + y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) + x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) + y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) + # Adjust for added padding + x1 += pad[0] + y1 += pad[2] + x2 += pad[1] + y2 += pad[3] + # Returns (x, y, w, h) + boxes[:, 1] = ((x1 + x2) / 2) / padded_w + boxes[:, 2] = ((y1 + y2) / 2) / padded_h + boxes[:, 3] *= w_factor / padded_w + boxes[:, 4] *= h_factor / padded_h + + targets = torch.zeros((len(boxes), 6)) + targets[:, 1:] = boxes + + # Apply augmentations + if self.augment: + if np.random.random() < 0.5: + img, targets = horisontal_flip(img, targets) + + return img_path, img, targets + + def collate_fn(self, batch): + paths, imgs, targets = list(zip(*batch)) + # Remove empty placeholder targets + targets = [boxes for boxes in targets if boxes is not None] + # Add sample index to targets + for i, boxes in enumerate(targets): + boxes[:, 0] = i + targets = torch.cat(targets, 0) + # Selects new image size every tenth batch + if self.multiscale and self.batch_count % 10 == 0: + self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) + # Resize images to input shape + imgs = torch.stack([resize(img, self.img_size) for img in imgs]) + self.batch_count += 1 + return paths, imgs, targets + + def __len__(self): + return len(self.img_files) diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000..1cf6994 --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,19 @@ +import tensorflow as tf + + +class Logger(object): + def __init__(self, log_dir): + """Create a summary writer logging to log_dir.""" + self.writer = tf.summary.create_file_writer(log_dir) + + def scalar_summary(self, tag, value, step): + with self.writer.as_default(): + tf.summary.scalar(tag, value, step=step) + self.writer.flush() + def list_of_scalars_summary(self, tag_value_pairs, step): + with self.writer.as_default(): + for tag, value in tag_value_pairs: + tf.summary.scalar(tag, value, step=step) + self.writer.flush() + # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs]) + # self.writer.add_summary(summary, step) diff --git a/utils/parse_config.py b/utils/parse_config.py new file mode 100644 index 0000000..9dc0358 --- /dev/null +++ b/utils/parse_config.py @@ -0,0 +1,36 @@ + + +def parse_model_config(path): + """Parses the yolo-v3 layer configuration file and returns module definitions""" + file = open(path, 'r') + lines = file.read().split('\n') + lines = [x for x in lines if x and not x.startswith('#')] + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces + module_defs = [] + for line in lines: + if line.startswith('['): # This marks the start of a new block + module_defs.append({}) + module_defs[-1]['type'] = line[1:-1].rstrip() + if module_defs[-1]['type'] == 'convolutional': + module_defs[-1]['batch_normalize'] = 0 + else: + key, value = line.split("=") + value = value.strip() + module_defs[-1][key.rstrip()] = value.strip() + + return module_defs + +def parse_data_config(path): + """Parses the data configuration file""" + options = dict() + options['gpus'] = '0,1,2,3' + options['num_workers'] = '10' + with open(path, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.strip() + if line == '' or line.startswith('#'): + continue + key, value = line.split('=') + options[key.strip()] = value.strip() + return options diff --git a/utils/utils.py b/utils/utils.py new file mode 100644 index 0000000..07b7f0c --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,322 @@ +from __future__ import division +import math +import time +import tqdm +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.patches as patches + + +def to_cpu(tensor): + return tensor.detach().cpu() + + +def load_classes(path): + """ + Loads class labels at 'path' + """ + fp = open(path, "r") + names = fp.read().split("\n")[:-1] + return names + + +def weights_init_normal(m): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + torch.nn.init.normal_(m.weight.data, 0.0, 0.02) + elif classname.find("BatchNorm2d") != -1: + torch.nn.init.normal_(m.weight.data, 1.0, 0.02) + torch.nn.init.constant_(m.bias.data, 0.0) + + +def rescale_boxes(boxes, current_dim, original_shape): + """ Rescales bounding boxes to the original shape """ + orig_h, orig_w = original_shape + # The amount of padding that was added + pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) + pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) + # Image height and width after padding is removed + unpad_h = current_dim - pad_y + unpad_w = current_dim - pad_x + # Rescale bounding boxes to dimension of original image + boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w + boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h + boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w + boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h + return boxes + + +def xywh2xyxy(x): + y = x.new(x.shape) + y[..., 0] = x[..., 0] - x[..., 2] / 2 + y[..., 1] = x[..., 1] - x[..., 3] / 2 + y[..., 2] = x[..., 0] + x[..., 2] / 2 + y[..., 3] = x[..., 1] + x[..., 3] / 2 + return y + + +def ap_per_class(tp, conf, pred_cls, target_cls): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (list). + conf: Objectness value from 0-1 (list). + pred_cls: Predicted object classes (list). + target_cls: True object classes (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + + # Create Precision-Recall curve and compute AP for each class + ap, p, r = [], [], [] + for c in tqdm.tqdm(unique_classes, desc="Computing AP"): + i = pred_cls == c + n_gt = (target_cls == c).sum() # Number of ground truth objects + n_p = i.sum() # Number of predicted objects + + if n_p == 0 and n_gt == 0: + continue + elif n_p == 0 or n_gt == 0: + ap.append(0) + r.append(0) + p.append(0) + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum() + tpc = (tp[i]).cumsum() + + # Recall + recall_curve = tpc / (n_gt + 1e-16) + r.append(recall_curve[-1]) + + # Precision + precision_curve = tpc / (tpc + fpc) + p.append(precision_curve[-1]) + + # AP from recall-precision curve + ap.append(compute_ap(recall_curve, precision_curve)) + + # Compute F1 score (harmonic mean of precision and recall) + p, r, ap = np.array(p), np.array(r), np.array(ap) + f1 = 2 * p * r / (p + r + 1e-16) + + return p, r, ap, f1, unique_classes.astype("int32") + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves. + Code originally from https://github.com/rbgirshick/py-faster-rcnn. + + # Arguments + recall: The recall curve (list). + precision: The precision curve (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([0.0], precision, [0.0])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def get_batch_statistics(outputs, targets, iou_threshold): + """ Compute true positives, predicted scores and predicted labels per sample """ + batch_metrics = [] + for sample_i in range(len(outputs)): + + if outputs[sample_i] is None: + continue + + output = outputs[sample_i] + pred_boxes = output[:, :4] + pred_scores = output[:, 4] + pred_labels = output[:, -1] + + true_positives = np.zeros(pred_boxes.shape[0]) + + annotations = targets[targets[:, 0] == sample_i][:, 1:] + target_labels = annotations[:, 0] if len(annotations) else [] + if len(annotations): + detected_boxes = [] + target_boxes = annotations[:, 1:] + + for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): + + # If targets are found break + if len(detected_boxes) == len(annotations): + break + + # Ignore if label is not one of the target labels + if pred_label not in target_labels: + continue + + iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) + if iou >= iou_threshold and box_index not in detected_boxes: + true_positives[pred_i] = 1 + detected_boxes += [box_index] + batch_metrics.append([true_positives, pred_scores, pred_labels]) + return batch_metrics + + +def bbox_wh_iou(wh1, wh2): + wh2 = wh2.t() + w1, h1 = wh1[0], wh1[1] + w2, h2 = wh2[0], wh2[1] + inter_area = torch.min(w1, w2) * torch.min(h1, h2) + union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area + return inter_area / union_area + + +def bbox_iou(box1, box2, x1y1x2y2=True): + """ + Returns the IoU of two bounding boxes + """ + if not x1y1x2y2: + # Transform from center and width to exact coordinates + b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 + b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 + b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 + b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 + else: + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] + + # get the corrdinates of the intersection rectangle + inter_rect_x1 = torch.max(b1_x1, b2_x1) + inter_rect_y1 = torch.max(b1_y1, b2_y1) + inter_rect_x2 = torch.min(b1_x2, b2_x2) + inter_rect_y2 = torch.min(b1_y2, b2_y2) + # Intersection area + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( + inter_rect_y2 - inter_rect_y1 + 1, min=0 + ) + # Union Area + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) + + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) + + return iou + + +def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): + """ + Removes detections with lower object confidence score than 'conf_thres' and performs + Non-Maximum Suppression to further filter detections. + Returns detections with shape: + (x1, y1, x2, y2, object_conf, class_score, class_pred) + """ + + # From (center x, center y, width, height) to (x1, y1, x2, y2) + prediction[..., :4] = xywh2xyxy(prediction[..., :4]) + output = [None for _ in range(len(prediction))] + for image_i, image_pred in enumerate(prediction): + # Filter out confidence scores below threshold + image_pred = image_pred[image_pred[:, 4] >= conf_thres] + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Object confidence times class confidence + score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] + # Sort by it + image_pred = image_pred[(-score).argsort()] + class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) + detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1) + # Perform non-maximum suppression + keep_boxes = [] + while detections.size(0): + large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres + label_match = detections[0, -1] == detections[:, -1] + # Indices of boxes with lower confidence scores, large IOUs and matching labels + invalid = large_overlap & label_match + weights = detections[invalid, 4:5] + # Merge overlapping bboxes by order of confidence + detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum() + keep_boxes += [detections[0]] + detections = detections[~invalid] + if keep_boxes: + output[image_i] = torch.stack(keep_boxes) + + return output + + +def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres): + + ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor + FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor + + nB = pred_boxes.size(0) # batchsieze 4 + nA = pred_boxes.size(1) # 每个格子对应了多少个anchor + nC = pred_cls.size(-1) # 类别的数量 + nG = pred_boxes.size(2) # gridsize + + # Output tensors + obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0) # obj,anchor包含物体, 即为1,默认为0 考虑前景 + noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1) # noobj, anchor不包含物体, 则为1,默认为1 考虑背景 + class_mask = FloatTensor(nB, nA, nG, nG).fill_(0) # 类别掩膜,类别预测正确即为1,默认全为0 + iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0) # 预测框与真实框的iou得分 + tx = FloatTensor(nB, nA, nG, nG).fill_(0) # 真实框相对于网格的位置 + ty = FloatTensor(nB, nA, nG, nG).fill_(0) + tw = FloatTensor(nB, nA, nG, nG).fill_(0) + th = FloatTensor(nB, nA, nG, nG).fill_(0) + tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0) + + # Convert to position relative to box + target_boxes = target[:, 2:6] * nG #target中的xywh都是0-1的,可以得到其在当前gridsize上的xywh + gxy = target_boxes[:, :2] + gwh = target_boxes[:, 2:] + # Get anchors with best iou + ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) #每一种规格的anchor跟每个标签上的框的IOU得分 + print (ious.shape) + best_ious, best_n = ious.max(0) # 得到其最高分以及哪种规格框和当前目标最相似 + # Separate target values + b, target_labels = target[:, :2].long().t() # 真实框所对应的batch,以及每个框所代表的实际类别 + gx, gy = gxy.t() + gw, gh = gwh.t() + gi, gj = gxy.long().t() #位置信息,向下取整了 + # Set masks + obj_mask[b, best_n, gj, gi] = 1 # 实际包含物体的设置成1 + noobj_mask[b, best_n, gj, gi] = 0 # 相反 + + # Set noobj mask to zero where iou exceeds ignore threshold + for i, anchor_ious in enumerate(ious.t()): # IOU超过了指定的阈值就相当于有物体了 + noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0 + + # Coordinates + tx[b, best_n, gj, gi] = gx - gx.floor() # 根据真实框所在位置,得到其相当于网络的位置 + ty[b, best_n, gj, gi] = gy - gy.floor() + # Width and height + tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) + th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) + # One-hot encoding of label + tcls[b, best_n, gj, gi, target_labels] = 1 #将真实框的标签转换为one-hot编码形式 + # Compute label correctness and iou at best anchor 计算预测的和真实一样的索引 + class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() + iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) #与真实框想匹配的预测框之间的iou值 + + tconf = obj_mask.float() # 真实框的置信度,也就是1 + return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf diff --git a/weights/darknet53.conv.74 b/weights/darknet53.conv.74 new file mode 100644 index 0000000..a0680f7 Binary files /dev/null and b/weights/darknet53.conv.74 differ diff --git a/weights/download_weights.sh b/weights/download_weights.sh new file mode 100644 index 0000000..4bccb58 --- /dev/null +++ b/weights/download_weights.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Download weights for vanilla YOLOv3 +wget -c https://pjreddie.com/media/files/yolov3.weights +# # Download weights for tiny YOLOv3 +wget -c https://pjreddie.com/media/files/yolov3-tiny.weights +# Download weights for backbone network +wget -c https://pjreddie.com/media/files/darknet53.conv.74 diff --git a/weights/yolov3-tiny.weights b/weights/yolov3-tiny.weights new file mode 100644 index 0000000..aad7e6c Binary files /dev/null and b/weights/yolov3-tiny.weights differ diff --git a/weights/yolov3.weights b/weights/yolov3.weights new file mode 100644 index 0000000..a5ed716 Binary files /dev/null and b/weights/yolov3.weights differ