commit 703829a1230f4c6ff2f1216afa2ef499b537da21
Author: 12345qiupeng <qpeng0504@163.com>
Date:   Fri Mar 3 11:31:50 2023 +0800

    first commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c3a2bd7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+data/coco/
+output/
\ No newline at end of file
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/PyTorch-YOLOv3.iml b/.idea/PyTorch-YOLOv3.iml
new file mode 100644
index 0000000..a276113
--- /dev/null
+++ b/.idea/PyTorch-YOLOv3.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="$USER_HOME$/Applications/miniforge3" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..ac37a5a
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/Applications/miniforge3" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..8071927
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PyTorch-YOLOv3.iml" filepath="$PROJECT_DIR$/.idea/PyTorch-YOLOv3.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/assets/dog.png b/assets/dog.png
new file mode 100644
index 0000000..8056c38
Binary files /dev/null and b/assets/dog.png differ
diff --git a/assets/giraffe.png b/assets/giraffe.png
new file mode 100644
index 0000000..d0787de
Binary files /dev/null and b/assets/giraffe.png differ
diff --git a/assets/messi.png b/assets/messi.png
new file mode 100644
index 0000000..1412915
Binary files /dev/null and b/assets/messi.png differ
diff --git a/assets/traffic.png b/assets/traffic.png
new file mode 100644
index 0000000..128b95f
Binary files /dev/null and b/assets/traffic.png differ
diff --git a/config/coco.data b/config/coco.data
new file mode 100644
index 0000000..18beac1
--- /dev/null
+++ b/config/coco.data
@@ -0,0 +1,6 @@
+classes= 80
+train=data/coco/trainvalno5k.txt
+valid=data/coco/5k.txt
+names=data/coco.names
+backup=backup/
+eval=coco
diff --git a/config/create_custom_model.sh b/config/create_custom_model.sh
new file mode 100644
index 0000000..eba2ebe
--- /dev/null
+++ b/config/create_custom_model.sh
@@ -0,0 +1,794 @@
+#!/bin/bash
+
+NUM_CLASSES=$1
+
+echo "
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=16
+subdivisions=1
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5))
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=$NUM_CLASSES
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5))
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=$NUM_CLASSES
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5))
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=$NUM_CLASSES
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+" >> yolov3-custom.cfg
diff --git a/config/custom.data b/config/custom.data
new file mode 100644
index 0000000..846fad7
--- /dev/null
+++ b/config/custom.data
@@ -0,0 +1,4 @@
+classes= 1
+train=data/custom/train.txt
+valid=data/custom/valid.txt
+names=data/custom/classes.names
diff --git a/config/yolov3-tiny.cfg b/config/yolov3-tiny.cfg
new file mode 100644
index 0000000..ade4969
--- /dev/null
+++ b/config/yolov3-tiny.cfg
@@ -0,0 +1,206 @@
+[net]
+# Testing
+batch=1
+subdivisions=1
+# Training
+# batch=64
+# subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 1
+[maxpool]
+size=2
+stride=2
+
+# 2
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 3
+[maxpool]
+size=2
+stride=2
+
+# 4
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 5
+[maxpool]
+size=2
+stride=2
+
+# 6
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 7
+[maxpool]
+size=2
+stride=2
+
+# 8
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 9
+[maxpool]
+size=2
+stride=2
+
+# 10
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 11
+[maxpool]
+size=2
+stride=1
+
+# 12
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+# 13
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+# 14
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 15
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+
+# 16
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+# 17
+[route]
+layers = -4
+
+# 18
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+# 19
+[upsample]
+stride=2
+
+# 20
+[route]
+layers = -1, 8
+
+# 21
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 22
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+# 23
+[yolo]
+mask = 1,2,3
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
diff --git a/config/yolov3.cfg b/config/yolov3.cfg
new file mode 100644
index 0000000..946e015
--- /dev/null
+++ b/config/yolov3.cfg
@@ -0,0 +1,788 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=16
+subdivisions=1
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
diff --git a/data/coco.names b/data/coco.names
new file mode 100644
index 0000000..ca76c80
--- /dev/null
+++ b/data/coco.names
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/data/custom/classes.names b/data/custom/classes.names
new file mode 100644
index 0000000..08afa18
--- /dev/null
+++ b/data/custom/classes.names
@@ -0,0 +1 @@
+train
diff --git a/data/custom/images/train.jpg b/data/custom/images/train.jpg
new file mode 100644
index 0000000..d832967
Binary files /dev/null and b/data/custom/images/train.jpg differ
diff --git a/data/custom/labels/train.txt b/data/custom/labels/train.txt
new file mode 100644
index 0000000..3bf4be4
--- /dev/null
+++ b/data/custom/labels/train.txt
@@ -0,0 +1 @@
+0 0.515 0.5 0.21694873 0.18286777
diff --git a/data/custom/train.txt b/data/custom/train.txt
new file mode 100644
index 0000000..7fa5443
--- /dev/null
+++ b/data/custom/train.txt
@@ -0,0 +1 @@
+data/custom/images/train.jpg
diff --git a/data/custom/valid.txt b/data/custom/valid.txt
new file mode 100644
index 0000000..7fa5443
--- /dev/null
+++ b/data/custom/valid.txt
@@ -0,0 +1 @@
+data/custom/images/train.jpg
diff --git a/data/get_coco_dataset.sh b/data/get_coco_dataset.sh
new file mode 100644
index 0000000..81b0017
--- /dev/null
+++ b/data/get_coco_dataset.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
+
+# Clone COCO API
+git clone https://github.com/pdollar/coco
+cd coco
+
+mkdir images
+cd images
+
+# Download Images
+wget -c https://pjreddie.com/media/files/train2014.zip
+wget -c https://pjreddie.com/media/files/val2014.zip
+
+# Unzip
+unzip -q train2014.zip
+unzip -q val2014.zip
+
+cd ..
+
+# Download COCO Metadata
+wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
+wget -c https://pjreddie.com/media/files/coco/5k.part
+wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
+wget -c https://pjreddie.com/media/files/coco/labels.tgz
+tar xzf labels.tgz
+unzip -q instances_train-val2014.zip
+
+# Set Up Image Lists
+paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
+paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
diff --git a/data/samples/dog.jpg b/data/samples/dog.jpg
new file mode 100644
index 0000000..77b0381
Binary files /dev/null and b/data/samples/dog.jpg differ
diff --git a/data/samples/eagle.jpg b/data/samples/eagle.jpg
new file mode 100644
index 0000000..8b75095
Binary files /dev/null and b/data/samples/eagle.jpg differ
diff --git a/data/samples/field.jpg b/data/samples/field.jpg
new file mode 100644
index 0000000..61d377f
Binary files /dev/null and b/data/samples/field.jpg differ
diff --git a/data/samples/giraffe.jpg b/data/samples/giraffe.jpg
new file mode 100644
index 0000000..a93e8b8
Binary files /dev/null and b/data/samples/giraffe.jpg differ
diff --git a/data/samples/herd_of_horses.jpg b/data/samples/herd_of_horses.jpg
new file mode 100644
index 0000000..3a761f4
Binary files /dev/null and b/data/samples/herd_of_horses.jpg differ
diff --git a/data/samples/messi.jpg b/data/samples/messi.jpg
new file mode 100644
index 0000000..997451d
Binary files /dev/null and b/data/samples/messi.jpg differ
diff --git a/data/samples/person.jpg b/data/samples/person.jpg
new file mode 100644
index 0000000..a137366
Binary files /dev/null and b/data/samples/person.jpg differ
diff --git a/data/samples/room.jpg b/data/samples/room.jpg
new file mode 100644
index 0000000..b42aaa7
Binary files /dev/null and b/data/samples/room.jpg differ
diff --git a/data/samples/street.jpg b/data/samples/street.jpg
new file mode 100644
index 0000000..832688c
Binary files /dev/null and b/data/samples/street.jpg differ
diff --git a/detect.py b/detect.py
new file mode 100644
index 0000000..f23fbc8
--- /dev/null
+++ b/detect.py
@@ -0,0 +1,141 @@
+from __future__ import division
+
+from models import *
+from utils.utils import *
+from utils.datasets import *
+
+import os
+import sys
+import time
+import datetime
+import argparse
+
+from PIL import Image
+
+import torch
+from torch.utils.data import DataLoader
+from torchvision import datasets
+from torch.autograd import Variable
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from matplotlib.ticker import NullLocator
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--image_folder", type=str, default="data/samples", help="path to dataset")
+    parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
+    parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
+    parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
+    parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
+    parser.add_argument("--nms_thres", type=float, default=0.4, help="iou thresshold for non-maximum suppression")
+    parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
+    parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation")
+    parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
+    parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model")
+    opt = parser.parse_args()
+    print(opt)
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    os.makedirs("output", exist_ok=True)
+
+    # Set up model
+    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
+
+    if opt.weights_path.endswith(".weights"):
+        # Load darknet weights
+        model.load_darknet_weights(opt.weights_path)
+    else:
+        # Load checkpoint weights
+        model.load_state_dict(torch.load(opt.weights_path))
+
+    model.eval()  # Set in evaluation mode
+
+    dataloader = DataLoader(
+        ImageFolder(opt.image_folder, img_size=opt.img_size),
+        batch_size=opt.batch_size,
+        shuffle=False,
+        num_workers=opt.n_cpu,
+    )
+
+    classes = load_classes(opt.class_path)  # Extracts class labels from file
+
+    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
+
+    imgs = []  # Stores image paths
+    img_detections = []  # Stores detections for each image index
+
+    print("\nPerforming object detection:")
+    prev_time = time.time()
+    for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
+        # Configure input
+        input_imgs = Variable(input_imgs.type(Tensor))
+
+        # Get detections
+        with torch.no_grad():
+            detections = model(input_imgs)
+            detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
+
+        # Log progress
+        current_time = time.time()
+        inference_time = datetime.timedelta(seconds=current_time - prev_time)
+        prev_time = current_time
+        print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time))
+
+        # Save image and detections
+        imgs.extend(img_paths)
+        img_detections.extend(detections)
+
+    # Bounding-box colors
+    cmap = plt.get_cmap("tab20b")
+    colors = [cmap(i) for i in np.linspace(0, 1, 20)]
+
+    print("\nSaving images:")
+    # Iterate through images and save plot of detections
+    for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
+
+        print("(%d) Image: '%s'" % (img_i, path))
+
+        # Create plot
+        img = np.array(Image.open(path))
+        plt.figure()
+        fig, ax = plt.subplots(1)
+        ax.imshow(img)
+
+        # Draw bounding boxes and labels of detections
+        if detections is not None:
+            # Rescale boxes to original image
+            detections = rescale_boxes(detections, opt.img_size, img.shape[:2])
+            unique_labels = detections[:, -1].cpu().unique()
+            n_cls_preds = len(unique_labels)
+            bbox_colors = random.sample(colors, n_cls_preds)
+            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
+
+                print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item()))
+
+                box_w = x2 - x1
+                box_h = y2 - y1
+
+                color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
+                # Create a Rectangle patch
+                bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
+                # Add the bbox to the plot
+                ax.add_patch(bbox)
+                # Add label
+                plt.text(
+                    x1,
+                    y1,
+                    s=classes[int(cls_pred)],
+                    color="white",
+                    verticalalignment="top",
+                    bbox={"color": color, "pad": 0},
+                )
+
+        # Save generated image with detections
+        plt.axis("off")
+        plt.gca().xaxis.set_major_locator(NullLocator())
+        plt.gca().yaxis.set_major_locator(NullLocator())
+        filename = path.split("/")[-1].split(".")[0]
+        plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0)
+        plt.close()
diff --git a/logs/events.out.tfevents.1585286242.P-V-12.4672.5.v2 b/logs/events.out.tfevents.1585286242.P-V-12.4672.5.v2
new file mode 100644
index 0000000..b46ac44
Binary files /dev/null and b/logs/events.out.tfevents.1585286242.P-V-12.4672.5.v2 differ
diff --git a/logs/events.out.tfevents.1585286419.P-V-12.9132.5.v2 b/logs/events.out.tfevents.1585286419.P-V-12.9132.5.v2
new file mode 100644
index 0000000..166d788
Binary files /dev/null and b/logs/events.out.tfevents.1585286419.P-V-12.9132.5.v2 differ
diff --git a/logs/events.out.tfevents.1585286534.P-V-12.3528.5.v2 b/logs/events.out.tfevents.1585286534.P-V-12.3528.5.v2
new file mode 100644
index 0000000..0a68aca
Binary files /dev/null and b/logs/events.out.tfevents.1585286534.P-V-12.3528.5.v2 differ
diff --git a/logs/events.out.tfevents.1585286765.P-V-12.12896.5.v2 b/logs/events.out.tfevents.1585286765.P-V-12.12896.5.v2
new file mode 100644
index 0000000..012849e
Binary files /dev/null and b/logs/events.out.tfevents.1585286765.P-V-12.12896.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287038.P-V-12.11624.5.v2 b/logs/events.out.tfevents.1585287038.P-V-12.11624.5.v2
new file mode 100644
index 0000000..c43ff03
Binary files /dev/null and b/logs/events.out.tfevents.1585287038.P-V-12.11624.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287196.P-V-12.12576.5.v2 b/logs/events.out.tfevents.1585287196.P-V-12.12576.5.v2
new file mode 100644
index 0000000..71ffc1c
Binary files /dev/null and b/logs/events.out.tfevents.1585287196.P-V-12.12576.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287272.P-V-12.12628.5.v2 b/logs/events.out.tfevents.1585287272.P-V-12.12628.5.v2
new file mode 100644
index 0000000..16b59cb
Binary files /dev/null and b/logs/events.out.tfevents.1585287272.P-V-12.12628.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287331.P-V-12.13252.5.v2 b/logs/events.out.tfevents.1585287331.P-V-12.13252.5.v2
new file mode 100644
index 0000000..f6422fd
Binary files /dev/null and b/logs/events.out.tfevents.1585287331.P-V-12.13252.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287394.P-V-12.1776.5.v2 b/logs/events.out.tfevents.1585287394.P-V-12.1776.5.v2
new file mode 100644
index 0000000..d4efe7e
Binary files /dev/null and b/logs/events.out.tfevents.1585287394.P-V-12.1776.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287408.P-V-12.11580.5.v2 b/logs/events.out.tfevents.1585287408.P-V-12.11580.5.v2
new file mode 100644
index 0000000..3e87a59
Binary files /dev/null and b/logs/events.out.tfevents.1585287408.P-V-12.11580.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287492.P-V-12.2360.5.v2 b/logs/events.out.tfevents.1585287492.P-V-12.2360.5.v2
new file mode 100644
index 0000000..32f5124
Binary files /dev/null and b/logs/events.out.tfevents.1585287492.P-V-12.2360.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287551.P-V-12.13140.5.v2 b/logs/events.out.tfevents.1585287551.P-V-12.13140.5.v2
new file mode 100644
index 0000000..4520024
Binary files /dev/null and b/logs/events.out.tfevents.1585287551.P-V-12.13140.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287636.P-V-12.1840.5.v2 b/logs/events.out.tfevents.1585287636.P-V-12.1840.5.v2
new file mode 100644
index 0000000..e990045
Binary files /dev/null and b/logs/events.out.tfevents.1585287636.P-V-12.1840.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287761.P-V-12.5664.5.v2 b/logs/events.out.tfevents.1585287761.P-V-12.5664.5.v2
new file mode 100644
index 0000000..58ecc2e
Binary files /dev/null and b/logs/events.out.tfevents.1585287761.P-V-12.5664.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287779.P-V-12.1184.5.v2 b/logs/events.out.tfevents.1585287779.P-V-12.1184.5.v2
new file mode 100644
index 0000000..bd019ff
Binary files /dev/null and b/logs/events.out.tfevents.1585287779.P-V-12.1184.5.v2 differ
diff --git a/logs/events.out.tfevents.1585287939.P-V-12.7816.5.v2 b/logs/events.out.tfevents.1585287939.P-V-12.7816.5.v2
new file mode 100644
index 0000000..93d7d58
Binary files /dev/null and b/logs/events.out.tfevents.1585287939.P-V-12.7816.5.v2 differ
diff --git a/logs/events.out.tfevents.1585288035.P-V-12.1012.5.v2 b/logs/events.out.tfevents.1585288035.P-V-12.1012.5.v2
new file mode 100644
index 0000000..7888197
Binary files /dev/null and b/logs/events.out.tfevents.1585288035.P-V-12.1012.5.v2 differ
diff --git a/logs/events.out.tfevents.1585288063.P-V-12.12836.5.v2 b/logs/events.out.tfevents.1585288063.P-V-12.12836.5.v2
new file mode 100644
index 0000000..0d23900
Binary files /dev/null and b/logs/events.out.tfevents.1585288063.P-V-12.12836.5.v2 differ
diff --git a/logs/events.out.tfevents.1585288244.P-V-12.10436.5.v2 b/logs/events.out.tfevents.1585288244.P-V-12.10436.5.v2
new file mode 100644
index 0000000..88487f2
Binary files /dev/null and b/logs/events.out.tfevents.1585288244.P-V-12.10436.5.v2 differ
diff --git a/logs/events.out.tfevents.1585288562.P-V-12.8452.5.v2 b/logs/events.out.tfevents.1585288562.P-V-12.8452.5.v2
new file mode 100644
index 0000000..511ccab
Binary files /dev/null and b/logs/events.out.tfevents.1585288562.P-V-12.8452.5.v2 differ
diff --git a/logs/events.out.tfevents.1585288617.P-V-12.11624.5.v2 b/logs/events.out.tfevents.1585288617.P-V-12.11624.5.v2
new file mode 100644
index 0000000..fc10541
Binary files /dev/null and b/logs/events.out.tfevents.1585288617.P-V-12.11624.5.v2 differ
diff --git a/logs/events.out.tfevents.1585288857.P-V-12.10380.5.v2 b/logs/events.out.tfevents.1585288857.P-V-12.10380.5.v2
new file mode 100644
index 0000000..2a1c7b1
Binary files /dev/null and b/logs/events.out.tfevents.1585288857.P-V-12.10380.5.v2 differ
diff --git a/logs/events.out.tfevents.1585288957.P-V-12.12580.5.v2 b/logs/events.out.tfevents.1585288957.P-V-12.12580.5.v2
new file mode 100644
index 0000000..1f04972
Binary files /dev/null and b/logs/events.out.tfevents.1585288957.P-V-12.12580.5.v2 differ
diff --git a/logs/events.out.tfevents.1585297145.P-V-12.2868.5.v2 b/logs/events.out.tfevents.1585297145.P-V-12.2868.5.v2
new file mode 100644
index 0000000..a43e045
Binary files /dev/null and b/logs/events.out.tfevents.1585297145.P-V-12.2868.5.v2 differ
diff --git a/logs/events.out.tfevents.1585297391.P-V-12.11856.5.v2 b/logs/events.out.tfevents.1585297391.P-V-12.11856.5.v2
new file mode 100644
index 0000000..97b074c
Binary files /dev/null and b/logs/events.out.tfevents.1585297391.P-V-12.11856.5.v2 differ
diff --git a/logs/events.out.tfevents.1585297467.P-V-12.12800.5.v2 b/logs/events.out.tfevents.1585297467.P-V-12.12800.5.v2
new file mode 100644
index 0000000..95d429f
Binary files /dev/null and b/logs/events.out.tfevents.1585297467.P-V-12.12800.5.v2 differ
diff --git a/logs/events.out.tfevents.1585297601.P-V-12.14276.5.v2 b/logs/events.out.tfevents.1585297601.P-V-12.14276.5.v2
new file mode 100644
index 0000000..95d9751
Binary files /dev/null and b/logs/events.out.tfevents.1585297601.P-V-12.14276.5.v2 differ
diff --git a/logs/events.out.tfevents.1585304696.P-V-12.14936.5.v2 b/logs/events.out.tfevents.1585304696.P-V-12.14936.5.v2
new file mode 100644
index 0000000..e1b6c20
Binary files /dev/null and b/logs/events.out.tfevents.1585304696.P-V-12.14936.5.v2 differ
diff --git a/logs/events.out.tfevents.1585306196.P-V-12.13036.5.v2 b/logs/events.out.tfevents.1585306196.P-V-12.13036.5.v2
new file mode 100644
index 0000000..c1fa408
Binary files /dev/null and b/logs/events.out.tfevents.1585306196.P-V-12.13036.5.v2 differ
diff --git a/logs/events.out.tfevents.1585306288.P-V-12.13988.5.v2 b/logs/events.out.tfevents.1585306288.P-V-12.13988.5.v2
new file mode 100644
index 0000000..e1344d2
Binary files /dev/null and b/logs/events.out.tfevents.1585306288.P-V-12.13988.5.v2 differ
diff --git a/logs/events.out.tfevents.1585306362.P-V-12.15080.5.v2 b/logs/events.out.tfevents.1585306362.P-V-12.15080.5.v2
new file mode 100644
index 0000000..0a8f136
Binary files /dev/null and b/logs/events.out.tfevents.1585306362.P-V-12.15080.5.v2 differ
diff --git a/logs/events.out.tfevents.1585307223.P-V-12.15892.5.v2 b/logs/events.out.tfevents.1585307223.P-V-12.15892.5.v2
new file mode 100644
index 0000000..2142487
Binary files /dev/null and b/logs/events.out.tfevents.1585307223.P-V-12.15892.5.v2 differ
diff --git a/logs/events.out.tfevents.1585730649.P-V-12.13564.5.v2 b/logs/events.out.tfevents.1585730649.P-V-12.13564.5.v2
new file mode 100644
index 0000000..a59c87e
Binary files /dev/null and b/logs/events.out.tfevents.1585730649.P-V-12.13564.5.v2 differ
diff --git a/logs/events.out.tfevents.1585731158.P-V-12.20540.5.v2 b/logs/events.out.tfevents.1585731158.P-V-12.20540.5.v2
new file mode 100644
index 0000000..ff6cc40
Binary files /dev/null and b/logs/events.out.tfevents.1585731158.P-V-12.20540.5.v2 differ
diff --git a/logs/events.out.tfevents.1585820895.P-V-12.14140.5.v2 b/logs/events.out.tfevents.1585820895.P-V-12.14140.5.v2
new file mode 100644
index 0000000..be621be
Binary files /dev/null and b/logs/events.out.tfevents.1585820895.P-V-12.14140.5.v2 differ
diff --git a/logs/events.out.tfevents.1585821091.P-V-12.1188.5.v2 b/logs/events.out.tfevents.1585821091.P-V-12.1188.5.v2
new file mode 100644
index 0000000..f0fe59a
Binary files /dev/null and b/logs/events.out.tfevents.1585821091.P-V-12.1188.5.v2 differ
diff --git a/logs/events.out.tfevents.1585821171.P-V-12.18044.5.v2 b/logs/events.out.tfevents.1585821171.P-V-12.18044.5.v2
new file mode 100644
index 0000000..515053e
Binary files /dev/null and b/logs/events.out.tfevents.1585821171.P-V-12.18044.5.v2 differ
diff --git a/logs/events.out.tfevents.1585821566.P-V-12.19632.5.v2 b/logs/events.out.tfevents.1585821566.P-V-12.19632.5.v2
new file mode 100644
index 0000000..d6ec179
Binary files /dev/null and b/logs/events.out.tfevents.1585821566.P-V-12.19632.5.v2 differ
diff --git a/logs/events.out.tfevents.1585821698.P-V-12.12456.5.v2 b/logs/events.out.tfevents.1585821698.P-V-12.12456.5.v2
new file mode 100644
index 0000000..8d68b21
Binary files /dev/null and b/logs/events.out.tfevents.1585821698.P-V-12.12456.5.v2 differ
diff --git a/logs/events.out.tfevents.1585822213.P-V-12.2604.5.v2 b/logs/events.out.tfevents.1585822213.P-V-12.2604.5.v2
new file mode 100644
index 0000000..136021c
Binary files /dev/null and b/logs/events.out.tfevents.1585822213.P-V-12.2604.5.v2 differ
diff --git a/logs/events.out.tfevents.1585822453.P-V-12.11736.5.v2 b/logs/events.out.tfevents.1585822453.P-V-12.11736.5.v2
new file mode 100644
index 0000000..4b19db8
Binary files /dev/null and b/logs/events.out.tfevents.1585822453.P-V-12.11736.5.v2 differ
diff --git a/logs/events.out.tfevents.1585823008.P-V-12.6436.5.v2 b/logs/events.out.tfevents.1585823008.P-V-12.6436.5.v2
new file mode 100644
index 0000000..4ed1672
Binary files /dev/null and b/logs/events.out.tfevents.1585823008.P-V-12.6436.5.v2 differ
diff --git a/logs/events.out.tfevents.1585823156.P-V-12.20104.5.v2 b/logs/events.out.tfevents.1585823156.P-V-12.20104.5.v2
new file mode 100644
index 0000000..d8bee3a
Binary files /dev/null and b/logs/events.out.tfevents.1585823156.P-V-12.20104.5.v2 differ
diff --git a/logs/events.out.tfevents.1585823199.P-V-12.16624.5.v2 b/logs/events.out.tfevents.1585823199.P-V-12.16624.5.v2
new file mode 100644
index 0000000..5d5ab45
Binary files /dev/null and b/logs/events.out.tfevents.1585823199.P-V-12.16624.5.v2 differ
diff --git a/logs/events.out.tfevents.1585823722.P-V-12.1188.5.v2 b/logs/events.out.tfevents.1585823722.P-V-12.1188.5.v2
new file mode 100644
index 0000000..fcbe32e
Binary files /dev/null and b/logs/events.out.tfevents.1585823722.P-V-12.1188.5.v2 differ
diff --git a/logs/events.out.tfevents.1585823992.P-V-12.11752.5.v2 b/logs/events.out.tfevents.1585823992.P-V-12.11752.5.v2
new file mode 100644
index 0000000..560ddba
Binary files /dev/null and b/logs/events.out.tfevents.1585823992.P-V-12.11752.5.v2 differ
diff --git a/logs/events.out.tfevents.1585825203.P-V-12.11356.5.v2 b/logs/events.out.tfevents.1585825203.P-V-12.11356.5.v2
new file mode 100644
index 0000000..c0fca69
Binary files /dev/null and b/logs/events.out.tfevents.1585825203.P-V-12.11356.5.v2 differ
diff --git a/logs/events.out.tfevents.1585831722.P-V-12.7600.5.v2 b/logs/events.out.tfevents.1585831722.P-V-12.7600.5.v2
new file mode 100644
index 0000000..46f3ca2
Binary files /dev/null and b/logs/events.out.tfevents.1585831722.P-V-12.7600.5.v2 differ
diff --git a/logs/events.out.tfevents.1585831750.P-V-12.15720.5.v2 b/logs/events.out.tfevents.1585831750.P-V-12.15720.5.v2
new file mode 100644
index 0000000..afa03e1
Binary files /dev/null and b/logs/events.out.tfevents.1585831750.P-V-12.15720.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832218.P-V-12.14532.5.v2 b/logs/events.out.tfevents.1585832218.P-V-12.14532.5.v2
new file mode 100644
index 0000000..650e58a
Binary files /dev/null and b/logs/events.out.tfevents.1585832218.P-V-12.14532.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832274.P-V-12.11564.5.v2 b/logs/events.out.tfevents.1585832274.P-V-12.11564.5.v2
new file mode 100644
index 0000000..4982c04
Binary files /dev/null and b/logs/events.out.tfevents.1585832274.P-V-12.11564.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832314.P-V-12.12160.5.v2 b/logs/events.out.tfevents.1585832314.P-V-12.12160.5.v2
new file mode 100644
index 0000000..be563a1
Binary files /dev/null and b/logs/events.out.tfevents.1585832314.P-V-12.12160.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832350.P-V-12.6972.5.v2 b/logs/events.out.tfevents.1585832350.P-V-12.6972.5.v2
new file mode 100644
index 0000000..7836d44
Binary files /dev/null and b/logs/events.out.tfevents.1585832350.P-V-12.6972.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832388.P-V-12.10572.5.v2 b/logs/events.out.tfevents.1585832388.P-V-12.10572.5.v2
new file mode 100644
index 0000000..3789935
Binary files /dev/null and b/logs/events.out.tfevents.1585832388.P-V-12.10572.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832416.P-V-12.20072.5.v2 b/logs/events.out.tfevents.1585832416.P-V-12.20072.5.v2
new file mode 100644
index 0000000..41bb081
Binary files /dev/null and b/logs/events.out.tfevents.1585832416.P-V-12.20072.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832445.P-V-12.3348.5.v2 b/logs/events.out.tfevents.1585832445.P-V-12.3348.5.v2
new file mode 100644
index 0000000..046129e
Binary files /dev/null and b/logs/events.out.tfevents.1585832445.P-V-12.3348.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832608.P-V-12.17292.5.v2 b/logs/events.out.tfevents.1585832608.P-V-12.17292.5.v2
new file mode 100644
index 0000000..29e6def
Binary files /dev/null and b/logs/events.out.tfevents.1585832608.P-V-12.17292.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832666.P-V-12.19260.5.v2 b/logs/events.out.tfevents.1585832666.P-V-12.19260.5.v2
new file mode 100644
index 0000000..8c301ff
Binary files /dev/null and b/logs/events.out.tfevents.1585832666.P-V-12.19260.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832711.P-V-12.4128.5.v2 b/logs/events.out.tfevents.1585832711.P-V-12.4128.5.v2
new file mode 100644
index 0000000..fbd281c
Binary files /dev/null and b/logs/events.out.tfevents.1585832711.P-V-12.4128.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832764.P-V-12.19252.5.v2 b/logs/events.out.tfevents.1585832764.P-V-12.19252.5.v2
new file mode 100644
index 0000000..94bc006
Binary files /dev/null and b/logs/events.out.tfevents.1585832764.P-V-12.19252.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832797.P-V-12.20048.5.v2 b/logs/events.out.tfevents.1585832797.P-V-12.20048.5.v2
new file mode 100644
index 0000000..b6fa912
Binary files /dev/null and b/logs/events.out.tfevents.1585832797.P-V-12.20048.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832872.P-V-12.19792.5.v2 b/logs/events.out.tfevents.1585832872.P-V-12.19792.5.v2
new file mode 100644
index 0000000..c740b6c
Binary files /dev/null and b/logs/events.out.tfevents.1585832872.P-V-12.19792.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832909.P-V-12.17900.5.v2 b/logs/events.out.tfevents.1585832909.P-V-12.17900.5.v2
new file mode 100644
index 0000000..07e3f2e
Binary files /dev/null and b/logs/events.out.tfevents.1585832909.P-V-12.17900.5.v2 differ
diff --git a/logs/events.out.tfevents.1585832962.P-V-12.14388.5.v2 b/logs/events.out.tfevents.1585832962.P-V-12.14388.5.v2
new file mode 100644
index 0000000..62fefb6
Binary files /dev/null and b/logs/events.out.tfevents.1585832962.P-V-12.14388.5.v2 differ
diff --git a/logs/events.out.tfevents.1585833042.P-V-12.17788.5.v2 b/logs/events.out.tfevents.1585833042.P-V-12.17788.5.v2
new file mode 100644
index 0000000..a032935
Binary files /dev/null and b/logs/events.out.tfevents.1585833042.P-V-12.17788.5.v2 differ
diff --git a/logs/events.out.tfevents.1585888551.P-V-12.21420.5.v2 b/logs/events.out.tfevents.1585888551.P-V-12.21420.5.v2
new file mode 100644
index 0000000..81700fa
Binary files /dev/null and b/logs/events.out.tfevents.1585888551.P-V-12.21420.5.v2 differ
diff --git a/logs/events.out.tfevents.1585889167.P-V-12.10180.5.v2 b/logs/events.out.tfevents.1585889167.P-V-12.10180.5.v2
new file mode 100644
index 0000000..1fcd186
Binary files /dev/null and b/logs/events.out.tfevents.1585889167.P-V-12.10180.5.v2 differ
diff --git a/logs/events.out.tfevents.1585889420.P-V-12.22636.5.v2 b/logs/events.out.tfevents.1585889420.P-V-12.22636.5.v2
new file mode 100644
index 0000000..7f949f7
Binary files /dev/null and b/logs/events.out.tfevents.1585889420.P-V-12.22636.5.v2 differ
diff --git a/logs/events.out.tfevents.1585889471.P-V-12.19200.5.v2 b/logs/events.out.tfevents.1585889471.P-V-12.19200.5.v2
new file mode 100644
index 0000000..7a002cb
Binary files /dev/null and b/logs/events.out.tfevents.1585889471.P-V-12.19200.5.v2 differ
diff --git a/logs/events.out.tfevents.1585894384.P-V-12.21692.5.v2 b/logs/events.out.tfevents.1585894384.P-V-12.21692.5.v2
new file mode 100644
index 0000000..f39c19b
Binary files /dev/null and b/logs/events.out.tfevents.1585894384.P-V-12.21692.5.v2 differ
diff --git a/logs/events.out.tfevents.1585895045.P-V-12.21520.5.v2 b/logs/events.out.tfevents.1585895045.P-V-12.21520.5.v2
new file mode 100644
index 0000000..98f37ed
Binary files /dev/null and b/logs/events.out.tfevents.1585895045.P-V-12.21520.5.v2 differ
diff --git a/logs/events.out.tfevents.1585975838.P-V-12.24392.5.v2 b/logs/events.out.tfevents.1585975838.P-V-12.24392.5.v2
new file mode 100644
index 0000000..e235559
Binary files /dev/null and b/logs/events.out.tfevents.1585975838.P-V-12.24392.5.v2 differ
diff --git a/logs/events.out.tfevents.1585978553.P-V-12.21292.5.v2 b/logs/events.out.tfevents.1585978553.P-V-12.21292.5.v2
new file mode 100644
index 0000000..7d67f40
Binary files /dev/null and b/logs/events.out.tfevents.1585978553.P-V-12.21292.5.v2 differ
diff --git a/models.py b/models.py
new file mode 100644
index 0000000..0f1b15e
--- /dev/null
+++ b/models.py
@@ -0,0 +1,345 @@
+from __future__ import division
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+
+from utils.parse_config import *
+from utils.utils import build_targets, to_cpu, non_max_suppression
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+
+
+def create_modules(module_defs):
+    """
+    Constructs module list of layer blocks from module configuration in module_defs
+    """
+    hyperparams = module_defs.pop(0)
+    output_filters = [int(hyperparams["channels"])]
+    module_list = nn.ModuleList()
+    for module_i, module_def in enumerate(module_defs):
+        modules = nn.Sequential()
+
+        if module_def["type"] == "convolutional":
+            bn = int(module_def["batch_normalize"])
+            filters = int(module_def["filters"])
+            kernel_size = int(module_def["size"])
+            pad = (kernel_size - 1) // 2
+            modules.add_module(
+                f"conv_{module_i}",
+                nn.Conv2d(
+                    in_channels=output_filters[-1],
+                    out_channels=filters,
+                    kernel_size=kernel_size,
+                    stride=int(module_def["stride"]),
+                    padding=pad,
+                    bias=not bn,
+                ),
+            )
+            if bn:
+                modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
+            if module_def["activation"] == "leaky":
+                modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
+
+        elif module_def["type"] == "maxpool":
+            kernel_size = int(module_def["size"])
+            stride = int(module_def["stride"])
+            if kernel_size == 2 and stride == 1:
+                modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
+            maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
+            modules.add_module(f"maxpool_{module_i}", maxpool)
+
+        elif module_def["type"] == "upsample":
+            upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
+            modules.add_module(f"upsample_{module_i}", upsample)
+
+        elif module_def["type"] == "route": # 输入1：26*26*256 输入2：26*26*128  输出：26*26*（256+128）
+            layers = [int(x) for x in module_def["layers"].split(",")]
+            filters = sum([output_filters[1:][i] for i in layers])
+            modules.add_module(f"route_{module_i}", EmptyLayer())
+
+        elif module_def["type"] == "shortcut":
+            filters = output_filters[1:][int(module_def["from"])]
+            modules.add_module(f"shortcut_{module_i}", EmptyLayer())
+
+        elif module_def["type"] == "yolo":
+            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
+            # Extract anchors
+            anchors = [int(x) for x in module_def["anchors"].split(",")]
+            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
+            anchors = [anchors[i] for i in anchor_idxs]
+            num_classes = int(module_def["classes"])
+            img_size = int(hyperparams["height"])
+            # Define detection layer
+            yolo_layer = YOLOLayer(anchors, num_classes, img_size)
+            modules.add_module(f"yolo_{module_i}", yolo_layer)
+        # Register module list and number of output filters
+        module_list.append(modules)
+        output_filters.append(filters)
+
+    return hyperparams, module_list
+
+
+class Upsample(nn.Module):
+    """ nn.Upsample is deprecated """
+
+    def __init__(self, scale_factor, mode="nearest"):
+        super(Upsample, self).__init__()
+        self.scale_factor = scale_factor
+        self.mode = mode
+
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
+        return x
+
+
+class EmptyLayer(nn.Module):
+    """Placeholder for 'route' and 'shortcut' layers"""
+
+    def __init__(self):
+        super(EmptyLayer, self).__init__()
+
+
+class YOLOLayer(nn.Module):
+    """Detection layer"""
+
+    def __init__(self, anchors, num_classes, img_dim=416):
+        super(YOLOLayer, self).__init__()
+        self.anchors = anchors
+        self.num_anchors = len(anchors)
+        self.num_classes = num_classes
+        self.ignore_thres = 0.5
+        self.mse_loss = nn.MSELoss()
+        self.bce_loss = nn.BCELoss()
+        self.obj_scale = 1
+        self.noobj_scale = 100
+        self.metrics = {}
+        self.img_dim = img_dim
+        self.grid_size = 0  # grid size
+
+    def compute_grid_offsets(self, grid_size, cuda=True):
+        self.grid_size = grid_size
+        g = self.grid_size
+        FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
+        self.stride = self.img_dim / self.grid_size
+        # Calculate offsets for each grid
+        self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
+        self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
+        self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
+        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
+        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
+
+    def forward(self, x, targets=None, img_dim=None):
+        # Tensors for cuda support
+        print (x.shape)
+        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
+
+        self.img_dim = img_dim
+        num_samples = x.size(0)
+        grid_size = x.size(2)
+
+        prediction = (
+            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
+            .permute(0, 1, 3, 4, 2)
+            .contiguous()
+        )
+        print (prediction.shape)
+        # Get outputs
+        x = torch.sigmoid(prediction[..., 0])  # Center x
+        y = torch.sigmoid(prediction[..., 1])  # Center y
+        w = prediction[..., 2]  # Width
+        h = prediction[..., 3]  # Height
+        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
+        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+
+        # If grid size does not match current we compute new offsets
+        if grid_size != self.grid_size:
+            self.compute_grid_offsets(grid_size, cuda=x.is_cuda) #相对位置得到对应的绝对位置比如之前的位置是0.5,0.5变为 11.5，11.5这样的
+
+        # Add offset and scale with anchors #特征图中的实际位置
+        pred_boxes = FloatTensor(prediction[..., :4].shape)
+        pred_boxes[..., 0] = x.data + self.grid_x
+        pred_boxes[..., 1] = y.data + self.grid_y
+        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
+        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
+
+        output = torch.cat( 
+            (
+                pred_boxes.view(num_samples, -1, 4) * self.stride, #还原到原始图中
+                pred_conf.view(num_samples, -1, 1),
+                pred_cls.view(num_samples, -1, self.num_classes),
+            ),
+            -1,
+        )
+
+        if targets is None:
+            return output, 0
+        else:
+            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
+                pred_boxes=pred_boxes,
+                pred_cls=pred_cls,
+                target=targets,
+                anchors=self.scaled_anchors,
+                ignore_thres=self.ignore_thres,
+            )
+            # iou_scores：真实值与最匹配的anchor的IOU得分值 class_mask：分类正确的索引  obj_mask：目标框所在位置的最好anchor置为1 noobj_mask obj_mask那里置0，还有计算的iou大于阈值的也置0，其他都为1 tx, ty, tw, th, 对应的对于该大小的特征图的xywh目标值也就是我们需要拟合的值 tconf 目标置信度
+            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
+            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) # 只计算有目标的
+            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
+            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
+            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
+            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) 
+            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
+            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj #有物体越接近1越好 没物体的越接近0越好
+            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) #分类损失
+            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls #总损失
+
+            # Metrics
+            cls_acc = 100 * class_mask[obj_mask].mean()
+            conf_obj = pred_conf[obj_mask].mean()
+            conf_noobj = pred_conf[noobj_mask].mean()
+            conf50 = (pred_conf > 0.5).float()
+            iou50 = (iou_scores > 0.5).float()
+            iou75 = (iou_scores > 0.75).float()
+            detected_mask = conf50 * class_mask * tconf
+            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
+            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
+            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
+
+            self.metrics = {
+                "loss": to_cpu(total_loss).item(),
+                "x": to_cpu(loss_x).item(),
+                "y": to_cpu(loss_y).item(),
+                "w": to_cpu(loss_w).item(),
+                "h": to_cpu(loss_h).item(),
+                "conf": to_cpu(loss_conf).item(),
+                "cls": to_cpu(loss_cls).item(),
+                "cls_acc": to_cpu(cls_acc).item(),
+                "recall50": to_cpu(recall50).item(),
+                "recall75": to_cpu(recall75).item(),
+                "precision": to_cpu(precision).item(),
+                "conf_obj": to_cpu(conf_obj).item(),
+                "conf_noobj": to_cpu(conf_noobj).item(),
+                "grid_size": grid_size,
+            }
+
+            return output, total_loss
+
+
+class Darknet(nn.Module):
+    """YOLOv3 object detection model"""
+
+    def __init__(self, config_path, img_size=416):
+        super(Darknet, self).__init__()
+        self.module_defs = parse_model_config(config_path)
+        self.hyperparams, self.module_list = create_modules(self.module_defs)
+        self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
+        self.img_size = img_size
+        self.seen = 0
+        self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
+
+    def forward(self, x, targets=None):
+        img_dim = x.shape[2]
+        loss = 0
+        layer_outputs, yolo_outputs = [], []
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
+                x = module(x)
+            elif module_def["type"] == "route":
+                x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
+            elif module_def["type"] == "shortcut":
+                layer_i = int(module_def["from"])
+                x = layer_outputs[-1] + layer_outputs[layer_i]
+            elif module_def["type"] == "yolo":
+                x, layer_loss = module[0](x, targets, img_dim)
+                loss += layer_loss
+                yolo_outputs.append(x)
+            layer_outputs.append(x)
+        yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
+        return yolo_outputs if targets is None else (loss, yolo_outputs)
+
+    def load_darknet_weights(self, weights_path):
+        """Parses and loads the weights stored in 'weights_path'"""
+
+        # Open the weights file
+        with open(weights_path, "rb") as f:
+            header = np.fromfile(f, dtype=np.int32, count=5)  # First five are header values
+            self.header_info = header  # Needed to write header when saving weights
+            self.seen = header[3]  # number of images seen during training
+            weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
+
+        # Establish cutoff for loading backbone weights
+        cutoff = None
+        if "darknet53.conv.74" in weights_path:
+            cutoff = 75
+
+        ptr = 0
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if i == cutoff:
+                break
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                if module_def["batch_normalize"]:
+                    # Load BN bias, weights, running mean and running variance
+                    bn_layer = module[1]
+                    num_b = bn_layer.bias.numel()  # Number of biases
+                    # Bias
+                    bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
+                    bn_layer.bias.data.copy_(bn_b)
+                    ptr += num_b
+                    # Weight
+                    bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
+                    bn_layer.weight.data.copy_(bn_w)
+                    ptr += num_b
+                    # Running Mean
+                    bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
+                    bn_layer.running_mean.data.copy_(bn_rm)
+                    ptr += num_b
+                    # Running Var
+                    bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
+                    bn_layer.running_var.data.copy_(bn_rv)
+                    ptr += num_b
+                else:
+                    # Load conv. bias
+                    num_b = conv_layer.bias.numel()
+                    conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
+                    conv_layer.bias.data.copy_(conv_b)
+                    ptr += num_b
+                # Load conv. weights
+                num_w = conv_layer.weight.numel()
+                conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
+                conv_layer.weight.data.copy_(conv_w)
+                ptr += num_w
+
+    def save_darknet_weights(self, path, cutoff=-1):
+        """
+            @:param path    - path of the new weights file
+            @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
+        """
+        fp = open(path, "wb")
+        self.header_info[3] = self.seen
+        self.header_info.tofile(fp)
+
+        # Iterate through layers
+        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                # If batch norm, load bn first
+                if module_def["batch_normalize"]:
+                    bn_layer = module[1]
+                    bn_layer.bias.data.cpu().numpy().tofile(fp)
+                    bn_layer.weight.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_mean.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_var.data.cpu().numpy().tofile(fp)
+                # Load conv bias
+                else:
+                    conv_layer.bias.data.cpu().numpy().tofile(fp)
+                # Load conv weights
+                conv_layer.weight.data.cpu().numpy().tofile(fp)
+
+        fp.close()
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..98ee186
--- /dev/null
+++ b/test.py
@@ -0,0 +1,105 @@
+from __future__ import division
+
+from models import *
+from utils.utils import *
+from utils.datasets import *
+from utils.parse_config import *
+
+import os
+import sys
+import time
+import datetime
+import argparse
+import tqdm
+
+import torch
+from torch.utils.data import DataLoader
+from torchvision import datasets
+from torchvision import transforms
+from torch.autograd import Variable
+import torch.optim as optim
+
+
+def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size):
+    model.eval()
+
+    # Get dataloader
+    dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False)
+    dataloader = torch.utils.data.DataLoader(
+        dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn
+    )
+
+    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
+
+    labels = []
+    sample_metrics = []  # List of tuples (TP, confs, pred)
+    for batch_i, (_, imgs, targets) in enumerate(tqdm.tqdm(dataloader, desc="Detecting objects")):
+
+        # Extract labels
+        labels += targets[:, 1].tolist()
+        # Rescale target
+        targets[:, 2:] = xywh2xyxy(targets[:, 2:])
+        targets[:, 2:] *= img_size
+
+        imgs = Variable(imgs.type(Tensor), requires_grad=False)
+
+        with torch.no_grad():
+            outputs = model(imgs)
+            outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres)
+
+        sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres)
+
+    # Concatenate sample statistics
+    true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))]
+    precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels)
+
+    return precision, recall, AP, f1, ap_class
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch")
+    parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
+    parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file")
+    parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
+    parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
+    parser.add_argument("--iou_thres", type=float, default=0.5, help="iou threshold required to qualify as detected")
+    parser.add_argument("--conf_thres", type=float, default=0.001, help="object confidence threshold")
+    parser.add_argument("--nms_thres", type=float, default=0.5, help="iou thresshold for non-maximum suppression")
+    parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
+    parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
+    opt = parser.parse_args()
+    print(opt)
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    data_config = parse_data_config(opt.data_config)
+    valid_path = data_config["valid"]
+    class_names = load_classes(data_config["names"])
+
+    # Initiate model
+    model = Darknet(opt.model_def).to(device)
+    if opt.weights_path.endswith(".weights"):
+        # Load darknet weights
+        model.load_darknet_weights(opt.weights_path)
+    else:
+        # Load checkpoint weights
+        model.load_state_dict(torch.load(opt.weights_path))
+
+    print("Compute mAP...")
+
+    precision, recall, AP, f1, ap_class = evaluate(
+        model,
+        path=valid_path,
+        iou_thres=opt.iou_thres,
+        conf_thres=opt.conf_thres,
+        nms_thres=opt.nms_thres,
+        img_size=opt.img_size,
+        batch_size=8,
+    )
+
+    print("Average Precisions:")
+    for i, c in enumerate(ap_class):
+        print(f"+ Class '{c}' ({class_names[c]}) - AP: {AP[i]}")
+
+    print(f"mAP: {AP.mean()}")
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..08820a5
--- /dev/null
+++ b/train.py
@@ -0,0 +1,187 @@
+from __future__ import division
+
+from models import *
+from utils.logger import *
+from utils.utils import *
+from utils.datasets import *
+from utils.parse_config import *
+from test import evaluate
+
+import warnings
+warnings.filterwarnings("ignore")
+
+from terminaltables import AsciiTable
+
+import os
+import sys
+import time
+import datetime
+import argparse
+
+import torch
+from torch.utils.data import DataLoader
+from torchvision import datasets
+from torchvision import transforms
+from torch.autograd import Variable
+import torch.optim as optim
+
+"""
+--data_config config/coco.data  
+--pretrained_weights weights/darknet53.conv.74
+"""
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--epochs", type=int, default=100, help="number of epochs")
+    parser.add_argument("--batch_size", type=int, default=4, help="size of each image batch")
+    parser.add_argument("--gradient_accumulations", type=int, default=2, help="number of gradient accums before step")
+    parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
+    parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file")
+    parser.add_argument("--pretrained_weights", type=str, help="if specified starts from checkpoint model")
+    parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation")
+    parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
+    parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights")
+    parser.add_argument("--evaluation_interval", type=int, default=1, help="interval evaluations on validation set")
+    parser.add_argument("--compute_map", default=False, help="if True computes mAP every tenth batch")
+    parser.add_argument("--multiscale_training", default=True, help="allow for multi-scale training")
+    opt = parser.parse_args()
+    print(opt)
+
+    logger = Logger("logs")
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    os.makedirs("output", exist_ok=True)
+    os.makedirs("checkpoints", exist_ok=True)
+
+    # Get data configuration
+    data_config = parse_data_config(opt.data_config)
+    train_path = data_config["train"]
+    valid_path = data_config["valid"]
+    class_names = load_classes(data_config["names"])
+
+    # Initiate model
+    model = Darknet(opt.model_def).to(device)
+    model.apply(weights_init_normal)
+
+    # If specified we start from checkpoint
+    if opt.pretrained_weights:
+        if opt.pretrained_weights.endswith(".pth"):
+            model.load_state_dict(torch.load(opt.pretrained_weights))
+        else:
+            model.load_darknet_weights(opt.pretrained_weights)
+
+    # Get dataloader
+    dataset = ListDataset(train_path, augment=True, multiscale=opt.multiscale_training)
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=opt.batch_size,
+        shuffle=True,
+        num_workers=opt.n_cpu,
+        pin_memory=True,
+        collate_fn=dataset.collate_fn,
+    )
+
+    optimizer = torch.optim.Adam(model.parameters())
+
+    metrics = [
+        "grid_size",
+        "loss",
+        "x",
+        "y",
+        "w",
+        "h",
+        "conf",
+        "cls",
+        "cls_acc",
+        "recall50",
+        "recall75",
+        "precision",
+        "conf_obj",
+        "conf_noobj",
+    ]
+
+    for epoch in range(opt.epochs):
+        model.train()
+        start_time = time.time()
+        for batch_i, (_, imgs, targets) in enumerate(dataloader):
+            batches_done = len(dataloader) * epoch + batch_i
+
+            imgs = Variable(imgs.to(device))
+            targets = Variable(targets.to(device), requires_grad=False)
+            print ('imgs',imgs.shape)
+            print ('targets',targets.shape)
+            loss, outputs = model(imgs, targets)
+            loss.backward()
+
+            if batches_done % opt.gradient_accumulations:
+                # Accumulates gradient before each step
+                optimizer.step()
+                optimizer.zero_grad()
+
+            # ----------------
+            #   Log progress
+            # ----------------
+
+            log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, opt.epochs, batch_i, len(dataloader))
+
+            metric_table = [["Metrics", *[f"YOLO Layer {i}" for i in range(len(model.yolo_layers))]]]
+
+            # Log metrics at each YOLO layer
+            for i, metric in enumerate(metrics):
+                formats = {m: "%.6f" for m in metrics}
+                formats["grid_size"] = "%2d"
+                formats["cls_acc"] = "%.2f%%"
+                row_metrics = [formats[metric] % yolo.metrics.get(metric, 0) for yolo in model.yolo_layers]
+                metric_table += [[metric, *row_metrics]]
+
+                # Tensorboard logging
+                tensorboard_log = []
+                for j, yolo in enumerate(model.yolo_layers):
+                    for name, metric in yolo.metrics.items():
+                        if name != "grid_size":
+                            tensorboard_log += [(f"{name}_{j+1}", metric)]
+                tensorboard_log += [("loss", loss.item())]
+                logger.list_of_scalars_summary(tensorboard_log, batches_done)
+
+            log_str += AsciiTable(metric_table).table
+            log_str += f"\nTotal loss {loss.item()}"
+
+            # Determine approximate time left for epoch
+            epoch_batches_left = len(dataloader) - (batch_i + 1)
+            time_left = datetime.timedelta(seconds=epoch_batches_left * (time.time() - start_time) / (batch_i + 1))
+            log_str += f"\n---- ETA {time_left}"
+
+            print(log_str)
+
+            model.seen += imgs.size(0)
+
+        if epoch % opt.evaluation_interval == 0:
+            print("\n---- Evaluating Model ----")
+            # Evaluate the model on the validation set
+            precision, recall, AP, f1, ap_class = evaluate(
+                model,
+                path=valid_path,
+                iou_thres=0.5,
+                conf_thres=0.5,
+                nms_thres=0.5,
+                img_size=opt.img_size,
+                batch_size=8,
+            )
+            evaluation_metrics = [
+                ("val_precision", precision.mean()),
+                ("val_recall", recall.mean()),
+                ("val_mAP", AP.mean()),
+                ("val_f1", f1.mean()),
+            ]
+            logger.list_of_scalars_summary(evaluation_metrics, epoch)
+
+            # Print class APs and mAP
+            ap_table = [["Index", "Class name", "AP"]]
+            for i, c in enumerate(ap_class):
+                ap_table += [[c, class_names[c], "%.5f" % AP[i]]]
+            print(AsciiTable(ap_table).table)
+            print(f"---- mAP {AP.mean()}")
+
+        if epoch % opt.checkpoint_interval == 0:
+            torch.save(model.state_dict(), f"checkpoints/yolov3_ckpt_%d.pth" % epoch)
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/augmentations.py b/utils/augmentations.py
new file mode 100644
index 0000000..b1aed5d
--- /dev/null
+++ b/utils/augmentations.py
@@ -0,0 +1,9 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+
+
+def horisontal_flip(images, targets):
+    images = torch.flip(images, [-1])
+    targets[:, 2] = 1 - targets[:, 2]
+    return images, targets
diff --git a/utils/datasets.py b/utils/datasets.py
new file mode 100644
index 0000000..efcd06a
--- /dev/null
+++ b/utils/datasets.py
@@ -0,0 +1,154 @@
+import glob
+import random
+import os
+import sys
+import numpy as np
+from PIL import Image
+import torch
+import torch.nn.functional as F
+
+from utils.augmentations import horisontal_flip
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+
+
+def pad_to_square(img, pad_value):
+    c, h, w = img.shape
+    dim_diff = np.abs(h - w)
+    # (upper / left) padding and (lower / right) padding
+    pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
+    # Determine padding
+    pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
+    # Add padding
+    img = F.pad(img, pad, "constant", value=pad_value)
+
+    return img, pad
+
+
+def resize(image, size):
+    image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
+    return image
+
+
+def random_resize(images, min_size=288, max_size=448):
+    new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
+    images = F.interpolate(images, size=new_size, mode="nearest")
+    return images
+
+
+class ImageFolder(Dataset):
+    def __init__(self, folder_path, img_size=416):
+        self.files = sorted(glob.glob("%s/*.*" % folder_path))
+        self.img_size = img_size
+
+    def __getitem__(self, index):
+        img_path = self.files[index % len(self.files)]
+        # Extract image as PyTorch tensor
+        img = transforms.ToTensor()(Image.open(img_path))
+        # Pad to square resolution
+        img, _ = pad_to_square(img, 0)
+        # Resize
+        img = resize(img, self.img_size)
+
+        return img_path, img
+
+    def __len__(self):
+        return len(self.files)
+
+
+class ListDataset(Dataset):
+    def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
+        with open(list_path, "r") as file:
+            self.img_files = file.readlines()
+
+        self.label_files = [
+            path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
+            for path in self.img_files
+        ]
+        self.img_size = img_size
+        self.max_objects = 100
+        self.augment = augment
+        self.multiscale = multiscale
+        self.normalized_labels = normalized_labels
+        self.min_size = self.img_size - 3 * 32
+        self.max_size = self.img_size + 3 * 32
+        self.batch_count = 0
+
+    def __getitem__(self, index):
+
+        # ---------
+        #  Image
+        # ---------
+
+        img_path = self.img_files[index % len(self.img_files)].rstrip()
+        img_path = 'E:\\eclipse-workspace\\PyTorch\\PyTorch-YOLOv3\\data\\coco' + img_path
+        #print (img_path)
+        # Extract image as PyTorch tensor
+        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
+
+        # Handle images with less than three channels
+        if len(img.shape) != 3:
+            img = img.unsqueeze(0)
+            img = img.expand((3, img.shape[1:]))
+
+        _, h, w = img.shape
+        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
+        # Pad to square resolution
+        img, pad = pad_to_square(img, 0)
+        _, padded_h, padded_w = img.shape
+
+        # ---------
+        #  Label
+        # ---------
+
+        label_path = self.label_files[index % len(self.img_files)].rstrip()
+        label_path = 'E:\\eclipse-workspace\\PyTorch\\PyTorch-YOLOv3\\data\\coco\\labels' + label_path
+        #print (label_path)
+
+        targets = None
+        if os.path.exists(label_path):
+            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
+            # Extract coordinates for unpadded + unscaled image
+            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
+            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
+            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
+            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
+            # Adjust for added padding
+            x1 += pad[0]
+            y1 += pad[2]
+            x2 += pad[1]
+            y2 += pad[3]
+            # Returns (x, y, w, h)
+            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
+            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
+            boxes[:, 3] *= w_factor / padded_w
+            boxes[:, 4] *= h_factor / padded_h
+
+            targets = torch.zeros((len(boxes), 6))
+            targets[:, 1:] = boxes
+
+        # Apply augmentations
+        if self.augment:
+            if np.random.random() < 0.5:
+                img, targets = horisontal_flip(img, targets)
+
+        return img_path, img, targets
+
+    def collate_fn(self, batch):
+        paths, imgs, targets = list(zip(*batch))
+        # Remove empty placeholder targets
+        targets = [boxes for boxes in targets if boxes is not None]
+        # Add sample index to targets
+        for i, boxes in enumerate(targets):
+            boxes[:, 0] = i
+        targets = torch.cat(targets, 0)
+        # Selects new image size every tenth batch
+        if self.multiscale and self.batch_count % 10 == 0:
+            self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
+        # Resize images to input shape
+        imgs = torch.stack([resize(img, self.img_size) for img in imgs])
+        self.batch_count += 1
+        return paths, imgs, targets
+
+    def __len__(self):
+        return len(self.img_files)
diff --git a/utils/logger.py b/utils/logger.py
new file mode 100644
index 0000000..1cf6994
--- /dev/null
+++ b/utils/logger.py
@@ -0,0 +1,19 @@
+import tensorflow as tf
+
+
+class Logger(object):
+    def __init__(self, log_dir):
+        """Create a summary writer logging to log_dir."""
+        self.writer = tf.summary.create_file_writer(log_dir)
+
+    def scalar_summary(self, tag, value, step):
+        with self.writer.as_default():
+            tf.summary.scalar(tag, value, step=step)
+            self.writer.flush()
+    def list_of_scalars_summary(self, tag_value_pairs, step):
+        with self.writer.as_default():
+            for tag, value in tag_value_pairs:
+                tf.summary.scalar(tag, value, step=step)
+            self.writer.flush()
+        # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
+        # self.writer.add_summary(summary, step)
diff --git a/utils/parse_config.py b/utils/parse_config.py
new file mode 100644
index 0000000..9dc0358
--- /dev/null
+++ b/utils/parse_config.py
@@ -0,0 +1,36 @@
+
+
+def parse_model_config(path):
+    """Parses the yolo-v3 layer configuration file and returns module definitions"""
+    file = open(path, 'r')
+    lines = file.read().split('\n')
+    lines = [x for x in lines if x and not x.startswith('#')]
+    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+    module_defs = []
+    for line in lines:
+        if line.startswith('['): # This marks the start of a new block
+            module_defs.append({})
+            module_defs[-1]['type'] = line[1:-1].rstrip()
+            if module_defs[-1]['type'] == 'convolutional':
+                module_defs[-1]['batch_normalize'] = 0
+        else:
+            key, value = line.split("=")
+            value = value.strip()
+            module_defs[-1][key.rstrip()] = value.strip()
+
+    return module_defs
+
+def parse_data_config(path):
+    """Parses the data configuration file"""
+    options = dict()
+    options['gpus'] = '0,1,2,3'
+    options['num_workers'] = '10'
+    with open(path, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.strip()
+        if line == '' or line.startswith('#'):
+            continue
+        key, value = line.split('=')
+        options[key.strip()] = value.strip()
+    return options
diff --git a/utils/utils.py b/utils/utils.py
new file mode 100644
index 0000000..07b7f0c
--- /dev/null
+++ b/utils/utils.py
@@ -0,0 +1,322 @@
+from __future__ import division
+import math
+import time
+import tqdm
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+
+
+def to_cpu(tensor):
+    return tensor.detach().cpu()
+
+
+def load_classes(path):
+    """
+    Loads class labels at 'path'
+    """
+    fp = open(path, "r")
+    names = fp.read().split("\n")[:-1]
+    return names
+
+
+def weights_init_normal(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
+    elif classname.find("BatchNorm2d") != -1:
+        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+        torch.nn.init.constant_(m.bias.data, 0.0)
+
+
+def rescale_boxes(boxes, current_dim, original_shape):
+    """ Rescales bounding boxes to the original shape """
+    orig_h, orig_w = original_shape
+    # The amount of padding that was added
+    pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
+    pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
+    # Image height and width after padding is removed
+    unpad_h = current_dim - pad_y
+    unpad_w = current_dim - pad_x
+    # Rescale bounding boxes to dimension of original image
+    boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
+    boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
+    boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
+    boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
+    return boxes
+
+
+def xywh2xyxy(x):
+    y = x.new(x.shape)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2
+    y[..., 1] = x[..., 1] - x[..., 3] / 2
+    y[..., 2] = x[..., 0] + x[..., 2] / 2
+    y[..., 3] = x[..., 1] + x[..., 3] / 2
+    return y
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:    True positives (list).
+        conf:  Objectness value from 0-1 (list).
+        pred_cls: Predicted object classes (list).
+        target_cls: True object classes (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(target_cls)
+
+    # Create Precision-Recall curve and compute AP for each class
+    ap, p, r = [], [], []
+    for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
+        i = pred_cls == c
+        n_gt = (target_cls == c).sum()  # Number of ground truth objects
+        n_p = i.sum()  # Number of predicted objects
+
+        if n_p == 0 and n_gt == 0:
+            continue
+        elif n_p == 0 or n_gt == 0:
+            ap.append(0)
+            r.append(0)
+            p.append(0)
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum()
+            tpc = (tp[i]).cumsum()
+
+            # Recall
+            recall_curve = tpc / (n_gt + 1e-16)
+            r.append(recall_curve[-1])
+
+            # Precision
+            precision_curve = tpc / (tpc + fpc)
+            p.append(precision_curve[-1])
+
+            # AP from recall-precision curve
+            ap.append(compute_ap(recall_curve, precision_curve))
+
+    # Compute F1 score (harmonic mean of precision and recall)
+    p, r, ap = np.array(p), np.array(r), np.array(ap)
+    f1 = 2 * p * r / (p + r + 1e-16)
+
+    return p, r, ap, f1, unique_classes.astype("int32")
+
+
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([0.0], precision, [0.0]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def get_batch_statistics(outputs, targets, iou_threshold):
+    """ Compute true positives, predicted scores and predicted labels per sample """
+    batch_metrics = []
+    for sample_i in range(len(outputs)):
+
+        if outputs[sample_i] is None:
+            continue
+
+        output = outputs[sample_i]
+        pred_boxes = output[:, :4]
+        pred_scores = output[:, 4]
+        pred_labels = output[:, -1]
+
+        true_positives = np.zeros(pred_boxes.shape[0])
+
+        annotations = targets[targets[:, 0] == sample_i][:, 1:]
+        target_labels = annotations[:, 0] if len(annotations) else []
+        if len(annotations):
+            detected_boxes = []
+            target_boxes = annotations[:, 1:]
+
+            for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
+
+                # If targets are found break
+                if len(detected_boxes) == len(annotations):
+                    break
+
+                # Ignore if label is not one of the target labels
+                if pred_label not in target_labels:
+                    continue
+
+                iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
+                if iou >= iou_threshold and box_index not in detected_boxes:
+                    true_positives[pred_i] = 1
+                    detected_boxes += [box_index]
+        batch_metrics.append([true_positives, pred_scores, pred_labels])
+    return batch_metrics
+
+
+def bbox_wh_iou(wh1, wh2):
+    wh2 = wh2.t()
+    w1, h1 = wh1[0], wh1[1]
+    w2, h2 = wh2[0], wh2[1]
+    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
+    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
+    return inter_area / union_area
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    """
+    Returns the IoU of two bounding boxes
+    """
+    if not x1y1x2y2:
+        # Transform from center and width to exact coordinates
+        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+    else:
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
+
+    # get the corrdinates of the intersection rectangle
+    inter_rect_x1 = torch.max(b1_x1, b2_x1)
+    inter_rect_y1 = torch.max(b1_y1, b2_y1)
+    inter_rect_x2 = torch.min(b1_x2, b2_x2)
+    inter_rect_y2 = torch.min(b1_y2, b2_y2)
+    # Intersection area
+    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
+        inter_rect_y2 - inter_rect_y1 + 1, min=0
+    )
+    # Union Area
+    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+
+    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+
+    return iou
+
+
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
+    """
+    Removes detections with lower object confidence score than 'conf_thres' and performs
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_score, class_pred)
+    """
+
+    # From (center x, center y, width, height) to (x1, y1, x2, y2)
+    prediction[..., :4] = xywh2xyxy(prediction[..., :4])
+    output = [None for _ in range(len(prediction))]
+    for image_i, image_pred in enumerate(prediction):
+        # Filter out confidence scores below threshold
+        image_pred = image_pred[image_pred[:, 4] >= conf_thres]
+        # If none are remaining => process next image
+        if not image_pred.size(0):
+            continue
+        # Object confidence times class confidence
+        score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
+        # Sort by it
+        image_pred = image_pred[(-score).argsort()]
+        class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
+        detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
+        # Perform non-maximum suppression
+        keep_boxes = []
+        while detections.size(0):
+            large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
+            label_match = detections[0, -1] == detections[:, -1]
+            # Indices of boxes with lower confidence scores, large IOUs and matching labels
+            invalid = large_overlap & label_match
+            weights = detections[invalid, 4:5]
+            # Merge overlapping bboxes by order of confidence
+            detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
+            keep_boxes += [detections[0]]
+            detections = detections[~invalid]
+        if keep_boxes:
+            output[image_i] = torch.stack(keep_boxes)
+
+    return output
+
+
+def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
+
+    ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
+    FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
+
+    nB = pred_boxes.size(0) # batchsieze 4
+    nA = pred_boxes.size(1) # 每个格子对应了多少个anchor
+    nC = pred_cls.size(-1)  # 类别的数量
+    nG = pred_boxes.size(2) # gridsize
+
+    # Output tensors
+    obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)  # obj，anchor包含物体, 即为1，默认为0 考虑前景
+    noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1) # noobj, anchor不包含物体, 则为1，默认为1 考虑背景
+    class_mask = FloatTensor(nB, nA, nG, nG).fill_(0) # 类别掩膜，类别预测正确即为1，默认全为0
+    iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0) # 预测框与真实框的iou得分
+    tx = FloatTensor(nB, nA, nG, nG).fill_(0) # 真实框相对于网格的位置
+    ty = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tw = FloatTensor(nB, nA, nG, nG).fill_(0) 
+    th = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
+
+    # Convert to position relative to box
+    target_boxes = target[:, 2:6] * nG #target中的xywh都是0-1的，可以得到其在当前gridsize上的xywh
+    gxy = target_boxes[:, :2]
+    gwh = target_boxes[:, 2:]
+    # Get anchors with best iou
+    ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) #每一种规格的anchor跟每个标签上的框的IOU得分
+    print (ious.shape)
+    best_ious, best_n = ious.max(0) # 得到其最高分以及哪种规格框和当前目标最相似
+    # Separate target values
+    b, target_labels = target[:, :2].long().t() # 真实框所对应的batch，以及每个框所代表的实际类别
+    gx, gy = gxy.t()
+    gw, gh = gwh.t()
+    gi, gj = gxy.long().t() #位置信息，向下取整了
+    # Set masks
+    obj_mask[b, best_n, gj, gi] = 1 # 实际包含物体的设置成1
+    noobj_mask[b, best_n, gj, gi] = 0 # 相反
+
+    # Set noobj mask to zero where iou exceeds ignore threshold
+    for i, anchor_ious in enumerate(ious.t()): # IOU超过了指定的阈值就相当于有物体了
+        noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
+
+    # Coordinates
+    tx[b, best_n, gj, gi] = gx - gx.floor() # 根据真实框所在位置，得到其相当于网络的位置
+    ty[b, best_n, gj, gi] = gy - gy.floor()
+    # Width and height
+    tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
+    th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
+    # One-hot encoding of label
+    tcls[b, best_n, gj, gi, target_labels] = 1 #将真实框的标签转换为one-hot编码形式
+    # Compute label correctness and iou at best anchor 计算预测的和真实一样的索引
+    class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
+    iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) #与真实框想匹配的预测框之间的iou值
+
+    tconf = obj_mask.float() # 真实框的置信度，也就是1
+    return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
diff --git a/weights/darknet53.conv.74 b/weights/darknet53.conv.74
new file mode 100644
index 0000000..a0680f7
Binary files /dev/null and b/weights/darknet53.conv.74 differ
diff --git a/weights/download_weights.sh b/weights/download_weights.sh
new file mode 100644
index 0000000..4bccb58
--- /dev/null
+++ b/weights/download_weights.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Download weights for vanilla YOLOv3
+wget -c https://pjreddie.com/media/files/yolov3.weights
+# # Download weights for tiny YOLOv3
+wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
+# Download weights for backbone network
+wget -c https://pjreddie.com/media/files/darknet53.conv.74
diff --git a/weights/yolov3-tiny.weights b/weights/yolov3-tiny.weights
new file mode 100644
index 0000000..aad7e6c
Binary files /dev/null and b/weights/yolov3-tiny.weights differ
diff --git a/weights/yolov3.weights b/weights/yolov3.weights
new file mode 100644
index 0000000..a5ed716
Binary files /dev/null and b/weights/yolov3.weights differ