YOLO img and vdo detector

5 years ago · 1e025ece42
3 changed files with 299 additions and 101 deletions
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@

 # Handai aerial detector

-We need to analyze road users in mid-block crossing area between Handai monorail station and handai hospital.
+We need to analyze road users in mid-block crossing area between Handai monorail station and Handai hospital.

 ## Output we need

--- a/examples/yolo_img_obj_detector.py
+++ b/examples/yolo_img_obj_detector.py
@ -0,0 +1,109 @@
+"""USAGE
+python examples/yolo_obj_detector.py \
+    -c ~/dev/obj-tracking/yolov3.cfg \
+    -w ~/dev/obj-tracking/yolov3.weights \
+    -cl ~/dev/obj-tracking/yolo/darknet/data/coco.names \
+    -i ~/dev/obj-tracking/person.jpg
+
+python examples/yolo_obj_detector.py \
+    -c ~/syncthing/dropbox/tracking-obj/mytrain.cfg \
+    -w ~/syncthing/dropbox/tracking-obj/mytrain_final.weights \
+    -cl ~/syncthing/dropbox/tracking-obj/mytrain.names \
+    -i /media/sipp11/500BUP/handai_photos/test/6294.jpg
+"""
+import cv2
+import argparse
+import numpy as np
+
+ap = argparse.ArgumentParser()
+ap.add_argument("-i", "--image", required=True, help="path to input image")
+ap.add_argument("-c", "--config", required=True, help="path to yolo config file")
+ap.add_argument(
+    "-w", "--weights", required=True, help="path to yolo pre-trained weights"
+)
+ap.add_argument(
+    "-cl", "--classes", required=True, help="path to text file containing class names"
+)
+args = ap.parse_args()
+
+
+def get_output_layers(net):
+    layer_names = net.getLayerNames()
+    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
+    return output_layers
+
+
+def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
+    label = str(classes[class_id])
+    color = COLORS[class_id]
+    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
+    cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+
+image = cv2.imread(args.image)
+
+Width = image.shape[1]
+Height = image.shape[0]
+scale = 0.00392
+
+classes = None
+
+with open(args.classes, "r") as f:
+    classes = [line.strip() for line in f.readlines()]
+
+COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
+
+net = cv2.dnn.readNet(args.weights, args.config)
+blob = cv2.dnn.blobFromImage(image, scale, (416, 416), (0, 0, 0), True, crop=False)
+
+net.setInput(blob)
+
+outs = net.forward(get_output_layers(net))
+
+class_ids = []
+confidences = []
+boxes = []
+conf_threshold = 0.5
+nms_threshold = 0.4
+
+
+for out in outs:
+    for detection in out:
+        scores = detection[5:]
+        class_id = np.argmax(scores)
+        confidence = scores[class_id]
+        if confidence > 0.5:
+            center_x = int(detection[0] * Width)
+            center_y = int(detection[1] * Height)
+            w = int(detection[2] * Width)
+            h = int(detection[3] * Height)
+            x = center_x - w / 2
+            y = center_y - h / 2
+            class_ids.append(class_id)
+            confidences.append(float(confidence))
+            boxes.append([x, y, w, h])
+
+
+indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
+
+for i in indices:
+    i = i[0]
+    box = boxes[i]
+    x = box[0]
+    y = box[1]
+    w = box[2]
+    h = box[3]
+    draw_prediction(
+        image,
+        class_ids[i],
+        confidences[i],
+        round(x),
+        round(y),
+        round(x + w),
+        round(y + h),
+    )
+
+cv2.imshow("object detection", image)
+cv2.waitKey()
+
+cv2.imwrite("object-detection.jpg", image)
+cv2.destroyAllWindows()
--- a/examples/yolo_obj_detector.py
+++ b/examples/yolo_obj_detector.py
@ -1,109 +1,198 @@
-"""USAGE
-python examples/yolo_obj_detector.py \
-    -c ~/dev/obj-tracking/yolov3.cfg \
-    -w ~/dev/obj-tracking/yolov3.weights \
-    -cl ~/dev/obj-tracking/yolo/darknet/data/coco.names \
-    -i ~/dev/obj-tracking/person.jpg
-
-python examples/yolo_obj_detector.py \
-    -c ~/syncthing/dropbox/tracking-obj/mytrain.cfg \
-    -w ~/syncthing/dropbox/tracking-obj/mytrain_final.weights \
-    -cl ~/syncthing/dropbox/tracking-obj/mytrain.names \
-    -i /media/sipp11/500BUP/handai_photos/test/6294.jpg
+"""USAGE:
+
+time python examples/test.py --input ~/Desktop/5min.mp4 -o output.mp4
+time python examples/test.py --input ~/Desktop/5min.mp4 -l
+
 """
-import cv2
-import argparse
+# import the necessary packages
 import numpy as np
+import argparse
+import imutils
+import time
+import cv2
+import os

+# construct the argument parse and parse the arguments
 ap = argparse.ArgumentParser()
-ap.add_argument("-i", "--image", required=True, help="path to input image")
-ap.add_argument("-c", "--config", required=True, help="path to yolo config file")
+ap.add_argument("-i", "--input", required=True, help="path to input video")
+ap.add_argument("-o", "--output", required=False, help="path to output video")
+ap.add_argument("-l", "--live", action='store_true', help="Show live detection")
+# ap.add_argument("-y", "--yolo", required=True,
+# 	help="base path to YOLO directory")
 ap.add_argument(
-    "-w", "--weights", required=True, help="path to yolo pre-trained weights"
+    "-c",
+    "--confidence",
+    type=float,
+    default=0.5,
+    help="minimum probability to filter weak detections",
 )
 ap.add_argument(
-    "-cl", "--classes", required=True, help="path to text file containing class names"
+    "-t",
+    "--threshold",
+    type=float,
+    default=0.3,
+    help="threshold when applyong non-maxima suppression",
 )
-args = ap.parse_args()
-
-
-def get_output_layers(net):
-    layer_names = net.getLayerNames()
-    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
-    return output_layers
-
-
-def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
-    label = str(classes[class_id])
-    color = COLORS[class_id]
-    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
-    cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
-
-image = cv2.imread(args.image)
-
-Width = image.shape[1]
-Height = image.shape[0]
-scale = 0.00392
-
-classes = None
-
-with open(args.classes, "r") as f:
-    classes = [line.strip() for line in f.readlines()]
-
-COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
-
-net = cv2.dnn.readNet(args.weights, args.config)
-blob = cv2.dnn.blobFromImage(image, scale, (416, 416), (0, 0, 0), True, crop=False)
-
-net.setInput(blob)
-
-outs = net.forward(get_output_layers(net))
-
-class_ids = []
-confidences = []
-boxes = []
-conf_threshold = 0.5
-nms_threshold = 0.4
-
-
-for out in outs:
-    for detection in out:
-        scores = detection[5:]
-        class_id = np.argmax(scores)
-        confidence = scores[class_id]
-        if confidence > 0.5:
-            center_x = int(detection[0] * Width)
-            center_y = int(detection[1] * Height)
-            w = int(detection[2] * Width)
-            h = int(detection[3] * Height)
-            x = center_x - w / 2
-            y = center_y - h / 2
-            class_ids.append(class_id)
-            confidences.append(float(confidence))
-            boxes.append([x, y, w, h])
-
-
-indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
-
-for i in indices:
-    i = i[0]
-    box = boxes[i]
-    x = box[0]
-    y = box[1]
-    w = box[2]
-    h = box[3]
-    draw_prediction(
-        image,
-        class_ids[i],
-        confidences[i],
-        round(x),
-        round(y),
-        round(x + w),
-        round(y + h),
+args = vars(ap.parse_args())
+
+# load the COCO class labels our YOLO model was trained on
+# labelsPath = os.path.sep.join([args["yolo"], "coco.names"])
+labelsPath = "/home/sipp11/syncthing/dropbox/tracking-obj/mytrain.names"
+LABELS = open(labelsPath).read().strip().split("\n")
+
+# initialize a list of colors to represent each possible class label
+np.random.seed(42)
+COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")
+
+# derive the paths to the YOLO weights and model configuration
+# weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
+# configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])
+
+weightsPath = "/home/sipp11/syncthing/dropbox/tracking-obj/mytrain_final.weights"
+configPath = "/home/sipp11/syncthing/dropbox/tracking-obj/mytrain.cfg"
+
+# load our YOLO object detector trained on COCO dataset (80 classes)
+# and determine only the *output* layer names that we need from YOLO
+print("[INFO] loading YOLO from disk...")
+net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
+ln = net.getLayerNames()
+ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
+
+
+# initialize the video stream, pointer to output video file, and
+# frame dimensions
+vs = cv2.VideoCapture(args["input"])
+writer = None
+(W, H) = (None, None)
+
+# try to determine the total number of frames in the video file
+try:
+    prop = (
+        cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() else cv2.CAP_PROP_FRAME_COUNT
    )
-
-cv2.imshow("object detection", image)
-cv2.waitKey()
-
-cv2.imwrite("object-detection.jpg", image)
-cv2.destroyAllWindows()
+    total = int(vs.get(prop))
+    print("[INFO] {} total frames in video".format(total))
+
+# an error occurred while trying to determine the total
+# number of frames in the video file
+except:
+    print("[INFO] could not determine # of frames in video")
+    print("[INFO] no approx. completion time can be provided")
+    total = -1
+
+
+# loop over frames from the video file stream
+while True:
+    # read the next frame from the file
+    (grabbed, frame) = vs.read()
+
+    # if the frame was not grabbed, then we have reached the end
+    # of the stream
+    if not grabbed:
+        break
+
+    # if the frame dimensions are empty, grab them
+    if W is None or H is None:
+        (H, W) = frame.shape[:2]
+
+    # construct a blob from the input frame and then perform a forward
+    # pass of the YOLO object detector, giving us our bounding boxes
+    # and associated probabilities
+    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
+    net.setInput(blob)
+    start = time.time()
+    layerOutputs = net.forward(ln)
+    end = time.time()
+
+    # initialize our lists of detected bounding boxes, confidences,
+    # and class IDs, respectively
+    boxes = []
+    confidences = []
+    classIDs = []
+
+    # loop over each of the layer outputs
+    for output in layerOutputs:
+        # loop over each of the detections
+        for detection in output:
+            # extract the class ID and confidence (i.e., probability)
+            # of the current object detection
+            scores = detection[5:]
+            classID = np.argmax(scores)
+            confidence = scores[classID]
+
+            # filter out weak predictions by ensuring the detected
+            # probability is greater than the minimum probability
+            if confidence > args["confidence"]:
+                # scale the bounding box coordinates back relative to
+                # the size of the image, keeping in mind that YOLO
+                # actually returns the center (x, y)-coordinates of
+                # the bounding box followed by the boxes' width and
+                # height
+                box = detection[0:4] * np.array([W, H, W, H])
+                (centerX, centerY, width, height) = box.astype("int")
+
+                # use the center (x, y)-coordinates to derive the top
+                # and and left corner of the bounding box
+                x = int(centerX - (width / 2))
+                y = int(centerY - (height / 2))
+
+                # update our list of bounding box coordinates,
+                # confidences, and class IDs
+                boxes.append([x, y, int(width), int(height)])
+                confidences.append(float(confidence))
+                classIDs.append(classID)
+
+        # apply non-maxima suppression to suppress weak, overlapping
+        # bounding boxes
+        idxs = cv2.dnn.NMSBoxes(
+            boxes, confidences, args["confidence"], args["threshold"]
+        )
+
+        # ensure at least one detection exists
+        if len(idxs) > 0:
+            # loop over the indexes we are keeping
+            for i in idxs.flatten():
+                # extract the bounding box coordinates
+                (x, y) = (boxes[i][0], boxes[i][1])
+                (w, h) = (boxes[i][2], boxes[i][3])
+
+                # draw a bounding box rectangle and label on the frame
+                color = [int(c) for c in COLORS[classIDs[i]]]
+                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
+                text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
+                cv2.putText(
+                    frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2
+                )
+
+    if args["live"]:
+        cv2.imshow("Frame", frame)
+        key = cv2.waitKey(1) & 0xFF
+
+        # if the `q` key was pressed, break from the loop
+        if key == ord("q"):
+            break
+
+    if args["output"]:
+        # check if the video writer is None
+        if writer is None:
+            # initialize our video writer
+            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
+            writer = cv2.VideoWriter(
+                args["output"], fourcc, 30, (frame.shape[1], frame.shape[0]), True
+            )
+
+            # some information on processing single frame
+            if total > 0:
+                elap = end - start
+                print("[INFO] single frame took {:.4f} seconds".format(elap))
+                print(
+                    "[INFO] estimated total time to finish: {:.4f}".format(elap * total)
+                )
+
+        # write the output frame to disk
+        writer.write(frame)
+
+# release the file pointers
+print("[INFO] cleaning up...")
+writer.release()
+vs.release()