YOLO img and vdo detector

5 years ago · 1e025ece42
3 changed files with 299 additions and 101 deletions
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # Handai aerial detector
-We need to analyze road users in mid-block crossing area between Handai monorail station and handai hospital.
+We need to analyze road users in mid-block crossing area between Handai monorail station and Handai hospital.
 ## Output we need
--- a/examples/yolo_img_obj_detector.py
+++ b/examples/yolo_img_obj_detector.py
@ -0,0 +1,109 @@
 """USAGE
 python examples/yolo_obj_detector.py \
    -c ~/dev/obj-tracking/yolov3.cfg \
    -w ~/dev/obj-tracking/yolov3.weights \
    -cl ~/dev/obj-tracking/yolo/darknet/data/coco.names \
    -i ~/dev/obj-tracking/person.jpg
 python examples/yolo_obj_detector.py \
    -c ~/syncthing/dropbox/tracking-obj/mytrain.cfg \
    -w ~/syncthing/dropbox/tracking-obj/mytrain_final.weights \
    -cl ~/syncthing/dropbox/tracking-obj/mytrain.names \
    -i /media/sipp11/500BUP/handai_photos/test/6294.jpg
 """
 import cv2
 import argparse
 import numpy as np
 ap = argparse.ArgumentParser()
 ap.add_argument("-i", "--image", required=True, help="path to input image")
 ap.add_argument("-c", "--config", required=True, help="path to yolo config file")
 ap.add_argument(
    "-w", "--weights", required=True, help="path to yolo pre-trained weights"
 )
 ap.add_argument(
    "-cl", "--classes", required=True, help="path to text file containing class names"
 )
 args = ap.parse_args()
 def get_output_layers(net):
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    return output_layers
 def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    label = str(classes[class_id])
    color = COLORS[class_id]
    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
    cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
 image = cv2.imread(args.image)
 Width = image.shape[1]
 Height = image.shape[0]
 scale = 0.00392
 classes = None
 with open(args.classes, "r") as f:
    classes = [line.strip() for line in f.readlines()]
 COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
 net = cv2.dnn.readNet(args.weights, args.config)
 blob = cv2.dnn.blobFromImage(image, scale, (416, 416), (0, 0, 0), True, crop=False)
 net.setInput(blob)
 outs = net.forward(get_output_layers(net))
 class_ids = []
 confidences = []
 boxes = []
 conf_threshold = 0.5
 nms_threshold = 0.4
 for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * Width)
            center_y = int(detection[1] * Height)
            w = int(detection[2] * Width)
            h = int(detection[3] * Height)
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])
 indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
 for i in indices:
    i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    draw_prediction(
        image,
        class_ids[i],
        confidences[i],
        round(x),
        round(y),
        round(x + w),
        round(y + h),
    )
 cv2.imshow("object detection", image)
 cv2.waitKey()
 cv2.imwrite("object-detection.jpg", image)
 cv2.destroyAllWindows()
--- a/examples/yolo_obj_detector.py
+++ b/examples/yolo_obj_detector.py
@ -1,109 +1,198 @@
-"""USAGE
+"""USAGE:
-python examples/yolo_obj_detector.py \
+
-    -c ~/dev/obj-tracking/yolov3.cfg \
+time python examples/test.py --input ~/Desktop/5min.mp4 -o output.mp4
-    -w ~/dev/obj-tracking/yolov3.weights \
+time python examples/test.py --input ~/Desktop/5min.mp4 -l
-    -cl ~/dev/obj-tracking/yolo/darknet/data/coco.names \
+
    -i ~/dev/obj-tracking/person.jpg
 python examples/yolo_obj_detector.py \
    -c ~/syncthing/dropbox/tracking-obj/mytrain.cfg \
    -w ~/syncthing/dropbox/tracking-obj/mytrain_final.weights \
    -cl ~/syncthing/dropbox/tracking-obj/mytrain.names \
    -i /media/sipp11/500BUP/handai_photos/test/6294.jpg
 """
-import cv2
+# import the necessary packages
 import argparse
 import numpy as np
 import argparse
 import imutils
 import time
 import cv2
 import os
 # construct the argument parse and parse the arguments
 ap = argparse.ArgumentParser()
-ap.add_argument("-i", "--image", required=True, help="path to input image")
+ap.add_argument("-i", "--input", required=True, help="path to input video")
-ap.add_argument("-c", "--config", required=True, help="path to yolo config file")
+ap.add_argument("-o", "--output", required=False, help="path to output video")
 ap.add_argument("-l", "--live", action='store_true', help="Show live detection")
 # ap.add_argument("-y", "--yolo", required=True,
 # 	help="base path to YOLO directory")
 ap.add_argument(
-    "-w", "--weights", required=True, help="path to yolo pre-trained weights"
+    "-c",
    "--confidence",
    type=float,
    default=0.5,
    help="minimum probability to filter weak detections",
 )
 ap.add_argument(
-    "-cl", "--classes", required=True, help="path to text file containing class names"
+    "-t",
    "--threshold",
    type=float,
    default=0.3,
    help="threshold when applyong non-maxima suppression",
 )
-args = ap.parse_args()
+args = vars(ap.parse_args())
-
+
-
+# load the COCO class labels our YOLO model was trained on
-def get_output_layers(net):
+# labelsPath = os.path.sep.join([args["yolo"], "coco.names"])
-    layer_names = net.getLayerNames()
+labelsPath = "/home/sipp11/syncthing/dropbox/tracking-obj/mytrain.names"
-    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
+LABELS = open(labelsPath).read().strip().split("\n")
-    return output_layers
+
-
+# initialize a list of colors to represent each possible class label
-
+np.random.seed(42)
-def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
+COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")
-    label = str(classes[class_id])
+
-    color = COLORS[class_id]
+# derive the paths to the YOLO weights and model configuration
-    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
+# weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
-    cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+# configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])
-
+
-image = cv2.imread(args.image)
+weightsPath = "/home/sipp11/syncthing/dropbox/tracking-obj/mytrain_final.weights"
-
+configPath = "/home/sipp11/syncthing/dropbox/tracking-obj/mytrain.cfg"
-Width = image.shape[1]
+
-Height = image.shape[0]
+# load our YOLO object detector trained on COCO dataset (80 classes)
-scale = 0.00392
+# and determine only the *output* layer names that we need from YOLO
-
+print("[INFO] loading YOLO from disk...")
-classes = None
+net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
-
+ln = net.getLayerNames()
-with open(args.classes, "r") as f:
+ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
-    classes = [line.strip() for line in f.readlines()]
+
-
+
-COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
+# initialize the video stream, pointer to output video file, and
-
+# frame dimensions
-net = cv2.dnn.readNet(args.weights, args.config)
+vs = cv2.VideoCapture(args["input"])
-blob = cv2.dnn.blobFromImage(image, scale, (416, 416), (0, 0, 0), True, crop=False)
+writer = None
-
+(W, H) = (None, None)
 # try to determine the total number of frames in the video file
 try:
    prop = (
        cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() else cv2.CAP_PROP_FRAME_COUNT
    )
    total = int(vs.get(prop))
    print("[INFO] {} total frames in video".format(total))
 # an error occurred while trying to determine the total
 # number of frames in the video file
 except:
    print("[INFO] could not determine # of frames in video")
    print("[INFO] no approx. completion time can be provided")
    total = -1
 # loop over frames from the video file stream
 while True:
    # read the next frame from the file
    (grabbed, frame) = vs.read()
    # if the frame was not grabbed, then we have reached the end
    # of the stream
    if not grabbed:
        break
    # if the frame dimensions are empty, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]
    # construct a blob from the input frame and then perform a forward
    # pass of the YOLO object detector, giving us our bounding boxes
    # and associated probabilities
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    start = time.time()
    layerOutputs = net.forward(ln)
    end = time.time()
-outs = net.forward(get_output_layers(net))
+    # initialize our lists of detected bounding boxes, confidences,
-
+    # and class IDs, respectively
 class_ids = []
 confidences = []
    boxes = []
-conf_threshold = 0.5
+    confidences = []
-nms_threshold = 0.4
+    classIDs = []
-
+
-
+    # loop over each of the layer outputs
-for out in outs:
+    for output in layerOutputs:
-    for detection in out:
+        # loop over each of the detections
        for detection in output:
            # extract the class ID and confidence (i.e., probability)
            # of the current object detection
            scores = detection[5:]
-        class_id = np.argmax(scores)
+            classID = np.argmax(scores)
-        confidence = scores[class_id]
+            confidence = scores[classID]
-        if confidence > 0.5:
+
-            center_x = int(detection[0] * Width)
+            # filter out weak predictions by ensuring the detected
-            center_y = int(detection[1] * Height)
+            # probability is greater than the minimum probability
-            w = int(detection[2] * Width)
+            if confidence > args["confidence"]:
-            h = int(detection[3] * Height)
+                # scale the bounding box coordinates back relative to
-            x = center_x - w / 2
+                # the size of the image, keeping in mind that YOLO
-            y = center_y - h / 2
+                # actually returns the center (x, y)-coordinates of
-            class_ids.append(class_id)
+                # the bounding box followed by the boxes' width and
                # height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                # use the center (x, y)-coordinates to derive the top
                # and and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                # update our list of bounding box coordinates,
                # confidences, and class IDs
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
-            boxes.append([x, y, w, h])
+                classIDs.append(classID)
-
+
-
+        # apply non-maxima suppression to suppress weak, overlapping
-indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
+        # bounding boxes
-
+        idxs = cv2.dnn.NMSBoxes(
-for i in indices:
+            boxes, confidences, args["confidence"], args["threshold"]
-    i = i[0]
+        )
-    box = boxes[i]
+
-    x = box[0]
+        # ensure at least one detection exists
-    y = box[1]
+        if len(idxs) > 0:
-    w = box[2]
+            # loop over the indexes we are keeping
-    h = box[3]
+            for i in idxs.flatten():
-    draw_prediction(
+                # extract the bounding box coordinates
-        image,
+                (x, y) = (boxes[i][0], boxes[i][1])
-        class_ids[i],
+                (w, h) = (boxes[i][2], boxes[i][3])
-        confidences[i],
+
-        round(x),
+                # draw a bounding box rectangle and label on the frame
-        round(y),
+                color = [int(c) for c in COLORS[classIDs[i]]]
-        round(x + w),
+                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
-        round(y + h),
+                text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
                cv2.putText(
                    frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2
                )
    if args["live"]:
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break
    if args["output"]:
        # check if the video writer is None
        if writer is None:
            # initialize our video writer
            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
            writer = cv2.VideoWriter(
                args["output"], fourcc, 30, (frame.shape[1], frame.shape[0]), True
            )
            # some information on processing single frame
            if total > 0:
                elap = end - start
                print("[INFO] single frame took {:.4f} seconds".format(elap))
                print(
                    "[INFO] estimated total time to finish: {:.4f}".format(elap * total)
                )
-cv2.imshow("object detection", image)
+        # write the output frame to disk
-cv2.waitKey()
+        writer.write(frame)
-cv2.imwrite("object-detection.jpg", image)
+# release the file pointers
-cv2.destroyAllWindows()
+print("[INFO] cleaning up...")
 writer.release()
 vs.release()