sipp11
5 years ago
commit
73acfece2f
8 changed files with 740 additions and 0 deletions
@ -0,0 +1,4 @@
|
||||
{ |
||||
"python.pythonPath": "~/.virtualenvs/obj-tracking/bin/python", |
||||
"python.formatting.provider": "black" |
||||
} |
@ -0,0 +1,24 @@
|
||||
|
||||
# Handai aerial detector |
||||
|
||||
We need to analyze road users in mid-block crossing area between Handai monorail station and handai hospital. |
||||
|
||||
## Output we need |
||||
|
||||
* What, when and where are road users |
||||
* focus on |
||||
* pedestrain |
||||
* with walking poles - STILL UNSURE if it's possible to detect |
||||
* handicap (with wheelchair) |
||||
* car |
||||
* bus |
||||
* cyclist |
||||
|
||||
## How? |
||||
|
||||
* I'm still debating if I should use `YOLO` or `SSD` for the run. |
||||
|
||||
## Challenge? |
||||
|
||||
Object tracking algorithm from both `opencv` and `dlib` are pretty much the same. Good tracking capability, but they would lose track when there is a building or even a small pillar blocking the sight. Thus, we need to detect them all separately on each zone and find a way to link "object" that comes to the __blocking zone__ and get through it the other way. I guess this should be able to get all the trace we need and connecting them all together. Yes, when there are tons of objects overlapping, things would get much tougher. But that is the issue for the other day. |
||||
|
@ -0,0 +1,128 @@
|
||||
# USAGE |
||||
# python multi_object_tracking.py --video videos/soccer_01.mp4 --tracker csrt |
||||
# python multi_object_tracking.py --video ~/Desktop/5min.mp4 --tracker csrt |
||||
|
||||
|
||||
# import the necessary packages |
||||
from imutils.video import VideoStream |
||||
import argparse |
||||
import imutils |
||||
import time |
||||
import cv2 |
||||
import dlib |
||||
|
||||
# construct the argument parser and parse the arguments |
||||
ap = argparse.ArgumentParser() |
||||
ap.add_argument("-v", "--video", type=str, |
||||
help="path to input video file") |
||||
ap.add_argument("-t", "--tracker", type=str, default="kcf", |
||||
help="OpenCV object tracker type") |
||||
args = vars(ap.parse_args()) |
||||
|
||||
# initialize a dictionary that maps strings to their corresponding |
||||
# OpenCV object tracker implementations |
||||
OPENCV_OBJECT_TRACKERS = { |
||||
"csrt": cv2.TrackerCSRT_create, |
||||
"kcf": cv2.TrackerKCF_create, |
||||
"boosting": cv2.TrackerBoosting_create, |
||||
"mil": cv2.TrackerMIL_create, |
||||
"tld": cv2.TrackerTLD_create, |
||||
"medianflow": cv2.TrackerMedianFlow_create, |
||||
"mosse": cv2.TrackerMOSSE_create |
||||
} |
||||
|
||||
# initialize OpenCV's special multi-object tracker |
||||
# trackers = cv2.MultiTracker_create() |
||||
trackers = [] |
||||
|
||||
# if a video path was not supplied, grab the reference to the web cam |
||||
if not args.get("video", False): |
||||
print("[INFO] starting video stream...") |
||||
vs = VideoStream(src=0).start() |
||||
time.sleep(1.0) |
||||
|
||||
# otherwise, grab a reference to the video file |
||||
else: |
||||
vs = cv2.VideoCapture(args["video"]) |
||||
|
||||
# loop over frames from the video stream |
||||
while True: |
||||
# grab the current frame, then handle if we are using a |
||||
# VideoStream or VideoCapture object |
||||
frame = vs.read() |
||||
frame = frame[1] if args.get("video", False) else frame |
||||
|
||||
# check to see if we have reached the end of the stream |
||||
if frame is None: |
||||
break |
||||
|
||||
# resize the frame (so we can process it faster) |
||||
# frame = imutils.resize(frame, width=600) |
||||
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
||||
|
||||
# grab the updated bounding box coordinates (if any) for each |
||||
# object that is being tracked |
||||
# (success, boxes) = trackers.update(frame) |
||||
# print('success', success) |
||||
# print('boxes', boxes) |
||||
|
||||
for tk in trackers: |
||||
tk.update(frame_rgb) |
||||
pos = tk.get_position() |
||||
# unpack the position object |
||||
startX = int(pos.left()) |
||||
startY = int(pos.top()) |
||||
endX = int(pos.right()) |
||||
endY = int(pos.bottom()) |
||||
|
||||
cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2) |
||||
|
||||
# loop over the bounding boxes and draw then on the frame |
||||
# for box in boxes: |
||||
# (x, y, w, h) = [int(v) for v in box] |
||||
# cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) |
||||
|
||||
# show the output frame |
||||
cv2.imshow("Frame", frame) |
||||
key = cv2.waitKey(1) & 0xFF |
||||
|
||||
# if the 's' key is selected, we are going to "select" a bounding |
||||
# box to track |
||||
if key == ord("s"): |
||||
# select the bounding box of the object we want to track (make |
||||
# sure you press ENTER or SPACE after selecting the ROI) |
||||
box = cv2.selectROI("Frame", frame, fromCenter=False, |
||||
showCrosshair=True) |
||||
print('select box: ', box) |
||||
(x,y,w,h) = box |
||||
startX = x |
||||
startY = y |
||||
endX = x + w |
||||
endY = y + h |
||||
print(startX, startY, endX, endY) |
||||
|
||||
# create a new object tracker for the bounding box and add it |
||||
# to our multi-object tracker |
||||
# tracker = OPENCV_OBJECT_TRACKERS[args["tracker"]]() |
||||
# trackers.add(tracker, frame, box) |
||||
tracker = dlib.correlation_tracker() |
||||
rect = dlib.rectangle(startX, startY, endX, endY) |
||||
print('rect', rect) |
||||
|
||||
tracker.start_track(frame_rgb, rect) |
||||
trackers.append(tracker) |
||||
|
||||
# if the `q` key was pressed, break from the loop |
||||
elif key == ord("q"): |
||||
break |
||||
|
||||
# if we are using a webcam, release the pointer |
||||
if not args.get("video", False): |
||||
vs.stop() |
||||
|
||||
# otherwise, release the file pointer |
||||
else: |
||||
vs.release() |
||||
|
||||
# close all windows |
||||
cv2.destroyAllWindows() |
@ -0,0 +1,210 @@
|
||||
# USAGE |
||||
# python multi_object_tracking_fast.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \ |
||||
# --model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --video race.mp4 |
||||
|
||||
# import the necessary packages |
||||
from imutils.video import FPS |
||||
import multiprocessing |
||||
import numpy as np |
||||
import argparse |
||||
import imutils |
||||
import dlib |
||||
import cv2 |
||||
|
||||
def start_tracker(box, label, rgb, inputQueue, outputQueue): |
||||
# construct a dlib rectangle object from the bounding box |
||||
# coordinates and then start the correlation tracker |
||||
t = dlib.correlation_tracker() |
||||
rect = dlib.rectangle(box[0], box[1], box[2], box[3]) |
||||
t.start_track(rgb, rect) |
||||
|
||||
# loop indefinitely -- this function will be called as a daemon |
||||
# process so we don't need to worry about joining it |
||||
while True: |
||||
# attempt to grab the next frame from the input queue |
||||
rgb = inputQueue.get() |
||||
|
||||
# if there was an entry in our queue, process it |
||||
if rgb is not None: |
||||
# update the tracker and grab the position of the tracked |
||||
# object |
||||
t.update(rgb) |
||||
pos = t.get_position() |
||||
|
||||
# unpack the position object |
||||
startX = int(pos.left()) |
||||
startY = int(pos.top()) |
||||
endX = int(pos.right()) |
||||
endY = int(pos.bottom()) |
||||
|
||||
# add the label + bounding box coordinates to the output |
||||
# queue |
||||
outputQueue.put((label, (startX, startY, endX, endY))) |
||||
|
||||
# construct the argument parser and parse the arguments |
||||
ap = argparse.ArgumentParser() |
||||
ap.add_argument("-p", "--prototxt", required=True, |
||||
help="path to Caffe 'deploy' prototxt file") |
||||
ap.add_argument("-m", "--model", required=True, |
||||
help="path to Caffe pre-trained model") |
||||
ap.add_argument("-v", "--video", required=True, |
||||
help="path to input video file") |
||||
ap.add_argument("-o", "--output", type=str, |
||||
help="path to optional output video file") |
||||
ap.add_argument("-c", "--confidence", type=float, default=0.2, |
||||
help="minimum probability to filter weak detections") |
||||
args = vars(ap.parse_args()) |
||||
|
||||
# initialize our list of queues -- both input queue and output queue |
||||
# for *every* object that we will be tracking |
||||
inputQueues = [] |
||||
outputQueues = [] |
||||
|
||||
# initialize the list of class labels MobileNet SSD was trained to |
||||
# detect |
||||
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", |
||||
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable", |
||||
"dog", "horse", "motorbike", "person", "pottedplant", "sheep", |
||||
"sofa", "train", "tvmonitor"] |
||||
|
||||
# load our serialized model from disk |
||||
print("[INFO] loading model...") |
||||
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"]) |
||||
|
||||
# initialize the video stream and output video writer |
||||
print("[INFO] starting video stream...") |
||||
vs = cv2.VideoCapture(args["video"]) |
||||
writer = None |
||||
|
||||
# start the frames per second throughput estimator |
||||
fps = FPS().start() |
||||
|
||||
# loop over frames from the video file stream |
||||
while True: |
||||
# grab the next frame from the video file |
||||
(grabbed, frame) = vs.read() |
||||
|
||||
# check to see if we have reached the end of the video file |
||||
if frame is None: |
||||
break |
||||
|
||||
# resize the frame for faster processing and then convert the |
||||
# frame from BGR to RGB ordering (dlib needs RGB ordering) |
||||
frame = imutils.resize(frame, width=600) |
||||
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
||||
|
||||
# if we are supposed to be writing a video to disk, initialize |
||||
# the writer |
||||
if args["output"] is not None and writer is None: |
||||
fourcc = cv2.VideoWriter_fourcc(*"MJPG") |
||||
writer = cv2.VideoWriter(args["output"], fourcc, 30, |
||||
(frame.shape[1], frame.shape[0]), True) |
||||
|
||||
# if our list of queues is empty then we know we have yet to |
||||
# create our first object tracker |
||||
if len(inputQueues) == 0: |
||||
# grab the frame dimensions and convert the frame to a blob |
||||
(h, w) = frame.shape[:2] |
||||
blob = cv2.dnn.blobFromImage(frame, 0.007843, (w, h), 127.5) |
||||
|
||||
# pass the blob through the network and obtain the detections |
||||
# and predictions |
||||
net.setInput(blob) |
||||
detections = net.forward() |
||||
|
||||
# loop over the detections |
||||
for i in np.arange(0, detections.shape[2]): |
||||
# extract the confidence (i.e., probability) associated |
||||
# with the prediction |
||||
confidence = detections[0, 0, i, 2] |
||||
|
||||
# filter out weak detections by requiring a minimum |
||||
# confidence |
||||
if confidence > args["confidence"]: |
||||
# extract the index of the class label from the |
||||
# detections list |
||||
idx = int(detections[0, 0, i, 1]) |
||||
label = CLASSES[idx] |
||||
|
||||
# if the class label is not a person, ignore it |
||||
if CLASSES[idx] != "person": |
||||
continue |
||||
|
||||
# compute the (x, y)-coordinates of the bounding box |
||||
# for the object |
||||
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) |
||||
(startX, startY, endX, endY) = box.astype("int") |
||||
bb = (startX, startY, endX, endY) |
||||
|
||||
# create two brand new input and output queues, |
||||
# respectively |
||||
iq = multiprocessing.Queue() |
||||
oq = multiprocessing.Queue() |
||||
inputQueues.append(iq) |
||||
outputQueues.append(oq) |
||||
|
||||
# spawn a daemon process for a new object tracker |
||||
p = multiprocessing.Process( |
||||
target=start_tracker, |
||||
args=(bb, label, rgb, iq, oq)) |
||||
p.daemon = True |
||||
p.start() |
||||
|
||||
# grab the corresponding class label for the detection |
||||
# and draw the bounding box |
||||
cv2.rectangle(frame, (startX, startY), (endX, endY), |
||||
(0, 255, 0), 2) |
||||
cv2.putText(frame, label, (startX, startY - 15), |
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2) |
||||
|
||||
# otherwise, we've already performed detection so let's track |
||||
# multiple objects |
||||
else: |
||||
# loop over each of our input ques and add the input RGB |
||||
# frame to it, enabling us to update each of the respective |
||||
# object trackers running in separate processes |
||||
for iq in inputQueues: |
||||
iq.put(rgb) |
||||
|
||||
# loop over each of the output queues |
||||
for oq in outputQueues: |
||||
# grab the updated bounding box coordinates for the |
||||
# object -- the .get method is a blocking operation so |
||||
# this will pause our execution until the respective |
||||
# process finishes the tracking update |
||||
(label, (startX, startY, endX, endY)) = oq.get() |
||||
|
||||
# draw the bounding box from the correlation object |
||||
# tracker |
||||
cv2.rectangle(frame, (startX, startY), (endX, endY), |
||||
(0, 255, 0), 2) |
||||
cv2.putText(frame, label, (startX, startY - 15), |
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2) |
||||
|
||||
# check to see if we should write the frame to disk |
||||
if writer is not None: |
||||
writer.write(frame) |
||||
|
||||
# show the output frame |
||||
cv2.imshow("Frame", frame) |
||||
key = cv2.waitKey(1) & 0xFF |
||||
|
||||
# if the `q` key was pressed, break from the loop |
||||
if key == ord("q"): |
||||
break |
||||
|
||||
# update the FPS counter |
||||
fps.update() |
||||
|
||||
# stop the timer and display FPS information |
||||
fps.stop() |
||||
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) |
||||
print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) |
||||
|
||||
# check to see if we need to release the video writer pointer |
||||
if writer is not None: |
||||
writer.release() |
||||
|
||||
# do a bit of cleanup |
||||
cv2.destroyAllWindows() |
||||
vs.release() |
@ -0,0 +1,100 @@
|
||||
# USAGE |
||||
# python multi_object_tracking.py --video videos/soccer_01.mp4 --tracker csrt |
||||
|
||||
# import the necessary packages |
||||
from imutils.video import VideoStream |
||||
import argparse |
||||
import imutils |
||||
import time |
||||
import cv2 |
||||
|
||||
# construct the argument parser and parse the arguments |
||||
ap = argparse.ArgumentParser() |
||||
ap.add_argument("-v", "--video", type=str, |
||||
help="path to input video file") |
||||
ap.add_argument("-t", "--tracker", type=str, default="kcf", |
||||
help="OpenCV object tracker type") |
||||
args = vars(ap.parse_args()) |
||||
|
||||
# initialize a dictionary that maps strings to their corresponding |
||||
# OpenCV object tracker implementations |
||||
OPENCV_OBJECT_TRACKERS = { |
||||
"csrt": cv2.TrackerCSRT_create, |
||||
"kcf": cv2.TrackerKCF_create, |
||||
"boosting": cv2.TrackerBoosting_create, |
||||
"mil": cv2.TrackerMIL_create, |
||||
"tld": cv2.TrackerTLD_create, |
||||
"medianflow": cv2.TrackerMedianFlow_create, |
||||
"mosse": cv2.TrackerMOSSE_create |
||||
} |
||||
|
||||
# initialize OpenCV's special multi-object tracker |
||||
trackers = cv2.MultiTracker_create() |
||||
|
||||
# if a video path was not supplied, grab the reference to the web cam |
||||
if not args.get("video", False): |
||||
print("[INFO] starting video stream...") |
||||
vs = VideoStream(src=0).start() |
||||
time.sleep(1.0) |
||||
|
||||
# otherwise, grab a reference to the video file |
||||
else: |
||||
vs = cv2.VideoCapture(args["video"]) |
||||
|
||||
# loop over frames from the video stream |
||||
while True: |
||||
# grab the current frame, then handle if we are using a |
||||
# VideoStream or VideoCapture object |
||||
frame = vs.read() |
||||
frame = frame[1] if args.get("video", False) else frame |
||||
|
||||
# check to see if we have reached the end of the stream |
||||
if frame is None: |
||||
break |
||||
|
||||
# resize the frame (so we can process it faster) |
||||
# frame = imutils.resize(frame, width=600) |
||||
|
||||
# grab the updated bounding box coordinates (if any) for each |
||||
# object that is being tracked |
||||
(success, boxes) = trackers.update(frame) |
||||
print('success', success) |
||||
print('boxes', boxes) |
||||
trackers.d |
||||
|
||||
# loop over the bounding boxes and draw then on the frame |
||||
for box in boxes: |
||||
(x, y, w, h) = [int(v) for v in box] |
||||
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) |
||||
|
||||
# show the output frame |
||||
cv2.imshow("Frame", frame) |
||||
key = cv2.waitKey(1) & 0xFF |
||||
|
||||
# if the 's' key is selected, we are going to "select" a bounding |
||||
# box to track |
||||
if key == ord("s"): |
||||
# select the bounding box of the object we want to track (make |
||||
# sure you press ENTER or SPACE after selecting the ROI) |
||||
box = cv2.selectROI("Frame", frame, fromCenter=False, |
||||
showCrosshair=True) |
||||
|
||||
# create a new object tracker for the bounding box and add it |
||||
# to our multi-object tracker |
||||
tracker = OPENCV_OBJECT_TRACKERS[args["tracker"]]() |
||||
trackers.add(tracker, frame, box) |
||||
|
||||
# if the `q` key was pressed, break from the loop |
||||
elif key == ord("q"): |
||||
break |
||||
|
||||
# if we are using a webcam, release the pointer |
||||
if not args.get("video", False): |
||||
vs.stop() |
||||
|
||||
# otherwise, release the file pointer |
||||
else: |
||||
vs.release() |
||||
|
||||
# close all windows |
||||
cv2.destroyAllWindows() |
@ -0,0 +1,167 @@
|
||||
"""USAGE |
||||
python cv_detector.py \ |
||||
--prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \ |
||||
--model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --video ~/Desktop/5min.mp4 |
||||
""" |
||||
# import the necessary packages |
||||
from imutils.video import FPS |
||||
import numpy as np |
||||
import argparse |
||||
import imutils |
||||
import dlib |
||||
import cv2 |
||||
from PIL import Image |
||||
|
||||
# construct the argument parser and parse the arguments |
||||
ap = argparse.ArgumentParser() |
||||
ap.add_argument("-p", "--prototxt", required=True, |
||||
help="path to Caffe 'deploy' prototxt file") |
||||
ap.add_argument("-m", "--model", required=True, |
||||
help="path to Caffe pre-trained model") |
||||
ap.add_argument("-v", "--video", required=True, |
||||
help="path to input video file") |
||||
ap.add_argument("-o", "--output", type=str, |
||||
help="path to optional output video file") |
||||
ap.add_argument("-c", "--confidence", type=float, default=0.2, |
||||
help="minimum probability to filter weak detections") |
||||
args = vars(ap.parse_args()) |
||||
|
||||
|
||||
# initialize the list of class labels MobileNet SSD was trained to |
||||
# detect |
||||
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", |
||||
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable", |
||||
"dog", "horse", "motorbike", "person", "pottedplant", "sheep", |
||||
"sofa", "train", "tvmonitor"] |
||||
|
||||
# load our serialized model from disk |
||||
print("[INFO] loading model...") |
||||
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"]) |
||||
|
||||
|
||||
# initialize the video stream and output video writer |
||||
print("[INFO] starting video stream...") |
||||
vs = cv2.VideoCapture(args["video"]) |
||||
_fps = vs.get(cv2.CAP_PROP_FPS) |
||||
|
||||
writer = None |
||||
|
||||
|
||||
# initialize the list of object trackers and corresponding class |
||||
# labels |
||||
# trackers = [] |
||||
labels = [] |
||||
# start the frames per second throughput estimator |
||||
fps = FPS().start() |
||||
frame_count = 0 |
||||
|
||||
# loop over frames from the video file stream |
||||
while True: |
||||
# grab the next frame from the video file |
||||
(grabbed, frame) = vs.read() |
||||
frame_count += 1 |
||||
_duration = frame_count / _fps |
||||
|
||||
# check to see if we have reached the end of the video file |
||||
if frame is None: |
||||
break |
||||
|
||||
|
||||
""" |
||||
ENTRANCE_1: from hospital |
||||
""" |
||||
|
||||
# 45. 325 == 164, 509 |
||||
|
||||
|
||||
|
||||
cropped_frame = frame[325:509, 45:164] |
||||
# cropped_frame.save("test.jpg") |
||||
cv2.imwrite('test.jpg', cropped_frame) |
||||
frame = cropped_frame |
||||
|
||||
|
||||
# resize the frame for faster processing and then convert the |
||||
# frame from BGR to RGB ordering (dlib needs RGB ordering) |
||||
# frame = imutils.resize(frame, width=600) |
||||
# rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
||||
|
||||
# grab the frame dimensions and convert the frame to a blob |
||||
(h, w) = frame.shape[:2] |
||||
blob = cv2.dnn.blobFromImage(frame, 0.007843, (w, h), 127.5) |
||||
|
||||
# pass the blob through the network and obtain the detections |
||||
# and predictions |
||||
net.setInput(blob) |
||||
detections = net.forward() |
||||
|
||||
|
||||
# loop over the detections |
||||
for i in np.arange(0, detections.shape[2]): |
||||
# extract the confidence (i.e., probability) associated |
||||
# with the prediction |
||||
confidence = detections[0, 0, i, 2] |
||||
|
||||
|
||||
# filter out weak detections by requiring a minimum |
||||
# confidence |
||||
if confidence > args["confidence"]: |
||||
# extract the index of the class label from the |
||||
# detections list |
||||
idx = int(detections[0, 0, i, 1]) |
||||
label = CLASSES[idx] |
||||
|
||||
DROP = ["diningtable", "chair", "aeroplane"] |
||||
if label in DROP: |
||||
continue |
||||
# if the class label is not a person, ignore it |
||||
# if CLASSES[idx] != "person": |
||||
# continue |
||||
|
||||
# compute the (x, y)-coordinates of the bounding box |
||||
# for the object |
||||
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) |
||||
(startX, startY, endX, endY) = box.astype("int") |
||||
|
||||
|
||||
print(f"[{_duration:0.02f}] label: {label} (x,y) = ({startX}, {startY})") |
||||
|
||||
# construct a dlib rectangle object from the bounding |
||||
# box coordinates and start the correlation tracker |
||||
# t = dlib.correlation_tracker() |
||||
# rect = dlib.rectangle(startX, startY, endX, endY) |
||||
# t.start_track(rgb, rect) |
||||
|
||||
# update our set of trackers and corresponding class |
||||
# labels |
||||
labels.append(label) |
||||
|
||||
# grab the corresponding class label for the detection |
||||
# and draw the bounding box |
||||
cv2.rectangle(frame, (startX, startY), (endX, endY), |
||||
(0, 255, 0), 2) |
||||
cv2.putText(frame, label, (startX, startY - 15), |
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2) |
||||
|
||||
# show the output frame |
||||
cv2.imshow("Frame", frame) |
||||
key = cv2.waitKey(1) & 0xFF |
||||
|
||||
# if the `q` key was pressed, break from the loop |
||||
if key == ord("q"): |
||||
break |
||||
|
||||
# update the FPS counter |
||||
# fps.update() |
||||
|
||||
|
||||
|
||||
# stop the timer and display FPS information |
||||
fps.stop() |
||||
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) |
||||
print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) |
||||
|
||||
|
||||
# do a bit of cleanup |
||||
cv2.destroyAllWindows() |
||||
vs.release() |
@ -0,0 +1,103 @@
|
||||
"""USAGE |
||||
python examples/yolo_obj_detector.py \ |
||||
-c ~/dev/obj-tracking/yolov3.cfg \ |
||||
-w ~/dev/obj-tracking/yolov3.weights \ |
||||
-cl ~/dev/obj-tracking/yolo/darknet/data/coco.names \ |
||||
-i ~/dev/obj-tracking/person.jpg |
||||
""" |
||||
import cv2 |
||||
import argparse |
||||
import numpy as np |
||||
|
||||
ap = argparse.ArgumentParser() |
||||
ap.add_argument("-i", "--image", required=True, help="path to input image") |
||||
ap.add_argument("-c", "--config", required=True, help="path to yolo config file") |
||||
ap.add_argument( |
||||
"-w", "--weights", required=True, help="path to yolo pre-trained weights" |
||||
) |
||||
ap.add_argument( |
||||
"-cl", "--classes", required=True, help="path to text file containing class names" |
||||
) |
||||
args = ap.parse_args() |
||||
|
||||
|
||||
def get_output_layers(net): |
||||
layer_names = net.getLayerNames() |
||||
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] |
||||
return output_layers |
||||
|
||||
|
||||
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h): |
||||
label = str(classes[class_id]) |
||||
color = COLORS[class_id] |
||||
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) |
||||
cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) |
||||
|
||||
image = cv2.imread(args.image) |
||||
|
||||
Width = image.shape[1] |
||||
Height = image.shape[0] |
||||
scale = 0.00392 |
||||
|
||||
classes = None |
||||
|
||||
with open(args.classes, "r") as f: |
||||
classes = [line.strip() for line in f.readlines()] |
||||
|
||||
COLORS = np.random.uniform(0, 255, size=(len(classes), 3)) |
||||
|
||||
net = cv2.dnn.readNet(args.weights, args.config) |
||||
blob = cv2.dnn.blobFromImage(image, scale, (416, 416), (0, 0, 0), True, crop=False) |
||||
|
||||
net.setInput(blob) |
||||
|
||||
outs = net.forward(get_output_layers(net)) |
||||
|
||||
class_ids = [] |
||||
confidences = [] |
||||
boxes = [] |
||||
conf_threshold = 0.5 |
||||
nms_threshold = 0.4 |
||||
|
||||
|
||||
for out in outs: |
||||
for detection in out: |
||||
scores = detection[5:] |
||||
class_id = np.argmax(scores) |
||||
confidence = scores[class_id] |
||||
if confidence > 0.5: |
||||
center_x = int(detection[0] * Width) |
||||
center_y = int(detection[1] * Height) |
||||
w = int(detection[2] * Width) |
||||
h = int(detection[3] * Height) |
||||
x = center_x - w / 2 |
||||
y = center_y - h / 2 |
||||
class_ids.append(class_id) |
||||
confidences.append(float(confidence)) |
||||
boxes.append([x, y, w, h]) |
||||
|
||||
|
||||
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold) |
||||
|
||||
for i in indices: |
||||
i = i[0] |
||||
box = boxes[i] |
||||
x = box[0] |
||||
y = box[1] |
||||
w = box[2] |
||||
h = box[3] |
||||
draw_prediction( |
||||
image, |
||||
class_ids[i], |
||||
confidences[i], |
||||
round(x), |
||||
round(y), |
||||
round(x + w), |
||||
round(y + h), |
||||
) |
||||
|
||||
cv2.imshow("object detection", image) |
||||
cv2.waitKey() |
||||
|
||||
cv2.imwrite("object-detection.jpg", image) |
||||
cv2.destroyAllWindows() |
Loading…
Reference in new issue