sipp11
5 years ago
4 changed files with 470 additions and 0 deletions
@ -0,0 +1,337 @@ |
|||||||
|
"""USAGE: |
||||||
|
|
||||||
|
time python src/_detector.py --input ~/Desktop/5min.mp4 -o output.mp4 |
||||||
|
time python src/_detector.py --input ~/Desktop/5min.mp4 -l |
||||||
|
|
||||||
|
""" |
||||||
|
# import the necessary packages |
||||||
|
import numpy as np |
||||||
|
import argparse |
||||||
|
import imutils |
||||||
|
import time |
||||||
|
import cv2 |
||||||
|
import os |
||||||
|
import dlib |
||||||
|
from utils import check_if_inside_the_boxes, is_it_the_same_obj, distance |
||||||
|
|
||||||
|
# tracking |
||||||
|
|
||||||
|
OPENCV_OBJECT_TRACKERS = {"csrt": cv2.TrackerCSRT_create} |
||||||
|
trackers = [] |
||||||
|
finished = [] |
||||||
|
|
||||||
|
# construct the argument parse and parse the arguments |
||||||
|
ap = argparse.ArgumentParser() |
||||||
|
ap.add_argument("-i", "--input", required=True, help="path to input video") |
||||||
|
ap.add_argument("-o", "--output", required=False, help="path to output video") |
||||||
|
ap.add_argument("-l", "--live", action="store_true", help="Show live detection") |
||||||
|
# ap.add_argument("-y", "--yolo", required=True, |
||||||
|
# help="base path to YOLO directory") |
||||||
|
ap.add_argument( |
||||||
|
"-c", |
||||||
|
"--confidence", |
||||||
|
type=float, |
||||||
|
default=0.95, |
||||||
|
help="minimum probability to filter weak detections", |
||||||
|
) |
||||||
|
ap.add_argument( |
||||||
|
"-t", |
||||||
|
"--threshold", |
||||||
|
type=float, |
||||||
|
default=0.3, |
||||||
|
help="threshold when applyong non-maxima suppression", |
||||||
|
) |
||||||
|
args = vars(ap.parse_args()) |
||||||
|
|
||||||
|
# load the COCO class labels our YOLO model was trained on |
||||||
|
# labelsPath = os.path.sep.join([args["yolo"], "coco.names"]) |
||||||
|
labelsPath = "/Users/sipp11/syncthing/dropbox/tracking-obj/mytrain.names" |
||||||
|
LABELS = open(labelsPath).read().strip().split("\n") |
||||||
|
# 0 person, 1 wheelchair, 2 bicycle, 3 motorbike, 4 car, 5 bus, 6 truck |
||||||
|
|
||||||
|
# initialize a list of colors to represent each possible class label |
||||||
|
np.random.seed(42) |
||||||
|
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") |
||||||
|
|
||||||
|
# derive the paths to the YOLO weights and model configuration |
||||||
|
weightsPath = "/Users/sipp11/syncthing/dropbox/tracking-obj/mytrain_final.weights" |
||||||
|
configPath = "/Users/sipp11/syncthing/dropbox/tracking-obj/mytrain.cfg" |
||||||
|
|
||||||
|
|
||||||
|
# load our YOLO object detector trained on COCO dataset (80 classes) |
||||||
|
# and determine only the *output* layer names that we need from YOLO |
||||||
|
print("[INFO] loading YOLO from disk...") |
||||||
|
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath) |
||||||
|
ln = net.getLayerNames() |
||||||
|
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] |
||||||
|
|
||||||
|
|
||||||
|
def detect_stuffs(net, frame): |
||||||
|
# construct a blob from the input frame and then perform a forward |
||||||
|
# pass of the YOLO object detector, giving us our bounding boxes |
||||||
|
# and associated probabilities |
||||||
|
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False) |
||||||
|
net.setInput(blob) |
||||||
|
start = time.time() |
||||||
|
layerOutputs = net.forward(ln) |
||||||
|
end = time.time() |
||||||
|
|
||||||
|
# initialize our lists of detected bounding boxes, confidences, |
||||||
|
# and class IDs, respectively |
||||||
|
boxes = [] |
||||||
|
confidences = [] |
||||||
|
classIDs = [] |
||||||
|
|
||||||
|
# loop over each of the layer outputs |
||||||
|
for output in layerOutputs: |
||||||
|
# print(f'[{_frame_count:08d}] output -> ', len(output)) |
||||||
|
# loop over each of the detections |
||||||
|
for detection in output: |
||||||
|
# extract the class ID and confidence (i.e., probability) |
||||||
|
# of the current object detection |
||||||
|
scores = detection[5:] |
||||||
|
classID = np.argmax(scores) |
||||||
|
confidence = scores[classID] |
||||||
|
|
||||||
|
# filter out weak predictions by ensuring the detected |
||||||
|
# probability is greater than the minimum probability |
||||||
|
if confidence <= args["confidence"]: |
||||||
|
continue |
||||||
|
|
||||||
|
# scale the bounding box coordinates back relative to |
||||||
|
# the size of the image, keeping in mind that YOLO |
||||||
|
# actually returns the center (x, y)-coordinates of |
||||||
|
# the bounding box followed by the boxes' width and |
||||||
|
# height |
||||||
|
box = detection[0:4] * np.array([W, H, W, H]) |
||||||
|
(centerX, centerY, width, height) = box.astype("int") |
||||||
|
|
||||||
|
# use the center (x, y)-coordinates to derive the top |
||||||
|
# and and left corner of the bounding box |
||||||
|
x = int(centerX - (width / 2)) |
||||||
|
y = int(centerY - (height / 2)) |
||||||
|
|
||||||
|
# update our list of bounding box coordinates, |
||||||
|
# confidences, and class IDs |
||||||
|
boxes.append([x, y, int(width), int(height)]) |
||||||
|
confidences.append(float(confidence)) |
||||||
|
classIDs.append(classID) |
||||||
|
|
||||||
|
# apply non-maxima suppression to suppress weak, overlapping |
||||||
|
# bounding boxes |
||||||
|
idxs = cv2.dnn.NMSBoxes( |
||||||
|
boxes, confidences, args["confidence"], args["threshold"] |
||||||
|
) |
||||||
|
|
||||||
|
# ensure at least one detection exists |
||||||
|
if len(idxs) == 0: |
||||||
|
continue |
||||||
|
|
||||||
|
# NOTE: we are not going to draw anything from DETECTION, |
||||||
|
# only from tracking one |
||||||
|
# loop over the indexes we are keeping |
||||||
|
# for i in idxs.flatten(): |
||||||
|
# # extract the bounding box coordinates |
||||||
|
# (x, y) = (boxes[i][0], boxes[i][1]) |
||||||
|
# (w, h) = (boxes[i][2], boxes[i][3]) |
||||||
|
|
||||||
|
# # draw a bounding box rectangle and label on the frame |
||||||
|
# color = [int(c) for c in COLORS[classIDs[i]]] |
||||||
|
# cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) |
||||||
|
# text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i]) |
||||||
|
# cv2.putText( |
||||||
|
# frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2 |
||||||
|
# ) |
||||||
|
return idxs, boxes, confidences, classIDs, start, end |
||||||
|
|
||||||
|
|
||||||
|
# initialize the video stream, pointer to output video file, and |
||||||
|
# frame dimensions |
||||||
|
vs = cv2.VideoCapture(args["input"]) |
||||||
|
writer = None |
||||||
|
(W, H) = (None, None) |
||||||
|
|
||||||
|
# try to determine the total number of frames in the video file |
||||||
|
try: |
||||||
|
prop = ( |
||||||
|
cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() else cv2.CAP_PROP_FRAME_COUNT |
||||||
|
) |
||||||
|
total = int(vs.get(prop)) |
||||||
|
print("[INFO] {} total frames in video".format(total)) |
||||||
|
|
||||||
|
# an error occurred while trying to determine the total |
||||||
|
# number of frames in the video file |
||||||
|
except: |
||||||
|
print("[INFO] could not determine # of frames in video") |
||||||
|
print("[INFO] no approx. completion time can be provided") |
||||||
|
total = -1 |
||||||
|
|
||||||
|
_frame_count = 0 |
||||||
|
tracker_counter = 1 |
||||||
|
|
||||||
|
# loop over frames from the video file stream |
||||||
|
while True: |
||||||
|
# read the next frame from the file |
||||||
|
(grabbed, frame) = vs.read() |
||||||
|
|
||||||
|
# if the frame was not grabbed, then we have reached the end |
||||||
|
# of the stream |
||||||
|
if not grabbed: |
||||||
|
break |
||||||
|
|
||||||
|
# if the frame dimensions are empty, grab them |
||||||
|
if W is None or H is None: |
||||||
|
(H, W) = frame.shape[:2] |
||||||
|
|
||||||
|
_frame_count += 1 |
||||||
|
|
||||||
|
# for dlib |
||||||
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
||||||
|
|
||||||
|
# only detect once a sec |
||||||
|
if _frame_count % 15 == 1: |
||||||
|
idxs, boxes, confidences, classIDs, start, end = detect_stuffs(net, frame) |
||||||
|
# loop over the indexes we are keeping |
||||||
|
for i in idxs.flatten(): |
||||||
|
# extract the bounding box coordinates |
||||||
|
(x, y) = (boxes[i][0], boxes[i][1]) |
||||||
|
(w, h) = (boxes[i][2], boxes[i][3]) |
||||||
|
|
||||||
|
_class = LABELS[classIDs[i]] |
||||||
|
_good = check_if_inside_the_boxes(x, y, w, h, _class) |
||||||
|
if not _good: |
||||||
|
continue |
||||||
|
|
||||||
|
# (1) check whether it's the same object as one in trackers |
||||||
|
is_same = False |
||||||
|
for t in trackers: |
||||||
|
tracker = t["tracker"] |
||||||
|
if _class != t["type"]: |
||||||
|
continue |
||||||
|
|
||||||
|
pos = tracker.get_position() |
||||||
|
i = int(pos.left()) |
||||||
|
j = int(pos.top()) |
||||||
|
_w = int(pos.right()) - i |
||||||
|
_h = int(pos.bottom()) - j |
||||||
|
print(f"[{t['id']}] - {t['type']}") |
||||||
|
is_same = is_it_the_same_obj(x, y, w, h, i, j, _w, _h, id=t["id"]) |
||||||
|
if is_same: |
||||||
|
break |
||||||
|
|
||||||
|
if not is_same: |
||||||
|
# add tracker to this obj |
||||||
|
# create a new object tracker for the bounding box and add it |
||||||
|
# to our multi-object tracker |
||||||
|
# tracker = OPENCV_OBJECT_TRACKERS[args["tracker"]]() |
||||||
|
# trackers.add(tracker, frame, box) |
||||||
|
tracker = dlib.correlation_tracker() |
||||||
|
rect = dlib.rectangle(x, y, x + w, y + h) |
||||||
|
print("NEW TRACKER rect", rect) |
||||||
|
|
||||||
|
t = { |
||||||
|
"id": tracker_counter, |
||||||
|
"type": _class, |
||||||
|
"tracker": tracker, |
||||||
|
"direction": "", |
||||||
|
"last_distance": -1, |
||||||
|
"last_position": (x + w / 2, y + h / 2), |
||||||
|
"still": 0, |
||||||
|
} |
||||||
|
tracker_counter += 1 |
||||||
|
tracker.start_track(frame_rgb, rect) |
||||||
|
trackers.append(t) |
||||||
|
|
||||||
|
print(f" i -> {i} ({x},{y}), {w},{h} ({x + w},{y + h})") |
||||||
|
|
||||||
|
# # draw a bounding box rectangle and label on the frame |
||||||
|
# color = [int(c) for c in COLORS[classIDs[i]]] |
||||||
|
# cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) |
||||||
|
# text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i]) |
||||||
|
# cv2.putText( |
||||||
|
# frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2 |
||||||
|
# ) |
||||||
|
|
||||||
|
_what = ",".join([LABELS[c] for c in classIDs]) |
||||||
|
print(f"[{_frame_count:08d}] :: {_what}") |
||||||
|
|
||||||
|
untracking = [] |
||||||
|
for tk in trackers: |
||||||
|
tk["tracker"].update(frame_rgb) |
||||||
|
pos = tk["tracker"].get_position() |
||||||
|
|
||||||
|
# unpack the position object |
||||||
|
startX = int(pos.left()) |
||||||
|
startY = int(pos.top()) |
||||||
|
endX = int(pos.right()) |
||||||
|
endY = int(pos.bottom()) |
||||||
|
|
||||||
|
tcx, tcy = (startX + endX) / 2, (startY + endY) / 2 |
||||||
|
# calculate distance |
||||||
|
_x, _y = tk["last_position"] |
||||||
|
_d = distance(_x, _y, tcx, tcy) |
||||||
|
_last_distance = tk["last_distance"] |
||||||
|
tk["last_distance"] = _d |
||||||
|
tk["last_position"] = (tcx, tcy) |
||||||
|
STILL_DISTANCE_IN_PX = 2 |
||||||
|
if _last_distance < STILL_DISTANCE_IN_PX and _d < STILL_DISTANCE_IN_PX: |
||||||
|
tk["still"] += 1 |
||||||
|
else: |
||||||
|
tk["still"] = 0 |
||||||
|
|
||||||
|
if tk["still"] > 30 or tcx < 10 or tcx > 1200: |
||||||
|
untracking.append(tk) |
||||||
|
|
||||||
|
cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2) |
||||||
|
color = [int(c) for c in COLORS[0]] |
||||||
|
print( |
||||||
|
f"{tk['id']} - {tk['type']} - centroid: {tcx, tcy} - distance: [stl:{tk['still']}] {_last_distance:.3f} -> {_d:.3f}" |
||||||
|
) |
||||||
|
cv2.putText( |
||||||
|
frame, |
||||||
|
f"{tk['id']} - {tk['type']}", |
||||||
|
(startX, startY - 5), |
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, |
||||||
|
0.5, |
||||||
|
color, |
||||||
|
2, |
||||||
|
) |
||||||
|
|
||||||
|
# untracking |
||||||
|
untracking_ids = [ut["id"] for ut in untracking] |
||||||
|
trackers = [tk for tk in trackers if tk["id"] not in untracking_ids] |
||||||
|
finished += untracking |
||||||
|
|
||||||
|
if args["live"]: |
||||||
|
cv2.imshow("Frame", frame) |
||||||
|
key = cv2.waitKey(1) & 0xFF |
||||||
|
|
||||||
|
# if the `q` key was pressed, break from the loop |
||||||
|
if key == ord("q"): |
||||||
|
break |
||||||
|
|
||||||
|
if args["output"]: |
||||||
|
# check if the video writer is None |
||||||
|
if writer is None: |
||||||
|
# initialize our video writer |
||||||
|
fourcc = cv2.VideoWriter_fourcc(*"MJPG") |
||||||
|
writer = cv2.VideoWriter( |
||||||
|
args["output"], fourcc, 30, (frame.shape[1], frame.shape[0]), True |
||||||
|
) |
||||||
|
|
||||||
|
# some information on processing single frame |
||||||
|
if total > 0: |
||||||
|
elap = end - start |
||||||
|
print("[INFO] single frame took {:.4f} seconds".format(elap)) |
||||||
|
print( |
||||||
|
"[INFO] estimated total time to finish: {:.4f}".format(elap * total) |
||||||
|
) |
||||||
|
|
||||||
|
# write the output frame to disk |
||||||
|
writer.write(frame) |
||||||
|
|
||||||
|
# release the file pointers |
||||||
|
print("[INFO] cleaning up...") |
||||||
|
if writer: |
||||||
|
writer.release() |
||||||
|
vs.release() |
@ -0,0 +1,119 @@ |
|||||||
|
import math |
||||||
|
|
||||||
|
# detecting area |
||||||
|
AREAS = [ |
||||||
|
[ |
||||||
|
("id", 1), |
||||||
|
("area", ((0, 40), (12, 129))), |
||||||
|
("target", ["car", "bus", "motorbike"]), |
||||||
|
], |
||||||
|
[("id", 2), ("area", ((85, 0), (222, 74))), ("target", ["person", "bicycle"])], |
||||||
|
[("id", 3), ("area", ((38, 340), (99, 482))), ("target", ["person", "wheelchair"])], |
||||||
|
[ |
||||||
|
("id", 4), |
||||||
|
("area", ((106, 310), (164, 461))), |
||||||
|
("target", ["person", "wheelchair"]), |
||||||
|
], |
||||||
|
[ |
||||||
|
("id", 5), |
||||||
|
("area", ((286, 230), (441, 346))), |
||||||
|
("target", ["person", "wheelchair"]), |
||||||
|
], |
||||||
|
[ |
||||||
|
("id", 6), |
||||||
|
("area", ((421, 190), (555, 304))), |
||||||
|
("target", ["car", "bus", "motorbike"]), |
||||||
|
], |
||||||
|
[ |
||||||
|
("id", 7), |
||||||
|
("area", ((555, 170), (720, 295))), |
||||||
|
("target", ["person", "wheelchair", "bicycle"]), |
||||||
|
], |
||||||
|
[ |
||||||
|
("id", 8), |
||||||
|
("area", ((877, 224), (947, 334))), |
||||||
|
("target", ["person", "wheelchair"]), |
||||||
|
], |
||||||
|
[ |
||||||
|
("id", 9), |
||||||
|
("area", ((1047, 229), (112, 338))), |
||||||
|
("target", ["person", "wheelchair"]), |
||||||
|
], |
||||||
|
[ |
||||||
|
("id", 10), |
||||||
|
("area", ((1158, 200), (1230, 307))), |
||||||
|
("target", ["person", "wheelchair"]), |
||||||
|
], |
||||||
|
] |
||||||
|
|
||||||
|
|
||||||
|
def distance(x2, y2, x1, y1): |
||||||
|
return math.sqrt(math.pow(x2 - x1, 2) + math.pow(y2 - y1, 2)) |
||||||
|
|
||||||
|
|
||||||
|
def check_if_inside_the_boxes(x, y, w, h, _type): |
||||||
|
cx, cy = x + w / 2, y + h / 2 |
||||||
|
# print(cx, cy) |
||||||
|
is_inside = False |
||||||
|
for _box in AREAS: |
||||||
|
if is_inside: |
||||||
|
break |
||||||
|
box = dict(_box) |
||||||
|
((x1, y1), (x2, y2)) = box["area"] |
||||||
|
# print(x1, cx, x2, ' -- ', y1, cy, y2, _type, box['target']) |
||||||
|
if x1 < cx and cx < x2 and y1 < cy and cy < y2 and _type in box["target"]: |
||||||
|
# print('inside --> ', _type, cx, cy, box['id']) |
||||||
|
is_inside = True |
||||||
|
# if diff_x < box_w |
||||||
|
if is_inside: |
||||||
|
print("INSIDE!! this -> ", box) |
||||||
|
return is_inside |
||||||
|
|
||||||
|
|
||||||
|
def is_it_the_same_obj(x1, y1, w1, h1, i1, j1, w2, h2, **kwargs): |
||||||
|
"""We would use the centroid location to check whether they are the same |
||||||
|
object and of course, dimension too. |
||||||
|
""" |
||||||
|
_id = kwargs.get("id", None) |
||||||
|
if _id: |
||||||
|
print(" :: check against id:", _id) |
||||||
|
DIMENSION_SHIFT = 0.15 |
||||||
|
# we have to use centroid !! from the experience |
||||||
|
cx1, cy1, cx2, cy2 = x1 + w1 / 2, y1 + h1 / 2, i1 + w2 / 2, j1 + h2 / 2 |
||||||
|
|
||||||
|
c_dff_x, c_dff_y = abs(cx2 - cx1), abs(cy2 - cy1) |
||||||
|
w_shift, h_shift = w1 * DIMENSION_SHIFT, h1 * DIMENSION_SHIFT |
||||||
|
print(" ::SAME:: shift") |
||||||
|
print(f" ---> SHIFT --> w:{w_shift}, h:{h_shift}") |
||||||
|
print(f" ---> centroid {c_dff_x}, {c_dff_y}") |
||||||
|
if c_dff_x > w_shift and c_dff_y > h_shift: |
||||||
|
print(" ::SAME:: shift too much already -- NOT THE SAME") |
||||||
|
return False |
||||||
|
|
||||||
|
# if one inside the other |
||||||
|
if i1 > x1 and (w1 - w2) > i1 - x1 and j1 > y1 and h1 - h2 > j1 - y1: |
||||||
|
# one is inside the other |
||||||
|
print(" ::SAME:: new one inside existing tracker") |
||||||
|
return True |
||||||
|
if x1 > i1 and (w2 - w1) > x1 - i1 and y1 > j1 and h2 - h1 > y1 - j1: |
||||||
|
# one is inside the other |
||||||
|
print(" ::SAME:: existing tracker inside new tracker") |
||||||
|
return True |
||||||
|
|
||||||
|
# if it's not inside the other, then we can use "size" if it's different |
||||||
|
size1, size2 = w1 * h1, w2 * h2 |
||||||
|
# if size is larger than 20%, then it's not the same thing |
||||||
|
print(f" ---> size {size1}, {size2}, diff % : {abs(size2 - size1)/size1}") |
||||||
|
print(" ::SAME:: size") |
||||||
|
if abs(size2 - size1) / size1 > 0.45: |
||||||
|
print(" ::SAME:: too diff in size -- NOT THE SAME") |
||||||
|
return False |
||||||
|
|
||||||
|
print(" ::SAME:: last") |
||||||
|
return True |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
check_if_inside_the_boxes(461, 263, 24, 65, "person") |
||||||
|
check_if_inside_the_boxes(8, 45, 172, 193, "bus") |
||||||
|
check_if_inside_the_boxes(300, 300, 24, 65, "person") |
Loading…
Reference in new issue