Basketball Court Video Tracking with OpenCV - My Learning Journey: Projects & Insights

I am working on my own basketball video analytics tool from scratch. I have a bit of experience with tracking, and video, but not with basketball. The type of video I am aiming to work with is amateur, non-professional recordings; not the type of video you’d see on TV, but the video from a cell phone camera or a bit better setup where a camera is on a tripod.

There are so many features at the high level I’d like to work toward. The main one is analysis products coming out of the video that enable me to evaluate a player. So that means tracking the player and in order to track the player, there are many more things to consider:

Player position relative to the ball. Do they have possession, are they on offense or defense?
Did they pass or shoot. Did they make the ball.
Did they still the ball or block it?
Where are they on the court?
…where’s the ball?
Where’s the court

So many things! I decided to just pick something. So in this quick experiment, I decided to just track the court as the camera moves. And to stack the cards in my favor, I allows selecting points manually to track. My goal was have a simple app that allows me to select four points on a video frame and then track those points throughout the video. I want to figure out how I can start building a robust court registration (i.e. camera view estimation) algorithm.

Step 1: Selecting Points

Using OpenCV’s mouse callback, I enabled interactive selection of four points on a frame. As each point is selected, it is drawn on the frame, and a crosshair follows the mouse for precision.

import cv2
import numpy as np

points = []
mouse_pos = None

def select_point(event, x, y, flags, param):
    global mouse_pos
    mouse_pos = (x, y)
    if event == cv2.EVENT_LBUTTONDOWN:
        points.append((x, y))
        print(f"Point selected: ({x}, {y})")

cv2.namedWindow("Image")
cv2.setMouseCallback("Image", select_point)

while True:
    display = frame.copy()
    for pt in points:
        cv2.circle(display, pt, 5, (0, 0, 255), -1)
    if mouse_pos is not None:
        cv2.drawMarker(display, mouse_pos, (0,255,0), markerType=cv2.MARKER_CROSS, markerSize=15, thickness=1)
    cv2.imshow("Image", display)
    key = cv2.waitKey(1) & 0xFF
    if key == 27 or len(points) == 4:
        break
cv2.destroyAllWindows()
print("Selected points:", points)

import cv2
import numpy as np

points = []
mouse_pos = None

def select_point(event, x, y, flags, param):
    global mouse_pos
    mouse_pos = (x, y)
    if event == cv2.EVENT_LBUTTONDOWN:
        points.append((x, y))
        print(f"Point selected: ({x}, {y})")

cv2.namedWindow("Image")
cv2.setMouseCallback("Image", select_point)

while True:
    display = frame.copy()
    for pt in points:
        cv2.circle(display, pt, 5, (0, 0, 255), -1)
    if mouse_pos is not None:
        cv2.drawMarker(display, mouse_pos, (0,255,0), markerType=cv2.MARKER_CROSS, markerSize=15, thickness=1)
    cv2.imshow("Image", display)
    key = cv2.waitKey(1) & 0xFF
    if key == 27 or len(points) == 4:
        break
cv2.destroyAllWindows()
print("Selected points:", points)

Step 2: Tracking Points with OpenCV Trackers

For robust tracking, I used OpenCV’s CSRT tracker (also tested MOSSE). Each selected point is initialized with a small ROI, and the tracker follows the region through the video.

roi_size = 128
cap = cv2.VideoCapture(video_path)
cap.set(cv2.CAP_PROP_POS_FRAMES, SKIP_TO_FRAME)
ret, frame = cap.read()

trackers = []
USE_MOSSE = True
for (x, y) in points:
    if USE_MOSSE:
        tracker = cv2.legacy.TrackerMOSSE_create()
    else: 
        tracker = cv2.legacy.TrackerCSRT_create()
    x0 = max(0, x - roi_size // 2)
    y0 = max(0, y - roi_size // 2)
    w = h = roi_size
    tracker.init(frame, (x0, y0, w, h))
    trackers.append(tracker)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    for i, tracker in enumerate(trackers):
        success, bbox = tracker.update(frame)
        if success:
            x, y, w, h = [int(v) for v in bbox]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cx, cy = x + w // 2, y + h // 2
            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)
            cv2.putText(frame, f'P{i+1}', (cx+5, cy-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
        else:
            cv2.putText(frame, f'Lost P{i+1}', (10, 30 + 20*i), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2, cv2.LINE_AA)
    cv2.imshow('Tracking', frame)
    if cv2.waitKey(30) & 0xFF == 27:
        break
cap.release()
cv2.destroyAllWindows()

roi_size = 128
cap = cv2.VideoCapture(video_path)
cap.set(cv2.CAP_PROP_POS_FRAMES, SKIP_TO_FRAME)
ret, frame = cap.read()

trackers = []
USE_MOSSE = True
for (x, y) in points:
    if USE_MOSSE:
        tracker = cv2.legacy.TrackerMOSSE_create()
    else: 
        tracker = cv2.legacy.TrackerCSRT_create()
    x0 = max(0, x - roi_size // 2)
    y0 = max(0, y - roi_size // 2)
    w = h = roi_size
    tracker.init(frame, (x0, y0, w, h))
    trackers.append(tracker)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    for i, tracker in enumerate(trackers):
        success, bbox = tracker.update(frame)
        if success:
            x, y, w, h = [int(v) for v in bbox]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cx, cy = x + w // 2, y + h // 2
            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)
            cv2.putText(frame, f'P{i+1}', (cx+5, cy-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
        else:
            cv2.putText(frame, f'Lost P{i+1}', (10, 30 + 20*i), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2, cv2.LINE_AA)
    cv2.imshow('Tracking', frame)
    if cv2.waitKey(30) & 0xFF == 27:
        break
cap.release()
cv2.destroyAllWindows()

As seen in the above videos, I was able to get about 15 fps with MOSSE tracking and 8 fps with CSRT. Note that MOSSE does drift more than CSRT>

Lessons Learned

CSRT is more robust than MOSSE for small, moving ROIs, but slower.
MOSSE is very fast and works well for simple, high-contrast targets.
I still think that very small points, optical flow or keypoint matching may be better.
Always use opencv-contrib-python for tracker support in OpenCV 4.x.

It’s a start but not a solution to court tracking.

Future Work

There is plenty to of work to do after this.

I need to label locations so I can create ground truth points for the video
Run each detection method and store the results
Compare each result with varying parameters; e.g., window size.
Use the 4 points as a guide. So if one of the 4 points drifts, we can lock it back in.
Since I am detecting 4 points on the 2 elbows of the free throw line, I should be able to roughly know camera pointing angle.