Crops to rowing machine screen - can be trained with optimize_crop.py and screen_classifier

This commit is contained in:
2026-03-16 13:46:02 +00:00
parent 2e386a4297
commit f0184319c6
4 changed files with 309 additions and 31 deletions

View File

@@ -18,26 +18,39 @@ import glob
import sys
def order_corners(pts):
"""Order 4 points as [top-left, top-right, bottom-right, bottom-left]."""
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
d = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(d)]
rect[3] = pts[np.argmax(d)]
return rect
def find_screen(image):
"""
Detect the Concept 2 PM5 LCD screen region in the image.
Returns (x, y, w, h) bounding box or None if not found.
Returns (x, y, w, h, contour) or None if not found.
The contour is the best-matching contour for perspective correction.
"""
h_img, w_img = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Pre-compute edge map for internal-content scoring
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(blurred, 50, 150)
blurred = cv2.GaussianBlur(gray, (11, 11), 0)
edges = cv2.Canny(blurred, 80, 100)
candidates = []
# Sweep brightness thresholds — screen brightness varies by
# lighting conditions (ranges from ~100 in dim gyms to ~200+)
for thresh_val in range(120, 200, 10):
for thresh_val in range(70, 210, 10):
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
@@ -54,39 +67,65 @@ def find_screen(image):
# Size: screen is a small-to-medium portion of the photo
area_ratio = rect_area / (h_img * w_img)
if area_ratio < 0.005 or area_ratio > 0.12:
if area_ratio < 0.004480508227271387 or area_ratio > 0.13807760800032298:
continue
# Aspect ratio: LCD is roughly square (0.5 to 1.6)
# Aspect ratio: LCD is roughly square
aspect = w / h
if aspect < 0.5 or aspect > 1.6:
if aspect < 0.6831978184146027 or aspect > 1.9505294279578584:
continue
# Rectangularity
rectangularity = area / rect_area
if rectangularity < 0.4:
if rectangularity < 0.6914579162415992:
continue
# KEY: edge density — LCD with text > 0.03, plain surfaces < 0.01
# KEY: edge density — LCD with text has high edge density
roi_edges = edges[y : y + h, x : x + w]
edge_density = np.sum(roi_edges > 0) / rect_area
if edge_density < 0.03:
if edge_density < 0.012759310759672408:
continue
# Score: edge density * area * rectangularity
# This favours text-rich regions that are large and well-shaped
score = edge_density * area * rectangularity
candidates.append((score, x, y, w, h))
candidates.append((score, x, y, w, h, cnt))
if not candidates:
return None
candidates.sort(key=lambda c: c[0], reverse=True)
return candidates[0][1:]
best = candidates[0]
return best[1], best[2], best[3], best[4], best[5]
def perspective_correct(image, contour, dst_w, dst_h):
"""Warp the screen quadrilateral to a flat rectangle."""
# Approximate contour to a polygon, tightening until we get 4 corners
peri = cv2.arcLength(contour, True)
for eps_mult in [0.02, 0.03, 0.05, 0.08, 0.10]:
approx = cv2.approxPolyDP(contour, eps_mult * peri, True)
if len(approx) == 4:
break
if len(approx) != 4:
# Fall back to the minimum area rectangle corners
rect = cv2.minAreaRect(contour)
approx = cv2.boxPoints(rect).astype(np.float32)
else:
approx = approx.reshape(4, 2).astype(np.float32)
src = order_corners(approx)
dst = np.array(
[[0, 0], [dst_w - 1, 0], [dst_w - 1, dst_h - 1], [0, dst_h - 1]],
dtype="float32",
)
M = cv2.getPerspectiveTransform(src, dst)
return cv2.warpPerspective(image, M, (dst_w, dst_h))
def crop_screen(image_path, output_path, padding=15):
"""Load an image, find the screen, crop and save it."""
"""Load an image, find the screen, perspective-correct and save it."""
image = cv2.imread(image_path)
if image is None:
print(f" ERROR: Could not read {image_path}")
@@ -99,16 +138,12 @@ def crop_screen(image_path, output_path, padding=15):
print(f" SKIP: No screen detected in {os.path.basename(image_path)}")
return False
x, y, w, h = result
x, y, w, h, contour = result
# Add padding, clamped to image bounds
x1 = max(0, x - padding)
y1 = max(0, y - padding)
x2 = min(w_img, x + w + padding)
y2 = min(h_img, y + h + padding)
# Use perspective correction to flatten the screen
corrected = perspective_correct(image, contour, w + 2 * padding, h + 2 * padding)
cropped = image[y1:y2, x1:x2]
cv2.imwrite(output_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
cv2.imwrite(output_path, corrected, [cv2.IMWRITE_JPEG_QUALITY, 95])
print(
f" OK: {os.path.basename(image_path)} -> {os.path.basename(output_path)} ({w}x{h})"
)