Crops to rowing machine screen - can be trained with optimize_crop.py and screen_classifier

This commit is contained in:
2026-03-16 13:46:02 +00:00
parent 2e386a4297
commit f0184319c6
4 changed files with 309 additions and 31 deletions

View File

@@ -18,26 +18,39 @@ import glob
import sys import sys
def order_corners(pts):
"""Order 4 points as [top-left, top-right, bottom-right, bottom-left]."""
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
d = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(d)]
rect[3] = pts[np.argmax(d)]
return rect
def find_screen(image): def find_screen(image):
""" """
Detect the Concept 2 PM5 LCD screen region in the image. Detect the Concept 2 PM5 LCD screen region in the image.
Returns (x, y, w, h) bounding box or None if not found. Returns (x, y, w, h, contour) or None if not found.
The contour is the best-matching contour for perspective correction.
""" """
h_img, w_img = image.shape[:2] h_img, w_img = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Pre-compute edge map for internal-content scoring # Pre-compute edge map for internal-content scoring
blurred = cv2.GaussianBlur(gray, (5, 5), 0) blurred = cv2.GaussianBlur(gray, (11, 11), 0)
edges = cv2.Canny(blurred, 50, 150) edges = cv2.Canny(blurred, 80, 100)
candidates = [] candidates = []
# Sweep brightness thresholds — screen brightness varies by # Sweep brightness thresholds — screen brightness varies by
# lighting conditions (ranges from ~100 in dim gyms to ~200+) # lighting conditions (ranges from ~100 in dim gyms to ~200+)
for thresh_val in range(120, 200, 10): for thresh_val in range(70, 210, 10):
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY) _, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11)) kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern) thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern) thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
@@ -54,39 +67,65 @@ def find_screen(image):
# Size: screen is a small-to-medium portion of the photo # Size: screen is a small-to-medium portion of the photo
area_ratio = rect_area / (h_img * w_img) area_ratio = rect_area / (h_img * w_img)
if area_ratio < 0.005 or area_ratio > 0.12: if area_ratio < 0.004480508227271387 or area_ratio > 0.13807760800032298:
continue continue
# Aspect ratio: LCD is roughly square (0.5 to 1.6) # Aspect ratio: LCD is roughly square
aspect = w / h aspect = w / h
if aspect < 0.5 or aspect > 1.6: if aspect < 0.6831978184146027 or aspect > 1.9505294279578584:
continue continue
# Rectangularity # Rectangularity
rectangularity = area / rect_area rectangularity = area / rect_area
if rectangularity < 0.4: if rectangularity < 0.6914579162415992:
continue continue
# KEY: edge density — LCD with text > 0.03, plain surfaces < 0.01 # KEY: edge density — LCD with text has high edge density
roi_edges = edges[y : y + h, x : x + w] roi_edges = edges[y : y + h, x : x + w]
edge_density = np.sum(roi_edges > 0) / rect_area edge_density = np.sum(roi_edges > 0) / rect_area
if edge_density < 0.03: if edge_density < 0.012759310759672408:
continue continue
# Score: edge density * area * rectangularity # Score: edge density * area * rectangularity
# This favours text-rich regions that are large and well-shaped # This favours text-rich regions that are large and well-shaped
score = edge_density * area * rectangularity score = edge_density * area * rectangularity
candidates.append((score, x, y, w, h)) candidates.append((score, x, y, w, h, cnt))
if not candidates: if not candidates:
return None return None
candidates.sort(key=lambda c: c[0], reverse=True) candidates.sort(key=lambda c: c[0], reverse=True)
return candidates[0][1:] best = candidates[0]
return best[1], best[2], best[3], best[4], best[5]
def perspective_correct(image, contour, dst_w, dst_h):
"""Warp the screen quadrilateral to a flat rectangle."""
# Approximate contour to a polygon, tightening until we get 4 corners
peri = cv2.arcLength(contour, True)
for eps_mult in [0.02, 0.03, 0.05, 0.08, 0.10]:
approx = cv2.approxPolyDP(contour, eps_mult * peri, True)
if len(approx) == 4:
break
if len(approx) != 4:
# Fall back to the minimum area rectangle corners
rect = cv2.minAreaRect(contour)
approx = cv2.boxPoints(rect).astype(np.float32)
else:
approx = approx.reshape(4, 2).astype(np.float32)
src = order_corners(approx)
dst = np.array(
[[0, 0], [dst_w - 1, 0], [dst_w - 1, dst_h - 1], [0, dst_h - 1]],
dtype="float32",
)
M = cv2.getPerspectiveTransform(src, dst)
return cv2.warpPerspective(image, M, (dst_w, dst_h))
def crop_screen(image_path, output_path, padding=15): def crop_screen(image_path, output_path, padding=15):
"""Load an image, find the screen, crop and save it.""" """Load an image, find the screen, perspective-correct and save it."""
image = cv2.imread(image_path) image = cv2.imread(image_path)
if image is None: if image is None:
print(f" ERROR: Could not read {image_path}") print(f" ERROR: Could not read {image_path}")
@@ -99,16 +138,12 @@ def crop_screen(image_path, output_path, padding=15):
print(f" SKIP: No screen detected in {os.path.basename(image_path)}") print(f" SKIP: No screen detected in {os.path.basename(image_path)}")
return False return False
x, y, w, h = result x, y, w, h, contour = result
# Add padding, clamped to image bounds # Use perspective correction to flatten the screen
x1 = max(0, x - padding) corrected = perspective_correct(image, contour, w + 2 * padding, h + 2 * padding)
y1 = max(0, y - padding)
x2 = min(w_img, x + w + padding)
y2 = min(h_img, y + h + padding)
cropped = image[y1:y2, x1:x2] cv2.imwrite(output_path, corrected, [cv2.IMWRITE_JPEG_QUALITY, 95])
cv2.imwrite(output_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
print( print(
f" OK: {os.path.basename(image_path)} -> {os.path.basename(output_path)} ({w}x{h})" f" OK: {os.path.basename(image_path)} -> {os.path.basename(output_path)} ({w}x{h})"
) )

202
optimize_crop.py Normal file
View File

@@ -0,0 +1,202 @@
"""
Optimize crop_to_screen.py parameters using Optuna.
Uses the feature-based classifier from screen_classifier.py as the
evaluation function. For each trial, runs find_screen() with suggested
parameters on all source photos and counts how many crops are classified
as rowing displays (label=1). Optuna maximises this count.
Usage:
python optimize_crop.py [--n-trials 300] [--photos-dir photos/]
"""
import argparse
import glob
import os
import tempfile
import cv2
import numpy as np
import optuna
from screen_classifier import cnn_predict
def find_screen_parameterized(image, params):
"""
Detect the Concept 2 PM5 LCD screen region in the image.
Same logic as crop_to_screen.find_screen but with tunable parameters.
Returns (x, y, w, h) bounding box or None if not found.
"""
h_img, w_img = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gk = params["gaussian_kernel_size"]
blurred = cv2.GaussianBlur(gray, (gk, gk), 0)
edges = cv2.Canny(blurred, params["canny_low"], params["canny_high"])
candidates = []
for thresh_val in range(params["thresh_min"], params["thresh_max"], 10):
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
mk = params["morph_kernel_size"]
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (mk, mk))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
contours, _ = cv2.findContours(
thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(cnt)
rect_area = w * h
if rect_area == 0:
continue
area_ratio = rect_area / (h_img * w_img)
if (
area_ratio < params["area_ratio_min"]
or area_ratio > params["area_ratio_max"]
):
continue
aspect = w / h
if aspect < params["aspect_min"] or aspect > params["aspect_max"]:
continue
rectangularity = area / rect_area
if rectangularity < params["rectangularity_min"]:
continue
roi_edges = edges[y : y + h, x : x + w]
edge_density = np.sum(roi_edges > 0) / rect_area
if edge_density < params["edge_density_min"]:
continue
score = edge_density * area * rectangularity
candidates.append((score, x, y, w, h))
if not candidates:
return None
candidates.sort(key=lambda c: c[0], reverse=True)
return candidates[0][1:]
def load_images(photos_dir):
"""Load all source images once for reuse across trials."""
paths = sorted(
glob.glob(os.path.join(photos_dir, "*.JPEG"))
+ glob.glob(os.path.join(photos_dir, "*.jpeg"))
+ glob.glob(os.path.join(photos_dir, "*.jpg"))
+ glob.glob(os.path.join(photos_dir, "*.JPG"))
)
images = []
for p in paths:
img = cv2.imread(p)
if img is not None:
images.append((p, img))
return images
def make_objective(images, tmp_dir, model_path):
"""Create an Optuna objective function closed over images and tmp_dir."""
def objective(trial):
params = {
"thresh_min": trial.suggest_int("thresh_min", 60, 160, step=10),
"thresh_max": trial.suggest_int("thresh_max", 160, 255, step=10),
"morph_kernel_size": trial.suggest_int("morph_kernel_size", 3, 21, step=2),
"gaussian_kernel_size": trial.suggest_int(
"gaussian_kernel_size", 3, 11, step=2
),
"canny_low": trial.suggest_int("canny_low", 20, 100, step=10),
"canny_high": trial.suggest_int("canny_high", 100, 250, step=10),
"area_ratio_min": trial.suggest_float("area_ratio_min", 0.001, 0.02),
"area_ratio_max": trial.suggest_float("area_ratio_max", 0.05, 0.30),
"aspect_min": trial.suggest_float("aspect_min", 0.3, 0.8),
"aspect_max": trial.suggest_float("aspect_max", 1.2, 2.5),
"rectangularity_min": trial.suggest_float("rectangularity_min", 0.2, 0.7),
"edge_density_min": trial.suggest_float("edge_density_min", 0.005, 0.06),
}
# Ensure thresh_min < thresh_max
if params["thresh_min"] >= params["thresh_max"]:
return 0
# Ensure canny_low < canny_high
if params["canny_low"] >= params["canny_high"]:
return 0
rowing_count = 0
for img_path, img in images:
result = find_screen_parameterized(img, params)
if result is None:
continue
x, y, w, h = result
h_img, w_img = img.shape[:2]
padding = 15
x1 = max(0, x - padding)
y1 = max(0, y - padding)
x2 = min(w_img, x + w + padding)
y2 = min(h_img, y + h + padding)
cropped = img[y1:y2, x1:x2]
# Save to temp file for the classifier
basename = os.path.splitext(os.path.basename(img_path))[0]
tmp_path = os.path.join(tmp_dir, f"{basename}_trial{trial.number}.jpg")
cv2.imwrite(tmp_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
try:
label, _ = cnn_predict(tmp_path, model_path)
if label == 1:
rowing_count += 1
finally:
# Clean up immediately to save disk space
if os.path.exists(tmp_path):
os.remove(tmp_path)
return rowing_count
return objective
def main():
parser = argparse.ArgumentParser(
description="Optimize crop_to_screen.py parameters"
)
parser.add_argument(
"--photos-dir", default="photos/", help="Directory of source photos"
)
parser.add_argument(
"--n-trials", type=int, default=300, help="Number of Optuna trials"
)
parser.add_argument(
"--model-path",
default="screen_classifier_model.pth",
help="Path to CNN model weights",
)
args = parser.parse_args()
images = load_images(args.photos_dir)
print(f"Loaded {len(images)} source images from {args.photos_dir}")
with tempfile.TemporaryDirectory() as tmp_dir:
study = optuna.create_study(direction="maximize")
objective = make_objective(images, tmp_dir, args.model_path)
study.optimize(objective, n_trials=args.n_trials, show_progress_bar=True)
print(f"\n{'=' * 60}")
print(f"Best score: {study.best_value} / {len(images)} images classified as rowing")
print(f"Best parameters:")
for k, v in sorted(study.best_params.items()):
print(f" {k:>25s}: {v}")
print(f"{'=' * 60}")
if __name__ == "__main__":
main()

View File

@@ -195,7 +195,10 @@ def get_cnn_model():
def train_cnn( def train_cnn(
data_dir: str, epochs: int = 20, lr: float = 1e-3, save_path: str = "model.pth" data_dir: str,
epochs: int = 20,
lr: float = 1e-3,
save_path: str = "screen_classifier_model.pth",
): ):
""" """
Train the CNN. Expects data_dir with structure: Train the CNN. Expects data_dir with structure:
@@ -336,8 +339,9 @@ def main():
sub = parser.add_subparsers(dest="command", required=True) sub = parser.add_subparsers(dest="command", required=True)
# --- predict --- # --- predict ---
p_pred = sub.add_parser("predict", help="Classify an image") p_pred = sub.add_parser("predict", help="Classify an image or directory of images")
p_pred.add_argument("--image", required=True, help="Path to image file") p_pred.add_argument("--image", help="Path to image file")
p_pred.add_argument("--dir", help="Path to directory of images")
p_pred.add_argument( p_pred.add_argument(
"--model", "--model",
choices=["features", "cnn"], choices=["features", "cnn"],
@@ -361,13 +365,50 @@ def main():
args = parser.parse_args() args = parser.parse_args()
if args.command == "predict": if args.command == "predict":
if args.model == "features": if not args.image and not args.dir:
label, conf = feature_based_predict(args.image, verbose=args.verbose) parser.error("predict requires --image or --dir")
else: if args.image and args.dir:
label, conf = cnn_predict(args.image, args.model_path) parser.error("--image and --dir are mutually exclusive")
tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE" # Build list of image paths
print(f"\n Result: {tag} (label={label}, confidence={conf:.2f})\n") IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff"}
if args.dir:
dir_path = Path(args.dir)
if not dir_path.is_dir():
print(f"Error: {args.dir} is not a directory")
sys.exit(1)
image_paths = sorted(
p for p in dir_path.iterdir() if p.suffix.lower() in IMAGE_EXTS
)
if not image_paths:
print(f"No images found in {args.dir}")
sys.exit(1)
else:
image_paths = [Path(args.image)]
# Classify each image
rowing_count = 0
for img_path in image_paths:
if args.model == "features":
label, conf = feature_based_predict(str(img_path), verbose=args.verbose)
else:
label, conf = cnn_predict(str(img_path), args.model_path)
tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE"
if args.dir:
print(f" {img_path.name} \u2192 {tag} (confidence={conf:.2f})")
else:
print(f"\n Result: {tag} (label={label}, confidence={conf:.2f})\n")
if label == 1:
rowing_count += 1
# Summary for directory mode
if args.dir:
total = len(image_paths)
not_rowing = total - rowing_count
print(
f"\n Summary: {total} images | {rowing_count} rowing | {not_rowing} not rowing"
)
elif args.command == "train": elif args.command == "train":
train_cnn(args.data_dir, epochs=args.epochs, lr=args.lr, save_path=args.save) train_cnn(args.data_dir, epochs=args.epochs, lr=args.lr, save_path=args.save)

BIN
screen_classifier_model.pth Normal file

Binary file not shown.