Crops to rowing machine screen - can be trained with optimize_crop.py and screen_classifier
This commit is contained in:
@@ -18,26 +18,39 @@ import glob
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def order_corners(pts):
|
||||||
|
"""Order 4 points as [top-left, top-right, bottom-right, bottom-left]."""
|
||||||
|
rect = np.zeros((4, 2), dtype="float32")
|
||||||
|
s = pts.sum(axis=1)
|
||||||
|
rect[0] = pts[np.argmin(s)]
|
||||||
|
rect[2] = pts[np.argmax(s)]
|
||||||
|
d = np.diff(pts, axis=1)
|
||||||
|
rect[1] = pts[np.argmin(d)]
|
||||||
|
rect[3] = pts[np.argmax(d)]
|
||||||
|
return rect
|
||||||
|
|
||||||
|
|
||||||
def find_screen(image):
|
def find_screen(image):
|
||||||
"""
|
"""
|
||||||
Detect the Concept 2 PM5 LCD screen region in the image.
|
Detect the Concept 2 PM5 LCD screen region in the image.
|
||||||
|
|
||||||
Returns (x, y, w, h) bounding box or None if not found.
|
Returns (x, y, w, h, contour) or None if not found.
|
||||||
|
The contour is the best-matching contour for perspective correction.
|
||||||
"""
|
"""
|
||||||
h_img, w_img = image.shape[:2]
|
h_img, w_img = image.shape[:2]
|
||||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
# Pre-compute edge map for internal-content scoring
|
# Pre-compute edge map for internal-content scoring
|
||||||
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
blurred = cv2.GaussianBlur(gray, (11, 11), 0)
|
||||||
edges = cv2.Canny(blurred, 50, 150)
|
edges = cv2.Canny(blurred, 80, 100)
|
||||||
|
|
||||||
candidates = []
|
candidates = []
|
||||||
|
|
||||||
# Sweep brightness thresholds — screen brightness varies by
|
# Sweep brightness thresholds — screen brightness varies by
|
||||||
# lighting conditions (ranges from ~100 in dim gyms to ~200+)
|
# lighting conditions (ranges from ~100 in dim gyms to ~200+)
|
||||||
for thresh_val in range(120, 200, 10):
|
for thresh_val in range(70, 210, 10):
|
||||||
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
|
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
|
||||||
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
|
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||||
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
|
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
|
||||||
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
|
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
|
||||||
|
|
||||||
@@ -54,39 +67,65 @@ def find_screen(image):
|
|||||||
|
|
||||||
# Size: screen is a small-to-medium portion of the photo
|
# Size: screen is a small-to-medium portion of the photo
|
||||||
area_ratio = rect_area / (h_img * w_img)
|
area_ratio = rect_area / (h_img * w_img)
|
||||||
if area_ratio < 0.005 or area_ratio > 0.12:
|
if area_ratio < 0.004480508227271387 or area_ratio > 0.13807760800032298:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Aspect ratio: LCD is roughly square (0.5 to 1.6)
|
# Aspect ratio: LCD is roughly square
|
||||||
aspect = w / h
|
aspect = w / h
|
||||||
if aspect < 0.5 or aspect > 1.6:
|
if aspect < 0.6831978184146027 or aspect > 1.9505294279578584:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Rectangularity
|
# Rectangularity
|
||||||
rectangularity = area / rect_area
|
rectangularity = area / rect_area
|
||||||
if rectangularity < 0.4:
|
if rectangularity < 0.6914579162415992:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# KEY: edge density — LCD with text > 0.03, plain surfaces < 0.01
|
# KEY: edge density — LCD with text has high edge density
|
||||||
roi_edges = edges[y : y + h, x : x + w]
|
roi_edges = edges[y : y + h, x : x + w]
|
||||||
edge_density = np.sum(roi_edges > 0) / rect_area
|
edge_density = np.sum(roi_edges > 0) / rect_area
|
||||||
if edge_density < 0.03:
|
if edge_density < 0.012759310759672408:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Score: edge density * area * rectangularity
|
# Score: edge density * area * rectangularity
|
||||||
# This favours text-rich regions that are large and well-shaped
|
# This favours text-rich regions that are large and well-shaped
|
||||||
score = edge_density * area * rectangularity
|
score = edge_density * area * rectangularity
|
||||||
candidates.append((score, x, y, w, h))
|
candidates.append((score, x, y, w, h, cnt))
|
||||||
|
|
||||||
if not candidates:
|
if not candidates:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
candidates.sort(key=lambda c: c[0], reverse=True)
|
candidates.sort(key=lambda c: c[0], reverse=True)
|
||||||
return candidates[0][1:]
|
best = candidates[0]
|
||||||
|
return best[1], best[2], best[3], best[4], best[5]
|
||||||
|
|
||||||
|
|
||||||
|
def perspective_correct(image, contour, dst_w, dst_h):
|
||||||
|
"""Warp the screen quadrilateral to a flat rectangle."""
|
||||||
|
# Approximate contour to a polygon, tightening until we get 4 corners
|
||||||
|
peri = cv2.arcLength(contour, True)
|
||||||
|
for eps_mult in [0.02, 0.03, 0.05, 0.08, 0.10]:
|
||||||
|
approx = cv2.approxPolyDP(contour, eps_mult * peri, True)
|
||||||
|
if len(approx) == 4:
|
||||||
|
break
|
||||||
|
|
||||||
|
if len(approx) != 4:
|
||||||
|
# Fall back to the minimum area rectangle corners
|
||||||
|
rect = cv2.minAreaRect(contour)
|
||||||
|
approx = cv2.boxPoints(rect).astype(np.float32)
|
||||||
|
else:
|
||||||
|
approx = approx.reshape(4, 2).astype(np.float32)
|
||||||
|
|
||||||
|
src = order_corners(approx)
|
||||||
|
dst = np.array(
|
||||||
|
[[0, 0], [dst_w - 1, 0], [dst_w - 1, dst_h - 1], [0, dst_h - 1]],
|
||||||
|
dtype="float32",
|
||||||
|
)
|
||||||
|
M = cv2.getPerspectiveTransform(src, dst)
|
||||||
|
return cv2.warpPerspective(image, M, (dst_w, dst_h))
|
||||||
|
|
||||||
|
|
||||||
def crop_screen(image_path, output_path, padding=15):
|
def crop_screen(image_path, output_path, padding=15):
|
||||||
"""Load an image, find the screen, crop and save it."""
|
"""Load an image, find the screen, perspective-correct and save it."""
|
||||||
image = cv2.imread(image_path)
|
image = cv2.imread(image_path)
|
||||||
if image is None:
|
if image is None:
|
||||||
print(f" ERROR: Could not read {image_path}")
|
print(f" ERROR: Could not read {image_path}")
|
||||||
@@ -99,16 +138,12 @@ def crop_screen(image_path, output_path, padding=15):
|
|||||||
print(f" SKIP: No screen detected in {os.path.basename(image_path)}")
|
print(f" SKIP: No screen detected in {os.path.basename(image_path)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
x, y, w, h = result
|
x, y, w, h, contour = result
|
||||||
|
|
||||||
# Add padding, clamped to image bounds
|
# Use perspective correction to flatten the screen
|
||||||
x1 = max(0, x - padding)
|
corrected = perspective_correct(image, contour, w + 2 * padding, h + 2 * padding)
|
||||||
y1 = max(0, y - padding)
|
|
||||||
x2 = min(w_img, x + w + padding)
|
|
||||||
y2 = min(h_img, y + h + padding)
|
|
||||||
|
|
||||||
cropped = image[y1:y2, x1:x2]
|
cv2.imwrite(output_path, corrected, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
||||||
cv2.imwrite(output_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
|
||||||
print(
|
print(
|
||||||
f" OK: {os.path.basename(image_path)} -> {os.path.basename(output_path)} ({w}x{h})"
|
f" OK: {os.path.basename(image_path)} -> {os.path.basename(output_path)} ({w}x{h})"
|
||||||
)
|
)
|
||||||
|
|||||||
202
optimize_crop.py
Normal file
202
optimize_crop.py
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
"""
|
||||||
|
Optimize crop_to_screen.py parameters using Optuna.
|
||||||
|
|
||||||
|
Uses the feature-based classifier from screen_classifier.py as the
|
||||||
|
evaluation function. For each trial, runs find_screen() with suggested
|
||||||
|
parameters on all source photos and counts how many crops are classified
|
||||||
|
as rowing displays (label=1). Optuna maximises this count.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python optimize_crop.py [--n-trials 300] [--photos-dir photos/]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import optuna
|
||||||
|
|
||||||
|
from screen_classifier import cnn_predict
|
||||||
|
|
||||||
|
|
||||||
|
def find_screen_parameterized(image, params):
|
||||||
|
"""
|
||||||
|
Detect the Concept 2 PM5 LCD screen region in the image.
|
||||||
|
Same logic as crop_to_screen.find_screen but with tunable parameters.
|
||||||
|
|
||||||
|
Returns (x, y, w, h) bounding box or None if not found.
|
||||||
|
"""
|
||||||
|
h_img, w_img = image.shape[:2]
|
||||||
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
gk = params["gaussian_kernel_size"]
|
||||||
|
blurred = cv2.GaussianBlur(gray, (gk, gk), 0)
|
||||||
|
edges = cv2.Canny(blurred, params["canny_low"], params["canny_high"])
|
||||||
|
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
for thresh_val in range(params["thresh_min"], params["thresh_max"], 10):
|
||||||
|
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
|
||||||
|
mk = params["morph_kernel_size"]
|
||||||
|
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (mk, mk))
|
||||||
|
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
|
||||||
|
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
|
||||||
|
|
||||||
|
contours, _ = cv2.findContours(
|
||||||
|
thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
||||||
|
)
|
||||||
|
|
||||||
|
for cnt in contours:
|
||||||
|
x, y, w, h = cv2.boundingRect(cnt)
|
||||||
|
area = cv2.contourArea(cnt)
|
||||||
|
rect_area = w * h
|
||||||
|
if rect_area == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
area_ratio = rect_area / (h_img * w_img)
|
||||||
|
if (
|
||||||
|
area_ratio < params["area_ratio_min"]
|
||||||
|
or area_ratio > params["area_ratio_max"]
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
aspect = w / h
|
||||||
|
if aspect < params["aspect_min"] or aspect > params["aspect_max"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rectangularity = area / rect_area
|
||||||
|
if rectangularity < params["rectangularity_min"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
roi_edges = edges[y : y + h, x : x + w]
|
||||||
|
edge_density = np.sum(roi_edges > 0) / rect_area
|
||||||
|
if edge_density < params["edge_density_min"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
score = edge_density * area * rectangularity
|
||||||
|
candidates.append((score, x, y, w, h))
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return None
|
||||||
|
|
||||||
|
candidates.sort(key=lambda c: c[0], reverse=True)
|
||||||
|
return candidates[0][1:]
|
||||||
|
|
||||||
|
|
||||||
|
def load_images(photos_dir):
|
||||||
|
"""Load all source images once for reuse across trials."""
|
||||||
|
paths = sorted(
|
||||||
|
glob.glob(os.path.join(photos_dir, "*.JPEG"))
|
||||||
|
+ glob.glob(os.path.join(photos_dir, "*.jpeg"))
|
||||||
|
+ glob.glob(os.path.join(photos_dir, "*.jpg"))
|
||||||
|
+ glob.glob(os.path.join(photos_dir, "*.JPG"))
|
||||||
|
)
|
||||||
|
images = []
|
||||||
|
for p in paths:
|
||||||
|
img = cv2.imread(p)
|
||||||
|
if img is not None:
|
||||||
|
images.append((p, img))
|
||||||
|
return images
|
||||||
|
|
||||||
|
|
||||||
|
def make_objective(images, tmp_dir, model_path):
|
||||||
|
"""Create an Optuna objective function closed over images and tmp_dir."""
|
||||||
|
|
||||||
|
def objective(trial):
|
||||||
|
params = {
|
||||||
|
"thresh_min": trial.suggest_int("thresh_min", 60, 160, step=10),
|
||||||
|
"thresh_max": trial.suggest_int("thresh_max", 160, 255, step=10),
|
||||||
|
"morph_kernel_size": trial.suggest_int("morph_kernel_size", 3, 21, step=2),
|
||||||
|
"gaussian_kernel_size": trial.suggest_int(
|
||||||
|
"gaussian_kernel_size", 3, 11, step=2
|
||||||
|
),
|
||||||
|
"canny_low": trial.suggest_int("canny_low", 20, 100, step=10),
|
||||||
|
"canny_high": trial.suggest_int("canny_high", 100, 250, step=10),
|
||||||
|
"area_ratio_min": trial.suggest_float("area_ratio_min", 0.001, 0.02),
|
||||||
|
"area_ratio_max": trial.suggest_float("area_ratio_max", 0.05, 0.30),
|
||||||
|
"aspect_min": trial.suggest_float("aspect_min", 0.3, 0.8),
|
||||||
|
"aspect_max": trial.suggest_float("aspect_max", 1.2, 2.5),
|
||||||
|
"rectangularity_min": trial.suggest_float("rectangularity_min", 0.2, 0.7),
|
||||||
|
"edge_density_min": trial.suggest_float("edge_density_min", 0.005, 0.06),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure thresh_min < thresh_max
|
||||||
|
if params["thresh_min"] >= params["thresh_max"]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Ensure canny_low < canny_high
|
||||||
|
if params["canny_low"] >= params["canny_high"]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
rowing_count = 0
|
||||||
|
for img_path, img in images:
|
||||||
|
result = find_screen_parameterized(img, params)
|
||||||
|
if result is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
x, y, w, h = result
|
||||||
|
h_img, w_img = img.shape[:2]
|
||||||
|
padding = 15
|
||||||
|
x1 = max(0, x - padding)
|
||||||
|
y1 = max(0, y - padding)
|
||||||
|
x2 = min(w_img, x + w + padding)
|
||||||
|
y2 = min(h_img, y + h + padding)
|
||||||
|
cropped = img[y1:y2, x1:x2]
|
||||||
|
|
||||||
|
# Save to temp file for the classifier
|
||||||
|
basename = os.path.splitext(os.path.basename(img_path))[0]
|
||||||
|
tmp_path = os.path.join(tmp_dir, f"{basename}_trial{trial.number}.jpg")
|
||||||
|
cv2.imwrite(tmp_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
||||||
|
|
||||||
|
try:
|
||||||
|
label, _ = cnn_predict(tmp_path, model_path)
|
||||||
|
if label == 1:
|
||||||
|
rowing_count += 1
|
||||||
|
finally:
|
||||||
|
# Clean up immediately to save disk space
|
||||||
|
if os.path.exists(tmp_path):
|
||||||
|
os.remove(tmp_path)
|
||||||
|
|
||||||
|
return rowing_count
|
||||||
|
|
||||||
|
return objective
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Optimize crop_to_screen.py parameters"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--photos-dir", default="photos/", help="Directory of source photos"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--n-trials", type=int, default=300, help="Number of Optuna trials"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--model-path",
|
||||||
|
default="screen_classifier_model.pth",
|
||||||
|
help="Path to CNN model weights",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
images = load_images(args.photos_dir)
|
||||||
|
print(f"Loaded {len(images)} source images from {args.photos_dir}")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
study = optuna.create_study(direction="maximize")
|
||||||
|
objective = make_objective(images, tmp_dir, args.model_path)
|
||||||
|
study.optimize(objective, n_trials=args.n_trials, show_progress_bar=True)
|
||||||
|
|
||||||
|
print(f"\n{'=' * 60}")
|
||||||
|
print(f"Best score: {study.best_value} / {len(images)} images classified as rowing")
|
||||||
|
print(f"Best parameters:")
|
||||||
|
for k, v in sorted(study.best_params.items()):
|
||||||
|
print(f" {k:>25s}: {v}")
|
||||||
|
print(f"{'=' * 60}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -195,7 +195,10 @@ def get_cnn_model():
|
|||||||
|
|
||||||
|
|
||||||
def train_cnn(
|
def train_cnn(
|
||||||
data_dir: str, epochs: int = 20, lr: float = 1e-3, save_path: str = "model.pth"
|
data_dir: str,
|
||||||
|
epochs: int = 20,
|
||||||
|
lr: float = 1e-3,
|
||||||
|
save_path: str = "screen_classifier_model.pth",
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Train the CNN. Expects data_dir with structure:
|
Train the CNN. Expects data_dir with structure:
|
||||||
@@ -336,8 +339,9 @@ def main():
|
|||||||
sub = parser.add_subparsers(dest="command", required=True)
|
sub = parser.add_subparsers(dest="command", required=True)
|
||||||
|
|
||||||
# --- predict ---
|
# --- predict ---
|
||||||
p_pred = sub.add_parser("predict", help="Classify an image")
|
p_pred = sub.add_parser("predict", help="Classify an image or directory of images")
|
||||||
p_pred.add_argument("--image", required=True, help="Path to image file")
|
p_pred.add_argument("--image", help="Path to image file")
|
||||||
|
p_pred.add_argument("--dir", help="Path to directory of images")
|
||||||
p_pred.add_argument(
|
p_pred.add_argument(
|
||||||
"--model",
|
"--model",
|
||||||
choices=["features", "cnn"],
|
choices=["features", "cnn"],
|
||||||
@@ -361,13 +365,50 @@ def main():
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.command == "predict":
|
if args.command == "predict":
|
||||||
if args.model == "features":
|
if not args.image and not args.dir:
|
||||||
label, conf = feature_based_predict(args.image, verbose=args.verbose)
|
parser.error("predict requires --image or --dir")
|
||||||
|
if args.image and args.dir:
|
||||||
|
parser.error("--image and --dir are mutually exclusive")
|
||||||
|
|
||||||
|
# Build list of image paths
|
||||||
|
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff"}
|
||||||
|
if args.dir:
|
||||||
|
dir_path = Path(args.dir)
|
||||||
|
if not dir_path.is_dir():
|
||||||
|
print(f"Error: {args.dir} is not a directory")
|
||||||
|
sys.exit(1)
|
||||||
|
image_paths = sorted(
|
||||||
|
p for p in dir_path.iterdir() if p.suffix.lower() in IMAGE_EXTS
|
||||||
|
)
|
||||||
|
if not image_paths:
|
||||||
|
print(f"No images found in {args.dir}")
|
||||||
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
label, conf = cnn_predict(args.image, args.model_path)
|
image_paths = [Path(args.image)]
|
||||||
|
|
||||||
|
# Classify each image
|
||||||
|
rowing_count = 0
|
||||||
|
for img_path in image_paths:
|
||||||
|
if args.model == "features":
|
||||||
|
label, conf = feature_based_predict(str(img_path), verbose=args.verbose)
|
||||||
|
else:
|
||||||
|
label, conf = cnn_predict(str(img_path), args.model_path)
|
||||||
|
|
||||||
tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE"
|
tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE"
|
||||||
|
if args.dir:
|
||||||
|
print(f" {img_path.name} \u2192 {tag} (confidence={conf:.2f})")
|
||||||
|
else:
|
||||||
print(f"\n Result: {tag} (label={label}, confidence={conf:.2f})\n")
|
print(f"\n Result: {tag} (label={label}, confidence={conf:.2f})\n")
|
||||||
|
if label == 1:
|
||||||
|
rowing_count += 1
|
||||||
|
|
||||||
|
# Summary for directory mode
|
||||||
|
if args.dir:
|
||||||
|
total = len(image_paths)
|
||||||
|
not_rowing = total - rowing_count
|
||||||
|
print(
|
||||||
|
f"\n Summary: {total} images | {rowing_count} rowing | {not_rowing} not rowing"
|
||||||
|
)
|
||||||
|
|
||||||
elif args.command == "train":
|
elif args.command == "train":
|
||||||
train_cnn(args.data_dir, epochs=args.epochs, lr=args.lr, save_path=args.save)
|
train_cnn(args.data_dir, epochs=args.epochs, lr=args.lr, save_path=args.save)
|
||||||
|
|||||||
BIN
screen_classifier_model.pth
Normal file
BIN
screen_classifier_model.pth
Normal file
Binary file not shown.
Reference in New Issue
Block a user