Crops to rowing machine screen - can be trained with optimize_crop.py and screen_classifier
This commit is contained in:
202
optimize_crop.py
Normal file
202
optimize_crop.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
Optimize crop_to_screen.py parameters using Optuna.
|
||||
|
||||
Uses the feature-based classifier from screen_classifier.py as the
|
||||
evaluation function. For each trial, runs find_screen() with suggested
|
||||
parameters on all source photos and counts how many crops are classified
|
||||
as rowing displays (label=1). Optuna maximises this count.
|
||||
|
||||
Usage:
|
||||
python optimize_crop.py [--n-trials 300] [--photos-dir photos/]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import optuna
|
||||
|
||||
from screen_classifier import cnn_predict
|
||||
|
||||
|
||||
def find_screen_parameterized(image, params):
|
||||
"""
|
||||
Detect the Concept 2 PM5 LCD screen region in the image.
|
||||
Same logic as crop_to_screen.find_screen but with tunable parameters.
|
||||
|
||||
Returns (x, y, w, h) bounding box or None if not found.
|
||||
"""
|
||||
h_img, w_img = image.shape[:2]
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
gk = params["gaussian_kernel_size"]
|
||||
blurred = cv2.GaussianBlur(gray, (gk, gk), 0)
|
||||
edges = cv2.Canny(blurred, params["canny_low"], params["canny_high"])
|
||||
|
||||
candidates = []
|
||||
|
||||
for thresh_val in range(params["thresh_min"], params["thresh_max"], 10):
|
||||
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
|
||||
mk = params["morph_kernel_size"]
|
||||
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (mk, mk))
|
||||
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
|
||||
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
|
||||
|
||||
contours, _ = cv2.findContours(
|
||||
thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
||||
)
|
||||
|
||||
for cnt in contours:
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
area = cv2.contourArea(cnt)
|
||||
rect_area = w * h
|
||||
if rect_area == 0:
|
||||
continue
|
||||
|
||||
area_ratio = rect_area / (h_img * w_img)
|
||||
if (
|
||||
area_ratio < params["area_ratio_min"]
|
||||
or area_ratio > params["area_ratio_max"]
|
||||
):
|
||||
continue
|
||||
|
||||
aspect = w / h
|
||||
if aspect < params["aspect_min"] or aspect > params["aspect_max"]:
|
||||
continue
|
||||
|
||||
rectangularity = area / rect_area
|
||||
if rectangularity < params["rectangularity_min"]:
|
||||
continue
|
||||
|
||||
roi_edges = edges[y : y + h, x : x + w]
|
||||
edge_density = np.sum(roi_edges > 0) / rect_area
|
||||
if edge_density < params["edge_density_min"]:
|
||||
continue
|
||||
|
||||
score = edge_density * area * rectangularity
|
||||
candidates.append((score, x, y, w, h))
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(key=lambda c: c[0], reverse=True)
|
||||
return candidates[0][1:]
|
||||
|
||||
|
||||
def load_images(photos_dir):
|
||||
"""Load all source images once for reuse across trials."""
|
||||
paths = sorted(
|
||||
glob.glob(os.path.join(photos_dir, "*.JPEG"))
|
||||
+ glob.glob(os.path.join(photos_dir, "*.jpeg"))
|
||||
+ glob.glob(os.path.join(photos_dir, "*.jpg"))
|
||||
+ glob.glob(os.path.join(photos_dir, "*.JPG"))
|
||||
)
|
||||
images = []
|
||||
for p in paths:
|
||||
img = cv2.imread(p)
|
||||
if img is not None:
|
||||
images.append((p, img))
|
||||
return images
|
||||
|
||||
|
||||
def make_objective(images, tmp_dir, model_path):
|
||||
"""Create an Optuna objective function closed over images and tmp_dir."""
|
||||
|
||||
def objective(trial):
|
||||
params = {
|
||||
"thresh_min": trial.suggest_int("thresh_min", 60, 160, step=10),
|
||||
"thresh_max": trial.suggest_int("thresh_max", 160, 255, step=10),
|
||||
"morph_kernel_size": trial.suggest_int("morph_kernel_size", 3, 21, step=2),
|
||||
"gaussian_kernel_size": trial.suggest_int(
|
||||
"gaussian_kernel_size", 3, 11, step=2
|
||||
),
|
||||
"canny_low": trial.suggest_int("canny_low", 20, 100, step=10),
|
||||
"canny_high": trial.suggest_int("canny_high", 100, 250, step=10),
|
||||
"area_ratio_min": trial.suggest_float("area_ratio_min", 0.001, 0.02),
|
||||
"area_ratio_max": trial.suggest_float("area_ratio_max", 0.05, 0.30),
|
||||
"aspect_min": trial.suggest_float("aspect_min", 0.3, 0.8),
|
||||
"aspect_max": trial.suggest_float("aspect_max", 1.2, 2.5),
|
||||
"rectangularity_min": trial.suggest_float("rectangularity_min", 0.2, 0.7),
|
||||
"edge_density_min": trial.suggest_float("edge_density_min", 0.005, 0.06),
|
||||
}
|
||||
|
||||
# Ensure thresh_min < thresh_max
|
||||
if params["thresh_min"] >= params["thresh_max"]:
|
||||
return 0
|
||||
|
||||
# Ensure canny_low < canny_high
|
||||
if params["canny_low"] >= params["canny_high"]:
|
||||
return 0
|
||||
|
||||
rowing_count = 0
|
||||
for img_path, img in images:
|
||||
result = find_screen_parameterized(img, params)
|
||||
if result is None:
|
||||
continue
|
||||
|
||||
x, y, w, h = result
|
||||
h_img, w_img = img.shape[:2]
|
||||
padding = 15
|
||||
x1 = max(0, x - padding)
|
||||
y1 = max(0, y - padding)
|
||||
x2 = min(w_img, x + w + padding)
|
||||
y2 = min(h_img, y + h + padding)
|
||||
cropped = img[y1:y2, x1:x2]
|
||||
|
||||
# Save to temp file for the classifier
|
||||
basename = os.path.splitext(os.path.basename(img_path))[0]
|
||||
tmp_path = os.path.join(tmp_dir, f"{basename}_trial{trial.number}.jpg")
|
||||
cv2.imwrite(tmp_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
||||
|
||||
try:
|
||||
label, _ = cnn_predict(tmp_path, model_path)
|
||||
if label == 1:
|
||||
rowing_count += 1
|
||||
finally:
|
||||
# Clean up immediately to save disk space
|
||||
if os.path.exists(tmp_path):
|
||||
os.remove(tmp_path)
|
||||
|
||||
return rowing_count
|
||||
|
||||
return objective
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Optimize crop_to_screen.py parameters"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--photos-dir", default="photos/", help="Directory of source photos"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--n-trials", type=int, default=300, help="Number of Optuna trials"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-path",
|
||||
default="screen_classifier_model.pth",
|
||||
help="Path to CNN model weights",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
images = load_images(args.photos_dir)
|
||||
print(f"Loaded {len(images)} source images from {args.photos_dir}")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
study = optuna.create_study(direction="maximize")
|
||||
objective = make_objective(images, tmp_dir, args.model_path)
|
||||
study.optimize(objective, n_trials=args.n_trials, show_progress_bar=True)
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"Best score: {study.best_value} / {len(images)} images classified as rowing")
|
||||
print(f"Best parameters:")
|
||||
for k, v in sorted(study.best_params.items()):
|
||||
print(f" {k:>25s}: {v}")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user