diff --git a/crop_to_screen.py b/crop_to_screen.py index e346e6b..5f8ffa3 100644 --- a/crop_to_screen.py +++ b/crop_to_screen.py @@ -18,26 +18,39 @@ import glob import sys +def order_corners(pts): + """Order 4 points as [top-left, top-right, bottom-right, bottom-left].""" + rect = np.zeros((4, 2), dtype="float32") + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] + rect[2] = pts[np.argmax(s)] + d = np.diff(pts, axis=1) + rect[1] = pts[np.argmin(d)] + rect[3] = pts[np.argmax(d)] + return rect + + def find_screen(image): """ Detect the Concept 2 PM5 LCD screen region in the image. - Returns (x, y, w, h) bounding box or None if not found. + Returns (x, y, w, h, contour) or None if not found. + The contour is the best-matching contour for perspective correction. """ h_img, w_img = image.shape[:2] gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Pre-compute edge map for internal-content scoring - blurred = cv2.GaussianBlur(gray, (5, 5), 0) - edges = cv2.Canny(blurred, 50, 150) + blurred = cv2.GaussianBlur(gray, (11, 11), 0) + edges = cv2.Canny(blurred, 80, 100) candidates = [] # Sweep brightness thresholds — screen brightness varies by # lighting conditions (ranges from ~100 in dim gyms to ~200+) - for thresh_val in range(120, 200, 10): + for thresh_val in range(70, 210, 10): _, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY) - kern = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11)) + kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern) thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern) @@ -54,39 +67,65 @@ def find_screen(image): # Size: screen is a small-to-medium portion of the photo area_ratio = rect_area / (h_img * w_img) - if area_ratio < 0.005 or area_ratio > 0.12: + if area_ratio < 0.004480508227271387 or area_ratio > 0.13807760800032298: continue - # Aspect ratio: LCD is roughly square (0.5 to 1.6) + # Aspect ratio: LCD is roughly square aspect = w / h - if aspect < 0.5 or aspect > 1.6: + if aspect < 0.6831978184146027 or aspect > 1.9505294279578584: continue # Rectangularity rectangularity = area / rect_area - if rectangularity < 0.4: + if rectangularity < 0.6914579162415992: continue - # KEY: edge density — LCD with text > 0.03, plain surfaces < 0.01 + # KEY: edge density — LCD with text has high edge density roi_edges = edges[y : y + h, x : x + w] edge_density = np.sum(roi_edges > 0) / rect_area - if edge_density < 0.03: + if edge_density < 0.012759310759672408: continue # Score: edge density * area * rectangularity # This favours text-rich regions that are large and well-shaped score = edge_density * area * rectangularity - candidates.append((score, x, y, w, h)) + candidates.append((score, x, y, w, h, cnt)) if not candidates: return None candidates.sort(key=lambda c: c[0], reverse=True) - return candidates[0][1:] + best = candidates[0] + return best[1], best[2], best[3], best[4], best[5] + + +def perspective_correct(image, contour, dst_w, dst_h): + """Warp the screen quadrilateral to a flat rectangle.""" + # Approximate contour to a polygon, tightening until we get 4 corners + peri = cv2.arcLength(contour, True) + for eps_mult in [0.02, 0.03, 0.05, 0.08, 0.10]: + approx = cv2.approxPolyDP(contour, eps_mult * peri, True) + if len(approx) == 4: + break + + if len(approx) != 4: + # Fall back to the minimum area rectangle corners + rect = cv2.minAreaRect(contour) + approx = cv2.boxPoints(rect).astype(np.float32) + else: + approx = approx.reshape(4, 2).astype(np.float32) + + src = order_corners(approx) + dst = np.array( + [[0, 0], [dst_w - 1, 0], [dst_w - 1, dst_h - 1], [0, dst_h - 1]], + dtype="float32", + ) + M = cv2.getPerspectiveTransform(src, dst) + return cv2.warpPerspective(image, M, (dst_w, dst_h)) def crop_screen(image_path, output_path, padding=15): - """Load an image, find the screen, crop and save it.""" + """Load an image, find the screen, perspective-correct and save it.""" image = cv2.imread(image_path) if image is None: print(f" ERROR: Could not read {image_path}") @@ -99,16 +138,12 @@ def crop_screen(image_path, output_path, padding=15): print(f" SKIP: No screen detected in {os.path.basename(image_path)}") return False - x, y, w, h = result + x, y, w, h, contour = result - # Add padding, clamped to image bounds - x1 = max(0, x - padding) - y1 = max(0, y - padding) - x2 = min(w_img, x + w + padding) - y2 = min(h_img, y + h + padding) + # Use perspective correction to flatten the screen + corrected = perspective_correct(image, contour, w + 2 * padding, h + 2 * padding) - cropped = image[y1:y2, x1:x2] - cv2.imwrite(output_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95]) + cv2.imwrite(output_path, corrected, [cv2.IMWRITE_JPEG_QUALITY, 95]) print( f" OK: {os.path.basename(image_path)} -> {os.path.basename(output_path)} ({w}x{h})" ) diff --git a/optimize_crop.py b/optimize_crop.py new file mode 100644 index 0000000..7354252 --- /dev/null +++ b/optimize_crop.py @@ -0,0 +1,202 @@ +""" +Optimize crop_to_screen.py parameters using Optuna. + +Uses the feature-based classifier from screen_classifier.py as the +evaluation function. For each trial, runs find_screen() with suggested +parameters on all source photos and counts how many crops are classified +as rowing displays (label=1). Optuna maximises this count. + +Usage: + python optimize_crop.py [--n-trials 300] [--photos-dir photos/] +""" + +import argparse +import glob +import os +import tempfile + +import cv2 +import numpy as np +import optuna + +from screen_classifier import cnn_predict + + +def find_screen_parameterized(image, params): + """ + Detect the Concept 2 PM5 LCD screen region in the image. + Same logic as crop_to_screen.find_screen but with tunable parameters. + + Returns (x, y, w, h) bounding box or None if not found. + """ + h_img, w_img = image.shape[:2] + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + gk = params["gaussian_kernel_size"] + blurred = cv2.GaussianBlur(gray, (gk, gk), 0) + edges = cv2.Canny(blurred, params["canny_low"], params["canny_high"]) + + candidates = [] + + for thresh_val in range(params["thresh_min"], params["thresh_max"], 10): + _, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY) + mk = params["morph_kernel_size"] + kern = cv2.getStructuringElement(cv2.MORPH_RECT, (mk, mk)) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern) + + contours, _ = cv2.findContours( + thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE + ) + + for cnt in contours: + x, y, w, h = cv2.boundingRect(cnt) + area = cv2.contourArea(cnt) + rect_area = w * h + if rect_area == 0: + continue + + area_ratio = rect_area / (h_img * w_img) + if ( + area_ratio < params["area_ratio_min"] + or area_ratio > params["area_ratio_max"] + ): + continue + + aspect = w / h + if aspect < params["aspect_min"] or aspect > params["aspect_max"]: + continue + + rectangularity = area / rect_area + if rectangularity < params["rectangularity_min"]: + continue + + roi_edges = edges[y : y + h, x : x + w] + edge_density = np.sum(roi_edges > 0) / rect_area + if edge_density < params["edge_density_min"]: + continue + + score = edge_density * area * rectangularity + candidates.append((score, x, y, w, h)) + + if not candidates: + return None + + candidates.sort(key=lambda c: c[0], reverse=True) + return candidates[0][1:] + + +def load_images(photos_dir): + """Load all source images once for reuse across trials.""" + paths = sorted( + glob.glob(os.path.join(photos_dir, "*.JPEG")) + + glob.glob(os.path.join(photos_dir, "*.jpeg")) + + glob.glob(os.path.join(photos_dir, "*.jpg")) + + glob.glob(os.path.join(photos_dir, "*.JPG")) + ) + images = [] + for p in paths: + img = cv2.imread(p) + if img is not None: + images.append((p, img)) + return images + + +def make_objective(images, tmp_dir, model_path): + """Create an Optuna objective function closed over images and tmp_dir.""" + + def objective(trial): + params = { + "thresh_min": trial.suggest_int("thresh_min", 60, 160, step=10), + "thresh_max": trial.suggest_int("thresh_max", 160, 255, step=10), + "morph_kernel_size": trial.suggest_int("morph_kernel_size", 3, 21, step=2), + "gaussian_kernel_size": trial.suggest_int( + "gaussian_kernel_size", 3, 11, step=2 + ), + "canny_low": trial.suggest_int("canny_low", 20, 100, step=10), + "canny_high": trial.suggest_int("canny_high", 100, 250, step=10), + "area_ratio_min": trial.suggest_float("area_ratio_min", 0.001, 0.02), + "area_ratio_max": trial.suggest_float("area_ratio_max", 0.05, 0.30), + "aspect_min": trial.suggest_float("aspect_min", 0.3, 0.8), + "aspect_max": trial.suggest_float("aspect_max", 1.2, 2.5), + "rectangularity_min": trial.suggest_float("rectangularity_min", 0.2, 0.7), + "edge_density_min": trial.suggest_float("edge_density_min", 0.005, 0.06), + } + + # Ensure thresh_min < thresh_max + if params["thresh_min"] >= params["thresh_max"]: + return 0 + + # Ensure canny_low < canny_high + if params["canny_low"] >= params["canny_high"]: + return 0 + + rowing_count = 0 + for img_path, img in images: + result = find_screen_parameterized(img, params) + if result is None: + continue + + x, y, w, h = result + h_img, w_img = img.shape[:2] + padding = 15 + x1 = max(0, x - padding) + y1 = max(0, y - padding) + x2 = min(w_img, x + w + padding) + y2 = min(h_img, y + h + padding) + cropped = img[y1:y2, x1:x2] + + # Save to temp file for the classifier + basename = os.path.splitext(os.path.basename(img_path))[0] + tmp_path = os.path.join(tmp_dir, f"{basename}_trial{trial.number}.jpg") + cv2.imwrite(tmp_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95]) + + try: + label, _ = cnn_predict(tmp_path, model_path) + if label == 1: + rowing_count += 1 + finally: + # Clean up immediately to save disk space + if os.path.exists(tmp_path): + os.remove(tmp_path) + + return rowing_count + + return objective + + +def main(): + parser = argparse.ArgumentParser( + description="Optimize crop_to_screen.py parameters" + ) + parser.add_argument( + "--photos-dir", default="photos/", help="Directory of source photos" + ) + parser.add_argument( + "--n-trials", type=int, default=300, help="Number of Optuna trials" + ) + parser.add_argument( + "--model-path", + default="screen_classifier_model.pth", + help="Path to CNN model weights", + ) + args = parser.parse_args() + + images = load_images(args.photos_dir) + print(f"Loaded {len(images)} source images from {args.photos_dir}") + + with tempfile.TemporaryDirectory() as tmp_dir: + study = optuna.create_study(direction="maximize") + objective = make_objective(images, tmp_dir, args.model_path) + study.optimize(objective, n_trials=args.n_trials, show_progress_bar=True) + + print(f"\n{'=' * 60}") + print(f"Best score: {study.best_value} / {len(images)} images classified as rowing") + print(f"Best parameters:") + for k, v in sorted(study.best_params.items()): + print(f" {k:>25s}: {v}") + print(f"{'=' * 60}") + + +if __name__ == "__main__": + main() diff --git a/screen_classifier.py b/screen_classifier.py index 3ee3c6e..1b502f0 100644 --- a/screen_classifier.py +++ b/screen_classifier.py @@ -195,7 +195,10 @@ def get_cnn_model(): def train_cnn( - data_dir: str, epochs: int = 20, lr: float = 1e-3, save_path: str = "model.pth" + data_dir: str, + epochs: int = 20, + lr: float = 1e-3, + save_path: str = "screen_classifier_model.pth", ): """ Train the CNN. Expects data_dir with structure: @@ -336,8 +339,9 @@ def main(): sub = parser.add_subparsers(dest="command", required=True) # --- predict --- - p_pred = sub.add_parser("predict", help="Classify an image") - p_pred.add_argument("--image", required=True, help="Path to image file") + p_pred = sub.add_parser("predict", help="Classify an image or directory of images") + p_pred.add_argument("--image", help="Path to image file") + p_pred.add_argument("--dir", help="Path to directory of images") p_pred.add_argument( "--model", choices=["features", "cnn"], @@ -361,13 +365,50 @@ def main(): args = parser.parse_args() if args.command == "predict": - if args.model == "features": - label, conf = feature_based_predict(args.image, verbose=args.verbose) - else: - label, conf = cnn_predict(args.image, args.model_path) + if not args.image and not args.dir: + parser.error("predict requires --image or --dir") + if args.image and args.dir: + parser.error("--image and --dir are mutually exclusive") - tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE" - print(f"\n Result: {tag} (label={label}, confidence={conf:.2f})\n") + # Build list of image paths + IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff"} + if args.dir: + dir_path = Path(args.dir) + if not dir_path.is_dir(): + print(f"Error: {args.dir} is not a directory") + sys.exit(1) + image_paths = sorted( + p for p in dir_path.iterdir() if p.suffix.lower() in IMAGE_EXTS + ) + if not image_paths: + print(f"No images found in {args.dir}") + sys.exit(1) + else: + image_paths = [Path(args.image)] + + # Classify each image + rowing_count = 0 + for img_path in image_paths: + if args.model == "features": + label, conf = feature_based_predict(str(img_path), verbose=args.verbose) + else: + label, conf = cnn_predict(str(img_path), args.model_path) + + tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE" + if args.dir: + print(f" {img_path.name} \u2192 {tag} (confidence={conf:.2f})") + else: + print(f"\n Result: {tag} (label={label}, confidence={conf:.2f})\n") + if label == 1: + rowing_count += 1 + + # Summary for directory mode + if args.dir: + total = len(image_paths) + not_rowing = total - rowing_count + print( + f"\n Summary: {total} images | {rowing_count} rowing | {not_rowing} not rowing" + ) elif args.command == "train": train_cnn(args.data_dir, epochs=args.epochs, lr=args.lr, save_path=args.save) diff --git a/screen_classifier_model.pth b/screen_classifier_model.pth new file mode 100644 index 0000000..4fb6f76 Binary files /dev/null and b/screen_classifier_model.pth differ