Make new file for each step of processing

2026-03-16 11:54:37 +00:00
parent ef0abd57d4
commit 4abcb10194
7 changed files with 536 additions and 244 deletions
--- a/crop_to_screen.py
+++ b/crop_to_screen.py
@@ -0,0 +1,155 @@
+"""
+Crop Concept 2 PM5 rowing machine screens from photos using OpenCV.
+
+Detection strategy:
+  The LCD screen has HIGH internal edge density (text/numbers/lines)
+  compared to other bright regions (windows, walls, lockers).
+  We threshold at multiple brightness levels, filter by edge density,
+  aspect ratio, and size, then pick the best match.
+
+Usage:
+  python crop_screens.py [input_dir] [output_dir]
+"""
+
+import cv2
+import numpy as np
+import os
+import glob
+import sys
+
+
+def find_screen(image):
+    """
+    Detect the Concept 2 PM5 LCD screen region in the image.
+
+    Returns (x, y, w, h) bounding box or None if not found.
+    """
+    h_img, w_img = image.shape[:2]
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+
+    # Pre-compute edge map for internal-content scoring
+    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+    edges = cv2.Canny(blurred, 50, 150)
+
+    candidates = []
+
+    # Sweep brightness thresholds — screen brightness varies by
+    # lighting conditions (ranges from ~100 in dim gyms to ~200+)
+    for thresh_val in range(120, 200, 10):
+        _, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
+        kern = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
+        thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
+        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
+
+        contours, _ = cv2.findContours(
+            thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+
+        for cnt in contours:
+            x, y, w, h = cv2.boundingRect(cnt)
+            area = cv2.contourArea(cnt)
+            rect_area = w * h
+            if rect_area == 0:
+                continue
+
+            # Size: screen is a small-to-medium portion of the photo
+            area_ratio = rect_area / (h_img * w_img)
+            if area_ratio < 0.005 or area_ratio > 0.12:
+                continue
+
+            # Aspect ratio: LCD is roughly square (0.5 to 1.6)
+            aspect = w / h
+            if aspect < 0.5 or aspect > 1.6:
+                continue
+
+            # Rectangularity
+            rectangularity = area / rect_area
+            if rectangularity < 0.4:
+                continue
+
+            # KEY: edge density — LCD with text > 0.03, plain surfaces < 0.01
+            roi_edges = edges[y : y + h, x : x + w]
+            edge_density = np.sum(roi_edges > 0) / rect_area
+            if edge_density < 0.03:
+                continue
+
+            # Score: edge density * area * rectangularity
+            # This favours text-rich regions that are large and well-shaped
+            score = edge_density * area * rectangularity
+            candidates.append((score, x, y, w, h))
+
+    if not candidates:
+        return None
+
+    candidates.sort(key=lambda c: c[0], reverse=True)
+    return candidates[0][1:]
+
+
+def crop_screen(image_path, output_path, padding=15):
+    """Load an image, find the screen, crop and save it."""
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"  ERROR: Could not read {image_path}")
+        return False
+
+    h_img, w_img = image.shape[:2]
+    result = find_screen(image)
+
+    if result is None:
+        print(f"  SKIP:  No screen detected in {os.path.basename(image_path)}")
+        return False
+
+    x, y, w, h = result
+
+    # Add padding, clamped to image bounds
+    x1 = max(0, x - padding)
+    y1 = max(0, y - padding)
+    x2 = min(w_img, x + w + padding)
+    y2 = min(h_img, y + h + padding)
+
+    cropped = image[y1:y2, x1:x2]
+    cv2.imwrite(output_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
+    print(
+        f"  OK:    {os.path.basename(image_path)} -> {os.path.basename(output_path)}  ({w}x{h})"
+    )
+    return True
+
+
+def main():
+    if len(sys.argv) >= 3:
+        input_dir = sys.argv[1]
+        output_dir = sys.argv[2]
+    elif len(sys.argv) == 2:
+        input_dir = sys.argv[1]
+        output_dir = os.path.join(input_dir, "cropped")
+    else:
+        input_dir = "/mnt/user-data/uploads"
+        output_dir = "/mnt/user-data/outputs"
+
+    os.makedirs(output_dir, exist_ok=True)
+
+    images = sorted(
+        glob.glob(os.path.join(input_dir, "*.JPEG"))
+        + glob.glob(os.path.join(input_dir, "*.jpeg"))
+        + glob.glob(os.path.join(input_dir, "*.jpg"))
+        + glob.glob(os.path.join(input_dir, "*.JPG"))
+    )
+
+    if not images:
+        print(f"No images found in {input_dir}")
+        return
+
+    print(f"Found {len(images)} images in {input_dir}\n")
+
+    success = 0
+    for img_path in images:
+        name = os.path.splitext(os.path.basename(img_path))[0]
+        out_path = os.path.join(output_dir, f"{name}_screen.jpg")
+        if crop_screen(img_path, out_path):
+            success += 1
+
+    print(f"\nDone: {success}/{len(images)} screens cropped -> {output_dir}")
+
+
+if __name__ == "__main__":
+    main()
--- a/extract_data.ipynb
+++ b/extract_data.ipynb
@@ -1,76 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "13389e33",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import extract_data as ed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "e5de5ac0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "imgs = ed.get_images(ed.PHOTOS_PATH)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "575fd8c9",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "module 'extract_data' has no attribute 'plot_image'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mAttributeError\u001b[39m                            Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m      1\u001b[39m img = ed.convert_to_opencv_image(imgs[\u001b[32m0\u001b[39m])\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43med\u001b[49m\u001b[43m.\u001b[49m\u001b[43mplot_image\u001b[49m(img)\n",
-      "\u001b[31mAttributeError\u001b[39m: module 'extract_data' has no attribute 'plot_image'"
-     ]
-    }
-   ],
-   "source": [
-    "img = ed.convert_to_opencv_image(imgs[0])\n",
-    "ed.plot_image(img)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b8b7bebc",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/extract_data.py
+++ b/extract_data.py
@@ -1,168 +0,0 @@
-import os
-from datetime import datetime
-
-import matplotlib.pyplot as plt
-
-import cv2 as cv
-import numpy as np
-import pandas as pd
-import pytesseract as tess
-from PIL import Image
-
-PHOTOS_PATH = "./photos/"
-
-# Get a list of images given a directory path
-def get_images(url: str):
-    images = []
-    for img_url in os.listdir(url):
-        try:
-            image = Image.open(os.path.join(url, img_url))
-            images.append(image)
-        except IOError:
-            print(f"Error opening image: {img_url}")
-    return images
-
-
-# Get the datetime taken from an image
-def get_datetime_taken(image: Image.Image) -> datetime | None:
-    exif = image.getexif()
-    if 306 in exif:
-        return datetime.strptime(exif[306], "%Y:%m:%d %H:%M:%S")
-    return None
-
-
-# Convert an image to OpenCV format
-def convert_to_opencv_image(img: Image.Image) -> np.ndarray:
-    return cv.cvtColor(np.array(img), cv.COLOR_RGB2BGR)
-
-
-def order_points(pts):
-    pts = pts.reshape(4, 2)
-    rect = np.zeros((4, 2), dtype="float32")
-
-    s = pts.sum(axis=1)
-    rect[0] = pts[np.argmin(s)]  # top-left
-    rect[2] = pts[np.argmax(s)]  # bottom-right
-
-    diff = np.diff(pts, axis=1)
-    rect[1] = pts[np.argmin(diff)]  # top-right
-    rect[3] = pts[np.argmax(diff)]  # bottom-left
-
-    return rect
-
-
-def is_closed_contour(cnt, eps=1.0):
-    # Check area
-    if cv.contourArea(cnt) == 0:
-        return False
-    # Check if first and last points are close
-    return cv.norm(cnt[0][0] - cnt[-1][0]) < eps
-
-
-# Optimise the image for OCR
-def process_image(img: Image.Image):
-    arr = convert_to_opencv_image(img)
-
-    # Blur the image for better edge (contour) detection
-    blur = cv.GaussianBlur(arr, (7, 7), 0)
-    edges = cv.Canny(blur, 50, 100)
-    contours, hierarchy = cv.findContours(
-        edges, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE
-    )
-
-    # Filter contours for rectangles
-    candidates = []
-    img_area = arr.shape[0] * arr.shape[1]
-
-    for cnt in contours:
-        rect = cv.minAreaRect(cnt)
-        (center, (width, height), angle) = rect
-        box = cv.boxPoints(rect)
-        box_contour = box.reshape((-1, 1, 2))
-        area = cv.contourArea(box_contour)
-        if area < 0.01 * img_area:
-            continue
-        # Check the aspect ratio is reasonable
-        aspect_ratio = width / float(height)
-
-        if 0.9 < aspect_ratio < 1.1:
-            candidates.append(box_contour)
-
-    # Most likely rectangle will be the largest one
-    if len(candidates) == 0:
-        return None
-
-    cv.drawContours(arr, contours, -1, (0, 255, 0), 3)
-    preview_image(arr)
-
-    display_contour = max(candidates, key=cv.contourArea)
-    rect = order_points(display_contour)
-    (w, h) = (400, 400)
-    dst = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype="float32")
-    mat = cv.getPerspectiveTransform(rect, dst)
-    warped = cv.warpPerspective(arr, mat, (w, h))
-    return warped
-
-
-# Get the text from an image using OCR
-def ocr_image(img: Image.Image) -> str:
-    return None
-
-
-# Process OCR text output
-def process_ocr_text(text: str) -> str:
-    return None
-
-
-# There are two gyms that I go to, one is the Peckham gym and the other is the Elephant and Castle gym.
-# You can tell which gym I went to by looking at the color of the wall.
-# If there is a green wall, its most likely the Peckham gym.
-# If there is a blue wall, its most likely the Elephant and Castle gym.
-def get_gym(image: Image.Image) -> str | None:
-    return None
-
-
-def preview_image(img: np.ndarray):
-    cv.imshow("preview", img)
-    cv.waitKey(0)
-    cv.destroyAllWindows()
-
-def plot_image(img, figsize=(6,6)):
-    fig, ax = plt.subplots(figsize=figsize)
-    ax.imshow(img)
-    ax.axis("off")
-
-    return fig
-
-
-def __main__():
-    imgs = get_images(PHOTOS_PATH)
-
-    if not imgs:
-        print("No images")
-        return None
-
-    fail = []
-    success = []
-    for img in imgs:
-        tst = process_image(img)
-        if tst is None:
-            fail.append(img.filename)
-            continue
-
-        success.append(img.filename)
-    print("success_len: ", len(success))
-    print("fail_len: ", len(fail))
-
-    print("failed:")
-    for x in fail:
-        print(x)
-
-    print("success:")
-    for x in success:
-        print(x)
-
-    print("success_len: ", len(success))
-    print("fail_len: ", len(fail))
-
-    return None
--- a/preview_screenshot_26.01.2026.png
+++ b/preview_screenshot_26.01.2026.png
--- a/preview_screenshot_26.01.2026_2.png
+++ b/preview_screenshot_26.01.2026_2.png
--- a/preview_screenshot_26.01.2026_3.png
+++ b/preview_screenshot_26.01.2026_3.png
--- a/screen_classifier.py
+++ b/screen_classifier.py
@@ -0,0 +1,381 @@
+"""
+Rowing Machine Display Classifier
+==================================
+Binary classifier: 1 = rowing machine display, 0 = not rowing machine.
+
+Two modes:
+  1. Feature-based (works immediately, no training needed)
+  2. CNN-based with transfer learning (needs training data)
+
+Usage:
+  # Predict with feature-based classifier (no training needed)
+  python classifier.py predict --image path/to/image.jpg
+
+  # Organize training data, then train CNN
+  python classifier.py train --data-dir data/
+  python classifier.py predict --image path/to/image.jpg --model cnn
+"""
+
+import argparse
+import os
+import sys
+import json
+import numpy as np
+from pathlib import Path
+from PIL import Image, ImageStat, ImageFilter
+
+# ---------------------------------------------------------------------------
+# Feature-based classifier (works out of the box, no GPU needed)
+# ---------------------------------------------------------------------------
+
+
+def extract_features(image_path: str) -> dict:
+    """Extract hand-crafted features that distinguish rowing displays."""
+    img = Image.open(image_path).convert("RGB")
+    gray = img.convert("L")
+
+    # Resize for consistent analysis
+    gray_resized = gray.resize((256, 256))
+    img_resized = img.resize((256, 256))
+
+    pixels = np.array(gray_resized, dtype=np.float64)
+    color_pixels = np.array(img_resized, dtype=np.float64)
+
+    features = {}
+
+    # 1. Contrast: rowing displays have high contrast (dark text on light bg)
+    features["std_dev"] = float(np.std(pixels))
+
+    # 2. Bimodality: displays tend toward two clusters (text vs background)
+    hist, _ = np.histogram(pixels, bins=32, range=(0, 256))
+    hist_norm = hist / hist.sum()
+    features["entropy"] = float(
+        -np.sum(hist_norm[hist_norm > 0] * np.log2(hist_norm[hist_norm > 0]))
+    )
+
+    # 3. Edge density: text/numbers create lots of edges
+    edges = gray_resized.filter(ImageFilter.FIND_EDGES)
+    edge_pixels = np.array(edges, dtype=np.float64)
+    features["edge_density"] = float(np.mean(edge_pixels > 30))
+
+    # 4. Horizontal line features: displays have horizontal separators
+    sobel_h = gray_resized.filter(
+        ImageFilter.Kernel(
+            size=(3, 3), kernel=[-1, -2, -1, 0, 0, 0, 1, 2, 1], scale=1, offset=128
+        )
+    )
+    sobel_pixels = np.abs(np.array(sobel_h, dtype=np.float64) - 128)
+    features["h_line_strength"] = float(np.mean(sobel_pixels > 20))
+
+    # 5. Color saturation: rowing displays are typically low-saturation
+    r, g, b = color_pixels[:, :, 0], color_pixels[:, :, 1], color_pixels[:, :, 2]
+    max_c = np.maximum(np.maximum(r, g), b)
+    min_c = np.minimum(np.minimum(r, g), b)
+    saturation = np.where(max_c > 0, (max_c - min_c) / max_c, 0)
+    features["mean_saturation"] = float(np.mean(saturation))
+
+    # 6. Dark pixel ratio: displays have significant dark regions (text)
+    features["dark_pixel_ratio"] = float(np.mean(pixels < 80))
+
+    # 7. Bright pixel ratio: displays have bright background regions
+    features["bright_pixel_ratio"] = float(np.mean(pixels > 180))
+
+    # 8. Texture uniformity via local variance
+    blurred = np.array(
+        gray_resized.filter(ImageFilter.GaussianBlur(5)), dtype=np.float64
+    )
+    local_var = np.mean((pixels - blurred) ** 2)
+    features["local_variance"] = float(local_var)
+
+    return features
+
+
+def feature_based_predict(image_path: str, verbose: bool = False) -> tuple[int, float]:
+    """
+    Predict using hand-crafted features and a rule-based scorer.
+    Returns (label, confidence).
+    """
+    feats = extract_features(image_path)
+
+    if verbose:
+        print("\n  Feature values:")
+        for k, v in feats.items():
+            print(f"    {k:>20s}: {v:.4f}")
+
+    score = 0.0
+
+    # Rowing displays: high contrast
+    if feats["std_dev"] > 50:
+        score += 0.15
+    if feats["std_dev"] > 70:
+        score += 0.10
+
+    # High edge density (text/numbers)
+    if feats["edge_density"] > 0.08:
+        score += 0.15
+    if feats["edge_density"] > 0.15:
+        score += 0.10
+
+    # Horizontal lines (separators between rows of data)
+    if feats["h_line_strength"] > 0.06:
+        score += 0.10
+
+    # Low saturation (monochrome-ish displays)
+    if feats["mean_saturation"] < 0.15:
+        score += 0.10
+
+    # Bimodal histogram (text vs background)
+    if feats["entropy"] < 3.8:
+        score += 0.10
+
+    # Has both dark and bright regions
+    if feats["dark_pixel_ratio"] > 0.15 and feats["bright_pixel_ratio"] > 0.15:
+        score += 0.15
+
+    # High local variance = structured content
+    if feats["local_variance"] > 200:
+        score += 0.10
+
+    score = min(score, 1.0)
+    label = 1 if score >= 0.45 else 0
+    confidence = score if label == 1 else 1.0 - score
+
+    return label, confidence
+
+
+# ---------------------------------------------------------------------------
+# CNN-based classifier (requires training)
+# ---------------------------------------------------------------------------
+
+
+def get_cnn_model():
+    """Build a simple CNN for binary classification."""
+    try:
+        import torch
+        import torch.nn as nn
+    except ImportError:
+        print(
+            "Error: PyTorch required for CNN mode. Install with: pip install torch torchvision"
+        )
+        sys.exit(1)
+
+    class RowingCNN(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.features = nn.Sequential(
+                nn.Conv2d(3, 32, 3, padding=1),
+                nn.BatchNorm2d(32),
+                nn.ReLU(),
+                nn.MaxPool2d(2),
+                nn.Conv2d(32, 64, 3, padding=1),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.MaxPool2d(2),
+                nn.Conv2d(64, 128, 3, padding=1),
+                nn.BatchNorm2d(128),
+                nn.ReLU(),
+                nn.MaxPool2d(2),
+                nn.Conv2d(128, 256, 3, padding=1),
+                nn.BatchNorm2d(256),
+                nn.ReLU(),
+                nn.AdaptiveAvgPool2d((4, 4)),
+            )
+            self.classifier = nn.Sequential(
+                nn.Flatten(),
+                nn.Linear(256 * 4 * 4, 128),
+                nn.ReLU(),
+                nn.Dropout(0.5),
+                nn.Linear(128, 1),
+            )
+
+        def forward(self, x):
+            return self.classifier(self.features(x))
+
+    return RowingCNN()
+
+
+def train_cnn(
+    data_dir: str, epochs: int = 20, lr: float = 1e-3, save_path: str = "model.pth"
+):
+    """
+    Train the CNN. Expects data_dir with structure:
+        data_dir/
+            train/
+                0/  (non-rowing images)
+                1/  (rowing images)
+            val/    (optional, same structure)
+    """
+    import torch
+    import torch.nn as nn
+    from torch.utils.data import DataLoader
+    from torchvision import datasets, transforms
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Training on: {device}")
+
+    transform_train = transforms.Compose(
+        [
+            transforms.Resize((224, 224)),
+            transforms.RandomHorizontalFlip(),
+            transforms.RandomRotation(10),
+            transforms.ColorJitter(brightness=0.2, contrast=0.2),
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ]
+    )
+    transform_val = transforms.Compose(
+        [
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ]
+    )
+
+    train_dir = os.path.join(data_dir, "train")
+    train_ds = datasets.ImageFolder(train_dir, transform=transform_train)
+    train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2)
+    print(f"Training samples: {len(train_ds)}  Classes: {train_ds.classes}")
+
+    val_loader = None
+    val_dir = os.path.join(data_dir, "val")
+    if os.path.isdir(val_dir):
+        val_ds = datasets.ImageFolder(val_dir, transform=transform_val)
+        val_loader = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=2)
+        print(f"Validation samples: {len(val_ds)}")
+
+    model = get_cnn_model().to(device)
+    criterion = nn.BCEWithLogitsLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
+
+    best_acc = 0.0
+    for epoch in range(epochs):
+        model.train()
+        running_loss, correct, total = 0.0, 0, 0
+
+        for inputs, labels in train_loader:
+            inputs, labels = inputs.to(device), labels.float().to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs).squeeze(1)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            running_loss += loss.item() * inputs.size(0)
+            preds = (torch.sigmoid(outputs) > 0.5).long()
+            correct += (preds == labels.long()).sum().item()
+            total += labels.size(0)
+
+        scheduler.step()
+        train_acc = correct / total
+        avg_loss = running_loss / total
+        line = f"  Epoch {epoch + 1:>3d}/{epochs}  loss={avg_loss:.4f}  train_acc={train_acc:.3f}"
+
+        if val_loader:
+            model.eval()
+            val_correct, val_total = 0, 0
+            with torch.no_grad():
+                for inputs, labels in val_loader:
+                    inputs, labels = inputs.to(device), labels.float().to(device)
+                    outputs = model(inputs).squeeze(1)
+                    preds = (torch.sigmoid(outputs) > 0.5).long()
+                    val_correct += (preds == labels.long()).sum().item()
+                    val_total += labels.size(0)
+            val_acc = val_correct / val_total
+            line += f"  val_acc={val_acc:.3f}"
+            if val_acc > best_acc:
+                best_acc = val_acc
+                torch.save(model.state_dict(), save_path)
+                line += "  *saved*"
+        else:
+            torch.save(model.state_dict(), save_path)
+
+        print(line)
+
+    print(f"\nModel saved to {save_path}")
+
+
+def cnn_predict(image_path: str, model_path: str = "model.pth") -> tuple[int, float]:
+    """Predict using the trained CNN."""
+    import torch
+    from torchvision import transforms
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = get_cnn_model()
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.to(device)
+    model.eval()
+
+    transform = transforms.Compose(
+        [
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ]
+    )
+
+    img = Image.open(image_path).convert("RGB")
+    tensor = transform(img).unsqueeze(0).to(device)
+
+    with torch.no_grad():
+        output = model(tensor).squeeze()
+        prob = torch.sigmoid(output).item()
+
+    label = 1 if prob > 0.5 else 0
+    confidence = prob if label == 1 else 1.0 - prob
+    return label, confidence
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Rowing Machine Display Classifier")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    # --- predict ---
+    p_pred = sub.add_parser("predict", help="Classify an image")
+    p_pred.add_argument("--image", required=True, help="Path to image file")
+    p_pred.add_argument(
+        "--model",
+        choices=["features", "cnn"],
+        default="features",
+        help="Which classifier to use (default: features)",
+    )
+    p_pred.add_argument("--model-path", default="model.pth", help="Path to CNN weights")
+    p_pred.add_argument("--verbose", "-v", action="store_true")
+
+    # --- train ---
+    p_train = sub.add_parser("train", help="Train the CNN classifier")
+    p_train.add_argument("--data-dir", required=True, help="Root data directory")
+    p_train.add_argument("--epochs", type=int, default=20)
+    p_train.add_argument("--lr", type=float, default=1e-3)
+    p_train.add_argument("--save", default="model.pth", help="Where to save weights")
+
+    # --- extract ---
+    p_feat = sub.add_parser("extract", help="Print extracted features for an image")
+    p_feat.add_argument("--image", required=True)
+
+    args = parser.parse_args()
+
+    if args.command == "predict":
+        if args.model == "features":
+            label, conf = feature_based_predict(args.image, verbose=args.verbose)
+        else:
+            label, conf = cnn_predict(args.image, args.model_path)
+
+        tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE"
+        print(f"\n  Result: {tag}  (label={label}, confidence={conf:.2f})\n")
+
+    elif args.command == "train":
+        train_cnn(args.data_dir, epochs=args.epochs, lr=args.lr, save_path=args.save)
+
+    elif args.command == "extract":
+        feats = extract_features(args.image)
+        print(json.dumps(feats, indent=2))
+
+
+if __name__ == "__main__":
+    main()