Make new file for each step of processing

2026-03-16 11:54:37 +00:00
parent ef0abd57d4
commit 4abcb10194
7 changed files with 536 additions and 244 deletions
--- a/crop_to_screen.py
+++ b/crop_to_screen.py
@@ -0,0 +1,155 @@
 """
 Crop Concept 2 PM5 rowing machine screens from photos using OpenCV.
 Detection strategy:
  The LCD screen has HIGH internal edge density (text/numbers/lines)
  compared to other bright regions (windows, walls, lockers).
  We threshold at multiple brightness levels, filter by edge density,
  aspect ratio, and size, then pick the best match.
 Usage:
  python crop_screens.py [input_dir] [output_dir]
 """
 import cv2
 import numpy as np
 import os
 import glob
 import sys
 def find_screen(image):
    """
    Detect the Concept 2 PM5 LCD screen region in the image.
    Returns (x, y, w, h) bounding box or None if not found.
    """
    h_img, w_img = image.shape[:2]
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Pre-compute edge map for internal-content scoring
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)
    candidates = []
    # Sweep brightness thresholds — screen brightness varies by
    # lighting conditions (ranges from ~100 in dim gyms to ~200+)
    for thresh_val in range(120, 200, 10):
        _, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
        kern = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
        thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
        contours, _ = cv2.findContours(
            thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            area = cv2.contourArea(cnt)
            rect_area = w * h
            if rect_area == 0:
                continue
            # Size: screen is a small-to-medium portion of the photo
            area_ratio = rect_area / (h_img * w_img)
            if area_ratio < 0.005 or area_ratio > 0.12:
                continue
            # Aspect ratio: LCD is roughly square (0.5 to 1.6)
            aspect = w / h
            if aspect < 0.5 or aspect > 1.6:
                continue
            # Rectangularity
            rectangularity = area / rect_area
            if rectangularity < 0.4:
                continue
            # KEY: edge density — LCD with text > 0.03, plain surfaces < 0.01
            roi_edges = edges[y : y + h, x : x + w]
            edge_density = np.sum(roi_edges > 0) / rect_area
            if edge_density < 0.03:
                continue
            # Score: edge density * area * rectangularity
            # This favours text-rich regions that are large and well-shaped
            score = edge_density * area * rectangularity
            candidates.append((score, x, y, w, h))
    if not candidates:
        return None
    candidates.sort(key=lambda c: c[0], reverse=True)
    return candidates[0][1:]
 def crop_screen(image_path, output_path, padding=15):
    """Load an image, find the screen, crop and save it."""
    image = cv2.imread(image_path)
    if image is None:
        print(f"  ERROR: Could not read {image_path}")
        return False
    h_img, w_img = image.shape[:2]
    result = find_screen(image)
    if result is None:
        print(f"  SKIP:  No screen detected in {os.path.basename(image_path)}")
        return False
    x, y, w, h = result
    # Add padding, clamped to image bounds
    x1 = max(0, x - padding)
    y1 = max(0, y - padding)
    x2 = min(w_img, x + w + padding)
    y2 = min(h_img, y + h + padding)
    cropped = image[y1:y2, x1:x2]
    cv2.imwrite(output_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
    print(
        f"  OK:    {os.path.basename(image_path)} -> {os.path.basename(output_path)}  ({w}x{h})"
    )
    return True
 def main():
    if len(sys.argv) >= 3:
        input_dir = sys.argv[1]
        output_dir = sys.argv[2]
    elif len(sys.argv) == 2:
        input_dir = sys.argv[1]
        output_dir = os.path.join(input_dir, "cropped")
    else:
        input_dir = "/mnt/user-data/uploads"
        output_dir = "/mnt/user-data/outputs"
    os.makedirs(output_dir, exist_ok=True)
    images = sorted(
        glob.glob(os.path.join(input_dir, "*.JPEG"))
        + glob.glob(os.path.join(input_dir, "*.jpeg"))
        + glob.glob(os.path.join(input_dir, "*.jpg"))
        + glob.glob(os.path.join(input_dir, "*.JPG"))
    )
    if not images:
        print(f"No images found in {input_dir}")
        return
    print(f"Found {len(images)} images in {input_dir}\n")
    success = 0
    for img_path in images:
        name = os.path.splitext(os.path.basename(img_path))[0]
        out_path = os.path.join(output_dir, f"{name}_screen.jpg")
        if crop_screen(img_path, out_path):
            success += 1
    print(f"\nDone: {success}/{len(images)} screens cropped -> {output_dir}")
 if __name__ == "__main__":
    main()
--- a/extract_data.ipynb
+++ b/extract_data.ipynb
@@ -1,76 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "13389e33",
   "metadata": {},
   "outputs": [],
   "source": [
    "import extract_data as ed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e5de5ac0",
   "metadata": {},
   "outputs": [],
   "source": [
    "imgs = ed.get_images(ed.PHOTOS_PATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "575fd8c9",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "module 'extract_data' has no attribute 'plot_image'",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mAttributeError\u001b[39m                            Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m      1\u001b[39m img = ed.convert_to_opencv_image(imgs[\u001b[32m0\u001b[39m])\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43med\u001b[49m\u001b[43m.\u001b[49m\u001b[43mplot_image\u001b[49m(img)\n",
      "\u001b[31mAttributeError\u001b[39m: module 'extract_data' has no attribute 'plot_image'"
     ]
    }
   ],
   "source": [
    "img = ed.convert_to_opencv_image(imgs[0])\n",
    "ed.plot_image(img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8b7bebc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/extract_data.py
+++ b/extract_data.py
@@ -1,168 +0,0 @@
 import os
 from datetime import datetime
 import matplotlib.pyplot as plt
 import cv2 as cv
 import numpy as np
 import pandas as pd
 import pytesseract as tess
 from PIL import Image
 PHOTOS_PATH = "./photos/"
 # Get a list of images given a directory path
 def get_images(url: str):
    images = []
    for img_url in os.listdir(url):
        try:
            image = Image.open(os.path.join(url, img_url))
            images.append(image)
        except IOError:
            print(f"Error opening image: {img_url}")
    return images
 # Get the datetime taken from an image
 def get_datetime_taken(image: Image.Image) -> datetime | None:
    exif = image.getexif()
    if 306 in exif:
        return datetime.strptime(exif[306], "%Y:%m:%d %H:%M:%S")
    return None
 # Convert an image to OpenCV format
 def convert_to_opencv_image(img: Image.Image) -> np.ndarray:
    return cv.cvtColor(np.array(img), cv.COLOR_RGB2BGR)
 def order_points(pts):
    pts = pts.reshape(4, 2)
    rect = np.zeros((4, 2), dtype="float32")
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]  # top-left
    rect[2] = pts[np.argmax(s)]  # bottom-right
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]  # top-right
    rect[3] = pts[np.argmax(diff)]  # bottom-left
    return rect
 def is_closed_contour(cnt, eps=1.0):
    # Check area
    if cv.contourArea(cnt) == 0:
        return False
    # Check if first and last points are close
    return cv.norm(cnt[0][0] - cnt[-1][0]) < eps
 # Optimise the image for OCR
 def process_image(img: Image.Image):
    arr = convert_to_opencv_image(img)
    # Blur the image for better edge (contour) detection
    blur = cv.GaussianBlur(arr, (7, 7), 0)
    edges = cv.Canny(blur, 50, 100)
    contours, hierarchy = cv.findContours(
        edges, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE
    )
    # Filter contours for rectangles
    candidates = []
    img_area = arr.shape[0] * arr.shape[1]
    for cnt in contours:
        rect = cv.minAreaRect(cnt)
        (center, (width, height), angle) = rect
        box = cv.boxPoints(rect)
        box_contour = box.reshape((-1, 1, 2))
        area = cv.contourArea(box_contour)
        if area < 0.01 * img_area:
            continue
        # Check the aspect ratio is reasonable
        aspect_ratio = width / float(height)
        if 0.9 < aspect_ratio < 1.1:
            candidates.append(box_contour)
    # Most likely rectangle will be the largest one
    if len(candidates) == 0:
        return None
    cv.drawContours(arr, contours, -1, (0, 255, 0), 3)
    preview_image(arr)
    display_contour = max(candidates, key=cv.contourArea)
    rect = order_points(display_contour)
    (w, h) = (400, 400)
    dst = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype="float32")
    mat = cv.getPerspectiveTransform(rect, dst)
    warped = cv.warpPerspective(arr, mat, (w, h))
    return warped
 # Get the text from an image using OCR
 def ocr_image(img: Image.Image) -> str:
    return None
 # Process OCR text output
 def process_ocr_text(text: str) -> str:
    return None
 # There are two gyms that I go to, one is the Peckham gym and the other is the Elephant and Castle gym.
 # You can tell which gym I went to by looking at the color of the wall.
 # If there is a green wall, its most likely the Peckham gym.
 # If there is a blue wall, its most likely the Elephant and Castle gym.
 def get_gym(image: Image.Image) -> str | None:
    return None
 def preview_image(img: np.ndarray):
    cv.imshow("preview", img)
    cv.waitKey(0)
    cv.destroyAllWindows()
 def plot_image(img, figsize=(6,6)):
    fig, ax = plt.subplots(figsize=figsize)
    ax.imshow(img)
    ax.axis("off")
    return fig
 def __main__():
    imgs = get_images(PHOTOS_PATH)
    if not imgs:
        print("No images")
        return None
    fail = []
    success = []
    for img in imgs:
        tst = process_image(img)
        if tst is None:
            fail.append(img.filename)
            continue
        success.append(img.filename)
    print("success_len: ", len(success))
    print("fail_len: ", len(fail))
    print("failed:")
    for x in fail:
        print(x)
    print("success:")
    for x in success:
        print(x)
    print("success_len: ", len(success))
    print("fail_len: ", len(fail))
    return None
--- a/preview_screenshot_26.01.2026.png
+++ b/preview_screenshot_26.01.2026.png
--- a/preview_screenshot_26.01.2026_2.png
+++ b/preview_screenshot_26.01.2026_2.png
--- a/preview_screenshot_26.01.2026_3.png
+++ b/preview_screenshot_26.01.2026_3.png
--- a/screen_classifier.py
+++ b/screen_classifier.py
@@ -0,0 +1,381 @@
 """
 Rowing Machine Display Classifier
 ==================================
 Binary classifier: 1 = rowing machine display, 0 = not rowing machine.
 Two modes:
  1. Feature-based (works immediately, no training needed)
  2. CNN-based with transfer learning (needs training data)
 Usage:
  # Predict with feature-based classifier (no training needed)
  python classifier.py predict --image path/to/image.jpg
  # Organize training data, then train CNN
  python classifier.py train --data-dir data/
  python classifier.py predict --image path/to/image.jpg --model cnn
 """
 import argparse
 import os
 import sys
 import json
 import numpy as np
 from pathlib import Path
 from PIL import Image, ImageStat, ImageFilter
 # ---------------------------------------------------------------------------
 # Feature-based classifier (works out of the box, no GPU needed)
 # ---------------------------------------------------------------------------
 def extract_features(image_path: str) -> dict:
    """Extract hand-crafted features that distinguish rowing displays."""
    img = Image.open(image_path).convert("RGB")
    gray = img.convert("L")
    # Resize for consistent analysis
    gray_resized = gray.resize((256, 256))
    img_resized = img.resize((256, 256))
    pixels = np.array(gray_resized, dtype=np.float64)
    color_pixels = np.array(img_resized, dtype=np.float64)
    features = {}
    # 1. Contrast: rowing displays have high contrast (dark text on light bg)
    features["std_dev"] = float(np.std(pixels))
    # 2. Bimodality: displays tend toward two clusters (text vs background)
    hist, _ = np.histogram(pixels, bins=32, range=(0, 256))
    hist_norm = hist / hist.sum()
    features["entropy"] = float(
        -np.sum(hist_norm[hist_norm > 0] * np.log2(hist_norm[hist_norm > 0]))
    )
    # 3. Edge density: text/numbers create lots of edges
    edges = gray_resized.filter(ImageFilter.FIND_EDGES)
    edge_pixels = np.array(edges, dtype=np.float64)
    features["edge_density"] = float(np.mean(edge_pixels > 30))
    # 4. Horizontal line features: displays have horizontal separators
    sobel_h = gray_resized.filter(
        ImageFilter.Kernel(
            size=(3, 3), kernel=[-1, -2, -1, 0, 0, 0, 1, 2, 1], scale=1, offset=128
        )
    )
    sobel_pixels = np.abs(np.array(sobel_h, dtype=np.float64) - 128)
    features["h_line_strength"] = float(np.mean(sobel_pixels > 20))
    # 5. Color saturation: rowing displays are typically low-saturation
    r, g, b = color_pixels[:, :, 0], color_pixels[:, :, 1], color_pixels[:, :, 2]
    max_c = np.maximum(np.maximum(r, g), b)
    min_c = np.minimum(np.minimum(r, g), b)
    saturation = np.where(max_c > 0, (max_c - min_c) / max_c, 0)
    features["mean_saturation"] = float(np.mean(saturation))
    # 6. Dark pixel ratio: displays have significant dark regions (text)
    features["dark_pixel_ratio"] = float(np.mean(pixels < 80))
    # 7. Bright pixel ratio: displays have bright background regions
    features["bright_pixel_ratio"] = float(np.mean(pixels > 180))
    # 8. Texture uniformity via local variance
    blurred = np.array(
        gray_resized.filter(ImageFilter.GaussianBlur(5)), dtype=np.float64
    )
    local_var = np.mean((pixels - blurred) ** 2)
    features["local_variance"] = float(local_var)
    return features
 def feature_based_predict(image_path: str, verbose: bool = False) -> tuple[int, float]:
    """
    Predict using hand-crafted features and a rule-based scorer.
    Returns (label, confidence).
    """
    feats = extract_features(image_path)
    if verbose:
        print("\n  Feature values:")
        for k, v in feats.items():
            print(f"    {k:>20s}: {v:.4f}")
    score = 0.0
    # Rowing displays: high contrast
    if feats["std_dev"] > 50:
        score += 0.15
    if feats["std_dev"] > 70:
        score += 0.10
    # High edge density (text/numbers)
    if feats["edge_density"] > 0.08:
        score += 0.15
    if feats["edge_density"] > 0.15:
        score += 0.10
    # Horizontal lines (separators between rows of data)
    if feats["h_line_strength"] > 0.06:
        score += 0.10
    # Low saturation (monochrome-ish displays)
    if feats["mean_saturation"] < 0.15:
        score += 0.10
    # Bimodal histogram (text vs background)
    if feats["entropy"] < 3.8:
        score += 0.10
    # Has both dark and bright regions
    if feats["dark_pixel_ratio"] > 0.15 and feats["bright_pixel_ratio"] > 0.15:
        score += 0.15
    # High local variance = structured content
    if feats["local_variance"] > 200:
        score += 0.10
    score = min(score, 1.0)
    label = 1 if score >= 0.45 else 0
    confidence = score if label == 1 else 1.0 - score
    return label, confidence
 # ---------------------------------------------------------------------------
 # CNN-based classifier (requires training)
 # ---------------------------------------------------------------------------
 def get_cnn_model():
    """Build a simple CNN for binary classification."""
    try:
        import torch
        import torch.nn as nn
    except ImportError:
        print(
            "Error: PyTorch required for CNN mode. Install with: pip install torch torchvision"
        )
        sys.exit(1)
    class RowingCNN(nn.Module):
        def __init__(self):
            super().__init__()
            self.features = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.AdaptiveAvgPool2d((4, 4)),
            )
            self.classifier = nn.Sequential(
                nn.Flatten(),
                nn.Linear(256 * 4 * 4, 128),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(128, 1),
            )
        def forward(self, x):
            return self.classifier(self.features(x))
    return RowingCNN()
 def train_cnn(
    data_dir: str, epochs: int = 20, lr: float = 1e-3, save_path: str = "model.pth"
 ):
    """
    Train the CNN. Expects data_dir with structure:
        data_dir/
            train/
                0/  (non-rowing images)
                1/  (rowing images)
            val/    (optional, same structure)
    """
    import torch
    import torch.nn as nn
    from torch.utils.data import DataLoader
    from torchvision import datasets, transforms
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Training on: {device}")
    transform_train = transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ColorJitter(brightness=0.2, contrast=0.2),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
    transform_val = transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
    train_dir = os.path.join(data_dir, "train")
    train_ds = datasets.ImageFolder(train_dir, transform=transform_train)
    train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2)
    print(f"Training samples: {len(train_ds)}  Classes: {train_ds.classes}")
    val_loader = None
    val_dir = os.path.join(data_dir, "val")
    if os.path.isdir(val_dir):
        val_ds = datasets.ImageFolder(val_dir, transform=transform_val)
        val_loader = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=2)
        print(f"Validation samples: {len(val_ds)}")
    model = get_cnn_model().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    best_acc = 0.0
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.float().to(device)
            optimizer.zero_grad()
            outputs = model(inputs).squeeze(1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            preds = (torch.sigmoid(outputs) > 0.5).long()
            correct += (preds == labels.long()).sum().item()
            total += labels.size(0)
        scheduler.step()
        train_acc = correct / total
        avg_loss = running_loss / total
        line = f"  Epoch {epoch + 1:>3d}/{epochs}  loss={avg_loss:.4f}  train_acc={train_acc:.3f}"
        if val_loader:
            model.eval()
            val_correct, val_total = 0, 0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.float().to(device)
                    outputs = model(inputs).squeeze(1)
                    preds = (torch.sigmoid(outputs) > 0.5).long()
                    val_correct += (preds == labels.long()).sum().item()
                    val_total += labels.size(0)
            val_acc = val_correct / val_total
            line += f"  val_acc={val_acc:.3f}"
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model.state_dict(), save_path)
                line += "  *saved*"
        else:
            torch.save(model.state_dict(), save_path)
        print(line)
    print(f"\nModel saved to {save_path}")
 def cnn_predict(image_path: str, model_path: str = "model.pth") -> tuple[int, float]:
    """Predict using the trained CNN."""
    import torch
    from torchvision import transforms
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = get_cnn_model()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    transform = transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
    img = Image.open(image_path).convert("RGB")
    tensor = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(tensor).squeeze()
        prob = torch.sigmoid(output).item()
    label = 1 if prob > 0.5 else 0
    confidence = prob if label == 1 else 1.0 - prob
    return label, confidence
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
 def main():
    parser = argparse.ArgumentParser(description="Rowing Machine Display Classifier")
    sub = parser.add_subparsers(dest="command", required=True)
    # --- predict ---
    p_pred = sub.add_parser("predict", help="Classify an image")
    p_pred.add_argument("--image", required=True, help="Path to image file")
    p_pred.add_argument(
        "--model",
        choices=["features", "cnn"],
        default="features",
        help="Which classifier to use (default: features)",
    )
    p_pred.add_argument("--model-path", default="model.pth", help="Path to CNN weights")
    p_pred.add_argument("--verbose", "-v", action="store_true")
    # --- train ---
    p_train = sub.add_parser("train", help="Train the CNN classifier")
    p_train.add_argument("--data-dir", required=True, help="Root data directory")
    p_train.add_argument("--epochs", type=int, default=20)
    p_train.add_argument("--lr", type=float, default=1e-3)
    p_train.add_argument("--save", default="model.pth", help="Where to save weights")
    # --- extract ---
    p_feat = sub.add_parser("extract", help="Print extracted features for an image")
    p_feat.add_argument("--image", required=True)
    args = parser.parse_args()
    if args.command == "predict":
        if args.model == "features":
            label, conf = feature_based_predict(args.image, verbose=args.verbose)
        else:
            label, conf = cnn_predict(args.image, args.model_path)
        tag = "ROWING MACHINE" if label == 1 else "NOT ROWING MACHINE"
        print(f"\n  Result: {tag}  (label={label}, confidence={conf:.2f})\n")
    elif args.command == "train":
        train_cnn(args.data_dir, epochs=args.epochs, lr=args.lr, save_path=args.save)
    elif args.command == "extract":
        feats = extract_features(args.image)
        print(json.dumps(feats, indent=2))
 if __name__ == "__main__":
    main()