Crops to rowing machine screen - can be trained with optimize_crop.py and screen_classifier
This commit is contained in:
@@ -18,26 +18,39 @@ import glob
|
||||
import sys
|
||||
|
||||
|
||||
def order_corners(pts):
|
||||
"""Order 4 points as [top-left, top-right, bottom-right, bottom-left]."""
|
||||
rect = np.zeros((4, 2), dtype="float32")
|
||||
s = pts.sum(axis=1)
|
||||
rect[0] = pts[np.argmin(s)]
|
||||
rect[2] = pts[np.argmax(s)]
|
||||
d = np.diff(pts, axis=1)
|
||||
rect[1] = pts[np.argmin(d)]
|
||||
rect[3] = pts[np.argmax(d)]
|
||||
return rect
|
||||
|
||||
|
||||
def find_screen(image):
|
||||
"""
|
||||
Detect the Concept 2 PM5 LCD screen region in the image.
|
||||
|
||||
Returns (x, y, w, h) bounding box or None if not found.
|
||||
Returns (x, y, w, h, contour) or None if not found.
|
||||
The contour is the best-matching contour for perspective correction.
|
||||
"""
|
||||
h_img, w_img = image.shape[:2]
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Pre-compute edge map for internal-content scoring
|
||||
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
||||
edges = cv2.Canny(blurred, 50, 150)
|
||||
blurred = cv2.GaussianBlur(gray, (11, 11), 0)
|
||||
edges = cv2.Canny(blurred, 80, 100)
|
||||
|
||||
candidates = []
|
||||
|
||||
# Sweep brightness thresholds — screen brightness varies by
|
||||
# lighting conditions (ranges from ~100 in dim gyms to ~200+)
|
||||
for thresh_val in range(120, 200, 10):
|
||||
for thresh_val in range(70, 210, 10):
|
||||
_, thresh = cv2.threshold(gray, thresh_val, 255, cv2.THRESH_BINARY)
|
||||
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
|
||||
kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
|
||||
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kern)
|
||||
|
||||
@@ -54,39 +67,65 @@ def find_screen(image):
|
||||
|
||||
# Size: screen is a small-to-medium portion of the photo
|
||||
area_ratio = rect_area / (h_img * w_img)
|
||||
if area_ratio < 0.005 or area_ratio > 0.12:
|
||||
if area_ratio < 0.004480508227271387 or area_ratio > 0.13807760800032298:
|
||||
continue
|
||||
|
||||
# Aspect ratio: LCD is roughly square (0.5 to 1.6)
|
||||
# Aspect ratio: LCD is roughly square
|
||||
aspect = w / h
|
||||
if aspect < 0.5 or aspect > 1.6:
|
||||
if aspect < 0.6831978184146027 or aspect > 1.9505294279578584:
|
||||
continue
|
||||
|
||||
# Rectangularity
|
||||
rectangularity = area / rect_area
|
||||
if rectangularity < 0.4:
|
||||
if rectangularity < 0.6914579162415992:
|
||||
continue
|
||||
|
||||
# KEY: edge density — LCD with text > 0.03, plain surfaces < 0.01
|
||||
# KEY: edge density — LCD with text has high edge density
|
||||
roi_edges = edges[y : y + h, x : x + w]
|
||||
edge_density = np.sum(roi_edges > 0) / rect_area
|
||||
if edge_density < 0.03:
|
||||
if edge_density < 0.012759310759672408:
|
||||
continue
|
||||
|
||||
# Score: edge density * area * rectangularity
|
||||
# This favours text-rich regions that are large and well-shaped
|
||||
score = edge_density * area * rectangularity
|
||||
candidates.append((score, x, y, w, h))
|
||||
candidates.append((score, x, y, w, h, cnt))
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(key=lambda c: c[0], reverse=True)
|
||||
return candidates[0][1:]
|
||||
best = candidates[0]
|
||||
return best[1], best[2], best[3], best[4], best[5]
|
||||
|
||||
|
||||
def perspective_correct(image, contour, dst_w, dst_h):
|
||||
"""Warp the screen quadrilateral to a flat rectangle."""
|
||||
# Approximate contour to a polygon, tightening until we get 4 corners
|
||||
peri = cv2.arcLength(contour, True)
|
||||
for eps_mult in [0.02, 0.03, 0.05, 0.08, 0.10]:
|
||||
approx = cv2.approxPolyDP(contour, eps_mult * peri, True)
|
||||
if len(approx) == 4:
|
||||
break
|
||||
|
||||
if len(approx) != 4:
|
||||
# Fall back to the minimum area rectangle corners
|
||||
rect = cv2.minAreaRect(contour)
|
||||
approx = cv2.boxPoints(rect).astype(np.float32)
|
||||
else:
|
||||
approx = approx.reshape(4, 2).astype(np.float32)
|
||||
|
||||
src = order_corners(approx)
|
||||
dst = np.array(
|
||||
[[0, 0], [dst_w - 1, 0], [dst_w - 1, dst_h - 1], [0, dst_h - 1]],
|
||||
dtype="float32",
|
||||
)
|
||||
M = cv2.getPerspectiveTransform(src, dst)
|
||||
return cv2.warpPerspective(image, M, (dst_w, dst_h))
|
||||
|
||||
|
||||
def crop_screen(image_path, output_path, padding=15):
|
||||
"""Load an image, find the screen, crop and save it."""
|
||||
"""Load an image, find the screen, perspective-correct and save it."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f" ERROR: Could not read {image_path}")
|
||||
@@ -99,16 +138,12 @@ def crop_screen(image_path, output_path, padding=15):
|
||||
print(f" SKIP: No screen detected in {os.path.basename(image_path)}")
|
||||
return False
|
||||
|
||||
x, y, w, h = result
|
||||
x, y, w, h, contour = result
|
||||
|
||||
# Add padding, clamped to image bounds
|
||||
x1 = max(0, x - padding)
|
||||
y1 = max(0, y - padding)
|
||||
x2 = min(w_img, x + w + padding)
|
||||
y2 = min(h_img, y + h + padding)
|
||||
# Use perspective correction to flatten the screen
|
||||
corrected = perspective_correct(image, contour, w + 2 * padding, h + 2 * padding)
|
||||
|
||||
cropped = image[y1:y2, x1:x2]
|
||||
cv2.imwrite(output_path, cropped, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
||||
cv2.imwrite(output_path, corrected, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
||||
print(
|
||||
f" OK: {os.path.basename(image_path)} -> {os.path.basename(output_path)} ({w}x{h})"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user