commit 25bb27f32a364cfd598575d95773290edd7bbb2c Author: Adam French Date: Mon Jan 26 19:07:38 2026 +0000 init commit diff --git a/extract_data.py b/extract_data.py new file mode 100644 index 0000000..e227031 --- /dev/null +++ b/extract_data.py @@ -0,0 +1,163 @@ +import os +from datetime import datetime + +import cv2 as cv +import numpy as np +import pandas as pd +import pytesseract as tess +from PIL import Image + +PHOTOS_PATH = "./photos/" + + +# Get a list of images given a directory path +def get_images(url: str): + images = [] + for img_url in os.listdir(url): + try: + image = Image.open(os.path.join(url, img_url)) + images.append(image) + except IOError: + print(f"Error opening image: {img_url}") + return images + + +# Get the datetime taken from an image +def get_datetime_taken(image: Image.Image) -> datetime | None: + exif = image.getexif() + if 306 in exif: + return datetime.strptime(exif[306], "%Y:%m:%d %H:%M:%S") + return None + + +# Convert an image to OpenCV format +def convert_to_opencv_image(img: Image.Image) -> np.ndarray: + return cv.cvtColor(np.array(img), cv.COLOR_RGB2BGR) + + +def order_points(pts): + pts = pts.reshape(4, 2) + rect = np.zeros((4, 2), dtype="float32") + + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] # top-left + rect[2] = pts[np.argmax(s)] # bottom-right + + diff = np.diff(pts, axis=1) + rect[1] = pts[np.argmin(diff)] # top-right + rect[3] = pts[np.argmax(diff)] # bottom-left + + return rect + + +def is_closed_contour(cnt, eps=1.0): + # Check area + if cv.contourArea(cnt) == 0: + return False + # Check if first and last points are close + return cv.norm(cnt[0][0] - cnt[-1][0]) < eps + + +# Optimise the image for OCR +def process_image(img: Image.Image): + arr = convert_to_opencv_image(img) + + # Blur the image for better edge (contour) detection + blur = cv.GaussianBlur(arr, (7, 7), 0) + edges = cv.Canny(blur, 50, 100) + contours, hierarchy = cv.findContours( + edges, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE + ) + + # Filter contours for rectangles + candidates = [] + img_area = arr.shape[0] * arr.shape[1] + + for cnt in contours: + rect = cv.minAreaRect(cnt) + (center, (width, height), angle) = rect + box = cv.boxPoints(rect) + box_contour = box.reshape((-1, 1, 2)) + area = cv.contourArea(box_contour) + if area < 0.01 * img_area: + continue + # Check the aspect ratio is reasonable + aspect_ratio = width / float(height) + + if 0.9 < aspect_ratio < 1.1: + candidates.append(box_contour) + + # Most likely rectangle will be the largest one + if len(candidates) == 0: + return None + + cv.drawContours(arr, contours, -1, (0, 255, 0), 3) + preview_image(arr) + + display_contour = max(candidates, key=cv.contourArea) + rect = order_points(display_contour) + (w, h) = (400, 400) + dst = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype="float32") + mat = cv.getPerspectiveTransform(rect, dst) + warped = cv.warpPerspective(arr, mat, (w, h)) + return warped + + +# Get the text from an image using OCR +def ocr_image(img: Image.Image) -> str: + return None + + +# Process OCR text output +def process_ocr_text(text: str) -> str: + return None + + +# There are two gyms that I go to, one is the Peckham gym and the other is the Elephant and Castle gym. +# You can tell which gym I went to by looking at the color of the wall. +# If there is a green wall, its most likely the Peckham gym. +# If there is a blue wall, its most likely the Elephant and Castle gym. +def get_gym(image: Image.Image) -> str | None: + return None + + +def preview_image(img: np.ndarray): + cv.imshow("preview", img) + cv.waitKey(0) + cv.destroyAllWindows() + + +def __main__(): + imgs = get_images(PHOTOS_PATH) + + if not imgs: + print("No images") + return None + + fail = [] + success = [] + for img in imgs: + tst = process_image(img) + if tst is None: + fail.append(img.filename) + continue + + success.append(img.filename) + print("success_len: ", len(success)) + print("fail_len: ", len(fail)) + + print("failed:") + for x in fail: + print(x) + + print("success:") + for x in success: + print(x) + + print("success_len: ", len(success)) + print("fail_len: ", len(fail)) + + return None + + +__main__() diff --git a/photos/IMG_5412.JPEG b/photos/IMG_5412.JPEG new file mode 100644 index 0000000..650cbef Binary files /dev/null and b/photos/IMG_5412.JPEG differ diff --git a/photos/IMG_5413.JPEG b/photos/IMG_5413.JPEG new file mode 100644 index 0000000..b5b7fcb Binary files /dev/null and b/photos/IMG_5413.JPEG differ diff --git a/photos/IMG_5414.JPEG b/photos/IMG_5414.JPEG new file mode 100644 index 0000000..6abff8c Binary files /dev/null and b/photos/IMG_5414.JPEG differ diff --git a/photos/IMG_5416.JPEG b/photos/IMG_5416.JPEG new file mode 100644 index 0000000..550af32 Binary files /dev/null and b/photos/IMG_5416.JPEG differ diff --git a/photos/IMG_5417.JPEG b/photos/IMG_5417.JPEG new file mode 100644 index 0000000..aada9c3 Binary files /dev/null and b/photos/IMG_5417.JPEG differ diff --git a/photos/IMG_5418.JPEG b/photos/IMG_5418.JPEG new file mode 100644 index 0000000..7d89fbd Binary files /dev/null and b/photos/IMG_5418.JPEG differ diff --git a/photos/IMG_5428.JPEG b/photos/IMG_5428.JPEG new file mode 100644 index 0000000..7651b6c Binary files /dev/null and b/photos/IMG_5428.JPEG differ diff --git a/photos/IMG_5431.JPEG b/photos/IMG_5431.JPEG new file mode 100644 index 0000000..4fa8254 Binary files /dev/null and b/photos/IMG_5431.JPEG differ diff --git a/photos/IMG_5432.JPEG b/photos/IMG_5432.JPEG new file mode 100644 index 0000000..f676de6 Binary files /dev/null and b/photos/IMG_5432.JPEG differ diff --git a/photos/IMG_5443.JPEG b/photos/IMG_5443.JPEG new file mode 100644 index 0000000..9abe845 Binary files /dev/null and b/photos/IMG_5443.JPEG differ diff --git a/photos/IMG_5448.JPEG b/photos/IMG_5448.JPEG new file mode 100644 index 0000000..06dad8a Binary files /dev/null and b/photos/IMG_5448.JPEG differ diff --git a/photos/IMG_5449.JPEG b/photos/IMG_5449.JPEG new file mode 100644 index 0000000..1917f67 Binary files /dev/null and b/photos/IMG_5449.JPEG differ diff --git a/photos/IMG_5451.JPEG b/photos/IMG_5451.JPEG new file mode 100644 index 0000000..94682a5 Binary files /dev/null and b/photos/IMG_5451.JPEG differ diff --git a/photos/IMG_5452.JPEG b/photos/IMG_5452.JPEG new file mode 100644 index 0000000..54cc6d8 Binary files /dev/null and b/photos/IMG_5452.JPEG differ diff --git a/photos/IMG_5454.JPEG b/photos/IMG_5454.JPEG new file mode 100644 index 0000000..4eb0eb5 Binary files /dev/null and b/photos/IMG_5454.JPEG differ diff --git a/photos/IMG_5457.JPEG b/photos/IMG_5457.JPEG new file mode 100644 index 0000000..4364354 Binary files /dev/null and b/photos/IMG_5457.JPEG differ diff --git a/photos/IMG_5459.JPEG b/photos/IMG_5459.JPEG new file mode 100644 index 0000000..f8afdba Binary files /dev/null and b/photos/IMG_5459.JPEG differ diff --git a/photos/IMG_5460.JPEG b/photos/IMG_5460.JPEG new file mode 100644 index 0000000..52c8f87 Binary files /dev/null and b/photos/IMG_5460.JPEG differ diff --git a/photos/IMG_5461.JPEG b/photos/IMG_5461.JPEG new file mode 100644 index 0000000..9adc965 Binary files /dev/null and b/photos/IMG_5461.JPEG differ diff --git a/photos/IMG_5462.JPEG b/photos/IMG_5462.JPEG new file mode 100644 index 0000000..dd6df4e Binary files /dev/null and b/photos/IMG_5462.JPEG differ diff --git a/photos/IMG_5463.JPEG b/photos/IMG_5463.JPEG new file mode 100644 index 0000000..890abe0 Binary files /dev/null and b/photos/IMG_5463.JPEG differ diff --git a/photos/IMG_5464.JPEG b/photos/IMG_5464.JPEG new file mode 100644 index 0000000..10b9027 Binary files /dev/null and b/photos/IMG_5464.JPEG differ diff --git a/photos/IMG_5468.JPEG b/photos/IMG_5468.JPEG new file mode 100644 index 0000000..ac36ba2 Binary files /dev/null and b/photos/IMG_5468.JPEG differ diff --git a/photos/IMG_5469.JPEG b/photos/IMG_5469.JPEG new file mode 100644 index 0000000..7e9fbe5 Binary files /dev/null and b/photos/IMG_5469.JPEG differ diff --git a/photos/IMG_5471.JPEG b/photos/IMG_5471.JPEG new file mode 100644 index 0000000..0601a0e Binary files /dev/null and b/photos/IMG_5471.JPEG differ diff --git a/photos/IMG_5472.JPEG b/photos/IMG_5472.JPEG new file mode 100644 index 0000000..b600c5f Binary files /dev/null and b/photos/IMG_5472.JPEG differ diff --git a/photos/IMG_5474.JPEG b/photos/IMG_5474.JPEG new file mode 100644 index 0000000..d55cd75 Binary files /dev/null and b/photos/IMG_5474.JPEG differ diff --git a/photos/IMG_5475.JPEG b/photos/IMG_5475.JPEG new file mode 100644 index 0000000..c6fdd08 Binary files /dev/null and b/photos/IMG_5475.JPEG differ diff --git a/photos/IMG_5476.JPEG b/photos/IMG_5476.JPEG new file mode 100644 index 0000000..40c06b8 Binary files /dev/null and b/photos/IMG_5476.JPEG differ diff --git a/photos/IMG_5477.JPEG b/photos/IMG_5477.JPEG new file mode 100644 index 0000000..3bd6c7e Binary files /dev/null and b/photos/IMG_5477.JPEG differ diff --git a/photos/IMG_5479.JPEG b/photos/IMG_5479.JPEG new file mode 100644 index 0000000..3adac95 Binary files /dev/null and b/photos/IMG_5479.JPEG differ diff --git a/photos/IMG_5482.JPEG b/photos/IMG_5482.JPEG new file mode 100644 index 0000000..4cd2457 Binary files /dev/null and b/photos/IMG_5482.JPEG differ diff --git a/photos/IMG_5483.JPEG b/photos/IMG_5483.JPEG new file mode 100644 index 0000000..e8966e4 Binary files /dev/null and b/photos/IMG_5483.JPEG differ diff --git a/photos/IMG_5484.JPEG b/photos/IMG_5484.JPEG new file mode 100644 index 0000000..c54acf6 Binary files /dev/null and b/photos/IMG_5484.JPEG differ diff --git a/photos/IMG_5485.JPEG b/photos/IMG_5485.JPEG new file mode 100644 index 0000000..4854340 Binary files /dev/null and b/photos/IMG_5485.JPEG differ diff --git a/photos/IMG_5486.JPEG b/photos/IMG_5486.JPEG new file mode 100644 index 0000000..20f19ad Binary files /dev/null and b/photos/IMG_5486.JPEG differ diff --git a/photos/IMG_5488.JPEG b/photos/IMG_5488.JPEG new file mode 100644 index 0000000..0883093 Binary files /dev/null and b/photos/IMG_5488.JPEG differ diff --git a/photos/IMG_5489.JPEG b/photos/IMG_5489.JPEG new file mode 100644 index 0000000..59f4ba8 Binary files /dev/null and b/photos/IMG_5489.JPEG differ diff --git a/photos/IMG_5513.JPEG b/photos/IMG_5513.JPEG new file mode 100644 index 0000000..c8b3d41 Binary files /dev/null and b/photos/IMG_5513.JPEG differ diff --git a/photos/IMG_5519.JPEG b/photos/IMG_5519.JPEG new file mode 100644 index 0000000..cb788b4 Binary files /dev/null and b/photos/IMG_5519.JPEG differ diff --git a/photos/IMG_5521.JPEG b/photos/IMG_5521.JPEG new file mode 100644 index 0000000..eb22565 Binary files /dev/null and b/photos/IMG_5521.JPEG differ diff --git a/photos/IMG_5522.JPEG b/photos/IMG_5522.JPEG new file mode 100644 index 0000000..dc20c9d Binary files /dev/null and b/photos/IMG_5522.JPEG differ diff --git a/photos/IMG_5523.JPEG b/photos/IMG_5523.JPEG new file mode 100644 index 0000000..864a329 Binary files /dev/null and b/photos/IMG_5523.JPEG differ diff --git a/preview_screenshot_26.01.2026.png b/preview_screenshot_26.01.2026.png new file mode 100644 index 0000000..74a33ae Binary files /dev/null and b/preview_screenshot_26.01.2026.png differ diff --git a/preview_screenshot_26.01.2026_2.png b/preview_screenshot_26.01.2026_2.png new file mode 100644 index 0000000..22646f5 Binary files /dev/null and b/preview_screenshot_26.01.2026_2.png differ diff --git a/preview_screenshot_26.01.2026_3.png b/preview_screenshot_26.01.2026_3.png new file mode 100644 index 0000000..1ba0abb Binary files /dev/null and b/preview_screenshot_26.01.2026_3.png differ