|
1 | | -import tkinter as tk |
| 1 | +import os |
| 2 | +import re |
| 3 | +import zipfile |
| 4 | +import base64 |
| 5 | +import hashlib |
| 6 | +from pathlib import Path |
| 7 | +from PIL import Image |
| 8 | +from io import BytesIO |
| 9 | +import numpy as np |
2 | 10 |
|
3 | | -def main(): |
4 | | - # Create the main window |
5 | | - root = tk.Tk() |
6 | | - root.title("Dummy UI Window") |
7 | | - root.geometry("400x200") # Width x Height |
8 | 11 |
|
9 | | - # Add a label |
10 | | - label = tk.Label(root, text="Hello, this is a dummy UI window!") |
11 | | - label.pack(pady=20) |
| 12 | +def validate_file_paths(md_path, zip_path): |
| 13 | + md_file = Path(md_path) |
| 14 | + zip_file = Path(zip_path) |
12 | 15 |
|
13 | | - # Add a button |
14 | | - button = tk.Button(root, text="Click Me", command=lambda: label.config(text="Button Clicked!")) |
15 | | - button.pack() |
| 16 | + if md_file.suffix != '.md' or zip_file.suffix != '.zip': |
| 17 | + raise ValueError("One file must be a .md file and the other a .zip file.") |
16 | 18 |
|
17 | | - # Run the Tkinter event loop |
18 | | - root.mainloop() |
| 19 | + if md_file.stem != zip_file.stem: |
| 20 | + raise ValueError("The files must have the same name, excluding the extension.") |
19 | 21 |
|
20 | | -if __name__ == "__main__": |
21 | | - main() |
| 22 | + |
| 23 | +def extract_images_from_zip(zip_path): |
| 24 | + images = {} |
| 25 | + with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
| 26 | + for file_name in zip_ref.namelist(): |
| 27 | + if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')): |
| 28 | + with zip_ref.open(file_name) as file: |
| 29 | + img = Image.open(file).convert('RGB') |
| 30 | + images[file_name] = img |
| 31 | + # print(f"Displaying: {file_name}") |
| 32 | + # img.show(title=file_name) # Opens image in viewer |
| 33 | + print(f"Total images in zip : {len(images)}") |
| 34 | + return images |
| 35 | + |
| 36 | +def parse_markdown(md_path): |
| 37 | + with open(md_path, 'r', encoding='utf-8') as file: |
| 38 | + content = file.read() |
| 39 | + |
| 40 | + image_refs = re.findall(r'!\[\]\[(image\d+)\]', content) |
| 41 | + base64_images = re.findall(r'\[(image\d+)\]:\s*<data:image/\w+;base64,([A-Za-z0-9+/=]+)>', content) |
| 42 | + |
| 43 | + print(f"Total images in markdown : {len(image_refs)}") |
| 44 | + return content, image_refs, base64_images |
| 45 | + |
| 46 | +def save_base64_images(base64_images): |
| 47 | + saved_images = {} |
| 48 | + for ref, b64_data in base64_images: |
| 49 | + img_data = base64.b64decode(b64_data) |
| 50 | + img = Image.open(BytesIO(img_data)).convert('RGB') |
| 51 | + img_hash = hashlib.md5(img.tobytes()).hexdigest() |
| 52 | + saved_images[img_hash] = (ref, img) |
| 53 | + return saved_images |
| 54 | + |
| 55 | +def image_difference(img1, img2): |
| 56 | + """Return mean squared error (MSE) between two images.""" |
| 57 | + img1 = img1.resize((128, 128)) |
| 58 | + img2 = img2.resize((128, 128)) |
| 59 | + arr1 = np.asarray(img1).astype("float32") |
| 60 | + arr2 = np.asarray(img2).astype("float32") |
| 61 | + return np.mean((arr1 - arr2) ** 2) |
| 62 | + |
| 63 | +def match_images(zip_images, md_images): |
| 64 | + matched = {} |
| 65 | + used_zip_keys = set() |
| 66 | + |
| 67 | + for md_hash, (ref, md_img) in md_images.items(): |
| 68 | + md_bytes = md_img.tobytes() |
| 69 | + best_match = None |
| 70 | + best_score = float('inf') |
| 71 | + |
| 72 | + for zip_key, zip_img in zip_images.items(): |
| 73 | + if zip_key in used_zip_keys: |
| 74 | + continue |
| 75 | + |
| 76 | + if hashlib.md5(zip_img.tobytes()).hexdigest() == md_hash: |
| 77 | + best_match = zip_key |
| 78 | + break |
| 79 | + |
| 80 | + score = image_difference(md_img, zip_img) |
| 81 | + if score < best_score: |
| 82 | + best_score = score |
| 83 | + best_match = zip_key |
| 84 | + |
| 85 | + if best_match is not None: |
| 86 | + matched[ref] = (best_match, zip_images[best_match]) |
| 87 | + used_zip_keys.add(best_match) |
| 88 | + else: |
| 89 | + raise ValueError(f"No suitable image match found for {ref} in Markdown.") |
| 90 | + |
| 91 | + return matched |
| 92 | + |
| 93 | +def update_markdown(content, matches, output_folder): |
| 94 | + # Remove all hex image references |
| 95 | + content = re.sub(r'\n\[image\d+\]:\s*<data:image/\w+;base64,[A-Za-z0-9+/=]+>', '', content) |
| 96 | + |
| 97 | + for ref, (filename, _) in matches.items(): |
| 98 | + filename = os.path.basename(filename) |
| 99 | + content = content.replace(f'![][ {ref} ]', f'') |
| 100 | + content = content.replace(f'![][{ref}]', f'') # if brackets are tight |
| 101 | + |
| 102 | + return content |
| 103 | + |
| 104 | +def save_images(output_folder, matches, md_path): |
| 105 | + # print(f"thjdklkjhgfhjkl{Path(md_path).parent}, __++__ {output_folder}") |
| 106 | + os.makedirs(os.path.join(Path(md_path).parent,output_folder), exist_ok=True) |
| 107 | + for ref, (filepath, img) in matches.items(): |
| 108 | + filename = os.path.basename(filepath) |
| 109 | + # print(f"thisdaw{filename}") #optimized this is not |
| 110 | + path = os.path.join(output_folder, filename) |
| 111 | + # print(f"thisdaw{path}") |
| 112 | + path = os.path.join(Path(md_path).parent, path) |
| 113 | + # print(f"thisdaw{path}") |
| 114 | + img.save(path) |
| 115 | + |
| 116 | +def process_markdown_and_zip(md_path: str, zip_path: str): |
| 117 | + validate_file_paths(md_path, zip_path) |
| 118 | + |
| 119 | + zip_images = extract_images_from_zip(zip_path) |
| 120 | + content, image_refs, base64_images = parse_markdown(md_path) |
| 121 | + md_images = save_base64_images(base64_images) |
| 122 | + matches = match_images(zip_images, md_images) |
| 123 | + |
| 124 | + output_folder = Path(md_path).stem.replace(' ', '-') + "-img" |
| 125 | + print(Path(md_path)) |
| 126 | + print(f"Saving images to: {output_folder}") |
| 127 | + save_images(output_folder, matches, md_path) |
| 128 | + |
| 129 | + updated_content = update_markdown(content, matches, output_folder) |
| 130 | + # Create a new file with the same name in the new folder |
| 131 | + |
| 132 | + # output_folder = Path(md_path).stem.replace(' ', '-') |
| 133 | + # print(output_folder) |
| 134 | + output_folder_path = Path(md_path).parent |
| 135 | + print(output_folder_path) |
| 136 | + |
| 137 | + # Create the output folder if it doesn't exist |
| 138 | + output_folder_path.mkdir(parents=True, exist_ok=True) |
| 139 | + |
| 140 | + new_md_path = output_folder_path / Path(md_path).name.replace(' ', '-') |
| 141 | + with open(new_md_path, 'w', encoding='utf-8') as file: |
| 142 | + file.write(updated_content) |
| 143 | + |
| 144 | + |
| 145 | +# Example usage |
| 146 | +md_path = 'rover-documentation\python\ROCKER SUSPENSION-DRIVE-Mechanical-Documentation.md' |
| 147 | +zip_path = 'rover-documentation\python\ROCKER SUSPENSION-DRIVE-Mechanical-Documentation.zip' |
| 148 | +process_markdown_and_zip(md_path, zip_path) |
0 commit comments