Skip to content

Commit ab4610d

Browse files
committed
python script to auto change images
weird issue with google docs to markdown conversion, images in table doesnt get converted weird uh
1 parent cddbecb commit ab4610d

File tree

1 file changed

+143
-16
lines changed

1 file changed

+143
-16
lines changed

python/main.py

Lines changed: 143 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,148 @@
1-
import tkinter as tk
1+
import os
2+
import re
3+
import zipfile
4+
import base64
5+
import hashlib
6+
from pathlib import Path
7+
from PIL import Image
8+
from io import BytesIO
9+
import numpy as np
210

3-
def main():
4-
# Create the main window
5-
root = tk.Tk()
6-
root.title("Dummy UI Window")
7-
root.geometry("400x200") # Width x Height
811

9-
# Add a label
10-
label = tk.Label(root, text="Hello, this is a dummy UI window!")
11-
label.pack(pady=20)
12+
def validate_file_paths(md_path, zip_path):
13+
md_file = Path(md_path)
14+
zip_file = Path(zip_path)
1215

13-
# Add a button
14-
button = tk.Button(root, text="Click Me", command=lambda: label.config(text="Button Clicked!"))
15-
button.pack()
16+
if md_file.suffix != '.md' or zip_file.suffix != '.zip':
17+
raise ValueError("One file must be a .md file and the other a .zip file.")
1618

17-
# Run the Tkinter event loop
18-
root.mainloop()
19+
if md_file.stem != zip_file.stem:
20+
raise ValueError("The files must have the same name, excluding the extension.")
1921

20-
if __name__ == "__main__":
21-
main()
22+
23+
def extract_images_from_zip(zip_path):
24+
images = {}
25+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
26+
for file_name in zip_ref.namelist():
27+
if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
28+
with zip_ref.open(file_name) as file:
29+
img = Image.open(file).convert('RGB')
30+
images[file_name] = img
31+
# print(f"Displaying: {file_name}")
32+
# img.show(title=file_name) # Opens image in viewer
33+
print(f"Total images in zip : {len(images)}")
34+
return images
35+
36+
def parse_markdown(md_path):
37+
with open(md_path, 'r', encoding='utf-8') as file:
38+
content = file.read()
39+
40+
image_refs = re.findall(r'!\[\]\[(image\d+)\]', content)
41+
base64_images = re.findall(r'\[(image\d+)\]:\s*<data:image/\w+;base64,([A-Za-z0-9+/=]+)>', content)
42+
43+
print(f"Total images in markdown : {len(image_refs)}")
44+
return content, image_refs, base64_images
45+
46+
def save_base64_images(base64_images):
47+
saved_images = {}
48+
for ref, b64_data in base64_images:
49+
img_data = base64.b64decode(b64_data)
50+
img = Image.open(BytesIO(img_data)).convert('RGB')
51+
img_hash = hashlib.md5(img.tobytes()).hexdigest()
52+
saved_images[img_hash] = (ref, img)
53+
return saved_images
54+
55+
def image_difference(img1, img2):
56+
"""Return mean squared error (MSE) between two images."""
57+
img1 = img1.resize((128, 128))
58+
img2 = img2.resize((128, 128))
59+
arr1 = np.asarray(img1).astype("float32")
60+
arr2 = np.asarray(img2).astype("float32")
61+
return np.mean((arr1 - arr2) ** 2)
62+
63+
def match_images(zip_images, md_images):
64+
matched = {}
65+
used_zip_keys = set()
66+
67+
for md_hash, (ref, md_img) in md_images.items():
68+
md_bytes = md_img.tobytes()
69+
best_match = None
70+
best_score = float('inf')
71+
72+
for zip_key, zip_img in zip_images.items():
73+
if zip_key in used_zip_keys:
74+
continue
75+
76+
if hashlib.md5(zip_img.tobytes()).hexdigest() == md_hash:
77+
best_match = zip_key
78+
break
79+
80+
score = image_difference(md_img, zip_img)
81+
if score < best_score:
82+
best_score = score
83+
best_match = zip_key
84+
85+
if best_match is not None:
86+
matched[ref] = (best_match, zip_images[best_match])
87+
used_zip_keys.add(best_match)
88+
else:
89+
raise ValueError(f"No suitable image match found for {ref} in Markdown.")
90+
91+
return matched
92+
93+
def update_markdown(content, matches, output_folder):
94+
# Remove all hex image references
95+
content = re.sub(r'\n\[image\d+\]:\s*<data:image/\w+;base64,[A-Za-z0-9+/=]+>', '', content)
96+
97+
for ref, (filename, _) in matches.items():
98+
filename = os.path.basename(filename)
99+
content = content.replace(f'![][ {ref} ]', f'![]({output_folder}/{filename})')
100+
content = content.replace(f'![][{ref}]', f'![]({output_folder}/{filename})') # if brackets are tight
101+
102+
return content
103+
104+
def save_images(output_folder, matches, md_path):
105+
# print(f"thjdklkjhgfhjkl{Path(md_path).parent}, __++__ {output_folder}")
106+
os.makedirs(os.path.join(Path(md_path).parent,output_folder), exist_ok=True)
107+
for ref, (filepath, img) in matches.items():
108+
filename = os.path.basename(filepath)
109+
# print(f"thisdaw{filename}") #optimized this is not
110+
path = os.path.join(output_folder, filename)
111+
# print(f"thisdaw{path}")
112+
path = os.path.join(Path(md_path).parent, path)
113+
# print(f"thisdaw{path}")
114+
img.save(path)
115+
116+
def process_markdown_and_zip(md_path: str, zip_path: str):
117+
validate_file_paths(md_path, zip_path)
118+
119+
zip_images = extract_images_from_zip(zip_path)
120+
content, image_refs, base64_images = parse_markdown(md_path)
121+
md_images = save_base64_images(base64_images)
122+
matches = match_images(zip_images, md_images)
123+
124+
output_folder = Path(md_path).stem.replace(' ', '-') + "-img"
125+
print(Path(md_path))
126+
print(f"Saving images to: {output_folder}")
127+
save_images(output_folder, matches, md_path)
128+
129+
updated_content = update_markdown(content, matches, output_folder)
130+
# Create a new file with the same name in the new folder
131+
132+
# output_folder = Path(md_path).stem.replace(' ', '-')
133+
# print(output_folder)
134+
output_folder_path = Path(md_path).parent
135+
print(output_folder_path)
136+
137+
# Create the output folder if it doesn't exist
138+
output_folder_path.mkdir(parents=True, exist_ok=True)
139+
140+
new_md_path = output_folder_path / Path(md_path).name.replace(' ', '-')
141+
with open(new_md_path, 'w', encoding='utf-8') as file:
142+
file.write(updated_content)
143+
144+
145+
# Example usage
146+
md_path = 'rover-documentation\python\ROCKER SUSPENSION-DRIVE-Mechanical-Documentation.md'
147+
zip_path = 'rover-documentation\python\ROCKER SUSPENSION-DRIVE-Mechanical-Documentation.zip'
148+
process_markdown_and_zip(md_path, zip_path)

0 commit comments

Comments
 (0)