Skip to content

Commit 20817db

Browse files
committed
add python script to sync images via sha hash based on filepaths
1 parent e16b3fc commit 20817db

File tree

1 file changed

+230
-0
lines changed

1 file changed

+230
-0
lines changed

sync-image.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import os
2+
import json
3+
import hashlib
4+
import requests
5+
import argparse
6+
from urllib.parse import urlparse
7+
from pathlib import Path
8+
from mimetypes import guess_extension
9+
10+
# --- Config ---
11+
DATA_DIR = 'content/maintainers/'
12+
CACHE_FILE = '.image-cache.json'
13+
IMAGES_DIR = 'public/images/'
14+
15+
Path(IMAGES_DIR).mkdir(parents=True, exist_ok=True)
16+
17+
# --- CLI Argument Parser ---
18+
parser = argparse.ArgumentParser(description="Sync or check remote images to local. If not present, download it and update json. If exists check the hash if updated.")
19+
parser.add_argument("json_file", nargs="?", help="Specific JSON file to process (inside content/maintainers/)")
20+
parser.add_argument("--sync", action="store_true", help="Check and prompt if any image is out of sync")
21+
parser.add_argument("--debug", action="store_true", help="Show debug logs")
22+
parser.add_argument("--verify-cache", action="store_true", help="Verify existing image cache hashes")
23+
24+
args = parser.parse_args()
25+
26+
class Colors:
27+
RESET = "\033[0m"
28+
GREEN = "\033[92m"
29+
RED = "\033[91m"
30+
YELLOW = "\033[93m"
31+
BLUE = "\033[94m"
32+
CYAN = "\033[96m"
33+
34+
# --- Load image cache ---
35+
if os.path.exists(CACHE_FILE):
36+
with open(CACHE_FILE, 'r') as f:
37+
cache = json.load(f)
38+
else:
39+
cache = {}
40+
41+
# --- Helpers ---
42+
def log(msg):
43+
if args.debug:
44+
print(msg)
45+
46+
def get_ext_from_url_or_content_type(url, headers):
47+
path = urlparse(url).path
48+
ext = os.path.splitext(path)[-1]
49+
if ext:
50+
return ext.split("?")[0]
51+
52+
content_type = headers.get('Content-Type', '')
53+
if content_type:
54+
return guess_extension(content_type.split(';')[0]) or '.img'
55+
56+
return '.img'
57+
58+
def get_hash(content):
59+
return hashlib.sha256(content).hexdigest()
60+
61+
def get_hash_from_file(filepath):
62+
with open(filepath, 'rb') as f:
63+
return hashlib.sha256(f.read()).hexdigest()
64+
65+
def download_image(url):
66+
try:
67+
response = requests.get(url)
68+
response.raise_for_status()
69+
return response.content, response.headers
70+
except Exception as e:
71+
print(f"[ERROR] Failed to download {url}: {e}")
72+
return None, None
73+
74+
def process_image(url, filename_base, force_download=False):
75+
if not url.strip():
76+
return ""
77+
78+
cached = cache.get(url)
79+
80+
for ext_guess in [".png", ".jpg", ".jpeg", ".svg", ".webp"]:
81+
full_filename = f"{filename_base}{ext_guess}"
82+
filepath = os.path.join(IMAGES_DIR, full_filename)
83+
if os.path.exists(filepath):
84+
local_hash = get_hash_from_file(filepath)
85+
if cached and cached["hash"] == local_hash:
86+
log(f"[SKIP] Up-to-date: {full_filename}")
87+
return full_filename
88+
elif args.sync:
89+
print(f"[CHECK] Needs update: {url} -> /images/{full_filename}")
90+
return full_filename
91+
else:
92+
break
93+
94+
if args.sync and not force_download:
95+
return ""
96+
97+
content, headers = download_image(url)
98+
if not content:
99+
return ""
100+
101+
ext = get_ext_from_url_or_content_type(url, headers)
102+
full_filename = f"{filename_base}{ext}"
103+
filepath = os.path.join(IMAGES_DIR, full_filename)
104+
105+
with open(filepath, 'wb') as f:
106+
f.write(content)
107+
108+
hash_value = get_hash(content)
109+
cache[url] = {
110+
"hash": hash_value,
111+
"filename": full_filename
112+
}
113+
114+
print(f"[UPDATE] Downloaded or replaced: {full_filename}")
115+
return full_filename
116+
117+
# --- Main File Processor ---
118+
def process_json(file):
119+
json_path = os.path.join(DATA_DIR, file)
120+
with open(json_path, 'r', encoding="utf-8") as f:
121+
try:
122+
data = json.load(f)
123+
except Exception as e:
124+
print(f"[SKIP] Invalid JSON in {file}: {e}")
125+
return
126+
127+
base_name = os.path.splitext(file)[0]
128+
129+
# Photo
130+
if "photo" in data:
131+
url = data["photo"]
132+
if isinstance(url, str) and url.strip():
133+
if url.startswith("/images/"):
134+
local_path = os.path.join("public", url.lstrip("/"))
135+
if os.path.exists(local_path):
136+
for original_url, entry in cache.items():
137+
if entry["filename"] == os.path.basename(local_path):
138+
actual_hash = get_hash_from_file(local_path)
139+
if actual_hash != entry["hash"]:
140+
print(f"{Colors.YELLOW}[MISMATCH] =====>{Colors.RESET} {url} differs from cache")
141+
if args.sync:
142+
print(f"[CHECK] Needs update: {original_url} -> {url}")
143+
else:
144+
filename = os.path.splitext(os.path.basename(local_path))[0]
145+
result = process_image(original_url, filename, force_download=True)
146+
data["photo"] = f"/images/{result}" if result else ""
147+
break
148+
else:
149+
print(f"[WARN] Local photo missing: {url}")
150+
data["photo"] = ""
151+
else:
152+
filename = f"{base_name}_photo"
153+
result = process_image(url, filename)
154+
data["photo"] = f"/images/{result}" if result else ""
155+
else:
156+
data["photo"] = ""
157+
158+
# Project logos
159+
for i, project in enumerate(data.get("projects", [])):
160+
url = project.get("logo", "")
161+
if isinstance(url, str) and url.strip():
162+
if url.startswith("/images/"):
163+
local_path = os.path.join("public", url.lstrip("/"))
164+
if os.path.exists(local_path):
165+
for original_url, entry in cache.items():
166+
if entry["filename"] == os.path.basename(local_path):
167+
actual_hash = get_hash_from_file(local_path)
168+
if actual_hash != entry["hash"]:
169+
print(f"{Colors.YELLOW}[MISMATCH] ========>{Colors.RESET} {url} differs from cache")
170+
if args.sync:
171+
print(f"[CHECK] Needs update: {original_url} -> {url}")
172+
else:
173+
project_name = project.get("name", f"project_{i}")
174+
safe_name = project_name.replace(" ", "_").lower()
175+
filename = f"{base_name}_{safe_name}"
176+
result = process_image(original_url, filename, force_download=True)
177+
project["logo"] = f"/images/{result}" if result else ""
178+
break
179+
else:
180+
print(f"[WARN] Local logo missing: {url}")
181+
project["logo"] = ""
182+
else:
183+
project_name = project.get("name", f"project_{i}")
184+
safe_name = project_name.replace(" ", "_").lower()
185+
filename = f"{base_name}_{safe_name}"
186+
result = process_image(url, filename)
187+
project["logo"] = f"/images/{result}" if result else ""
188+
else:
189+
project["logo"] = ""
190+
191+
if not args.sync:
192+
with open(json_path, 'w', encoding="utf-8") as f:
193+
json.dump(data, f, indent=2, ensure_ascii=False)
194+
print(f"[DONE] Updated: {file}")
195+
196+
# --- Verify cache integrity if requested ---
197+
if hasattr(args, "verify_cache") and args.verify_cache:
198+
print("[INFO] Verifying image hashes from cache...")
199+
for url, info in cache.items():
200+
try:
201+
content, _ = download_image(url)
202+
if not content:
203+
print(f"[FAIL] Could not download: {url}")
204+
continue
205+
current_hash = get_hash(content)
206+
if current_hash == info["hash"]:
207+
print(f"[OK] Match: {url}")
208+
else:
209+
print(f"{Colors.YELLOW}[DIFF] ===========>{Colors.RESET} Hash mismatch: {url}")
210+
except Exception as e:
211+
print(f"[FAIL] {url}: {e}")
212+
exit(0)
213+
214+
# --- Process Files ---
215+
if args.json_file:
216+
if not args.json_file.endswith(".json"):
217+
print("[ERROR] Please provide a valid .json file")
218+
else:
219+
process_json(args.json_file)
220+
else:
221+
for file in os.listdir(DATA_DIR):
222+
if file.endswith(".json"):
223+
process_json(file)
224+
225+
# --- Save cache ---
226+
if not args.sync:
227+
with open(CACHE_FILE, 'w', encoding="utf-8") as f:
228+
json.dump(cache, f, indent=2)
229+
230+
print("[OK] Image sync complete.")

0 commit comments

Comments
 (0)