67 lines
1.6 KiB
Python
Executable File
67 lines
1.6 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
import json
|
|
import os
|
|
import base64
|
|
from PIL import Image
|
|
import multiprocessing as mp
|
|
from io import BytesIO
|
|
|
|
def data_url_to_bin(image_data_url):
|
|
f = image_data_url
|
|
pref = "data:image/jpeg;base64,"
|
|
if f and f.startswith(pref):
|
|
return base64.b64decode(f[len(pref):])
|
|
pref = "data:image/png;base64,"
|
|
if f and f.startswith(pref):
|
|
return base64.b64decode(f[len(pref):])
|
|
return None
|
|
|
|
def get_files(d):
|
|
for root, dirs, files in os.walk(d):
|
|
for file in files:
|
|
yield os.path.join(root, file)
|
|
|
|
class Record:
|
|
def __init__(self, file):
|
|
self.file = file
|
|
self.basename = os.path.basename(self.file)
|
|
|
|
def load(self):
|
|
self.data = json.load(open(self.file))
|
|
|
|
def make_thumb(self, path):
|
|
img = data_url_to_bin(self.data["image_data_url"])
|
|
if img:
|
|
binary = BytesIO(img)
|
|
img = Image.open(binary)
|
|
img.thumbnail((100, 100))
|
|
img.save(f"{path}/{self.basename}.jpg")
|
|
|
|
def load_samples(raw_dir):
|
|
for file in get_files(raw_dir):
|
|
try:
|
|
rec = Record(file)
|
|
yield rec
|
|
except Exception as e:
|
|
print(f"Error loading {file}: {e}")
|
|
|
|
def process_one(rec):
|
|
try:
|
|
do_process_one(rec)
|
|
except Exception as e:
|
|
print(f"Error processing {rec.file}: {e}")
|
|
raise
|
|
|
|
def do_process_one(rec):
|
|
rec.load()
|
|
rec.make_thumb("data/thumb")
|
|
|
|
def main():
|
|
samples = list(load_samples("data/raw"))
|
|
print(len(samples))
|
|
with mp.Pool(mp.cpu_count() - 1) as pool:
|
|
pool.map(process_one, samples)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|