research: Add process.py
This commit is contained in:
parent
ff74600819
commit
28dac7da3f
66
research/process.py
Executable file
66
research/process.py
Executable file
@ -0,0 +1,66 @@
|
||||
#! /usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import base64
|
||||
from PIL import Image
|
||||
import multiprocessing as mp
|
||||
from io import BytesIO
|
||||
|
||||
def data_url_to_bin(image_data_url):
|
||||
f = image_data_url
|
||||
pref = "data:image/jpeg;base64,"
|
||||
if f and f.startswith(pref):
|
||||
return base64.b64decode(f[len(pref):])
|
||||
pref = "data:image/png;base64,"
|
||||
if f and f.startswith(pref):
|
||||
return base64.b64decode(f[len(pref):])
|
||||
return None
|
||||
|
||||
def get_files(d):
|
||||
for root, dirs, files in os.walk(d):
|
||||
for file in files:
|
||||
yield os.path.join(root, file)
|
||||
|
||||
class Record:
|
||||
def __init__(self, file):
|
||||
self.file = file
|
||||
self.basename = os.path.basename(self.file)
|
||||
|
||||
def load(self):
|
||||
self.data = json.load(open(self.file))
|
||||
|
||||
def make_thumb(self, path):
|
||||
img = data_url_to_bin(self.data["image_data_url"])
|
||||
if img:
|
||||
binary = BytesIO(img)
|
||||
img = Image.open(binary)
|
||||
img.thumbnail((100, 100))
|
||||
img.save(f"{path}/{self.basename}.jpg")
|
||||
|
||||
def load_samples(raw_dir):
|
||||
for file in get_files(raw_dir):
|
||||
try:
|
||||
rec = Record(file)
|
||||
yield rec
|
||||
except Exception as e:
|
||||
print(f"Error loading {file}: {e}")
|
||||
|
||||
def process_one(rec):
|
||||
try:
|
||||
do_process_one(rec)
|
||||
except Exception as e:
|
||||
print(f"Error processing {rec.file}: {e}")
|
||||
raise
|
||||
|
||||
def do_process_one(rec):
|
||||
rec.load()
|
||||
rec.make_thumb("data/thumb")
|
||||
|
||||
def main():
|
||||
samples = list(load_samples("data/raw"))
|
||||
print(len(samples))
|
||||
with mp.Pool(mp.cpu_count() - 1) as pool:
|
||||
pool.map(process_one, samples)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user