themblem/emblem5/ai/browser.py
2025-12-26 13:55:52 +00:00

121 lines
3.4 KiB
Python

#!/usr/bin/env python3
import os
import sys
import argparse
import streamlit as st
import json
import re
from PIL import Image
def get_scan_ids(data_dir):
scans_dir = os.path.join(data_dir, 'scans')
if not os.path.exists(scans_dir):
return []
scan_ids = []
for item in os.listdir(scans_dir):
scan_path = os.path.join(scans_dir, item)
if os.path.isdir(scan_path):
try:
scan_ids.append(int(item))
except ValueError:
continue
return sorted(scan_ids, reverse=True)
def parse_filter(filter_str):
"""Parse filter string like '357193-358023 or 358024-358808' into list of ranges."""
if not filter_str or not filter_str.strip():
return []
ranges = []
# Split by 'or' (case insensitive)
parts = re.split(r'\s+or\s+', filter_str.strip(), flags=re.IGNORECASE)
for part in parts:
part = part.strip()
if not part:
continue
# Try to parse as range (start-end)
match = re.match(r'(\d+)\s*-\s*(\d+)', part)
if match:
start = int(match.group(1))
end = int(match.group(2))
ranges.append((start, end))
else:
# Try to parse as single number
try:
num = int(part)
ranges.append((num, num))
except ValueError:
continue
return ranges
def filter_scans(scan_ids, ranges):
"""Filter scan IDs based on ranges."""
if not ranges:
return scan_ids
filtered = set()
for start, end in ranges:
for sid in scan_ids:
if start <= sid <= end:
filtered.add(sid)
return sorted(filtered, reverse=True)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--data-dir', type=str, default=os.path.expanduser('~/emblem'), help='Data directory')
args, unknown = parser.parse_known_args()
data_dir = args.data_dir
st.title('Browser')
scan_ids = get_scan_ids(data_dir)
total_scans = len(scan_ids)
if total_scans == 0:
st.write('No scans found')
return
st.write(f'Total scans: {total_scans}')
# Smart filter input
filter_str = st.text_input('Filter (e.g., "357193-358023 or 358024-358808")', value='')
if filter_str:
ranges = parse_filter(filter_str)
if ranges:
to_show = filter_scans(scan_ids, ranges)
st.write(f'Filter matched: {len(to_show)} scans')
else:
st.warning('No valid ranges found in filter')
return
else:
# Default: show last 500 scans
default_count = min(500, total_scans)
to_show = scan_ids[:default_count]
st.write(f'Showing last {len(to_show)} scans (default)')
for sid in to_show:
show_scan(str(sid), data_dir)
def show_scan(scan_id, data_dir):
scan_dir = os.path.join(data_dir, 'scans', scan_id)
mdfile = os.path.join(scan_dir, 'metadata.json')
if not os.path.exists(mdfile):
return
md = json.load(open(mdfile))
sbs_path = os.path.join(scan_dir, 'sbs.jpg')
if not os.path.exists(sbs_path):
return
sbs = Image.open(sbs_path)
st.write(f'{scan_id}: {md.get("labels", "N/A")}')
st.write(f'SBS: {sbs_path}')
st.image(sbs.resize((512, 256)))
st.divider()
if __name__ == '__main__':
main()