Move all CI jobs from i7 to ailab runner
This commit is contained in:
parent
a91cf30e05
commit
9207a005bd
@ -1,8 +1,8 @@
|
|||||||
stages:
|
stages:
|
||||||
- ailab
|
|
||||||
- test-and-build
|
- test-and-build
|
||||||
- build-docker
|
- build-docker
|
||||||
- deploy
|
- deploy
|
||||||
|
- ailab
|
||||||
|
|
||||||
variables:
|
variables:
|
||||||
GIT_DEPTH: 1
|
GIT_DEPTH: 1
|
||||||
@ -13,7 +13,7 @@ test:
|
|||||||
except:
|
except:
|
||||||
- main
|
- main
|
||||||
tags:
|
tags:
|
||||||
- i7
|
- ailab
|
||||||
script:
|
script:
|
||||||
- make opencv -j$(nproc --ignore=2)
|
- make opencv -j$(nproc --ignore=2)
|
||||||
- make test
|
- make test
|
||||||
@ -23,7 +23,7 @@ build-web:
|
|||||||
except:
|
except:
|
||||||
- main
|
- main
|
||||||
tags:
|
tags:
|
||||||
- i7
|
- ailab
|
||||||
before_script:
|
before_script:
|
||||||
- (cd web; npm install)
|
- (cd web; npm install)
|
||||||
script:
|
script:
|
||||||
@ -37,7 +37,7 @@ build-docker:
|
|||||||
except:
|
except:
|
||||||
- main
|
- main
|
||||||
tags:
|
tags:
|
||||||
- i7
|
- ailab
|
||||||
script:
|
script:
|
||||||
- make docker-build
|
- make docker-build
|
||||||
- make docker-push
|
- make docker-push
|
||||||
@ -62,7 +62,7 @@ dev-smoke:
|
|||||||
stage: test-and-build
|
stage: test-and-build
|
||||||
when: manual
|
when: manual
|
||||||
tags:
|
tags:
|
||||||
- i7
|
- ailab
|
||||||
allow_failure: true
|
allow_failure: true
|
||||||
script:
|
script:
|
||||||
- ./scripts/emcli --env dev activate 0074253255108
|
- ./scripts/emcli --env dev activate 0074253255108
|
||||||
@ -72,7 +72,7 @@ dev-smoke:
|
|||||||
deploy-prod:
|
deploy-prod:
|
||||||
stage: deploy
|
stage: deploy
|
||||||
tags:
|
tags:
|
||||||
- i7
|
- ailab
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
script:
|
script:
|
||||||
|
|||||||
10
Dockerfile
10
Dockerfile
@ -1,10 +1,11 @@
|
|||||||
FROM ubuntu:24.04
|
FROM registry.gitlab.com/euphon/themblem:ubuntu-24.04
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
curl \
|
curl \
|
||||||
nginx \
|
nginx \
|
||||||
python3 \
|
python3 \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
python3-dev \
|
python3-dev \
|
||||||
|
python3-venv \
|
||||||
build-essential \
|
build-essential \
|
||||||
libpq-dev \
|
libpq-dev \
|
||||||
libjpeg-dev \
|
libjpeg-dev \
|
||||||
@ -15,11 +16,12 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|||||||
zip \
|
zip \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
ADD requirements.txt requirements.txt
|
ADD requirements.txt requirements.txt
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
RUN python3 -m venv /venv && \
|
||||||
|
/venv/bin/pip install uv --index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
|
||||||
rm -f /usr/lib/python*/EXTERNALLY-MANAGED && \
|
rm -f /usr/lib/python*/EXTERNALLY-MANAGED && \
|
||||||
/root/.local/bin/uv pip install --system --no-cache -r requirements.txt
|
/venv/bin/uv pip install --system --no-cache -r requirements.txt --default-index https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
ADD scripts/download_models.py /tmp/download_models.py
|
ADD scripts/download_models.py /tmp/download_models.py
|
||||||
RUN python3 /tmp/download_models.py && rm /tmp/download_models.py
|
RUN HF_ENDPOINT=https://hf-mirror.com python3 /tmp/download_models.py && rm /tmp/download_models.py
|
||||||
ADD api /emblem/api
|
ADD api /emblem/api
|
||||||
ADD web /emblem/web
|
ADD web /emblem/web
|
||||||
RUN cd /emblem/api && ./manage.py collectstatic --noinput
|
RUN cd /emblem/api && ./manage.py collectstatic --noinput
|
||||||
|
|||||||
4
Makefile
4
Makefile
@ -1,6 +1,6 @@
|
|||||||
.PHONY: FORCE emblemscanner-release fetch fetch-quick sbs-quick train-quick
|
.PHONY: FORCE emblemscanner-release fetch fetch-quick sbs-quick train-quick
|
||||||
|
|
||||||
DATA_DIR ?= /data/emblem
|
DATA_DIR ?= $(HOME)/emblem
|
||||||
|
|
||||||
IMAGE_TAG := $(shell git rev-parse --short HEAD)
|
IMAGE_TAG := $(shell git rev-parse --short HEAD)
|
||||||
IMAGE_REPO := registry.gitlab.com/euphon/themblem
|
IMAGE_REPO := registry.gitlab.com/euphon/themblem
|
||||||
@ -164,4 +164,4 @@ build/emblemscanner-$(RELEASE_VERSION).zip: FORCE
|
|||||||
exit 1; \
|
exit 1; \
|
||||||
fi
|
fi
|
||||||
cd scanner; zip -r ../build/emblemscanner-$(RELEASE_VERSION).zip pages/emblemscanner
|
cd scanner; zip -r ../build/emblemscanner-$(RELEASE_VERSION).zip pages/emblemscanner
|
||||||
cd scanner/pages/emblemscanner && zip -g ../../../build/emblemscanner-$(RELEASE_VERSION).zip README.md
|
cd scanner/pages/emblemscanner && zip -g ../../../build/emblemscanner-$(RELEASE_VERSION).zip README.md
|
||||||
|
|||||||
@ -92,13 +92,17 @@ def make_side_by_side_img_with_margins(frame_img, std_img):
|
|||||||
if std_corners is None or frame_corners is None:
|
if std_corners is None or frame_corners is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
edge_length = min(std_img.width, std_img.height)
|
# Use a reasonable size - use the larger dimension or at least 512
|
||||||
|
edge_length = max(std_img.width, std_img.height, frame_img.width, frame_img.height, 512)
|
||||||
margin_ratio = find_min_margin_ratio(std_img, std_corners)
|
margin_ratio = find_min_margin_ratio(std_img, std_corners)
|
||||||
std_warped = warp_with_margin_ratio(std_img, edge_length, std_corners, margin_ratio)
|
std_warped = warp_with_margin_ratio(std_img, edge_length, std_corners, margin_ratio)
|
||||||
frame_warped = warp_with_margin_ratio(frame_img, edge_length, frame_corners, margin_ratio)
|
frame_warped = warp_with_margin_ratio(frame_img, edge_length, frame_corners, margin_ratio)
|
||||||
ret = Image.new('RGB', (edge_length, int(edge_length * margin_ratio)))
|
|
||||||
ret.paste(std_warped, (0, 0))
|
# Create horizontal layout: frame on left, std on right
|
||||||
ret.paste(frame_warped, (0, int(edge_length * margin_ratio)))
|
# Each warped image is edge_length x edge_length
|
||||||
|
ret = Image.new('RGB', (edge_length * 2, edge_length))
|
||||||
|
ret.paste(frame_warped, (0, 0)) # frame on left
|
||||||
|
ret.paste(std_warped, (edge_length, 0)) # std on right
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|||||||
@ -22,7 +22,6 @@ def process_scan(scan_dir):
|
|||||||
sbs_file = os.path.join(scan_dir, 'sbs.jpg')
|
sbs_file = os.path.join(scan_dir, 'sbs.jpg')
|
||||||
frame_qr_file = os.path.join(scan_dir, 'frame-qr.jpg')
|
frame_qr_file = os.path.join(scan_dir, 'frame-qr.jpg')
|
||||||
std_qr_file = os.path.join(scan_dir, 'std-qr.jpg')
|
std_qr_file = os.path.join(scan_dir, 'std-qr.jpg')
|
||||||
sbs_no_margin_file = os.path.join(scan_dir, 'sbs-nomargin.jpg')
|
|
||||||
try:
|
try:
|
||||||
if not os.path.exists(sbs_file):
|
if not os.path.exists(sbs_file):
|
||||||
frame_img = Image.open(frame_file)
|
frame_img = Image.open(frame_file)
|
||||||
@ -32,19 +31,6 @@ def process_scan(scan_dir):
|
|||||||
sbs_img.save(sbs_file)
|
sbs_img.save(sbs_file)
|
||||||
else:
|
else:
|
||||||
return "make_side_by_side_img_with_margins failed"
|
return "make_side_by_side_img_with_margins failed"
|
||||||
if not os.path.exists(sbs_no_margin_file):
|
|
||||||
frame_img = Image.open(frame_file)
|
|
||||||
std_img = Image.open(std_file)
|
|
||||||
if not os.path.exists(frame_qr_file) or not os.path.exists(std_qr_file):
|
|
||||||
frame_qrcode, frame_qr_img = extract_qr(frame_img)
|
|
||||||
std_qrcode, std_qr_img = extract_qr(std_img)
|
|
||||||
frame_qr_img.save(frame_qr_file)
|
|
||||||
std_qr_img.save(std_qr_file)
|
|
||||||
else:
|
|
||||||
frame_qr_img = Image.open(frame_qr_file)
|
|
||||||
std_qr_img = Image.open(std_qr_file)
|
|
||||||
sbs_no_margin_img = make_side_by_side_img(frame_qr_img, std_qr_img)
|
|
||||||
sbs_no_margin_img.save(sbs_no_margin_file)
|
|
||||||
return "ok"
|
return "ok"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"error: {e}"
|
return f"error: {e}"
|
||||||
|
|||||||
@ -43,17 +43,18 @@ from functools import partial
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from common import *
|
from common import *
|
||||||
|
|
||||||
def process_scan_grid(scan_item, hue_jitter=0.1):
|
def process_scan_grid(scan_item, data_dir='data', hue_jitter=0.1):
|
||||||
"""Process a single scan to create grid files and metadata"""
|
"""Process a single scan to create grid files and metadata
|
||||||
|
Returns: (sample_metadata, reason) where reason is None on success, or a string describing the failure"""
|
||||||
scan_id, metadata = scan_item
|
scan_id, metadata = scan_item
|
||||||
sample_metadata = []
|
sample_metadata = []
|
||||||
|
|
||||||
sbs_path = os.path.join('data/scans', scan_id, 'sbs.jpg')
|
sbs_path = os.path.join(data_dir, 'scans', scan_id, 'sbs.jpg')
|
||||||
if not os.path.exists(sbs_path):
|
if not os.path.exists(sbs_path):
|
||||||
return sample_metadata
|
return sample_metadata, 'sbs.jpg missing'
|
||||||
|
|
||||||
# Create grid directory if it doesn't exist
|
# Create grid directory if it doesn't exist
|
||||||
grid_dir = os.path.join('data/scans', scan_id, 'grids')
|
grid_dir = os.path.join(data_dir, 'scans', scan_id, 'grids')
|
||||||
os.makedirs(grid_dir, exist_ok=True)
|
os.makedirs(grid_dir, exist_ok=True)
|
||||||
|
|
||||||
# Check if all grid files already exist
|
# Check if all grid files already exist
|
||||||
@ -82,62 +83,76 @@ def process_scan_grid(scan_item, hue_jitter=0.1):
|
|||||||
'grid_j': j,
|
'grid_j': j,
|
||||||
'label': label
|
'label': label
|
||||||
})
|
})
|
||||||
return sample_metadata
|
return sample_metadata, None
|
||||||
|
|
||||||
# Load the side-by-side image
|
# Load the side-by-side image
|
||||||
sbs_img = Image.open(sbs_path).convert('RGB')
|
try:
|
||||||
|
sbs_img = Image.open(sbs_path).convert('RGB')
|
||||||
|
except Exception as e:
|
||||||
|
return sample_metadata, f'sbs.jpg unreadable: {str(e)}'
|
||||||
width, height = sbs_img.size
|
width, height = sbs_img.size
|
||||||
|
|
||||||
|
# Check if image is too small
|
||||||
|
if width < 6 or height < 3:
|
||||||
|
return sample_metadata, f'sbs.jpg too small: {width}x{height}'
|
||||||
|
|
||||||
# Calculate crop dimensions
|
# Calculate crop dimensions
|
||||||
crop_width = width // 6 # width/2 / 3
|
crop_width = width // 6 # width/2 / 3
|
||||||
crop_height = height // 3
|
crop_height = height // 3
|
||||||
|
|
||||||
# Generate all 3x3 grid combinations
|
if crop_width <= 0 or crop_height <= 0:
|
||||||
for i in range(3):
|
return sample_metadata, f'invalid crop dimensions: {crop_width}x{crop_height}'
|
||||||
for j in range(3):
|
|
||||||
# Calculate crop positions directly from original image
|
|
||||||
left_x = i * crop_width
|
|
||||||
right_x = (i + 3) * crop_width # Skip middle section
|
|
||||||
y = j * crop_height
|
|
||||||
|
|
||||||
# Crop directly from original image
|
|
||||||
left_crop = sbs_img.crop((left_x, y, left_x + crop_width, y + crop_height))
|
|
||||||
right_crop = sbs_img.crop((right_x, y, right_x + crop_width, y + crop_height))
|
|
||||||
|
|
||||||
# Apply color jitter only to left crop
|
|
||||||
color_jitter = transforms.ColorJitter(
|
|
||||||
brightness=0.2,
|
|
||||||
contrast=0.2,
|
|
||||||
saturation=0.2,
|
|
||||||
hue=hue_jitter
|
|
||||||
)
|
|
||||||
left_crop = color_jitter(left_crop)
|
|
||||||
|
|
||||||
# Concatenate left and right crops horizontally
|
|
||||||
grid_img = Image.new('RGB', (crop_width * 2, crop_height))
|
|
||||||
grid_img.paste(left_crop, (0, 0))
|
|
||||||
grid_img.paste(right_crop, (crop_width, 0))
|
|
||||||
|
|
||||||
# Save grid image
|
|
||||||
grid_filename = f'grid-{i}-{j}.jpg'
|
|
||||||
grid_path = os.path.join(grid_dir, grid_filename)
|
|
||||||
grid_img.save(grid_path, 'JPEG', quality=95)
|
|
||||||
|
|
||||||
# Store metadata
|
|
||||||
label = 1 if 'pos' in metadata['labels'] else 0
|
|
||||||
sample_metadata.append({
|
|
||||||
'scan_id': scan_id,
|
|
||||||
'grid_path': grid_path,
|
|
||||||
'grid_i': i,
|
|
||||||
'grid_j': j,
|
|
||||||
'label': label
|
|
||||||
})
|
|
||||||
|
|
||||||
return sample_metadata
|
# Generate all 3x3 grid combinations
|
||||||
|
try:
|
||||||
|
for i in range(3):
|
||||||
|
for j in range(3):
|
||||||
|
# Calculate crop positions directly from original image
|
||||||
|
left_x = i * crop_width
|
||||||
|
right_x = (i + 3) * crop_width # Skip middle section
|
||||||
|
y = j * crop_height
|
||||||
|
|
||||||
|
# Crop directly from original image
|
||||||
|
left_crop = sbs_img.crop((left_x, y, left_x + crop_width, y + crop_height))
|
||||||
|
right_crop = sbs_img.crop((right_x, y, right_x + crop_width, y + crop_height))
|
||||||
|
|
||||||
|
# Apply color jitter only to left crop
|
||||||
|
color_jitter = transforms.ColorJitter(
|
||||||
|
brightness=0.2,
|
||||||
|
contrast=0.2,
|
||||||
|
saturation=0.2,
|
||||||
|
hue=hue_jitter
|
||||||
|
)
|
||||||
|
left_crop = color_jitter(left_crop)
|
||||||
|
|
||||||
|
# Concatenate left and right crops horizontally
|
||||||
|
grid_img = Image.new('RGB', (crop_width * 2, crop_height))
|
||||||
|
grid_img.paste(left_crop, (0, 0))
|
||||||
|
grid_img.paste(right_crop, (crop_width, 0))
|
||||||
|
|
||||||
|
# Save grid image
|
||||||
|
grid_filename = f'grid-{i}-{j}.jpg'
|
||||||
|
grid_path = os.path.join(grid_dir, grid_filename)
|
||||||
|
grid_img.save(grid_path, 'JPEG', quality=95)
|
||||||
|
|
||||||
|
# Store metadata
|
||||||
|
label = 1 if 'pos' in metadata['labels'] else 0
|
||||||
|
sample_metadata.append({
|
||||||
|
'scan_id': scan_id,
|
||||||
|
'grid_path': grid_path,
|
||||||
|
'grid_i': i,
|
||||||
|
'grid_j': j,
|
||||||
|
'label': label
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return sample_metadata, f'error creating grids: {str(e)}'
|
||||||
|
|
||||||
|
return sample_metadata, None
|
||||||
|
|
||||||
class GridDataset(Dataset):
|
class GridDataset(Dataset):
|
||||||
def __init__(self, scan_data, transform=None, num_workers=None, hue_jitter=0.1):
|
def __init__(self, scan_data, data_dir='data', transform=None, num_workers=None, hue_jitter=0.1):
|
||||||
self.scan_data = scan_data
|
self.scan_data = scan_data
|
||||||
|
self.data_dir = data_dir
|
||||||
self.transform = transform
|
self.transform = transform
|
||||||
self.sample_metadata = []
|
self.sample_metadata = []
|
||||||
self.hue_jitter = hue_jitter
|
self.hue_jitter = hue_jitter
|
||||||
@ -150,18 +165,30 @@ class GridDataset(Dataset):
|
|||||||
# Use multiprocessing to create grid files
|
# Use multiprocessing to create grid files
|
||||||
with mp.Pool(processes=num_workers) as pool:
|
with mp.Pool(processes=num_workers) as pool:
|
||||||
# Process all scans in parallel with hue_jitter parameter
|
# Process all scans in parallel with hue_jitter parameter
|
||||||
process_func = partial(process_scan_grid, hue_jitter=self.hue_jitter)
|
process_func = partial(process_scan_grid, data_dir=self.data_dir, hue_jitter=self.hue_jitter)
|
||||||
results = list(tqdm(
|
results = list(tqdm(
|
||||||
pool.imap(process_func, scan_data.items()),
|
pool.imap(process_func, scan_data.items()),
|
||||||
total=len(scan_data),
|
total=len(scan_data),
|
||||||
desc="Creating grid files"
|
desc="Creating grid files"
|
||||||
))
|
))
|
||||||
|
|
||||||
# Collect all sample metadata
|
# Collect all sample metadata and statistics
|
||||||
|
stats = defaultdict(int)
|
||||||
for result in results:
|
for result in results:
|
||||||
self.sample_metadata.extend(result)
|
if isinstance(result, tuple) and len(result) == 2:
|
||||||
|
metadata_list, reason = result
|
||||||
|
self.sample_metadata.extend(metadata_list)
|
||||||
|
if reason is not None:
|
||||||
|
stats[reason] += 1
|
||||||
|
else:
|
||||||
|
# Backward compatibility - old format without reason
|
||||||
|
self.sample_metadata.extend(result)
|
||||||
|
|
||||||
print(f"Created {len(self.sample_metadata)} grid files")
|
print(f"Created {len(self.sample_metadata)} grid files")
|
||||||
|
if stats:
|
||||||
|
print("\nStatistics on why grid files were not created:")
|
||||||
|
for reason, count in sorted(stats.items(), key=lambda x: -x[1]):
|
||||||
|
print(f" {reason}: {count} scans")
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.sample_metadata)
|
return len(self.sample_metadata)
|
||||||
@ -451,11 +478,11 @@ def main():
|
|||||||
|
|
||||||
# Create datasets
|
# Create datasets
|
||||||
print("Creating training dataset...")
|
print("Creating training dataset...")
|
||||||
train_dataset = GridDataset(train_data, transform=transform, num_workers=args.num_workers, hue_jitter=args.hue_jitter)
|
train_dataset = GridDataset(train_data, data_dir=args.data_dir, transform=transform, num_workers=args.num_workers, hue_jitter=args.hue_jitter)
|
||||||
print(f"Training samples: {len(train_dataset)}")
|
print(f"Training samples: {len(train_dataset)}")
|
||||||
|
|
||||||
print("Creating validation dataset...")
|
print("Creating validation dataset...")
|
||||||
val_dataset = GridDataset(val_data, transform=transform, num_workers=args.num_workers, hue_jitter=args.hue_jitter)
|
val_dataset = GridDataset(val_data, data_dir=args.data_dir, transform=transform, num_workers=args.num_workers, hue_jitter=args.hue_jitter)
|
||||||
print(f"Validation samples: {len(val_dataset)}")
|
print(f"Validation samples: {len(val_dataset)}")
|
||||||
|
|
||||||
# Create data loaders
|
# Create data loaders
|
||||||
|
|||||||
@ -2,9 +2,14 @@
|
|||||||
"""
|
"""
|
||||||
Download HuggingFace models for offline use
|
Download HuggingFace models for offline use
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Use HF mirror if HF_ENDPOINT is not already set
|
||||||
|
if 'HF_ENDPOINT' not in os.environ:
|
||||||
|
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
||||||
|
|
||||||
# Create models directory relative to this script: scripts/ -> ../models/
|
# Create models directory relative to this script: scripts/ -> ../models/
|
||||||
script_dir = Path(__file__).parent
|
script_dir = Path(__file__).parent
|
||||||
models_dir = script_dir.parent / "models"
|
models_dir = script_dir.parent / "models"
|
||||||
@ -16,6 +21,7 @@ model_path = models_dir / "text2vec-base-chinese"
|
|||||||
|
|
||||||
print(f"Downloading model: {model_name}")
|
print(f"Downloading model: {model_name}")
|
||||||
print(f"Saving to: {model_path}")
|
print(f"Saving to: {model_path}")
|
||||||
|
print(f"Using HF endpoint: {os.environ.get('HF_ENDPOINT', 'default')}")
|
||||||
|
|
||||||
model = SentenceTransformer(model_name)
|
model = SentenceTransformer(model_name)
|
||||||
model.save(str(model_path))
|
model.save(str(model_path))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user