diff --git a/.gitea/workflows/dummy.yml b/.gitea/workflows/dummy.yml deleted file mode 100644 index bc0e71f..0000000 --- a/.gitea/workflows/dummy.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: dummy -on: - push: - workflow_dispatch: - -jobs: - hello: - runs-on: ailab - steps: - - name: Greet - run: | - echo "hello from $(hostname) as $(whoami)" - uname -a - nvidia-smi --query-gpu=name,memory.free --format=csv,noheader || true diff --git a/.gitea/workflows/smoke.yml b/.gitea/workflows/smoke.yml new file mode 100644 index 0000000..e7c5dbc --- /dev/null +++ b/.gitea/workflows/smoke.yml @@ -0,0 +1,15 @@ +name: smoke +on: + push: + workflow_dispatch: + +jobs: + nanochat-smoke: + runs-on: gpu + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + - name: nvidia-smi + run: nvidia-smi --query-gpu=name,memory.free,memory.used --format=csv + - name: smoke + run: bash scripts/smoke.sh diff --git a/scripts/smoke.sh b/scripts/smoke.sh new file mode 100755 index 0000000..36b429b --- /dev/null +++ b/scripts/smoke.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# End-to-end smoke for upstream nanochat: dataset → tokenizer → tiny base_train. +# Idempotent: caches venv, uv-cache, and dataset shards under /data/nanochat-smoke/. +# Targets ailab (CN, RTX 5090). Uses CN mirrors for PyPI / pytorch wheels / HuggingFace. +set -euo pipefail +export PATH="$HOME/.local/bin:$PATH" + +CACHE_ROOT=${CACHE_ROOT:-/data/nanochat-smoke} +NANOCHAT_DIR=$CACHE_ROOT/nanochat +export NANOCHAT_BASE_DIR=$CACHE_ROOT/cache +export UV_CACHE_DIR=$CACHE_ROOT/uv-cache +export UV_DEFAULT_INDEX=${UV_DEFAULT_INDEX:-https://mirrors.aliyun.com/pypi/simple/} +export UV_INDEX_STRATEGY=unsafe-best-match +export HF_ENDPOINT=${HF_ENDPOINT:-https://hf-mirror.com} +export OMP_NUM_THREADS=1 + +mkdir -p "$CACHE_ROOT" "$NANOCHAT_BASE_DIR" + +if [ ! -d "$NANOCHAT_DIR" ]; then + echo "Cloning nanochat into $NANOCHAT_DIR" + git clone https://github.com/karpathy/nanochat.git "$NANOCHAT_DIR" \ + || git clone https://ghfast.top/https://github.com/karpathy/nanochat.git "$NANOCHAT_DIR" +fi + +cd "$NANOCHAT_DIR" + +# CN mirror patches — idempotent: sed only matches the upstream URLs. +sed -i \ + -e "s|https://download.pytorch.org/whl/cu128|https://mirror.sjtu.edu.cn/pytorch-wheels/cu128|g" \ + -e "s|https://download.pytorch.org/whl/cpu|https://mirror.sjtu.edu.cn/pytorch-wheels/cpu|g" \ + pyproject.toml uv.lock +sed -i \ + -e "s|https://huggingface.co/datasets/karpathy/climbmix-400b-shuffle|https://hf-mirror.com/datasets/karpathy/climbmix-400b-shuffle|" \ + nanochat/dataset.py + +[ -d .venv ] || uv venv +uv sync --extra gpu --index-strategy unsafe-best-match + +source .venv/bin/activate + +echo "=== [1/4] download 1 climbmix shard ===" +time python -m nanochat.dataset -n 1 + +echo "=== [2/4] train tokenizer (50M chars) ===" +time python -m scripts.tok_train --max-chars=50000000 + +echo "=== [3/4] tok_eval ===" +time python -m scripts.tok_eval + +echo "=== [4/4] base_train d=6 50 iters ===" +time python -m scripts.base_train \ + --depth=6 \ + --head-dim=64 \ + --window-pattern=L \ + --max-seq-len=512 \ + --device-batch-size=8 \ + --total-batch-size=4096 \ + --eval-every=25 \ + --eval-tokens=131072 \ + --core-metric-every=-1 \ + --sample-every=25 \ + --num-iterations=50 \ + --run=dummy + +echo "=== smoke done ==="