make miniseries more generic and easier to run and less hard coded

This commit is contained in:
Andrej Karpathy
2026-01-12 02:54:35 +00:00
parent b33e394528
commit aa95fb2e03
+23 -13
View File
@@ -1,29 +1,39 @@
#!/bin/bash #!/bin/bash
# See speedrun.sh for more comments # See speedrun.sh for more comments
# Usage: ./miniseries.sh [series_name]
# Example: ./miniseries.sh jan11
# Default series name is today's date (e.g., jan11)
export OMP_NUM_THREADS=1 export OMP_NUM_THREADS=1
export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat"
mkdir -p $NANOCHAT_BASE_DIR mkdir -p $NANOCHAT_BASE_DIR
# uv # Setup (skip with SKIP_SETUP=1)
command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh if [ -z "$SKIP_SETUP" ]; then
[ -d ".venv" ] || uv venv # uv
uv sync --extra gpu command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
source .venv/bin/activate [ -d ".venv" ] || uv venv
uv sync --extra gpu
source .venv/bin/activate
# Tokenizer # Tokenizer
python -m nanochat.dataset -n 240 python -m nanochat.dataset -n 240
python -m scripts.tok_train --max_chars=2000000000 --vocab_size=32768 python -m scripts.tok_train --max_chars=2000000000 --vocab_size=32768
else
source .venv/bin/activate
fi
# Series name: from arg, env var, or default to today's date (e.g., jan11)
SERIES_NAME="${1:-${SERIES_NAME:-$(date +%b%d | tr '[:upper:]' '[:lower:]')}}"
# Depths to train (the "miniseries") # Depths to train (the "miniseries")
DEPTHS=(10 11 12 13 14 15 16 17 18 19 20) DEPTHS=(10 11 12 13 14 15 16 17 18 19 20)
# Hardware # Hardware
NPROC_PER_NODE="${NPROC_PER_NODE:-8}" NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
# Logging # Logging
WANDB_RUN="${WANDB_RUN:-jan7_miniseries}" WANDB_RUN="${WANDB_RUN:-${SERIES_NAME}_miniseries}"
RESULTS_DIR="$NANOCHAT_BASE_DIR/jan7_miniseries_results" RESULTS_DIR="$NANOCHAT_BASE_DIR/${SERIES_NAME}_miniseries_results"
mkdir -p "$RESULTS_DIR" mkdir -p "$RESULTS_DIR"
RESULTS_FILE="$RESULTS_DIR/results.csv" RESULTS_FILE="$RESULTS_DIR/results.csv"
@@ -37,13 +47,13 @@ log() {
} }
log "==============================================" log "=============================================="
log "Jan 7 Miniseries Training" log "${SERIES_NAME} Miniseries Training"
log "==============================================" log "=============================================="
for d in "${DEPTHS[@]}"; do for d in "${DEPTHS[@]}"; do
log "Training d=$d..." log "Training d=$d..."
TAG="jan7_miniseries_d${d}" TAG="${SERIES_NAME}_miniseries_d${d}"
START_TIME=$(date +%s) START_TIME=$(date +%s)
# Train the model with natural horizon (target_param_data_ratio default) # Train the model with natural horizon (target_param_data_ratio default)
@@ -84,7 +94,7 @@ for d in "${DEPTHS[@]}"; do
done done
log "==============================================" log "=============================================="
log "Jan 7 Miniseries Complete!" log "${SERIES_NAME} Miniseries Complete!"
log "==============================================" log "=============================================="
log "Results saved to: $RESULTS_FILE" log "Results saved to: $RESULTS_FILE"
echo "" echo ""