make miniseries more generic and easier to run and less hard coded
This commit is contained in:
+23
-13
@@ -1,29 +1,39 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# See speedrun.sh for more comments
|
# See speedrun.sh for more comments
|
||||||
|
# Usage: ./miniseries.sh [series_name]
|
||||||
|
# Example: ./miniseries.sh jan11
|
||||||
|
# Default series name is today's date (e.g., jan11)
|
||||||
|
|
||||||
export OMP_NUM_THREADS=1
|
export OMP_NUM_THREADS=1
|
||||||
export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat"
|
export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat"
|
||||||
mkdir -p $NANOCHAT_BASE_DIR
|
mkdir -p $NANOCHAT_BASE_DIR
|
||||||
|
|
||||||
# uv
|
# Setup (skip with SKIP_SETUP=1)
|
||||||
command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
|
if [ -z "$SKIP_SETUP" ]; then
|
||||||
[ -d ".venv" ] || uv venv
|
# uv
|
||||||
uv sync --extra gpu
|
command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
source .venv/bin/activate
|
[ -d ".venv" ] || uv venv
|
||||||
|
uv sync --extra gpu
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
# Tokenizer
|
# Tokenizer
|
||||||
python -m nanochat.dataset -n 240
|
python -m nanochat.dataset -n 240
|
||||||
python -m scripts.tok_train --max_chars=2000000000 --vocab_size=32768
|
python -m scripts.tok_train --max_chars=2000000000 --vocab_size=32768
|
||||||
|
else
|
||||||
|
source .venv/bin/activate
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Series name: from arg, env var, or default to today's date (e.g., jan11)
|
||||||
|
SERIES_NAME="${1:-${SERIES_NAME:-$(date +%b%d | tr '[:upper:]' '[:lower:]')}}"
|
||||||
# Depths to train (the "miniseries")
|
# Depths to train (the "miniseries")
|
||||||
DEPTHS=(10 11 12 13 14 15 16 17 18 19 20)
|
DEPTHS=(10 11 12 13 14 15 16 17 18 19 20)
|
||||||
# Hardware
|
# Hardware
|
||||||
NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
|
NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
|
||||||
# Logging
|
# Logging
|
||||||
WANDB_RUN="${WANDB_RUN:-jan7_miniseries}"
|
WANDB_RUN="${WANDB_RUN:-${SERIES_NAME}_miniseries}"
|
||||||
|
|
||||||
RESULTS_DIR="$NANOCHAT_BASE_DIR/jan7_miniseries_results"
|
RESULTS_DIR="$NANOCHAT_BASE_DIR/${SERIES_NAME}_miniseries_results"
|
||||||
mkdir -p "$RESULTS_DIR"
|
mkdir -p "$RESULTS_DIR"
|
||||||
RESULTS_FILE="$RESULTS_DIR/results.csv"
|
RESULTS_FILE="$RESULTS_DIR/results.csv"
|
||||||
|
|
||||||
@@ -37,13 +47,13 @@ log() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log "=============================================="
|
log "=============================================="
|
||||||
log "Jan 7 Miniseries Training"
|
log "${SERIES_NAME} Miniseries Training"
|
||||||
log "=============================================="
|
log "=============================================="
|
||||||
|
|
||||||
for d in "${DEPTHS[@]}"; do
|
for d in "${DEPTHS[@]}"; do
|
||||||
log "Training d=$d..."
|
log "Training d=$d..."
|
||||||
|
|
||||||
TAG="jan7_miniseries_d${d}"
|
TAG="${SERIES_NAME}_miniseries_d${d}"
|
||||||
START_TIME=$(date +%s)
|
START_TIME=$(date +%s)
|
||||||
|
|
||||||
# Train the model with natural horizon (target_param_data_ratio default)
|
# Train the model with natural horizon (target_param_data_ratio default)
|
||||||
@@ -84,7 +94,7 @@ for d in "${DEPTHS[@]}"; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
log "=============================================="
|
log "=============================================="
|
||||||
log "Jan 7 Miniseries Complete!"
|
log "${SERIES_NAME} Miniseries Complete!"
|
||||||
log "=============================================="
|
log "=============================================="
|
||||||
log "Results saved to: $RESULTS_FILE"
|
log "Results saved to: $RESULTS_FILE"
|
||||||
echo ""
|
echo ""
|
||||||
|
|||||||
Reference in New Issue
Block a user