quick fix to not OOM main speedrun script

This commit is contained in:
Andrej Karpathy
2026-01-26 22:31:42 +00:00
parent 85b3e95e09
commit 8630d32be4
2 changed files with 3 additions and 3 deletions
+2 -2
View File
@@ -58,8 +58,8 @@ python -m nanochat.dataset -n 8
# See comment below for why 370 is the right number here
python -m nanochat.dataset -n 370 &
DATASET_DOWNLOAD_PID=$!
# train the tokenizer with vocab size 2**16 = 65536 on ~2B characters of data
python -m scripts.tok_train --max-chars=2000000000 --vocab-size=65536
# train the tokenizer with vocab size 2**15 = 32768 on ~2B characters of data
python -m scripts.tok_train
# evaluate the tokenizer (report compression ratio etc.)
python -m scripts.tok_eval