fix buggy midtrain and update all kwargs to be idiomatic. that is, argparse uses dashes variables use underscores. the underscores are just a remnant of the previous Configurator object. This is the right way
This commit is contained in:
+8
-8
@@ -20,7 +20,7 @@ if [ -z "$SKIP_SETUP" ]; then
|
||||
# Tokenizer, download 1000 shards for pretraining
|
||||
# (probably this can be reduced but it's tricky to determine the exact right number, TODO).
|
||||
python -m nanochat.dataset -n 1000
|
||||
python -m scripts.tok_train --max_chars=2000000000 --vocab_size=32768
|
||||
python -m scripts.tok_train --max-chars=2000000000 --vocab-size=32768
|
||||
else
|
||||
source .venv/bin/activate
|
||||
fi
|
||||
@@ -58,16 +58,16 @@ for d in "${DEPTHS[@]}"; do
|
||||
START_TIME=$(date +%s)
|
||||
|
||||
# Train the model with natural horizon (target_param_data_ratio default)
|
||||
# No --target_flops, let it use the default ratio from base_train
|
||||
# No --target-flops, let it use the default ratio from base_train
|
||||
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \
|
||||
--depth=$d \
|
||||
--target_param_data_ratio=8 \
|
||||
--target-param-data-ratio=8 \
|
||||
--run="${WANDB_RUN}_d${d}" \
|
||||
--model_tag="${TAG}" \
|
||||
--core_metric_every=999999 \
|
||||
--core_metric_max_per_task=-1 \
|
||||
--sample_every=-1 \
|
||||
--save_every=-1 \
|
||||
--model-tag="${TAG}" \
|
||||
--core-metric-every=999999 \
|
||||
--core-metric-max-per-task=-1 \
|
||||
--sample-every=-1 \
|
||||
--save-every=-1 \
|
||||
2>&1 | tee "$RESULTS_DIR/${TAG}_train.log"
|
||||
|
||||
END_TIME=$(date +%s)
|
||||
|
||||
Reference in New Issue
Block a user