fix buggy midtrain and update all kwargs to be idiomatic. that is, argparse uses dashes variables use underscores. the underscores are just a remnant of the previous Configurator object. This is the right way

This commit is contained in:
Andrej Karpathy
2026-01-13 22:45:27 +00:00
parent 3b50b77ed3
commit 7312ec9898
11 changed files with 144 additions and 139 deletions
+8 -8
View File
@@ -64,15 +64,15 @@ for flops in "${FLOPS_BUDGETS[@]}"; do
# CORE eval happens once at the end (999999 ensures only final step)
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \
--depth=$d \
--target_flops=$flops \
--target_param_data_ratio=-1 \
--target-flops=$flops \
--target-param-data-ratio=-1 \
--run="${WANDB_RUN}_${TAG}" \
--model_tag="${TAG}" \
--eval_tokens=$EVAL_TOKENS \
--core_metric_every=999999 \
--core_metric_max_per_task=-1 \
--sample_every=-1 \
--save_every=-1 \
--model-tag="${TAG}" \
--eval-tokens=$EVAL_TOKENS \
--core-metric-every=999999 \
--core-metric-max-per-task=-1 \
--sample-every=-1 \
--save-every=-1 \
2>&1 | tee "$RESULTS_DIR/${TAG}_train.log"
END_TIME=$(date +%s)