fix comment
This commit is contained in:
+1
-1
@@ -69,7 +69,7 @@ python -m scripts.tok_eval
|
|||||||
echo "Waiting for dataset download to complete..."
|
echo "Waiting for dataset download to complete..."
|
||||||
wait $DATASET_DOWNLOAD_PID
|
wait $DATASET_DOWNLOAD_PID
|
||||||
|
|
||||||
# d24 model (slightly overtrained is enough to beat GPT-2 => increase data:params ratio from compute optimal 10.5 (default) to 12)
|
# d26 model (slightly undertrained to beat GPT-2 => decrease data:params ratio from compute optimal 10.5 (default) to 8.25)
|
||||||
torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=26 --target-param-data-ratio=8.25 --device-batch-size=16 --fp8 --run=$WANDB_RUN
|
torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=26 --target-param-data-ratio=8.25 --device-batch-size=16 --fp8 --run=$WANDB_RUN
|
||||||
# evaluate the model: CORE metric, BPB on train/val, and draw samples
|
# evaluate the model: CORE metric, BPB on train/val, and draw samples
|
||||||
torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-size=16
|
torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-size=16
|
||||||
|
|||||||
Reference in New Issue
Block a user