a number of upgrades to SFT script to bring it up to date w.r.t. pretraining and tuning some of its kwargs based on sweeps

This commit is contained in:
Andrej Karpathy
2026-02-16 14:41:53 +00:00
parent 2f09686724
commit 788dadeb88
3 changed files with 159 additions and 45 deletions
+1
View File
@@ -468,6 +468,7 @@ while True:
"user_config": user_config, # inputs to the training script
"device_batch_size": args.device_batch_size,
"max_seq_len": args.max_seq_len,
"total_batch_size": total_batch_size,
"dataloader_state_dict": dataloader_state_dict,
"loop_state": { # all loop state (other than step) so that we can resume training
"min_val_bpb": min_val_bpb,