a number of upgrades to SFT script to bring it up to date w.r.t. pretraining and tuning some of its kwargs based on sweeps
This commit is contained in:
@@ -468,6 +468,7 @@ while True:
|
||||
"user_config": user_config, # inputs to the training script
|
||||
"device_batch_size": args.device_batch_size,
|
||||
"max_seq_len": args.max_seq_len,
|
||||
"total_batch_size": total_batch_size,
|
||||
"dataloader_state_dict": dataloader_state_dict,
|
||||
"loop_state": { # all loop state (other than step) so that we can resume training
|
||||
"min_val_bpb": min_val_bpb,
|
||||
|
||||
Reference in New Issue
Block a user