bugfix save and load ckpt from model_tag dir

This commit is contained in:
duwenjie
2025-12-21 15:07:04 +08:00
parent d5759400f9
commit 92c6654b95
3 changed files with 14 additions and 13 deletions
+2 -2
View File
@@ -250,8 +250,8 @@ for step in range(num_iterations):
if master_process:
base_dir = get_base_dir()
depth = model.config.n_layer
model_tag = f"d{depth}" # base the model tag on the depth of the base model
checkpoint_dir = os.path.join(base_dir, "chatsft_checkpoints", model_tag)
output_dirname = model_tag if model_tag else f"d{depth}" # e.g. d12
checkpoint_dir = os.path.join(base_dir, "chatsft_checkpoints", output_dirname)
model_config_kwargs = model.config.__dict__ # slightly naughty, abusing the simplicity of GPTConfig, TODO nicer
save_checkpoint(
checkpoint_dir,