fix base_loss to report correct loss by switching the dataloader to the new default

This commit is contained in:
Andrej Karpathy
2026-01-13 22:09:36 +00:00
parent f92efce169
commit 3b50b77ed3
2 changed files with 5 additions and 2 deletions
+3
View File
@@ -25,6 +25,7 @@ def _patch_missing_config_keys(model_config_kwargs):
# Old models were trained with full context (no sliding window)
if "window_pattern" not in model_config_kwargs:
model_config_kwargs["window_pattern"] = "L"
log0(f"Patching missing window_pattern in model config to 'L'")
def _patch_missing_keys(model_data, model_config):
"""Add default values for new parameters that may be missing in old checkpoints."""
@@ -32,9 +33,11 @@ def _patch_missing_keys(model_data, model_config):
# resid_lambdas defaults to 1.0 (identity scaling)
if "resid_lambdas" not in model_data:
model_data["resid_lambdas"] = torch.ones(n_layer)
log0(f"Patching missing resid_lambdas in model data to 1.0")
# x0_lambdas defaults to 0.0 (disabled)
if "x0_lambdas" not in model_data:
model_data["x0_lambdas"] = torch.zeros(n_layer)
log0(f"Patching missing x0_lambdas in model data to 0.0")
def save_checkpoint(checkpoint_dir, step, model_data, optimizer_data, meta_data, rank=0):
if rank == 0: