tune logit softcap?

This commit is contained in:
Andrej Karpathy
2026-03-02 18:19:37 +00:00
parent 83dccc20ae
commit aba30cb037
2 changed files with 5 additions and 1 deletions
+1 -1
View File
@@ -407,7 +407,7 @@ class GPT(nn.Module):
x = norm(x)
# Forward the lm_head (compute logits)
softcap = 15 # smoothly cap the logits to the range [-softcap, softcap]
softcap = 20 # smoothly cap the logits to the range [-softcap, softcap]
logits = self.lm_head(x) # (B, T, padded_vocab_size) <- very big tensor, large amount of memory
logits = logits[..., :self.config.vocab_size] # slice to remove padding
logits = logits.float() # switch to fp32 for logit softcap and loss computation