fix scaling laws scripts after the bigram embeddings were removed
This commit is contained in:
@@ -76,7 +76,6 @@
|
||||
"\n",
|
||||
"Our CSV now has granular counts:\n",
|
||||
"- `params_wte` - token embedding (lookup table)\n",
|
||||
"- `params_bigram_embed` - bigram hash embeddings (lookup table)\n",
|
||||
"- `params_value_embeds` - value embeddings (lookup table)\n",
|
||||
"- `params_lm_head` - unembedding projection (matmul)\n",
|
||||
"- `params_transformer` - attention + MLP matrices (matmuls)\n",
|
||||
@@ -116,12 +115,13 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"# Compute derived columns\n",
|
||||
"df = df.copy() # avoid SettingWithCopyWarning from earlier filter\n",
|
||||
"df['effective_params'] = df.apply(compute_effective_params, axis=1)\n",
|
||||
"df['param_data_ratio'] = df['tokens_trained'] / df['effective_params']\n",
|
||||
"\n",
|
||||
"# Show parameter breakdown for first few rows\n",
|
||||
"print(\"Parameter breakdown (first row per flops budget):\")\n",
|
||||
"param_cols = ['depth', 'params_wte', 'params_bigram_embed', 'params_value_embeds',\n",
|
||||
"param_cols = ['depth', 'params_wte', 'params_value_embeds',\n",
|
||||
" 'params_lm_head', 'params_transformer', 'params_scalars', 'params_total', 'effective_params']\n",
|
||||
"df.groupby('flops_budget').first()[param_cols]"
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user