fix scaling laws scripts after the bigram embeddings were removed

2026-03-17 16:55:56 +00:00
parent 1b1cc3c599
commit 5019accc5b
2 changed files with 12 additions and 11 deletions
@@ -76,7 +76,6 @@
    "\n",
    "Our CSV now has granular counts:\n",
    "- `params_wte` - token embedding (lookup table)\n",
-    "- `params_bigram_embed` - bigram hash embeddings (lookup table)\n",
    "- `params_value_embeds` - value embeddings (lookup table)\n",
    "- `params_lm_head` - unembedding projection (matmul)\n",
    "- `params_transformer` - attention + MLP matrices (matmuls)\n",
@@ -116,12 +115,13 @@
    "\n",
    "\n",
    "# Compute derived columns\n",
+    "df = df.copy()  # avoid SettingWithCopyWarning from earlier filter\n",
    "df['effective_params'] = df.apply(compute_effective_params, axis=1)\n",
    "df['param_data_ratio'] = df['tokens_trained'] / df['effective_params']\n",
    "\n",
    "# Show parameter breakdown for first few rows\n",
    "print(\"Parameter breakdown (first row per flops budget):\")\n",
-    "param_cols = ['depth', 'params_wte', 'params_bigram_embed', 'params_value_embeds',\n",
+    "param_cols = ['depth', 'params_wte', 'params_value_embeds',\n",
    "              'params_lm_head', 'params_transformer', 'params_scalars', 'params_total', 'effective_params']\n",
    "df.groupby('flops_budget').first()[param_cols]"
   ]