Hopfield + Hebbian hybrid memory system for LLMs. Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025). Architecture: - Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle) - Multi-hop: Hebbian W matrix with WTA pattern separation - 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency - 4ms latency @ 20K memories, ~1GB VRAM Key findings: - Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian) - WTA pattern separation enables 20K+ capacity - Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this - MiniLM-L6 is optimal (discrimination gap > absolute similarity) - Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark - SNN encoder viable (CosSim 0.99) but not needed for current architecture
238 lines
5.0 KiB
JSON
238 lines
5.0 KiB
JSON
[
|
|
{
|
|
"dim": 768,
|
|
"neurons": 2048,
|
|
"steps": 64,
|
|
"final_mse": 0.00011098239338025451,
|
|
"final_cos": 0.9898157119750977,
|
|
"milestones": [
|
|
{
|
|
"epoch": 20,
|
|
"mse": 0.005041939315075675,
|
|
"cos": -0.0007408469663156817
|
|
},
|
|
{
|
|
"epoch": 40,
|
|
"mse": 0.0029456913859272995,
|
|
"cos": -0.0003333062321568529
|
|
},
|
|
{
|
|
"epoch": 60,
|
|
"mse": 0.0029715588005880516,
|
|
"cos": 0.0005352402261147896
|
|
},
|
|
{
|
|
"epoch": 80,
|
|
"mse": 0.04361877404153347,
|
|
"cos": 0.4805794248978297
|
|
},
|
|
{
|
|
"epoch": 100,
|
|
"mse": 0.005344521099080642,
|
|
"cos": 0.7873762448628744
|
|
},
|
|
{
|
|
"epoch": 120,
|
|
"mse": 0.001494182685079674,
|
|
"cos": 0.9197443743546804
|
|
},
|
|
{
|
|
"epoch": 140,
|
|
"mse": 0.0003552741633029655,
|
|
"cos": 0.9758868634700775
|
|
},
|
|
{
|
|
"epoch": 160,
|
|
"mse": 0.00016522348839013526,
|
|
"cos": 0.9866191744804382
|
|
},
|
|
{
|
|
"epoch": 180,
|
|
"mse": 0.00011800844416332741,
|
|
"cos": 0.9894002715746562
|
|
},
|
|
{
|
|
"epoch": 200,
|
|
"mse": 0.00011065248036175035,
|
|
"cos": 0.9898596425851186
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"dim": 768,
|
|
"neurons": 4096,
|
|
"steps": 64,
|
|
"final_mse": 5.6636981753399596e-05,
|
|
"final_cos": 0.9872701168060303,
|
|
"milestones": [
|
|
{
|
|
"epoch": 20,
|
|
"mse": 0.004513699406137069,
|
|
"cos": 7.949230493977665e-05
|
|
},
|
|
{
|
|
"epoch": 40,
|
|
"mse": 0.0028209949222703775,
|
|
"cos": 0.0006807217665482313
|
|
},
|
|
{
|
|
"epoch": 60,
|
|
"mse": 0.002746186009608209,
|
|
"cos": -0.0012927929715563853
|
|
},
|
|
{
|
|
"epoch": 80,
|
|
"mse": 0.048195418591300644,
|
|
"cos": 0.49279734392960867
|
|
},
|
|
{
|
|
"epoch": 100,
|
|
"mse": 0.011376503172020118,
|
|
"cos": 0.7687788685162862
|
|
},
|
|
{
|
|
"epoch": 120,
|
|
"mse": 0.0018575659099345405,
|
|
"cos": 0.9089678009351094
|
|
},
|
|
{
|
|
"epoch": 140,
|
|
"mse": 0.00029495314811356366,
|
|
"cos": 0.9680179615815481
|
|
},
|
|
{
|
|
"epoch": 160,
|
|
"mse": 0.00010300778691695693,
|
|
"cos": 0.9824542800585429
|
|
},
|
|
{
|
|
"epoch": 180,
|
|
"mse": 6.22785273056555e-05,
|
|
"cos": 0.986561139424642
|
|
},
|
|
{
|
|
"epoch": 200,
|
|
"mse": 5.633314976876136e-05,
|
|
"cos": 0.9872957944869996
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"dim": 768,
|
|
"neurons": 4096,
|
|
"steps": 128,
|
|
"final_mse": 0.0007109043071977794,
|
|
"final_cos": 0.9640029072761536,
|
|
"milestones": [
|
|
{
|
|
"epoch": 20,
|
|
"mse": 0.004640598734840751,
|
|
"cos": 0.0001389272161759436
|
|
},
|
|
{
|
|
"epoch": 40,
|
|
"mse": 0.0028830923062438765,
|
|
"cos": -0.0005388486936377982
|
|
},
|
|
{
|
|
"epoch": 60,
|
|
"mse": 0.0026579547052582105,
|
|
"cos": -0.0008515000498543183
|
|
},
|
|
{
|
|
"epoch": 80,
|
|
"mse": 0.005524608632549643,
|
|
"cos": 0.3971738278865814
|
|
},
|
|
{
|
|
"epoch": 100,
|
|
"mse": 0.44284523477156956,
|
|
"cos": 0.14999981944759685
|
|
},
|
|
{
|
|
"epoch": 120,
|
|
"mse": 0.009387427164862553,
|
|
"cos": 0.8101295113563538
|
|
},
|
|
{
|
|
"epoch": 140,
|
|
"mse": 0.0032115802091235916,
|
|
"cos": 0.9130531450112661
|
|
},
|
|
{
|
|
"epoch": 160,
|
|
"mse": 0.001285675020578007,
|
|
"cos": 0.9493551254272461
|
|
},
|
|
{
|
|
"epoch": 180,
|
|
"mse": 0.0007889122760389,
|
|
"cos": 0.9620140413443248
|
|
},
|
|
{
|
|
"epoch": 200,
|
|
"mse": 0.0007097914950766911,
|
|
"cos": 0.9642268856366475
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"dim": 768,
|
|
"neurons": 8192,
|
|
"steps": 64,
|
|
"final_mse": 9.41839098231867e-05,
|
|
"final_cos": 0.977264404296875,
|
|
"milestones": [
|
|
{
|
|
"epoch": 20,
|
|
"mse": 0.0042252690686533844,
|
|
"cos": 0.0009540434480489542
|
|
},
|
|
{
|
|
"epoch": 40,
|
|
"mse": 0.0026403106516227127,
|
|
"cos": -0.00011461178073659539
|
|
},
|
|
{
|
|
"epoch": 60,
|
|
"mse": 0.002510098453300695,
|
|
"cos": 3.730244352482259e-05
|
|
},
|
|
{
|
|
"epoch": 80,
|
|
"mse": 0.07319205676515897,
|
|
"cos": 0.5515906274318695
|
|
},
|
|
{
|
|
"epoch": 100,
|
|
"mse": 0.02154427437732617,
|
|
"cos": 0.6362018167972565
|
|
},
|
|
{
|
|
"epoch": 120,
|
|
"mse": 0.005301868465418617,
|
|
"cos": 0.8255152304967245
|
|
},
|
|
{
|
|
"epoch": 140,
|
|
"mse": 0.0007266401468465725,
|
|
"cos": 0.9310513158639272
|
|
},
|
|
{
|
|
"epoch": 160,
|
|
"mse": 0.00019424428513351206,
|
|
"cos": 0.9668811519940694
|
|
},
|
|
{
|
|
"epoch": 180,
|
|
"mse": 0.00010609042850167801,
|
|
"cos": 0.9758348226547241
|
|
},
|
|
{
|
|
"epoch": 200,
|
|
"mse": 9.468134303460828e-05,
|
|
"cos": 0.9772639731566112
|
|
}
|
|
]
|
|
}
|
|
] |