"""Experiment 7b: Deep dive into Hopfield memory. Hopfield crushed it at 1000 bg (100% para recall). Now stress test: 1. Scale to 5K, 10K, 20K memories — does softmax attention hold up? 2. Multi-hop: can we chain through Hopfield? (A→B→C) 3. Latency: O(N) attention — how slow at 20K? 4. β optimization: find sweet spot 5. Memory: storing all patterns explicitly — how much VRAM? 6. Mixed difficulty: semantically similar distractors (not just random bg) """ import sys import time from pathlib import Path import torch import torch.nn as nn import numpy as np DEVICE = "cuda" def cosine(a, b): if a.norm() == 0 or b.norm() == 0: return 0.0 return nn.functional.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item() def winner_take_all(x, k): _, idx = x.topk(k, dim=-1) out = torch.zeros_like(x) out.scatter_(-1, idx, 1.0) return out class HopfieldMemory: def __init__(self, input_dim, code_dim=16384, k=50, beta=16.0): self.k = k self.code_dim = code_dim self.beta = beta self.proj = (torch.randn(input_dim, code_dim, device=DEVICE) * (1.0 / input_dim**0.5)) self.cue_codes = [] self.target_codes = [] self.cue_embs = [] self.target_embs = [] def sep(self, x): return winner_take_all(x @ self.proj, self.k) def learn(self, cue_emb, target_emb): self.cue_codes.append(self.sep(cue_emb)) self.target_codes.append(self.sep(target_emb)) self.cue_embs.append(cue_emb.detach()) self.target_embs.append(target_emb.detach()) def _get_matrices(self): return torch.stack(self.cue_codes), torch.stack(self.target_codes) def recall(self, query_emb, steps=3): cue_mat, target_mat = self._get_matrices() xi = self.sep(query_emb) for _ in range(steps): scores = self.beta * (xi @ cue_mat.T) attn = torch.softmax(scores, dim=0) xi = attn @ cue_mat xi = winner_take_all(xi, self.k) # Final association scores = self.beta * (xi @ cue_mat.T) attn = torch.softmax(scores, dim=0) recalled = attn @ target_mat return winner_take_all(recalled, self.k) def recall_multihop(self, query_emb, hops=2, steps_per_hop=3): """Multi-hop: settle to cue → get target → use target as next cue.""" cue_mat, target_mat = self._get_matrices() xi = self.sep(query_emb) results = [] for hop in range(hops): # Settle to nearest cue attractor for _ in range(steps_per_hop): scores = self.beta * (xi @ cue_mat.T) attn = torch.softmax(scores, dim=0) xi = attn @ cue_mat xi = winner_take_all(xi, self.k) # Associate: cue → target scores = self.beta * (xi @ cue_mat.T) attn = torch.softmax(scores, dim=0) target = attn @ target_mat target = winner_take_all(target, self.k) results.append(target) # Next hop: use target as new query xi = target return results def recall_embedding_space(self, query_emb, steps=3): """Hopfield attention in raw embedding space (no WTA codes). Might be better for noise tolerance since embeddings are continuous. """ if not self.cue_embs: return None cue_mat = torch.stack(self.cue_embs) target_mat = torch.stack(self.target_embs) xi = query_emb for _ in range(steps): scores = self.beta * (xi @ cue_mat.T) attn = torch.softmax(scores, dim=0) xi = attn @ cue_mat # Final: get target scores = self.beta * (xi @ cue_mat.T) attn = torch.softmax(scores, dim=0) return attn @ target_mat def load_model(): from sentence_transformers import SentenceTransformer return SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE) def test_scale(model, n_background_list, beta=16.0): """Test Hopfield at different scales.""" print(f"\n=== Scale Test (β={beta}) ===") pairs = [ ("What's the weather like today?", "User checks weather every morning"), ("Let's deploy the new version", "Deployment uses GitHub Actions with k3s"), ("The database is slow again", "Missing index on users table"), ("I need to fix the auth bug", "JWT tokens with 24h expiry in Redis"), ("The API returns 500 errors", "OOM in the Python worker"), ] paraphrases = [ "How's the weather outside?", "We should push the new release", "DB performance is terrible", "There's a login bug to fix", "Getting internal server errors", ] embed_dim = model.get_sentence_embedding_dimension() cue_embs = model.encode([p[0] for p in pairs], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) target_embs = model.encode([p[1] for p in pairs], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) para_embs = model.encode(paraphrases, convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) for n_bg in n_background_list: mem = HopfieldMemory(embed_dim, code_dim=8192, k=50, beta=beta) # Store test pairs for i in range(len(pairs)): mem.learn(cue_embs[i], target_embs[i]) # Store background if n_bg > 0: # More diverse background sentences bg_cues = [] bg_targets = [] topics = ["server", "database", "API", "frontend", "backend", "cache", "queue", "network", "storage", "auth"] for i in range(n_bg): t = topics[i % len(topics)] bg_cues.append(f"The {t} system has issue number {i}") bg_targets.append(f"Issue {i} for {t} requires attention from team {i%5}") for start in range(0, n_bg, 256): end = min(start + 256, n_bg) bc = model.encode(bg_cues[start:end], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) bt = model.encode(bg_targets[start:end], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) for j in range(bc.shape[0]): mem.learn(bc[j], bt[j]) # Test target_codes = torch.stack([mem.sep(t) for t in target_embs]) # Paraphrase recall t0 = time.time() para_correct = 0 for i in range(len(paraphrases)): recalled = mem.recall(para_embs[i]) sims = nn.functional.cosine_similarity(recalled.unsqueeze(0), target_codes, dim=-1) if sims.argmax().item() == i: para_correct += 1 recall_time = (time.time() - t0) / len(paraphrases) * 1000 # Also test in embedding space para_correct_emb = 0 for i in range(len(paraphrases)): recalled_emb = mem.recall_embedding_space(para_embs[i]) sims = nn.functional.cosine_similarity(recalled_emb.unsqueeze(0), target_embs, dim=-1) if sims.argmax().item() == i: para_correct_emb += 1 n = len(paraphrases) total_mem = len(mem.cue_codes) vram = total_mem * 8192 * 4 * 2 / 1024**2 # codes + embs approx print(f" N={total_mem:>6}: Code={para_correct}/{n} ({para_correct/n:.0%}), " f"Emb={para_correct_emb}/{n} ({para_correct_emb/n:.0%}), " f"time={recall_time:.1f}ms, ~VRAM={vram:.0f}MB") del mem torch.cuda.empty_cache() def test_multihop(model): """Multi-hop through Hopfield memory.""" print("\n=== Multi-hop Test ===") chains = [ ["What's the weather?", "I check weather before going out", "My coffee shop is around the corner", "They have great latte art"], ["Let's review the code", "Code review found a memory leak", "Memory leaks cause OOM kills", "Need memory limits in k8s"], ["Deploy to production", "Production uses blue-green deploy", "Blue environment is active", "Switch DNS to green when ready"], ] embed_dim = model.get_sentence_embedding_dimension() for chain in chains: mem = HopfieldMemory(embed_dim, code_dim=8192, k=50, beta=16.0) chain_embs = [model.encode([t], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0] for t in chain] # Learn consecutive pairs for i in range(len(chain) - 1): mem.learn(chain_embs[i], chain_embs[i+1]) # Multi-hop recall target_codes = [mem.sep(e) for e in chain_embs] results = mem.recall_multihop(chain_embs[0], hops=len(chain)-1) print(f"\n Chain: {' → '.join([c[:20]+'...' for c in chain])}") for hop_idx, recalled in enumerate(results): target = target_codes[hop_idx + 1] sim = cosine(recalled, target) status = "✓" if sim > 0.5 else "✗" print(f" {status} hop {hop_idx+1}: → '{chain[hop_idx+1][:30]}...' sim={sim:.3f}") # Multi-hop with background noise print("\n --- Multi-hop with 200 background memories ---") mem = HopfieldMemory(embed_dim, code_dim=8192, k=50, beta=16.0) # Store all chains all_chain_embs = [] for chain in chains: embs = [model.encode([t], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0] for t in chain] all_chain_embs.append(embs) for i in range(len(chain) - 1): mem.learn(embs[i], embs[i+1]) # Add background bg = [f"Background sentence number {i}" for i in range(200)] bg_embs = model.encode(bg, convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) for i in range(199): mem.learn(bg_embs[i], bg_embs[i+1]) for ci, chain in enumerate(chains): target_codes = [mem.sep(e) for e in all_chain_embs[ci]] results = mem.recall_multihop(all_chain_embs[ci][0], hops=len(chain)-1) for hop_idx, recalled in enumerate(results): target = target_codes[hop_idx + 1] sim = cosine(recalled, target) status = "✓" if sim > 0.5 else "✗" print(f" {status} Chain{ci+1} hop{hop_idx+1}: sim={sim:.3f}") def test_hard_distractors(model): """Test with semantically similar distractors (harder than random bg).""" print("\n=== Hard Distractors (semantically similar) ===") # Target pair pairs = [ ("The database is slow", "Missing index on users table"), ] # Distractors: similar to cue but different meaning distractors_cue = [ "The database is fast", "The database crashed", "The database needs backup", "The datastore is slow", "The DB latency is high", "Database performance degraded", "SQL queries are slow", "The cache is slow", "The search index is slow", "MongoDB is slow", ] distractors_target = [ f"Distractor target {i}" for i in range(len(distractors_cue)) ] query = "DB performance is terrible" embed_dim = model.get_sentence_embedding_dimension() for beta in [8.0, 16.0, 32.0, 64.0]: mem = HopfieldMemory(embed_dim, code_dim=8192, k=50, beta=beta) # Store target cue_emb = model.encode([pairs[0][0]], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0] target_emb = model.encode([pairs[0][1]], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0] mem.learn(cue_emb, target_emb) # Store distractors dist_cue_embs = model.encode(distractors_cue, convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) dist_target_embs = model.encode(distractors_target, convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) for i in range(len(distractors_cue)): mem.learn(dist_cue_embs[i], dist_target_embs[i]) # Query q_emb = model.encode([query], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0] recalled = mem.recall(q_emb) target_code = mem.sep(target_emb) sim = cosine(recalled, target_code) # Also check which cue got highest attention cue_mat = torch.stack(mem.cue_codes) q_code = mem.sep(q_emb) scores = beta * (q_code @ cue_mat.T) attn = torch.softmax(scores, dim=0) top_idx = attn.argmax().item() top_attn = attn[top_idx].item() all_cues = [pairs[0][0]] + distractors_cue print(f" β={beta:>4}: sim_to_target={sim:.3f}, " f"top_attn={top_attn:.3f} → '{all_cues[top_idx][:30]}...'") def main(): print("=" * 60) print("Experiment 7b: Hopfield Deep Dive") print("=" * 60) model = load_model() # Scale test test_scale(model, [0, 100, 500, 1000, 2000, 5000, 10000], beta=16.0) # β sweep at large scale print("\n=== β Sweep at N=5000 ===") for beta in [4, 8, 16, 32, 64]: test_scale(model, [5000], beta=beta) # Multi-hop test_multihop(model) # Hard distractors test_hard_distractors(model) if __name__ == "__main__": main()