"""Experiment 4c: Find optimal config for real-world use. From exp04b: k=50 gives 95% paraphrase recall (best). Need to verify capacity is still sufficient at k=50. Also: test with more realistic memory counts (100-1000). """ import sys import time import json from pathlib import Path import torch import torch.nn as nn import numpy as np DEVICE = "cuda" RESULTS_DIR = Path(__file__).parent.parent / "doc" def cosine(a, b): if a.norm() == 0 or b.norm() == 0: return 0.0 return nn.functional.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item() def winner_take_all(x, k): _, idx = x.topk(k, dim=-1) out = torch.zeros_like(x) out.scatter_(-1, idx, 1.0) return out class UnifiedHebbianMemory: def __init__(self, input_dim, code_dim, k): self.k = k self.code_dim = code_dim self.proj = (torch.randn(input_dim, code_dim, device=DEVICE) * (1.0 / input_dim**0.5)) self.W = torch.zeros(code_dim, code_dim, device=DEVICE) def sep(self, x): return winner_take_all(x @ self.proj, self.k) def learn(self, cue_emb, target_emb): self.W += torch.outer(self.sep(target_emb), self.sep(cue_emb)) def recall(self, query_emb): code = self.sep(query_emb) raw = self.W @ code return winner_take_all(raw, self.k) def test_capacity_with_real_embeddings(model, code_dim, k, max_memories=2000): """Generate lots of diverse sentence pairs and test recall.""" from sentence_transformers import SentenceTransformer # Generate diverse sentences programmatically topics = [ "deploy", "database", "API", "testing", "monitoring", "security", "frontend", "backend", "caching", "logging", "backup", "server", "CI/CD", "Docker", "Kubernetes", "microservice", "authentication", "performance", "debugging", "refactoring" ] actions = [ "is broken", "needs updating", "has a bug", "was configured wrong", "needs optimization", "requires migration", "should be refactored", "has a memory leak", "is timing out", "needs documentation" ] facts = [ "was fixed last week by adding an index", "uses the new v3 API endpoint", "is scheduled for maintenance on Friday", "requires admin access to modify", "has a known issue with large payloads", "was migrated from AWS to GCP", "needs Python 3.12 or higher", "uses Redis for session storage", "has rate limiting at 1000 req/min", "is monitored by PagerDuty" ] cue_sentences = [] target_sentences = [] for i in range(max_memories): topic = topics[i % len(topics)] action = actions[i % len(actions)] fact = facts[i % len(facts)] idx = i // (len(topics) * len(actions)) cue_sentences.append(f"The {topic} system {action} (issue #{i})") target_sentences.append(f"{topic} {fact}, ticket #{i}, priority {idx}") embed_dim = model.get_sentence_embedding_dimension() mem = UnifiedHebbianMemory(embed_dim, code_dim, k) # Encode in batches batch_size = 256 checkpoints = [50, 100, 200, 500, 1000, 2000] all_cue_embs = [] all_target_embs = [] print(f" Config: code_dim={code_dim}, k={k}") for start in range(0, max_memories, batch_size): end = min(start + batch_size, max_memories) cue_embs = model.encode(cue_sentences[start:end], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) target_embs = model.encode(target_sentences[start:end], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) for i in range(cue_embs.shape[0]): mem.learn(cue_embs[i], target_embs[i]) all_cue_embs.append(cue_embs[i]) all_target_embs.append(target_embs[i]) total = len(all_cue_embs) if total in checkpoints: # Test on random sample sample_n = min(100, total) indices = torch.randperm(total)[:sample_n].tolist() correct = 0 for idx in indices: recalled = mem.recall(all_cue_embs[idx]) target_code = mem.sep(all_target_embs[idx]) if cosine(recalled, target_code) > 0.5: correct += 1 w_norm = mem.W.norm().item() print(f" N={total:>5}: Recall={correct}/{sample_n} " f"({correct/sample_n:.0%}), W_norm={w_norm:.0f}") def test_paraphrase_at_scale(model, code_dim, k, n_memories): """Add many memories, then test paraphrase recall on a subset.""" embed_dim = model.get_sentence_embedding_dimension() mem = UnifiedHebbianMemory(embed_dim, code_dim, k) # Add background memories (noise) bg_cues = [f"Background task number {i} about topic {i%20}" for i in range(n_memories)] bg_targets = [f"Background fact {i} with detail {i%10}" for i in range(n_memories)] bg_cue_embs = model.encode(bg_cues, convert_to_tensor=True, normalize_embeddings=True, device=DEVICE, batch_size=256) bg_target_embs = model.encode(bg_targets, convert_to_tensor=True, normalize_embeddings=True, device=DEVICE, batch_size=256) for i in range(n_memories): mem.learn(bg_cue_embs[i], bg_target_embs[i]) # Now add our specific test memories test_pairs = [ ("What's the weather like today?", "User prefers to check weather every morning"), ("Let's deploy the new version", "The deployment pipeline uses GitHub Actions with k3s"), ("The database is slow again", "Missing index on users table caused slowdown last time"), ("I need to fix the auth bug", "Auth service uses JWT tokens with 24h expiry in Redis"), ("The API returns 500 errors", "Last 500 was caused by OOM in the Python worker"), ] paraphrases = [ "How's the weather outside?", "We should push the new release", "DB performance is terrible", "There's a login bug to fix", "Getting internal server errors", ] test_cue_embs = model.encode([p[0] for p in test_pairs], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) test_target_embs = model.encode([p[1] for p in test_pairs], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) para_embs = model.encode(paraphrases, convert_to_tensor=True, normalize_embeddings=True, device=DEVICE) for i in range(len(test_pairs)): mem.learn(test_cue_embs[i], test_target_embs[i]) # Test exact recall exact_correct = 0 for i in range(len(test_pairs)): recalled = mem.recall(test_cue_embs[i]) tc = mem.sep(test_target_embs[i]) if cosine(recalled, tc) > 0.5: exact_correct += 1 # Test paraphrase recall para_correct = 0 for i in range(len(paraphrases)): recalled = mem.recall(para_embs[i]) tc = mem.sep(test_target_embs[i]) if cosine(recalled, tc) > 0.5: para_correct += 1 n = len(test_pairs) print(f" bg={n_memories}, code={code_dim}, k={k}: " f"Exact={exact_correct}/{n}, Para={para_correct}/{n}") return exact_correct / n, para_correct / n def main(): print("=" * 60) print("Experiment 4c: Optimal Config + Scale Testing") print("=" * 60) from sentence_transformers import SentenceTransformer model = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE) # Test 1: Capacity with real embeddings print("\n=== Capacity Test ===") for code_dim, k in [(8192, 50), (16384, 50), (16384, 20), (32768, 50)]: test_capacity_with_real_embeddings(model, code_dim, k, max_memories=2000) print() # Test 2: Paraphrase at scale print("\n=== Paraphrase Recall at Scale ===") for n_bg in [0, 100, 500, 1000]: for code_dim, k in [(8192, 50), (16384, 50)]: test_paraphrase_at_scale(model, code_dim, k, n_bg) if __name__ == "__main__": main()