nuonuo/experiments/exp08_llm_integration.py
Fam Zheng d923aa1e31 NuoNuo: Hippocampal memory module prototype
Hopfield + Hebbian hybrid memory system for LLMs.
Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025).

Architecture:
- Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle)
- Multi-hop: Hebbian W matrix with WTA pattern separation
- 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency
- 4ms latency @ 20K memories, ~1GB VRAM

Key findings:
- Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian)
- WTA pattern separation enables 20K+ capacity
- Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this
- MiniLM-L6 is optimal (discrimination gap > absolute similarity)
- Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark
- SNN encoder viable (CosSim 0.99) but not needed for current architecture
2026-04-07 10:37:24 +01:00

214 lines
7.5 KiB
Python

"""Experiment P0: LLM Integration — end-to-end memory-augmented conversation.
Tests:
1. Memory extraction (heuristic fallback since LLM gateway is down)
2. Paraphrase generation (heuristic fallback)
3. End-to-end: conversation → extract → store → recall → inject
4. Multi-turn conversation simulation
"""
import sys
import time
from pathlib import Path
import torch
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
sys.path.insert(0, str(Path(__file__).parent.parent))
from nuonuo.hippocampus import HippocampalMemory
from llm import (LLMClient, extract_memories_heuristic, extract_memories_llm,
generate_paraphrases_heuristic, generate_paraphrases_llm,
format_recalled_memories)
DEVICE = "cuda"
def load_model():
from sentence_transformers import SentenceTransformer
return SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
def emb(model, text):
return model.encode([text], convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)[0]
def test_heuristic_extraction():
"""Test memory extraction without LLM."""
print("=== Test 1: Heuristic Memory Extraction ===\n")
conversations = [
("How do I deploy to production?",
"Use the blue-green deployment pipeline via GitHub Actions. The config is in .github/workflows/deploy.yml"),
("The database is really slow today",
"Check for missing indexes on the users table. Last time this happened it was the created_at column."),
("Hi, how are you?",
"I'm doing well, thanks!"),
("What port does Redis run on?",
"Redis is on port 6379 at redis.internal"),
("Fix the auth bug please",
"The auth service uses JWT tokens with 24h expiry stored in Redis. The bug was in token refresh logic."),
]
for user_msg, assistant_msg in conversations:
memories = extract_memories_heuristic(user_msg, assistant_msg)
print(f" User: {user_msg[:50]}...")
if memories:
for m in memories:
print(f" → CUE: {m.cue[:40]}... | TARGET: {m.target[:50]}... | IMP: {m.importance}")
else:
print(f" → (nothing extracted)")
print()
def test_heuristic_paraphrases():
"""Test paraphrase generation without LLM."""
print("=== Test 2: Heuristic Paraphrase Generation ===\n")
texts = [
"How do I deploy to production?",
"The database is slow",
"Can you fix the authentication bug?",
"I need to configure nginx",
"Let's set up monitoring for the server",
]
for text in texts:
paras = generate_paraphrases_heuristic(text, n=3)
print(f" Original: {text}")
for p in paras:
print(f"{p}")
print()
def test_end_to_end(model):
"""Full pipeline: conversation → extract → store → recall → inject."""
print("=== Test 3: End-to-End Pipeline ===\n")
memory = HippocampalMemory(embed_dim=384)
llm = LLMClient() # Will fail gracefully if gateway down
# Simulate a few conversation turns
turns = [
("How do I deploy to production?",
"Use blue-green deployment via GitHub Actions. Config in .github/workflows/deploy.yml"),
("The database is really slow",
"Check for missing indexes on users table, especially created_at column"),
("What port does Redis run on?",
"Redis is on port 6379 at redis.internal"),
("Fix the auth bug",
"Auth uses JWT tokens with 24h expiry in Redis. Bug was in token refresh."),
("How do I backup the database?",
"Backups run daily at 3am UTC via cron job to S3. Config in /etc/cron.d/db-backup"),
]
# Phase 1: Learn from conversations
print("--- Phase 1: Learning from conversations ---")
for user_msg, assistant_msg in turns:
# Extract memories
if llm.available:
memories = extract_memories_llm(llm, user_msg, assistant_msg)
else:
memories = extract_memories_heuristic(user_msg, assistant_msg)
for mem_item in memories:
# Generate paraphrases
if llm.available:
paras = generate_paraphrases_llm(llm, mem_item.cue, n=3)
else:
paras = generate_paraphrases_heuristic(mem_item.cue, n=3)
# Embed and store
cue_emb = emb(model, mem_item.cue)
target_emb = emb(model, mem_item.target)
para_embs = [emb(model, p) for p in paras] if paras else None
mid = memory.store(
cue_emb, target_emb,
cue_variants=para_embs,
metadata={"cue": mem_item.cue, "target": mem_item.target,
"importance": mem_item.importance},
)
print(f" Stored [{mid}]: {mem_item.cue[:40]}... → {mem_item.target[:40]}...")
if paras:
print(f" + {len(paras)} paraphrases: {[p[:30] for p in paras]}")
print(f"\n Total: {memory.stats()}")
# Phase 2: Recall
print("\n--- Phase 2: Recall from new queries ---")
queries = [
"DB performance is terrible",
"How to push a new release?",
"What's the Redis connection info?",
"The login system has a problem",
"Need to create a database backup",
"Where's the deployment config?",
]
for query in queries:
query_emb = emb(model, query)
# Single-hop recall
results = memory.recall(query_emb, top_k=2)
# Multi-hop
chain = memory.recall_chain(query_emb, hops=2)
# Format for context injection
all_results = results + [r for r in chain if r.memory_id not in {r2.memory_id for r2 in results}]
context = format_recalled_memories(all_results)
print(f"\n Query: \"{query}\"")
if results:
print(f" Top result: {results[0].metadata.get('target', '?')[:60]}...")
print(f" Similarity: {results[0].similarity:.3f}")
if chain and len(chain) > 1:
print(f" Chain hop 2: {chain[1].metadata.get('target', '?')[:60]}...")
if context:
print(f" Context injection:\n {context.replace(chr(10), chr(10) + ' ')}")
def test_llm_live(model):
"""Test with live LLM if available."""
print("\n=== Test 4: Live LLM Integration ===\n")
llm = LLMClient()
if not llm.available:
print(" LLM Gateway not available. Skipping live test.")
print(" To test: ensure https://ste-jarvis.tiktok-row.net/llm/v1 is reachable")
return
# Test extraction
user_msg = "The payment webhook keeps failing with a 502 error"
assistant_msg = "The webhook endpoint at /api/payments/webhook is behind nginx. Check if the upstream timeout is too short — payment processing can take up to 30 seconds."
memories = extract_memories_llm(llm, user_msg, assistant_msg)
print(f" Extracted {len(memories)} memories from live LLM:")
for m in memories:
print(f" CUE: {m.cue} | TARGET: {m.target[:60]}... | IMP: {m.importance}")
# Test paraphrase
if memories:
paras = generate_paraphrases_llm(llm, memories[0].cue, n=3)
print(f"\n Paraphrases for '{memories[0].cue}':")
for p in paras:
print(f"{p}")
def main():
print("=" * 60)
print("Experiment P0: LLM Integration")
print("=" * 60)
model = load_model()
test_heuristic_extraction()
test_heuristic_paraphrases()
test_end_to_end(model)
test_llm_live(model)
if __name__ == "__main__":
main()