"""LLM integration for hippocampal memory. Functions: 1. extract_memories: Extract (cue, target) pairs from conversation turns 2. generate_paraphrases: Generate cue variants for augmentation 3. recall_and_inject: Recall memories and format for context injection 4. format_recalled_memories: Format RecallResults into prompt text Supports any OpenAI-compatible API. Falls back to simple heuristics when LLM unavailable. """ import re from typing import Optional from dataclasses import dataclass from openai import OpenAI @dataclass class ExtractedMemory: cue: str target: str importance: float = 0.5 # 0-1, higher = more worth storing class LLMClient: """Wrapper around OpenAI-compatible API with fallback.""" def __init__(self, base_url: str = "https://ste-jarvis.tiktok-row.net/llm/v1", api_key: str = "unused", model: str = "gemma4:12b", timeout: float = 5.0): self.model = model self.available = False try: self.client = OpenAI(base_url=base_url, api_key=api_key, timeout=timeout) # Quick check self.client.models.list() self.available = True except Exception: self.client = None def chat(self, messages: list[dict], temperature: float = 0.7, max_tokens: int = 512) -> Optional[str]: if not self.available: return None try: resp = self.client.chat.completions.create( model=self.model, messages=messages, temperature=temperature, max_tokens=max_tokens, ) return resp.choices[0].message.content except Exception: return None def extract_memories_llm(client: LLMClient, user_msg: str, assistant_msg: str) -> list[ExtractedMemory]: """Use LLM to extract memorable facts from a conversation turn.""" prompt = f"""From this conversation turn, extract key facts worth remembering for future conversations. For each fact, provide a "cue" (what would trigger recalling this) and a "target" (the fact itself). Rate importance 0-1 (1 = critical fact, 0 = trivial). User: {user_msg} Assistant: {assistant_msg} Output format (one per line): CUE: | TARGET: | IMPORTANCE: <0-1> Only extract genuinely useful facts. If nothing worth remembering, output NONE.""" result = client.chat([{"role": "user", "content": prompt}], temperature=0.3) if not result: return extract_memories_heuristic(user_msg, assistant_msg) memories = [] for line in result.strip().split("\n"): if line.strip() == "NONE": break match = re.match(r"CUE:\s*(.+?)\s*\|\s*TARGET:\s*(.+?)\s*\|\s*IMPORTANCE:\s*([\d.]+)", line) if match: memories.append(ExtractedMemory( cue=match.group(1).strip(), target=match.group(2).strip(), importance=float(match.group(3)), )) return memories def extract_memories_heuristic(user_msg: str, assistant_msg: str) -> list[ExtractedMemory]: """Fallback: simple heuristic extraction when LLM unavailable. Rules: - User questions → store the answer - Technical statements → store as-is - Short messages (< 10 words) → skip """ memories = [] # User asked a question, assistant answered if "?" in user_msg and len(assistant_msg.split()) > 5: memories.append(ExtractedMemory( cue=user_msg.rstrip("?").strip(), target=assistant_msg[:200], importance=0.6, )) # Technical keywords suggest something worth remembering tech_keywords = ["deploy", "config", "bug", "fix", "error", "database", "server", "API", "port", "token", "password", "version", "install", "upgrade", "migrate", "backup"] combined = (user_msg + " " + assistant_msg).lower() if any(kw in combined for kw in tech_keywords): if len(user_msg.split()) >= 5: memories.append(ExtractedMemory( cue=user_msg[:100], target=assistant_msg[:200], importance=0.5, )) return memories def generate_paraphrases_llm(client: LLMClient, text: str, n: int = 3) -> list[str]: """Use LLM to generate paraphrases of a cue text.""" prompt = f"""Generate {n} different paraphrases of this text. Each should convey the same meaning but use different words/phrasing. One per line, no numbering. Text: {text}""" result = client.chat([{"role": "user", "content": prompt}], temperature=0.8, max_tokens=256) if not result: return generate_paraphrases_heuristic(text, n) paraphrases = [line.strip() for line in result.strip().split("\n") if line.strip() and len(line.strip()) > 3] return paraphrases[:n] def generate_paraphrases_heuristic(text: str, n: int = 3) -> list[str]: """Fallback: simple text augmentation when LLM unavailable. Strategies: - Remove/add common prefixes - Swap known synonyms - Truncate to key phrases """ variants = [] text_lower = text.lower().strip() # Remove common prefixes prefixes = ["can you ", "please ", "i need to ", "let's ", "we should ", "how do i ", "how to ", "i want to ", "help me "] for pfx in prefixes: if text_lower.startswith(pfx): stripped = text[len(pfx):].strip() if stripped and stripped not in variants: variants.append(stripped) # Simple synonym swaps swaps = { "slow": "performance issues", "fast": "quick", "fix": "resolve", "deploy": "release", "error": "issue", "bug": "problem", "database": "DB", "server": "machine", "configure": "set up", } for old, new in swaps.items(): if old in text_lower: variant = text.replace(old, new).replace(old.capitalize(), new.capitalize()) if variant != text and variant not in variants: variants.append(variant) # Add "the X is Y" pattern if len(text.split()) <= 8: variants.append(f"issue with {text_lower}") return variants[:n] def format_recalled_memories(results: list, max_memories: int = 5) -> str: """Format RecallResults into a prompt-ready string.""" if not results: return "" lines = [] for i, r in enumerate(results[:max_memories]): meta = r.metadata if "target" in meta: text = meta["target"] elif "text" in meta: text = meta["text"] else: continue hop_info = f" (via {r.hop_distance}-hop association)" if r.hop_distance > 1 else "" lines.append(f"- {text}{hop_info}") if not lines: return "" return "Recalled from memory:\n" + "\n".join(lines)