tori/src/state.rs
Fam Zheng 63bbbae17c feat: show plan diff in execution log when revise_plan is called
- apply_plan_diff now returns a YAML unified diff string
- Pure Rust LCS diff implementation (no external dependency)
- revise_plan logs the diff to execution log with ```diff fencing
- Frontend renders diff with green/red syntax highlighting
2026-03-10 19:03:47 +00:00

631 lines
21 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use serde::{Deserialize, Serialize};
use crate::llm::ChatMessage;
// --- Step result (returned by run_step_loop) ---
#[derive(Debug, Clone)]
pub struct StepResult {
pub status: StepResultStatus,
pub summary: String,
pub artifacts: Vec<Artifact>,
}
#[derive(Debug, Clone)]
pub enum StepResultStatus {
Done,
Failed { error: String },
NeedsApproval { message: String },
}
/// Check scratchpad size. Limit: ~8K tokens ≈ 24K bytes.
const SCRATCHPAD_MAX_BYTES: usize = 24_000;
pub fn check_scratchpad_size(content: &str) -> Result<(), String> {
if content.len() > SCRATCHPAD_MAX_BYTES {
Err(format!(
"Scratchpad 超出容量限制(当前 {} 字节,上限 {} 字节)。请精简内容后重试。",
content.len(),
SCRATCHPAD_MAX_BYTES,
))
} else {
Ok(())
}
}
// --- Agent phase state machine ---
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum AgentPhase {
Planning,
Executing { step: i32 },
Completed,
}
// --- Step ---
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StepStatus {
Pending,
Running,
WaitingApproval,
Done,
Failed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Artifact {
pub name: String,
pub path: String,
pub artifact_type: String,
#[serde(default)]
pub description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Step {
pub order: i32,
pub title: String,
pub description: String,
pub status: StepStatus,
/// 完成后由 LLM 填入的一句话摘要
#[serde(default, skip_serializing_if = "Option::is_none")]
pub summary: Option<String>,
/// 用户针对此步骤的反馈
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub user_feedbacks: Vec<String>,
#[serde(default)]
pub db_id: String,
/// 步骤产出物
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub artifacts: Vec<Artifact>,
}
// --- Core state ---
/// Agent 运行时的完整状态。整个结构体可以 JSON 序列化后直接存 DB。
///
/// 同时也是构建 LLM API call messages 的数据源:
///
/// Planning 阶段:
/// [ system(planning_prompt), user(requirement), ...current_step_chat_history ]
///
/// Executing 阶段:
/// [ system(execution_prompt), user(step_context), ...current_step_chat_history ]
///
/// step_context = requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentState {
/// 当前阶段
pub phase: AgentPhase,
/// LLM 生成的执行计划
pub steps: Vec<Step>,
/// 当前步骤内的多轮对话历史assistant + tool result
/// 直接 extend 到 messages 尾部。在 step 切换时 clear。
pub current_step_chat_history: Vec<ChatMessage>,
/// LLM 的跨步骤工作区,由 agent 自己读写step 切换时保留
pub scratchpad: String,
}
impl AgentState {
pub fn new() -> Self {
Self {
phase: AgentPhase::Planning,
steps: Vec::new(),
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
}
}
/// 当前正在执行的步骤号Planning/Completed 时返回 0。
pub fn current_step(&self) -> i32 {
match &self.phase {
AgentPhase::Executing { step } => *step,
_ => 0,
}
}
/// Docker-build-cache 风格的 plan diff。
/// 比较 (title, description)user_feedbacks 不参与比较。
/// 第一个 mismatch 开始,该步骤及后续全部 invalidate → Pending。
/// Apply docker-cache style diff. Returns a unified-diff string (YAML format)
/// showing what changed, for logging in the frontend.
pub fn apply_plan_diff(&mut self, new_steps: Vec<Step>) -> String {
// Serialize old/new plans to YAML for diff (only title + description)
let to_yaml = |steps: &[Step]| -> String {
let items: Vec<serde_json::Value> = steps.iter().map(|s| {
serde_json::json!({
"step": s.order,
"title": s.title,
"description": s.description,
})
}).collect();
serde_yaml::to_string(&items).unwrap_or_default()
};
let old_yaml = to_yaml(&self.steps);
let new_yaml = to_yaml(&new_steps);
let old = &self.steps;
let mut result = Vec::new();
let mut invalidated = false;
for (i, new) in new_steps.into_iter().enumerate() {
if !invalidated {
if let Some(old_step) = old.get(i) {
if old_step.title == new.title && old_step.description == new.description {
// Cache hit: keep old status/summary, take new user_feedbacks
result.push(Step {
user_feedbacks: new.user_feedbacks,
..old_step.clone()
});
continue;
}
}
// Cache miss or new step — invalidate from here
invalidated = true;
}
result.push(Step {
status: StepStatus::Pending,
summary: None,
..new
});
}
self.steps = result;
// Generate unified diff
diff_strings(&old_yaml, &new_yaml)
}
/// 找到第一个需要执行的步骤 (Pending 或 Running)。
/// 全部 Done 时返回 None。
pub fn first_actionable_step(&self) -> Option<i32> {
self.steps.iter()
.find(|s| matches!(s.status, StepStatus::Pending | StepStatus::Running | StepStatus::WaitingApproval))
.map(|s| s.order)
}
/// 构建 Executing 阶段的 user message
/// requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
pub fn build_step_context(&self, requirement: &str) -> String {
let mut ctx = String::new();
// 需求
ctx.push_str("## 需求\n");
ctx.push_str(requirement);
ctx.push_str("\n\n");
// 计划概览
ctx.push_str("## 计划概览\n");
let cur = self.current_step();
for s in &self.steps {
let marker = match s.status {
StepStatus::Done => " done",
StepStatus::Running => " >> current",
StepStatus::WaitingApproval => " ⏳ waiting",
StepStatus::Failed => " FAILED",
StepStatus::Pending => "",
};
ctx.push_str(&format!("{}. {}{}\n", s.order, s.title, marker));
}
ctx.push('\n');
// 当前步骤详情
if let Some(s) = self.steps.iter().find(|s| s.order == cur) {
ctx.push_str(&format!("## 当前步骤(步骤 {}\n", cur));
ctx.push_str(&format!("标题:{}\n", s.title));
ctx.push_str(&format!("描述:{}\n", s.description));
if !s.user_feedbacks.is_empty() {
ctx.push_str("\n用户反馈:\n");
for fb in &s.user_feedbacks {
ctx.push_str(&format!("- {}\n", fb));
}
}
ctx.push('\n');
}
// 已完成步骤摘要
let done: Vec<_> = self.steps.iter()
.filter(|s| matches!(s.status, StepStatus::Done))
.collect();
if !done.is_empty() {
ctx.push_str("## 已完成步骤摘要\n");
for s in done {
let summary = s.summary.as_deref().unwrap_or("(no summary)");
ctx.push_str(&format!("- 步骤 {}: {}\n", s.order, summary));
if !s.artifacts.is_empty() {
let arts: Vec<String> = s.artifacts.iter()
.map(|a| format!("{} ({})", a.name, a.artifact_type))
.collect();
ctx.push_str(&format!(" 产物: {}\n", arts.join(", ")));
}
}
ctx.push('\n');
}
// 备忘录
if !self.scratchpad.is_empty() {
ctx.push_str("## 备忘录\n");
ctx.push_str(&self.scratchpad);
ctx.push('\n');
}
ctx
}
/// 构建传给 LLM 的完整 messages 数组。
pub fn build_messages(&self, system_prompt: &str, requirement: &str) -> Vec<ChatMessage> {
let mut msgs = vec![ChatMessage::system(system_prompt)];
match &self.phase {
AgentPhase::Planning => {
msgs.push(ChatMessage::user(requirement));
}
AgentPhase::Executing { .. } => {
msgs.push(ChatMessage::user(&self.build_step_context(requirement)));
}
AgentPhase::Completed => {}
}
msgs.extend(self.current_step_chat_history.clone());
msgs
}
}
/// Simple line-by-line unified diff (no external dependency).
/// Uses longest common subsequence to produce a clean diff.
fn diff_strings(old: &str, new: &str) -> String {
let old_lines: Vec<&str> = old.lines().collect();
let new_lines: Vec<&str> = new.lines().collect();
if old_lines == new_lines {
return String::from("(no changes)");
}
// LCS table
let m = old_lines.len();
let n = new_lines.len();
let mut dp = vec![vec![0u32; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
dp[i][j] = if old_lines[i - 1] == new_lines[j - 1] {
dp[i - 1][j - 1] + 1
} else {
dp[i - 1][j].max(dp[i][j - 1])
};
}
}
// Backtrack to produce diff lines
let mut result = Vec::new();
let (mut i, mut j) = (m, n);
while i > 0 || j > 0 {
if i > 0 && j > 0 && old_lines[i - 1] == new_lines[j - 1] {
result.push(format!(" {}", old_lines[i - 1]));
i -= 1;
j -= 1;
} else if j > 0 && (i == 0 || dp[i][j - 1] >= dp[i - 1][j]) {
result.push(format!("+{}", new_lines[j - 1]));
j -= 1;
} else {
result.push(format!("-{}", old_lines[i - 1]));
i -= 1;
}
}
result.reverse();
result.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
fn make_step(order: i32, title: &str, desc: &str, status: StepStatus) -> Step {
Step {
order,
title: title.into(),
description: desc.into(),
status,
summary: None,
user_feedbacks: Vec::new(),
db_id: String::new(),
artifacts: Vec::new(),
}
}
// --- check_scratchpad_size ---
#[test]
fn scratchpad_empty_ok() {
assert!(check_scratchpad_size("").is_ok());
}
#[test]
fn scratchpad_under_limit_ok() {
let content = "a".repeat(24_000);
assert!(check_scratchpad_size(&content).is_ok());
}
#[test]
fn scratchpad_over_limit_err() {
let content = "a".repeat(24_001);
let err = check_scratchpad_size(&content).unwrap_err();
assert!(err.contains("24001"));
assert!(err.contains("24000"));
}
#[test]
fn scratchpad_exactly_at_limit() {
let content = "a".repeat(SCRATCHPAD_MAX_BYTES);
assert!(check_scratchpad_size(&content).is_ok());
}
#[test]
fn scratchpad_multibyte_counts_bytes_not_chars() {
// 8000 个中文字 = 24000 bytes (UTF-8), exactly at limit
let content = "".repeat(8000);
assert_eq!(content.len(), 24000);
assert!(check_scratchpad_size(&content).is_ok());
// One more char pushes over
let content_over = format!("{}", content);
assert!(check_scratchpad_size(&content_over).is_err());
}
// --- first_actionable_step ---
#[test]
fn first_actionable_all_done() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![
make_step(1, "A", "a", StepStatus::Done),
make_step(2, "B", "b", StepStatus::Done),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), None);
}
#[test]
fn first_actionable_skips_done() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
make_step(1, "A", "a", StepStatus::Done),
make_step(2, "B", "b", StepStatus::Pending),
make_step(3, "C", "c", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(2));
}
#[test]
fn first_actionable_finds_running() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
make_step(1, "A", "a", StepStatus::Done),
make_step(2, "B", "b", StepStatus::Running),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(2));
}
#[test]
fn first_actionable_finds_waiting_approval() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![
make_step(1, "A", "a", StepStatus::WaitingApproval),
make_step(2, "B", "b", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(1));
}
#[test]
fn first_actionable_skips_failed() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
make_step(1, "A", "a", StepStatus::Failed),
make_step(2, "B", "b", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(2));
}
// --- apply_plan_diff ---
#[test]
fn plan_diff_identical_keeps_done() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
make_step(2, "B", "desc B", StepStatus::Pending),
];
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
make_step(2, "B", "desc B", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert!(matches!(state.steps[0].status, StepStatus::Done));
assert_eq!(state.steps[0].summary.as_deref(), Some("did A"));
assert!(matches!(state.steps[1].status, StepStatus::Pending));
}
#[test]
fn plan_diff_change_invalidates_from_mismatch() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
Step { status: StepStatus::Done, summary: Some("did B".into()),
..make_step(2, "B", "desc B", StepStatus::Done) },
make_step(3, "C", "desc C", StepStatus::Pending),
];
// Change step 2's description → invalidates 2 and 3
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
make_step(2, "B", "desc B CHANGED", StepStatus::Pending),
make_step(3, "C", "desc C", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert!(matches!(state.steps[0].status, StepStatus::Done)); // kept
assert!(matches!(state.steps[1].status, StepStatus::Pending)); // invalidated
assert!(state.steps[1].summary.is_none()); // summary cleared
assert!(matches!(state.steps[2].status, StepStatus::Pending)); // invalidated
}
#[test]
fn plan_diff_add_new_steps() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
];
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
make_step(2, "New", "new step", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert_eq!(state.steps.len(), 2);
assert!(matches!(state.steps[0].status, StepStatus::Done));
assert!(matches!(state.steps[1].status, StepStatus::Pending));
assert_eq!(state.steps[1].title, "New");
}
#[test]
fn plan_diff_remove_steps() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
make_step(2, "B", "desc B", StepStatus::Pending),
make_step(3, "C", "desc C", StepStatus::Pending),
];
// New plan only has 1 step (same as step 1)
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert_eq!(state.steps.len(), 1);
assert!(matches!(state.steps[0].status, StepStatus::Done));
}
// --- build_step_context ---
#[test]
fn step_context_includes_all_sections() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
Step { status: StepStatus::Done, summary: Some("installed deps".into()),
..make_step(1, "Setup", "install deps", StepStatus::Done) },
make_step(2, "Build", "compile code", StepStatus::Running),
make_step(3, "Test", "run tests", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: "key=value".into(),
};
let ctx = state.build_step_context("Build a web app");
assert!(ctx.contains("## 需求\nBuild a web app"));
assert!(ctx.contains("## 计划概览"));
assert!(ctx.contains("1. Setup done"));
assert!(ctx.contains("2. Build >> current"));
assert!(ctx.contains("3. Test"));
assert!(ctx.contains("## 当前步骤(步骤 2"));
assert!(ctx.contains("标题Build"));
assert!(ctx.contains("描述compile code"));
assert!(ctx.contains("## 已完成步骤摘要"));
assert!(ctx.contains("installed deps"));
assert!(ctx.contains("## 备忘录\nkey=value"));
}
#[test]
fn step_context_user_feedback() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![
Step {
user_feedbacks: vec!["please use React".into()],
..make_step(1, "Setup", "setup project", StepStatus::Running)
},
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
let ctx = state.build_step_context("Build app");
assert!(ctx.contains("用户反馈"));
assert!(ctx.contains("please use React"));
}
// --- build_messages ---
#[test]
fn build_messages_planning() {
let state = AgentState::new();
let msgs = state.build_messages("system prompt", "requirement text");
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].role, "system");
assert_eq!(msgs[0].content.as_deref(), Some("system prompt"));
assert_eq!(msgs[1].role, "user");
assert_eq!(msgs[1].content.as_deref(), Some("requirement text"));
}
#[test]
fn build_messages_executing_includes_history() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![make_step(1, "Do thing", "details", StepStatus::Running)],
current_step_chat_history: vec![
ChatMessage { role: "assistant".into(), content: Some("let me help".into()), tool_calls: None, tool_call_id: None },
],
scratchpad: String::new(),
};
let msgs = state.build_messages("sys", "req");
assert_eq!(msgs.len(), 3); // system + user context + 1 history
assert_eq!(msgs[2].role, "assistant");
}
#[test]
fn build_messages_completed_minimal() {
let state = AgentState {
phase: AgentPhase::Completed,
steps: Vec::new(),
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
let msgs = state.build_messages("sys", "req");
assert_eq!(msgs.len(), 1); // only system
}
}