tori/src/state.rs
Fam Zheng fa800b1601 feat: step artifacts framework
- Add Artifact type to Step (name, path, artifact_type, description)
- step_done tool accepts optional artifacts parameter
- Save artifacts to step_artifacts DB table
- Display artifacts in frontend PlanSection (tag style)
- Show artifacts in step context for sub-agents and coordinator
- Add LLM client retry with exponential backoff
2026-03-09 12:01:29 +00:00

567 lines
19 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use serde::{Deserialize, Serialize};
use crate::llm::ChatMessage;
// --- Step result (returned by run_step_loop) ---
#[derive(Debug, Clone)]
pub struct StepResult {
pub status: StepResultStatus,
pub summary: String,
pub artifacts: Vec<Artifact>,
}
#[derive(Debug, Clone)]
pub enum StepResultStatus {
Done,
Failed { error: String },
NeedsApproval { message: String },
}
/// Check scratchpad size. Limit: ~8K tokens ≈ 24K bytes.
const SCRATCHPAD_MAX_BYTES: usize = 24_000;
pub fn check_scratchpad_size(content: &str) -> Result<(), String> {
if content.len() > SCRATCHPAD_MAX_BYTES {
Err(format!(
"Scratchpad 超出容量限制(当前 {} 字节,上限 {} 字节)。请精简内容后重试。",
content.len(),
SCRATCHPAD_MAX_BYTES,
))
} else {
Ok(())
}
}
// --- Agent phase state machine ---
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum AgentPhase {
Planning,
Executing { step: i32 },
Completed,
}
// --- Step ---
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StepStatus {
Pending,
Running,
WaitingApproval,
Done,
Failed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Artifact {
pub name: String,
pub path: String,
pub artifact_type: String,
#[serde(default)]
pub description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Step {
pub order: i32,
pub title: String,
pub description: String,
pub status: StepStatus,
/// 完成后由 LLM 填入的一句话摘要
#[serde(default, skip_serializing_if = "Option::is_none")]
pub summary: Option<String>,
/// 用户针对此步骤的反馈
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub user_feedbacks: Vec<String>,
#[serde(default)]
pub db_id: String,
/// 步骤产出物
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub artifacts: Vec<Artifact>,
}
// --- Core state ---
/// Agent 运行时的完整状态。整个结构体可以 JSON 序列化后直接存 DB。
///
/// 同时也是构建 LLM API call messages 的数据源:
///
/// Planning 阶段:
/// [ system(planning_prompt), user(requirement), ...current_step_chat_history ]
///
/// Executing 阶段:
/// [ system(execution_prompt), user(step_context), ...current_step_chat_history ]
///
/// step_context = requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentState {
/// 当前阶段
pub phase: AgentPhase,
/// LLM 生成的执行计划
pub steps: Vec<Step>,
/// 当前步骤内的多轮对话历史assistant + tool result
/// 直接 extend 到 messages 尾部。在 step 切换时 clear。
pub current_step_chat_history: Vec<ChatMessage>,
/// LLM 的跨步骤工作区,由 agent 自己读写step 切换时保留
pub scratchpad: String,
}
impl AgentState {
pub fn new() -> Self {
Self {
phase: AgentPhase::Planning,
steps: Vec::new(),
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
}
}
/// 当前正在执行的步骤号Planning/Completed 时返回 0。
pub fn current_step(&self) -> i32 {
match &self.phase {
AgentPhase::Executing { step } => *step,
_ => 0,
}
}
/// Docker-build-cache 风格的 plan diff。
/// 比较 (title, description)user_feedbacks 不参与比较。
/// 第一个 mismatch 开始,该步骤及后续全部 invalidate → Pending。
pub fn apply_plan_diff(&mut self, new_steps: Vec<Step>) {
let old = &self.steps;
let mut result = Vec::new();
let mut invalidated = false;
for (i, new) in new_steps.into_iter().enumerate() {
if !invalidated {
if let Some(old_step) = old.get(i) {
if old_step.title == new.title && old_step.description == new.description {
// Cache hit: keep old status/summary, take new user_feedbacks
result.push(Step {
user_feedbacks: new.user_feedbacks,
..old_step.clone()
});
continue;
}
}
// Cache miss or new step — invalidate from here
invalidated = true;
}
result.push(Step {
status: StepStatus::Pending,
summary: None,
..new
});
}
self.steps = result;
}
/// 找到第一个需要执行的步骤 (Pending 或 Running)。
/// 全部 Done 时返回 None。
pub fn first_actionable_step(&self) -> Option<i32> {
self.steps.iter()
.find(|s| matches!(s.status, StepStatus::Pending | StepStatus::Running | StepStatus::WaitingApproval))
.map(|s| s.order)
}
/// 构建 Executing 阶段的 user message
/// requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
pub fn build_step_context(&self, requirement: &str) -> String {
let mut ctx = String::new();
// 需求
ctx.push_str("## 需求\n");
ctx.push_str(requirement);
ctx.push_str("\n\n");
// 计划概览
ctx.push_str("## 计划概览\n");
let cur = self.current_step();
for s in &self.steps {
let marker = match s.status {
StepStatus::Done => " done",
StepStatus::Running => " >> current",
StepStatus::WaitingApproval => " ⏳ waiting",
StepStatus::Failed => " FAILED",
StepStatus::Pending => "",
};
ctx.push_str(&format!("{}. {}{}\n", s.order, s.title, marker));
}
ctx.push('\n');
// 当前步骤详情
if let Some(s) = self.steps.iter().find(|s| s.order == cur) {
ctx.push_str(&format!("## 当前步骤(步骤 {}\n", cur));
ctx.push_str(&format!("标题:{}\n", s.title));
ctx.push_str(&format!("描述:{}\n", s.description));
if !s.user_feedbacks.is_empty() {
ctx.push_str("\n用户反馈:\n");
for fb in &s.user_feedbacks {
ctx.push_str(&format!("- {}\n", fb));
}
}
ctx.push('\n');
}
// 已完成步骤摘要
let done: Vec<_> = self.steps.iter()
.filter(|s| matches!(s.status, StepStatus::Done))
.collect();
if !done.is_empty() {
ctx.push_str("## 已完成步骤摘要\n");
for s in done {
let summary = s.summary.as_deref().unwrap_or("(no summary)");
ctx.push_str(&format!("- 步骤 {}: {}\n", s.order, summary));
if !s.artifacts.is_empty() {
let arts: Vec<String> = s.artifacts.iter()
.map(|a| format!("{} ({})", a.name, a.artifact_type))
.collect();
ctx.push_str(&format!(" 产物: {}\n", arts.join(", ")));
}
}
ctx.push('\n');
}
// 备忘录
if !self.scratchpad.is_empty() {
ctx.push_str("## 备忘录\n");
ctx.push_str(&self.scratchpad);
ctx.push('\n');
}
ctx
}
/// 构建传给 LLM 的完整 messages 数组。
pub fn build_messages(&self, system_prompt: &str, requirement: &str) -> Vec<ChatMessage> {
let mut msgs = vec![ChatMessage::system(system_prompt)];
match &self.phase {
AgentPhase::Planning => {
msgs.push(ChatMessage::user(requirement));
}
AgentPhase::Executing { .. } => {
msgs.push(ChatMessage::user(&self.build_step_context(requirement)));
}
AgentPhase::Completed => {}
}
msgs.extend(self.current_step_chat_history.clone());
msgs
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_step(order: i32, title: &str, desc: &str, status: StepStatus) -> Step {
Step {
order,
title: title.into(),
description: desc.into(),
status,
summary: None,
user_feedbacks: Vec::new(),
db_id: String::new(),
artifacts: Vec::new(),
}
}
// --- check_scratchpad_size ---
#[test]
fn scratchpad_empty_ok() {
assert!(check_scratchpad_size("").is_ok());
}
#[test]
fn scratchpad_under_limit_ok() {
let content = "a".repeat(24_000);
assert!(check_scratchpad_size(&content).is_ok());
}
#[test]
fn scratchpad_over_limit_err() {
let content = "a".repeat(24_001);
let err = check_scratchpad_size(&content).unwrap_err();
assert!(err.contains("24001"));
assert!(err.contains("24000"));
}
#[test]
fn scratchpad_exactly_at_limit() {
let content = "a".repeat(SCRATCHPAD_MAX_BYTES);
assert!(check_scratchpad_size(&content).is_ok());
}
#[test]
fn scratchpad_multibyte_counts_bytes_not_chars() {
// 8000 个中文字 = 24000 bytes (UTF-8), exactly at limit
let content = "".repeat(8000);
assert_eq!(content.len(), 24000);
assert!(check_scratchpad_size(&content).is_ok());
// One more char pushes over
let content_over = format!("{}", content);
assert!(check_scratchpad_size(&content_over).is_err());
}
// --- first_actionable_step ---
#[test]
fn first_actionable_all_done() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![
make_step(1, "A", "a", StepStatus::Done),
make_step(2, "B", "b", StepStatus::Done),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), None);
}
#[test]
fn first_actionable_skips_done() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
make_step(1, "A", "a", StepStatus::Done),
make_step(2, "B", "b", StepStatus::Pending),
make_step(3, "C", "c", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(2));
}
#[test]
fn first_actionable_finds_running() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
make_step(1, "A", "a", StepStatus::Done),
make_step(2, "B", "b", StepStatus::Running),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(2));
}
#[test]
fn first_actionable_finds_waiting_approval() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![
make_step(1, "A", "a", StepStatus::WaitingApproval),
make_step(2, "B", "b", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(1));
}
#[test]
fn first_actionable_skips_failed() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
make_step(1, "A", "a", StepStatus::Failed),
make_step(2, "B", "b", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
assert_eq!(state.first_actionable_step(), Some(2));
}
// --- apply_plan_diff ---
#[test]
fn plan_diff_identical_keeps_done() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
make_step(2, "B", "desc B", StepStatus::Pending),
];
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
make_step(2, "B", "desc B", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert!(matches!(state.steps[0].status, StepStatus::Done));
assert_eq!(state.steps[0].summary.as_deref(), Some("did A"));
assert!(matches!(state.steps[1].status, StepStatus::Pending));
}
#[test]
fn plan_diff_change_invalidates_from_mismatch() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
Step { status: StepStatus::Done, summary: Some("did B".into()),
..make_step(2, "B", "desc B", StepStatus::Done) },
make_step(3, "C", "desc C", StepStatus::Pending),
];
// Change step 2's description → invalidates 2 and 3
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
make_step(2, "B", "desc B CHANGED", StepStatus::Pending),
make_step(3, "C", "desc C", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert!(matches!(state.steps[0].status, StepStatus::Done)); // kept
assert!(matches!(state.steps[1].status, StepStatus::Pending)); // invalidated
assert!(state.steps[1].summary.is_none()); // summary cleared
assert!(matches!(state.steps[2].status, StepStatus::Pending)); // invalidated
}
#[test]
fn plan_diff_add_new_steps() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
];
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
make_step(2, "New", "new step", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert_eq!(state.steps.len(), 2);
assert!(matches!(state.steps[0].status, StepStatus::Done));
assert!(matches!(state.steps[1].status, StepStatus::Pending));
assert_eq!(state.steps[1].title, "New");
}
#[test]
fn plan_diff_remove_steps() {
let mut state = AgentState::new();
state.steps = vec![
Step { status: StepStatus::Done, summary: Some("did A".into()),
..make_step(1, "A", "desc A", StepStatus::Done) },
make_step(2, "B", "desc B", StepStatus::Pending),
make_step(3, "C", "desc C", StepStatus::Pending),
];
// New plan only has 1 step (same as step 1)
let new_steps = vec![
make_step(1, "A", "desc A", StepStatus::Pending),
];
state.apply_plan_diff(new_steps);
assert_eq!(state.steps.len(), 1);
assert!(matches!(state.steps[0].status, StepStatus::Done));
}
// --- build_step_context ---
#[test]
fn step_context_includes_all_sections() {
let state = AgentState {
phase: AgentPhase::Executing { step: 2 },
steps: vec![
Step { status: StepStatus::Done, summary: Some("installed deps".into()),
..make_step(1, "Setup", "install deps", StepStatus::Done) },
make_step(2, "Build", "compile code", StepStatus::Running),
make_step(3, "Test", "run tests", StepStatus::Pending),
],
current_step_chat_history: Vec::new(),
scratchpad: "key=value".into(),
};
let ctx = state.build_step_context("Build a web app");
assert!(ctx.contains("## 需求\nBuild a web app"));
assert!(ctx.contains("## 计划概览"));
assert!(ctx.contains("1. Setup done"));
assert!(ctx.contains("2. Build >> current"));
assert!(ctx.contains("3. Test"));
assert!(ctx.contains("## 当前步骤(步骤 2"));
assert!(ctx.contains("标题Build"));
assert!(ctx.contains("描述compile code"));
assert!(ctx.contains("## 已完成步骤摘要"));
assert!(ctx.contains("installed deps"));
assert!(ctx.contains("## 备忘录\nkey=value"));
}
#[test]
fn step_context_user_feedback() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![
Step {
user_feedbacks: vec!["please use React".into()],
..make_step(1, "Setup", "setup project", StepStatus::Running)
},
],
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
let ctx = state.build_step_context("Build app");
assert!(ctx.contains("用户反馈"));
assert!(ctx.contains("please use React"));
}
// --- build_messages ---
#[test]
fn build_messages_planning() {
let state = AgentState::new();
let msgs = state.build_messages("system prompt", "requirement text");
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].role, "system");
assert_eq!(msgs[0].content.as_deref(), Some("system prompt"));
assert_eq!(msgs[1].role, "user");
assert_eq!(msgs[1].content.as_deref(), Some("requirement text"));
}
#[test]
fn build_messages_executing_includes_history() {
let state = AgentState {
phase: AgentPhase::Executing { step: 1 },
steps: vec![make_step(1, "Do thing", "details", StepStatus::Running)],
current_step_chat_history: vec![
ChatMessage { role: "assistant".into(), content: Some("let me help".into()), tool_calls: None, tool_call_id: None },
],
scratchpad: String::new(),
};
let msgs = state.build_messages("sys", "req");
assert_eq!(msgs.len(), 3); // system + user context + 1 history
assert_eq!(msgs[2].role, "assistant");
}
#[test]
fn build_messages_completed_minimal() {
let state = AgentState {
phase: AgentPhase::Completed,
steps: Vec::new(),
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
};
let msgs = state.build_messages("sys", "req");
assert_eq!(msgs.len(), 1); // only system
}
}