notes: 加「 清理润色」block + 转写原文默认折叠
deploy notes / build-and-deploy (push) Successful in 1m47s

- backend: schema 加 cleaned 列;process_recording 流程
  ASR → cleaning (LLM 分段+去口语+润色+**加粗高亮**) → summarizing → done
- cleanup LLM 失败不阻塞,继续 summary
- 前端三 block 顺序:纪要 → 清理润色 → 原文(details 折叠)
- 新 status 'cleaning' 也加进 statusLabel / progressText
This commit is contained in:
Fam Zheng
2026-05-18 01:22:33 +01:00
parent ca11a9bda7
commit e072109e91
2 changed files with 124 additions and 13 deletions
+83 -8
View File
@@ -81,6 +81,7 @@ async fn main() -> std::io::Result<()> {
// 兼容旧 db 增量加列;已存在忽略错误
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_doc_id TEXT", []);
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_url TEXT", []);
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN cleaned TEXT", []);
tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready");
let http = reqwest::Client::builder()
@@ -247,6 +248,7 @@ struct RecordingDetail {
size_bytes: i64,
status: String,
transcript: Option<String>,
cleaned: Option<String>,
summary: Option<String>,
error: Option<String>,
created_at: String,
@@ -293,30 +295,30 @@ async fn get_recording(
let conn = s.db.lock().unwrap();
type Row = (
String, String, String, i64, String,
Option<String>, Option<String>, Option<String>, String,
Option<String>, Option<String>, Option<String>, Option<String>, String,
Option<String>, Option<String>,
);
let row: Option<Row> = conn
.query_row(
"SELECT title, filename, mime, size_bytes, status,
transcript, summary, error, created_at,
transcript, cleaned, summary, error, created_at,
feishu_doc_id, feishu_url
FROM recordings WHERE id = ?1",
params![id],
|r| {
Ok((
r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?,
r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?,
r.get(9)?, r.get(10)?,
r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?, r.get(9)?,
r.get(10)?, r.get(11)?,
))
},
)
.optional()?;
let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at,
let (title, filename, mime, size_bytes, status, transcript, cleaned, summary, error, created_at,
feishu_doc_id, feishu_url) = row.ok_or(AppError::NotFound)?;
Ok(JsonResp(RecordingDetail {
id, title, filename, mime, size_bytes, status,
transcript, summary, error, created_at,
transcript, cleaned, summary, error, created_at,
feishu_doc_id, feishu_url,
}))
}
@@ -455,15 +457,34 @@ async fn process_recording(s: AppState, id: i64) {
return;
}
};
// 写 transcript 但还没 summary
// 写 transcript,进入 cleaning
{
let conn = s.db.lock().unwrap();
let _ = conn.execute(
"UPDATE recordings SET transcript = ?1, status = 'summarizing' WHERE id = ?2",
"UPDATE recordings SET transcript = ?1, status = 'cleaning' WHERE id = ?2",
params![&transcript, id],
);
}
// LLM cleanup:分段 + 去口语 + 润色 + 高亮(失败也继续 summary,不阻塞)
match call_llm_cleanup(&s, &transcript).await {
Ok(c) => {
let conn = s.db.lock().unwrap();
let _ = conn.execute(
"UPDATE recordings SET cleaned = ?1, status = 'summarizing' WHERE id = ?2",
params![&c, id],
);
}
Err(e) => {
tracing::warn!(%id, error = %e, "cleanup failed, skip and continue to summary");
let conn = s.db.lock().unwrap();
let _ = conn.execute(
"UPDATE recordings SET status = 'summarizing' WHERE id = ?1",
params![id],
);
}
}
// LLM:生成会议纪要 + 标题
let raw = match call_llm_summary(&s, &transcript).await {
Ok(t) => t,
@@ -559,6 +580,60 @@ async fn call_asr(
Ok(text)
}
async fn call_llm_cleanup(s: &AppState, transcript: &str) -> Result<String, String> {
let trimmed = if transcript.chars().count() > 12000 {
let mut out = String::new();
for (i, c) in transcript.chars().enumerate() {
if i >= 12000 { break; }
out.push(c);
}
out + "\n\n[... 后文截断]"
} else {
transcript.to_string()
};
let payload = json!({
"model": s.llm_model,
"messages": [
{ "role": "system", "content":
"你是 ASR 转写后处理助手。把下面这段连续无标点的转写整理成可读版本:\n\
\n\
1. **自动分段**:按话题/语义换段,每段 2-5 句\n\
2. **加标点**:句号、问号、感叹号、逗号、引号\n\
3. **去口语噪音**:删掉「嗯/啊/那个/就是/对/然后...」等填充词,但保留实际含义的连接词\n\
4. **轻度润色**:通顺、语法、错别字(结合上下文修 ASR 错字),但**不要总结、不要改变原意、不要添加内容**\n\
5. **重点高亮**:把关键判断、结论、决定、数字、名词用 markdown `**...**` 加粗\n\
\n\
输出纯 markdown 段落,不要标题、不要列表、不要解释。" },
{ "role": "user", "content": trimmed },
],
"temperature": 0.3,
});
let url = format!("{}/chat/completions", s.llm_gateway.trim_end_matches('/'));
let resp = s
.http
.post(&url)
.bearer_auth(&s.llm_token)
.json(&payload)
.timeout(std::time::Duration::from_secs(600))
.send()
.await
.map_err(|e| format!("connect: {e}"))?;
if !resp.status().is_success() {
let st = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(format!("LLM {st}: {body}"));
}
let v: Value = resp.json().await.map_err(|e| format!("decode: {e}"))?;
let text = v
.get("choices").and_then(|c| c.get(0))
.and_then(|c| c.get("message"))
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
.map(|s| s.to_string())
.ok_or_else(|| format!("LLM no content: {v}"))?;
Ok(text)
}
async fn call_llm_summary(s: &AppState, transcript: &str) -> Result<String, String> {
let trimmed = if transcript.chars().count() > 12000 {
let mut out = String::new();