- backend: schema 加 cleaned 列;process_recording 流程 ASR → cleaning (LLM 分段+去口语+润色+**加粗高亮**) → summarizing → done - cleanup LLM 失败不阻塞,继续 summary - 前端三 block 顺序:纪要 → 清理润色 → 原文(details 折叠) - 新 status 'cleaning' 也加进 statusLabel / progressText
This commit is contained in:
+83
-8
@@ -81,6 +81,7 @@ async fn main() -> std::io::Result<()> {
|
||||
// 兼容旧 db 增量加列;已存在忽略错误
|
||||
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_doc_id TEXT", []);
|
||||
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_url TEXT", []);
|
||||
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN cleaned TEXT", []);
|
||||
tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready");
|
||||
|
||||
let http = reqwest::Client::builder()
|
||||
@@ -247,6 +248,7 @@ struct RecordingDetail {
|
||||
size_bytes: i64,
|
||||
status: String,
|
||||
transcript: Option<String>,
|
||||
cleaned: Option<String>,
|
||||
summary: Option<String>,
|
||||
error: Option<String>,
|
||||
created_at: String,
|
||||
@@ -293,30 +295,30 @@ async fn get_recording(
|
||||
let conn = s.db.lock().unwrap();
|
||||
type Row = (
|
||||
String, String, String, i64, String,
|
||||
Option<String>, Option<String>, Option<String>, String,
|
||||
Option<String>, Option<String>, Option<String>, Option<String>, String,
|
||||
Option<String>, Option<String>,
|
||||
);
|
||||
let row: Option<Row> = conn
|
||||
.query_row(
|
||||
"SELECT title, filename, mime, size_bytes, status,
|
||||
transcript, summary, error, created_at,
|
||||
transcript, cleaned, summary, error, created_at,
|
||||
feishu_doc_id, feishu_url
|
||||
FROM recordings WHERE id = ?1",
|
||||
params![id],
|
||||
|r| {
|
||||
Ok((
|
||||
r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?,
|
||||
r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?,
|
||||
r.get(9)?, r.get(10)?,
|
||||
r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?, r.get(9)?,
|
||||
r.get(10)?, r.get(11)?,
|
||||
))
|
||||
},
|
||||
)
|
||||
.optional()?;
|
||||
let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at,
|
||||
let (title, filename, mime, size_bytes, status, transcript, cleaned, summary, error, created_at,
|
||||
feishu_doc_id, feishu_url) = row.ok_or(AppError::NotFound)?;
|
||||
Ok(JsonResp(RecordingDetail {
|
||||
id, title, filename, mime, size_bytes, status,
|
||||
transcript, summary, error, created_at,
|
||||
transcript, cleaned, summary, error, created_at,
|
||||
feishu_doc_id, feishu_url,
|
||||
}))
|
||||
}
|
||||
@@ -455,15 +457,34 @@ async fn process_recording(s: AppState, id: i64) {
|
||||
return;
|
||||
}
|
||||
};
|
||||
// 写 transcript 但还没 summary
|
||||
// 写 transcript,进入 cleaning
|
||||
{
|
||||
let conn = s.db.lock().unwrap();
|
||||
let _ = conn.execute(
|
||||
"UPDATE recordings SET transcript = ?1, status = 'summarizing' WHERE id = ?2",
|
||||
"UPDATE recordings SET transcript = ?1, status = 'cleaning' WHERE id = ?2",
|
||||
params![&transcript, id],
|
||||
);
|
||||
}
|
||||
|
||||
// LLM cleanup:分段 + 去口语 + 润色 + 高亮(失败也继续 summary,不阻塞)
|
||||
match call_llm_cleanup(&s, &transcript).await {
|
||||
Ok(c) => {
|
||||
let conn = s.db.lock().unwrap();
|
||||
let _ = conn.execute(
|
||||
"UPDATE recordings SET cleaned = ?1, status = 'summarizing' WHERE id = ?2",
|
||||
params![&c, id],
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(%id, error = %e, "cleanup failed, skip and continue to summary");
|
||||
let conn = s.db.lock().unwrap();
|
||||
let _ = conn.execute(
|
||||
"UPDATE recordings SET status = 'summarizing' WHERE id = ?1",
|
||||
params![id],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// LLM:生成会议纪要 + 标题
|
||||
let raw = match call_llm_summary(&s, &transcript).await {
|
||||
Ok(t) => t,
|
||||
@@ -559,6 +580,60 @@ async fn call_asr(
|
||||
Ok(text)
|
||||
}
|
||||
|
||||
async fn call_llm_cleanup(s: &AppState, transcript: &str) -> Result<String, String> {
|
||||
let trimmed = if transcript.chars().count() > 12000 {
|
||||
let mut out = String::new();
|
||||
for (i, c) in transcript.chars().enumerate() {
|
||||
if i >= 12000 { break; }
|
||||
out.push(c);
|
||||
}
|
||||
out + "\n\n[... 后文截断]"
|
||||
} else {
|
||||
transcript.to_string()
|
||||
};
|
||||
let payload = json!({
|
||||
"model": s.llm_model,
|
||||
"messages": [
|
||||
{ "role": "system", "content":
|
||||
"你是 ASR 转写后处理助手。把下面这段连续无标点的转写整理成可读版本:\n\
|
||||
\n\
|
||||
1. **自动分段**:按话题/语义换段,每段 2-5 句\n\
|
||||
2. **加标点**:句号、问号、感叹号、逗号、引号\n\
|
||||
3. **去口语噪音**:删掉「嗯/啊/那个/就是/对/然后...」等填充词,但保留实际含义的连接词\n\
|
||||
4. **轻度润色**:通顺、语法、错别字(结合上下文修 ASR 错字),但**不要总结、不要改变原意、不要添加内容**\n\
|
||||
5. **重点高亮**:把关键判断、结论、决定、数字、名词用 markdown `**...**` 加粗\n\
|
||||
\n\
|
||||
输出纯 markdown 段落,不要标题、不要列表、不要解释。" },
|
||||
{ "role": "user", "content": trimmed },
|
||||
],
|
||||
"temperature": 0.3,
|
||||
});
|
||||
let url = format!("{}/chat/completions", s.llm_gateway.trim_end_matches('/'));
|
||||
let resp = s
|
||||
.http
|
||||
.post(&url)
|
||||
.bearer_auth(&s.llm_token)
|
||||
.json(&payload)
|
||||
.timeout(std::time::Duration::from_secs(600))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("connect: {e}"))?;
|
||||
if !resp.status().is_success() {
|
||||
let st = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("LLM {st}: {body}"));
|
||||
}
|
||||
let v: Value = resp.json().await.map_err(|e| format!("decode: {e}"))?;
|
||||
let text = v
|
||||
.get("choices").and_then(|c| c.get(0))
|
||||
.and_then(|c| c.get("message"))
|
||||
.and_then(|m| m.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
.map(|s| s.to_string())
|
||||
.ok_or_else(|| format!("LLM no content: {v}"))?;
|
||||
Ok(text)
|
||||
}
|
||||
|
||||
async fn call_llm_summary(s: &AppState, transcript: &str) -> Result<String, String> {
|
||||
let trimmed = if transcript.chars().count() > 12000 {
|
||||
let mut out = String::new();
|
||||
|
||||
Reference in New Issue
Block a user