diff --git a/apps/notes/frontend/src/App.vue b/apps/notes/frontend/src/App.vue index 3522e33..e263243 100644 --- a/apps/notes/frontend/src/App.vue +++ b/apps/notes/frontend/src/App.vue @@ -118,9 +118,20 @@
-

🎙️ 转写原文

-

{{ selected.status === 'failed' ? '转写失败' : '尚未生成' }}

-
{{ selected.transcript }}
+

✨ 清理润色

+

空(cleanup step 失败,看下方原文)

+

+ {{ progressText(selected.status) }}… +

+
+
+ +
+
+

🎙️ 转写原文(默认折叠)

+

{{ selected.status === 'failed' ? '转写失败' : '尚未生成' }}

+
{{ selected.transcript }}
+
@@ -392,7 +403,8 @@ function statusLabel(s) { return ({ pending: '⏳ 排队', transcribing: '🎙️ 转写中', - summarizing: '✏️ 总结中', + cleaning: '✨ 清理润色中', + summarizing: '📋 总结中', done: '✓ 完成', failed: '✗ 失败', })[s] || s @@ -401,7 +413,8 @@ function progressText(s) { return ({ pending: '等候处理', transcribing: '语音转写中(视音频长度可能要几分钟)', - summarizing: 'LLM 生成纪要中', + cleaning: 'LLM 分段 + 去口语 + 润色 + 高亮', + summarizing: 'LLM 生成会议纪要', })[s] || s } function fmtSize(b) { @@ -740,6 +753,29 @@ input, textarea { font-family: inherit; background: transparent; border: none; c } .block.err pre { white-space: pre-wrap; color: var(--accent-red); font-size: 12px; } +.block details > summary { + cursor: pointer; + list-style: none; + user-select: none; + margin-bottom: 4px; +} +.block details > summary::-webkit-details-marker { display: none; } +.block details > summary::before { + content: '▶'; + display: inline-block; + margin-right: 6px; + font-size: 11px; + color: var(--text-mute); + transition: transform 0.15s; +} +.block details[open] > summary::before { transform: rotate(90deg); } +.block details > summary h3 { + margin: 0 !important; + text-transform: none; + letter-spacing: normal; + font-size: 13px; +} + @media (max-width: 768px) { .root { flex-direction: column; } .sidebar { width: 100%; height: 45vh; border-right: none; border-bottom: 1px solid var(--border-soft); } diff --git a/apps/notes/src/main.rs b/apps/notes/src/main.rs index 3eabfba..f2d27e3 100644 --- a/apps/notes/src/main.rs +++ b/apps/notes/src/main.rs @@ -81,6 +81,7 @@ async fn main() -> std::io::Result<()> { // 兼容旧 db 增量加列;已存在忽略错误 let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_doc_id TEXT", []); let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_url TEXT", []); + let _ = conn.execute("ALTER TABLE recordings ADD COLUMN cleaned TEXT", []); tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready"); let http = reqwest::Client::builder() @@ -247,6 +248,7 @@ struct RecordingDetail { size_bytes: i64, status: String, transcript: Option, + cleaned: Option, summary: Option, error: Option, created_at: String, @@ -293,30 +295,30 @@ async fn get_recording( let conn = s.db.lock().unwrap(); type Row = ( String, String, String, i64, String, - Option, Option, Option, String, + Option, Option, Option, Option, String, Option, Option, ); let row: Option = conn .query_row( "SELECT title, filename, mime, size_bytes, status, - transcript, summary, error, created_at, + transcript, cleaned, summary, error, created_at, feishu_doc_id, feishu_url FROM recordings WHERE id = ?1", params![id], |r| { Ok(( r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?, - r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?, - r.get(9)?, r.get(10)?, + r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?, r.get(9)?, + r.get(10)?, r.get(11)?, )) }, ) .optional()?; - let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at, + let (title, filename, mime, size_bytes, status, transcript, cleaned, summary, error, created_at, feishu_doc_id, feishu_url) = row.ok_or(AppError::NotFound)?; Ok(JsonResp(RecordingDetail { id, title, filename, mime, size_bytes, status, - transcript, summary, error, created_at, + transcript, cleaned, summary, error, created_at, feishu_doc_id, feishu_url, })) } @@ -455,15 +457,34 @@ async fn process_recording(s: AppState, id: i64) { return; } }; - // 写 transcript 但还没 summary + // 写 transcript,进入 cleaning { let conn = s.db.lock().unwrap(); let _ = conn.execute( - "UPDATE recordings SET transcript = ?1, status = 'summarizing' WHERE id = ?2", + "UPDATE recordings SET transcript = ?1, status = 'cleaning' WHERE id = ?2", params![&transcript, id], ); } + // LLM cleanup:分段 + 去口语 + 润色 + 高亮(失败也继续 summary,不阻塞) + match call_llm_cleanup(&s, &transcript).await { + Ok(c) => { + let conn = s.db.lock().unwrap(); + let _ = conn.execute( + "UPDATE recordings SET cleaned = ?1, status = 'summarizing' WHERE id = ?2", + params![&c, id], + ); + } + Err(e) => { + tracing::warn!(%id, error = %e, "cleanup failed, skip and continue to summary"); + let conn = s.db.lock().unwrap(); + let _ = conn.execute( + "UPDATE recordings SET status = 'summarizing' WHERE id = ?1", + params![id], + ); + } + } + // LLM:生成会议纪要 + 标题 let raw = match call_llm_summary(&s, &transcript).await { Ok(t) => t, @@ -559,6 +580,60 @@ async fn call_asr( Ok(text) } +async fn call_llm_cleanup(s: &AppState, transcript: &str) -> Result { + let trimmed = if transcript.chars().count() > 12000 { + let mut out = String::new(); + for (i, c) in transcript.chars().enumerate() { + if i >= 12000 { break; } + out.push(c); + } + out + "\n\n[... 后文截断]" + } else { + transcript.to_string() + }; + let payload = json!({ + "model": s.llm_model, + "messages": [ + { "role": "system", "content": + "你是 ASR 转写后处理助手。把下面这段连续无标点的转写整理成可读版本:\n\ + \n\ + 1. **自动分段**:按话题/语义换段,每段 2-5 句\n\ + 2. **加标点**:句号、问号、感叹号、逗号、引号\n\ + 3. **去口语噪音**:删掉「嗯/啊/那个/就是/对/然后...」等填充词,但保留实际含义的连接词\n\ + 4. **轻度润色**:通顺、语法、错别字(结合上下文修 ASR 错字),但**不要总结、不要改变原意、不要添加内容**\n\ + 5. **重点高亮**:把关键判断、结论、决定、数字、名词用 markdown `**...**` 加粗\n\ + \n\ + 输出纯 markdown 段落,不要标题、不要列表、不要解释。" }, + { "role": "user", "content": trimmed }, + ], + "temperature": 0.3, + }); + let url = format!("{}/chat/completions", s.llm_gateway.trim_end_matches('/')); + let resp = s + .http + .post(&url) + .bearer_auth(&s.llm_token) + .json(&payload) + .timeout(std::time::Duration::from_secs(600)) + .send() + .await + .map_err(|e| format!("connect: {e}"))?; + if !resp.status().is_success() { + let st = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(format!("LLM {st}: {body}")); + } + let v: Value = resp.json().await.map_err(|e| format!("decode: {e}"))?; + let text = v + .get("choices").and_then(|c| c.get(0)) + .and_then(|c| c.get("message")) + .and_then(|m| m.get("content")) + .and_then(|c| c.as_str()) + .map(|s| s.to_string()) + .ok_or_else(|| format!("LLM no content: {v}"))?; + Ok(text) +} + async fn call_llm_summary(s: &AppState, transcript: &str) -> Result { let trimmed = if transcript.chars().count() > 12000 { let mut out = String::new();