notes(asr): 切片串行 ASR 绕单文件大小限制
deploy notes / build-and-deploy (push) Successful in 3m40s

ASR server 直接 500 拒绝大文件 (15MB / ~15min 4.7s 即返回 500),不是
处理超时。改成:sidecar 装 ffmpeg → /transcribe endpoint 把音频切 60s
段 → 串行调外部 ASR → 拼接 transcript。notes 主容器 call_asr 改成 POST
到 sidecar /transcribe(timeout 1h 给长录音留余地)。

- feishu sidecar Dockerfile + ffmpeg + requests
- server.py 加 TranscribeReq;fallback -c copy 失败时 re-encode AAC
- main.rs 删除 asr_url/asr_token 字段(now sidecar concern)
- k8s manifest: ASR_URL/ASR_TOKEN 从主容器移到 feishu sidecar env
This commit is contained in:
Fam Zheng
2026-05-17 22:38:05 +01:00
parent e5a87cc65f
commit 688ccdc76f
4 changed files with 104 additions and 39 deletions
+3 -2
View File
@@ -4,7 +4,7 @@
FROM node:20-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip python3-markdown ca-certificates curl \
python3 python3-pip python3-markdown ca-certificates curl ffmpeg \
&& rm -rf /var/lib/apt/lists/*
# lark-cli postinstall 调 curl 下二进制,没 curl 会报 spawnSync ENOENT
@@ -12,7 +12,8 @@ RUN npm install -g @larksuite/cli@1.0.29
RUN pip install --no-cache-dir --break-system-packages \
fastapi==0.115.6 \
uvicorn==0.34.0
uvicorn==0.34.0 \
requests==2.32.3
COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu
RUN chmod +x /usr/local/bin/markdown-to-feishu
+82 -5
View File
@@ -1,17 +1,19 @@
"""notes feishu sidecarHTTP 包一层 markdown-to-feishu。
POST /convert {md_path, title?, existing_doc_id?}
→ 跑 markdown-to-feishuparse 最后那段 JSON,返回 {doc_id, url}
"""notes 多用途 sidecar
POST /transcribe — 用 ffmpeg 切片 + 串行调外部 ASR,绕过单请求大小限制
POST /convert — markdown-to-feishu,把会议纪要 push 飞书 docx
"""
import json
import logging
import os
import re
import shutil
import subprocess
import tempfile
import uuid
from pathlib import Path
from typing import Optional
import requests
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
@@ -27,6 +29,81 @@ def healthz():
return {'ok': True}
class TranscribeReq(BaseModel):
audio_path: str
chunk_seconds: int = 60 # 60s ≈ 1-1.5 MB m4a,远低于 ASR 限制
@app.post('/transcribe')
def transcribe(req: TranscribeReq):
"""ffmpeg 切片 → 串行喂外部 ASR → 拼接 transcript。"""
src = Path(req.audio_path)
if not src.exists():
raise HTTPException(400, f'audio not found: {src}')
asr_url = os.environ.get('ASR_URL', '')
asr_token = os.environ.get('ASR_TOKEN', '')
if not asr_url or not asr_token:
raise HTTPException(500, 'ASR_URL/ASR_TOKEN not configured in sidecar')
tmp = Path(tempfile.gettempdir()) / f'transcribe-{uuid.uuid4().hex}'
tmp.mkdir(parents=True)
try:
# 用 ffmpeg segment:直接 copy streamfast & 不损失质量)
# 个别情况下 -c copy 在某些容器格式下切不精准,回退 re-encode 到 aac
ext = src.suffix.lstrip('.') or 'm4a'
chunk_pattern = f'chunk_%03d.{ext}'
try:
subprocess.run(
['ffmpeg', '-y', '-i', str(src),
'-f', 'segment', '-segment_time', str(req.chunk_seconds),
'-c', 'copy', '-reset_timestamps', '1',
str(tmp / chunk_pattern)],
check=True, capture_output=True, timeout=180,
)
except subprocess.CalledProcessError:
# fallback: re-encode AAC,慢但稳
log.warning("ffmpeg -c copy 失败,回退 re-encode")
for p in tmp.glob(f'chunk_*.{ext}'):
p.unlink(missing_ok=True)
subprocess.run(
['ffmpeg', '-y', '-i', str(src),
'-f', 'segment', '-segment_time', str(req.chunk_seconds),
'-c:a', 'aac', '-b:a', '64k', '-ac', '1', '-ar', '16000',
'-reset_timestamps', '1',
str(tmp / 'chunk_%03d.m4a')],
check=True, capture_output=True, timeout=600,
)
ext = 'm4a'
chunks = sorted(tmp.glob(f'chunk_*.{ext}'))
if not chunks:
raise HTTPException(500, 'ffmpeg produced 0 chunks')
log.info("split %s%d chunks", src.name, len(chunks))
all_text = []
for i, c in enumerate(chunks, 1):
log.info("ASR chunk %d/%d (%s, %d KB)", i, len(chunks), c.name, c.stat().st_size // 1024)
with open(c, 'rb') as f:
r = requests.post(
asr_url,
headers={'Authorization': f'Bearer {asr_token}'},
files={'file': (c.name, f, 'audio/mp4')},
data={'model': 'qwen3-asr', 'response_format': 'json'},
timeout=300,
)
if not r.ok:
raise HTTPException(502, f'ASR chunk {i} {r.status_code}: {r.text[:300]}')
try:
text = r.json().get('text', '').strip()
except Exception:
raise HTTPException(502, f'ASR chunk {i} bad json: {r.text[:200]}')
all_text.append(text)
full = '\n'.join(t for t in all_text if t)
return {'text': full, 'chunks': len(chunks)}
finally:
shutil.rmtree(tmp, ignore_errors=True)
class ConvertReq(BaseModel):
md_path: str
title: Optional[str] = None
+8 -7
View File
@@ -72,8 +72,6 @@ spec:
value: /data/app.db
- name: BLOBS_DIR
value: /data/blobs
- name: ASR_URL
value: http://18.159.112.195:8848/v1/audio/transcriptions
- name: LLM_GATEWAY
value: http://3.135.65.204:8848/v1
- name: LLM_MODEL
@@ -83,11 +81,6 @@ spec:
secretKeyRef:
name: notes-creds
key: passphrase
- name: ASR_TOKEN
valueFrom:
secretKeyRef:
name: notes-creds
key: asr_token
- name: LLM_TOKEN
valueFrom:
secretKeyRef:
@@ -115,6 +108,14 @@ spec:
ports:
- containerPort: 8002
name: feishu
env:
- name: ASR_URL
value: http://18.159.112.195:8848/v1/audio/transcriptions
- name: ASR_TOKEN
valueFrom:
secretKeyRef:
name: notes-creds
key: asr_token
readinessProbe:
httpGet: { path: /healthz, port: feishu }
initialDelaySeconds: 3
+11 -25
View File
@@ -31,8 +31,6 @@ struct AppState {
db: Arc<Mutex<Connection>>,
blobs_dir: PathBuf,
passphrase: String,
asr_url: String,
asr_token: String,
llm_gateway: String,
llm_token: String,
llm_model: String,
@@ -53,9 +51,7 @@ async fn main() -> std::io::Result<()> {
if passphrase.is_empty() {
tracing::warn!("PASSPHRASE not set — all /api/* will return 401");
}
let asr_url = std::env::var("ASR_URL")
.unwrap_or_else(|_| "http://18.159.112.195:8848/v1/audio/transcriptions".into());
let asr_token = std::env::var("ASR_TOKEN").unwrap_or_default();
// ASR 现在由 sidecar 调(切片串行),主容器不再直接调外部 ASR
let llm_gateway =
std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into());
let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default();
@@ -95,8 +91,6 @@ async fn main() -> std::io::Result<()> {
db: Arc::new(Mutex::new(conn)),
blobs_dir,
passphrase,
asr_url,
asr_token,
llm_gateway,
llm_token,
llm_model,
@@ -465,38 +459,30 @@ fn set_status(s: &AppState, id: i64, status: &str, transcript: Option<&str>, err
async fn call_asr(
s: &AppState,
path: &std::path::Path,
filename: &str,
_filename: &str,
) -> Result<String, String> {
let bytes = tokio::fs::read(path).await.map_err(|e| e.to_string())?;
let part = reqwest::multipart::Part::bytes(bytes)
.file_name(filename.to_string())
.mime_str("audio/mpeg")
.map_err(|e| e.to_string())?;
let form = reqwest::multipart::Form::new()
.text("model", "qwen3-asr")
.text("response_format", "json")
.part("file", part);
// 走 sidecar /transcribesidecar 用 ffmpeg 切片 + 串行调外部 ASR,绕过 ASR server 单文件大小限制
let url = format!("{}/transcribe", s.feishu_url.trim_end_matches('/'));
let payload = json!({ "audio_path": path.to_string_lossy() });
let resp = s
.http
.post(&s.asr_url)
.bearer_auth(&s.asr_token)
.multipart(form)
.timeout(std::time::Duration::from_secs(600))
.post(&url)
.json(&payload)
.timeout(std::time::Duration::from_secs(3600))
.send()
.await
.map_err(|e| format!("connect: {e}"))?;
.map_err(|e| format!("connect sidecar: {e}"))?;
if !resp.status().is_success() {
let st = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(format!("ASR {st}: {body}"));
return Err(format!("sidecar /transcribe {st}: {body}"));
}
let v: Value = resp.json().await.map_err(|e| format!("decode: {e}"))?;
let text = v
.get("text")
.and_then(|x| x.as_str())
.map(|s| s.to_string())
.ok_or_else(|| format!("ASR response no 'text': {v}"))?;
.ok_or_else(|| format!("no 'text' in response: {v}"))?;
Ok(text)
}