notes(asr): 切片串行 ASR 绕单文件大小限制
deploy notes / build-and-deploy (push) Successful in 3m40s

ASR server 直接 500 拒绝大文件 (15MB / ~15min 4.7s 即返回 500),不是
处理超时。改成:sidecar 装 ffmpeg → /transcribe endpoint 把音频切 60s
段 → 串行调外部 ASR → 拼接 transcript。notes 主容器 call_asr 改成 POST
到 sidecar /transcribe(timeout 1h 给长录音留余地)。

- feishu sidecar Dockerfile + ffmpeg + requests
- server.py 加 TranscribeReq;fallback -c copy 失败时 re-encode AAC
- main.rs 删除 asr_url/asr_token 字段(now sidecar concern)
- k8s manifest: ASR_URL/ASR_TOKEN 从主容器移到 feishu sidecar env
This commit is contained in:
Fam Zheng
2026-05-17 22:38:05 +01:00
parent e5a87cc65f
commit 688ccdc76f
4 changed files with 104 additions and 39 deletions
+3 -2
View File
@@ -4,7 +4,7 @@
FROM node:20-slim FROM node:20-slim
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip python3-markdown ca-certificates curl \ python3 python3-pip python3-markdown ca-certificates curl ffmpeg \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# lark-cli postinstall 调 curl 下二进制,没 curl 会报 spawnSync ENOENT # lark-cli postinstall 调 curl 下二进制,没 curl 会报 spawnSync ENOENT
@@ -12,7 +12,8 @@ RUN npm install -g @larksuite/cli@1.0.29
RUN pip install --no-cache-dir --break-system-packages \ RUN pip install --no-cache-dir --break-system-packages \
fastapi==0.115.6 \ fastapi==0.115.6 \
uvicorn==0.34.0 uvicorn==0.34.0 \
requests==2.32.3
COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu
RUN chmod +x /usr/local/bin/markdown-to-feishu RUN chmod +x /usr/local/bin/markdown-to-feishu
+82 -5
View File
@@ -1,17 +1,19 @@
"""notes feishu sidecarHTTP 包一层 markdown-to-feishu。 """notes 多用途 sidecar
POST /transcribe — 用 ffmpeg 切片 + 串行调外部 ASR,绕过单请求大小限制
POST /convert {md_path, title?, existing_doc_id?} POST /convert — markdown-to-feishu,把会议纪要 push 飞书 docx
→ 跑 markdown-to-feishuparse 最后那段 JSON,返回 {doc_id, url}
""" """
import json import json
import logging import logging
import os import os
import re import shutil
import subprocess import subprocess
import tempfile
import uuid
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import requests
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
@@ -27,6 +29,81 @@ def healthz():
return {'ok': True} return {'ok': True}
class TranscribeReq(BaseModel):
audio_path: str
chunk_seconds: int = 60 # 60s ≈ 1-1.5 MB m4a,远低于 ASR 限制
@app.post('/transcribe')
def transcribe(req: TranscribeReq):
"""ffmpeg 切片 → 串行喂外部 ASR → 拼接 transcript。"""
src = Path(req.audio_path)
if not src.exists():
raise HTTPException(400, f'audio not found: {src}')
asr_url = os.environ.get('ASR_URL', '')
asr_token = os.environ.get('ASR_TOKEN', '')
if not asr_url or not asr_token:
raise HTTPException(500, 'ASR_URL/ASR_TOKEN not configured in sidecar')
tmp = Path(tempfile.gettempdir()) / f'transcribe-{uuid.uuid4().hex}'
tmp.mkdir(parents=True)
try:
# 用 ffmpeg segment:直接 copy streamfast & 不损失质量)
# 个别情况下 -c copy 在某些容器格式下切不精准,回退 re-encode 到 aac
ext = src.suffix.lstrip('.') or 'm4a'
chunk_pattern = f'chunk_%03d.{ext}'
try:
subprocess.run(
['ffmpeg', '-y', '-i', str(src),
'-f', 'segment', '-segment_time', str(req.chunk_seconds),
'-c', 'copy', '-reset_timestamps', '1',
str(tmp / chunk_pattern)],
check=True, capture_output=True, timeout=180,
)
except subprocess.CalledProcessError:
# fallback: re-encode AAC,慢但稳
log.warning("ffmpeg -c copy 失败,回退 re-encode")
for p in tmp.glob(f'chunk_*.{ext}'):
p.unlink(missing_ok=True)
subprocess.run(
['ffmpeg', '-y', '-i', str(src),
'-f', 'segment', '-segment_time', str(req.chunk_seconds),
'-c:a', 'aac', '-b:a', '64k', '-ac', '1', '-ar', '16000',
'-reset_timestamps', '1',
str(tmp / 'chunk_%03d.m4a')],
check=True, capture_output=True, timeout=600,
)
ext = 'm4a'
chunks = sorted(tmp.glob(f'chunk_*.{ext}'))
if not chunks:
raise HTTPException(500, 'ffmpeg produced 0 chunks')
log.info("split %s%d chunks", src.name, len(chunks))
all_text = []
for i, c in enumerate(chunks, 1):
log.info("ASR chunk %d/%d (%s, %d KB)", i, len(chunks), c.name, c.stat().st_size // 1024)
with open(c, 'rb') as f:
r = requests.post(
asr_url,
headers={'Authorization': f'Bearer {asr_token}'},
files={'file': (c.name, f, 'audio/mp4')},
data={'model': 'qwen3-asr', 'response_format': 'json'},
timeout=300,
)
if not r.ok:
raise HTTPException(502, f'ASR chunk {i} {r.status_code}: {r.text[:300]}')
try:
text = r.json().get('text', '').strip()
except Exception:
raise HTTPException(502, f'ASR chunk {i} bad json: {r.text[:200]}')
all_text.append(text)
full = '\n'.join(t for t in all_text if t)
return {'text': full, 'chunks': len(chunks)}
finally:
shutil.rmtree(tmp, ignore_errors=True)
class ConvertReq(BaseModel): class ConvertReq(BaseModel):
md_path: str md_path: str
title: Optional[str] = None title: Optional[str] = None
+8 -7
View File
@@ -72,8 +72,6 @@ spec:
value: /data/app.db value: /data/app.db
- name: BLOBS_DIR - name: BLOBS_DIR
value: /data/blobs value: /data/blobs
- name: ASR_URL
value: http://18.159.112.195:8848/v1/audio/transcriptions
- name: LLM_GATEWAY - name: LLM_GATEWAY
value: http://3.135.65.204:8848/v1 value: http://3.135.65.204:8848/v1
- name: LLM_MODEL - name: LLM_MODEL
@@ -83,11 +81,6 @@ spec:
secretKeyRef: secretKeyRef:
name: notes-creds name: notes-creds
key: passphrase key: passphrase
- name: ASR_TOKEN
valueFrom:
secretKeyRef:
name: notes-creds
key: asr_token
- name: LLM_TOKEN - name: LLM_TOKEN
valueFrom: valueFrom:
secretKeyRef: secretKeyRef:
@@ -115,6 +108,14 @@ spec:
ports: ports:
- containerPort: 8002 - containerPort: 8002
name: feishu name: feishu
env:
- name: ASR_URL
value: http://18.159.112.195:8848/v1/audio/transcriptions
- name: ASR_TOKEN
valueFrom:
secretKeyRef:
name: notes-creds
key: asr_token
readinessProbe: readinessProbe:
httpGet: { path: /healthz, port: feishu } httpGet: { path: /healthz, port: feishu }
initialDelaySeconds: 3 initialDelaySeconds: 3
+11 -25
View File
@@ -31,8 +31,6 @@ struct AppState {
db: Arc<Mutex<Connection>>, db: Arc<Mutex<Connection>>,
blobs_dir: PathBuf, blobs_dir: PathBuf,
passphrase: String, passphrase: String,
asr_url: String,
asr_token: String,
llm_gateway: String, llm_gateway: String,
llm_token: String, llm_token: String,
llm_model: String, llm_model: String,
@@ -53,9 +51,7 @@ async fn main() -> std::io::Result<()> {
if passphrase.is_empty() { if passphrase.is_empty() {
tracing::warn!("PASSPHRASE not set — all /api/* will return 401"); tracing::warn!("PASSPHRASE not set — all /api/* will return 401");
} }
let asr_url = std::env::var("ASR_URL") // ASR 现在由 sidecar 调(切片串行),主容器不再直接调外部 ASR
.unwrap_or_else(|_| "http://18.159.112.195:8848/v1/audio/transcriptions".into());
let asr_token = std::env::var("ASR_TOKEN").unwrap_or_default();
let llm_gateway = let llm_gateway =
std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into()); std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into());
let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default(); let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default();
@@ -95,8 +91,6 @@ async fn main() -> std::io::Result<()> {
db: Arc::new(Mutex::new(conn)), db: Arc::new(Mutex::new(conn)),
blobs_dir, blobs_dir,
passphrase, passphrase,
asr_url,
asr_token,
llm_gateway, llm_gateway,
llm_token, llm_token,
llm_model, llm_model,
@@ -465,38 +459,30 @@ fn set_status(s: &AppState, id: i64, status: &str, transcript: Option<&str>, err
async fn call_asr( async fn call_asr(
s: &AppState, s: &AppState,
path: &std::path::Path, path: &std::path::Path,
filename: &str, _filename: &str,
) -> Result<String, String> { ) -> Result<String, String> {
let bytes = tokio::fs::read(path).await.map_err(|e| e.to_string())?; // 走 sidecar /transcribesidecar 用 ffmpeg 切片 + 串行调外部 ASR,绕过 ASR server 单文件大小限制
let part = reqwest::multipart::Part::bytes(bytes) let url = format!("{}/transcribe", s.feishu_url.trim_end_matches('/'));
.file_name(filename.to_string()) let payload = json!({ "audio_path": path.to_string_lossy() });
.mime_str("audio/mpeg")
.map_err(|e| e.to_string())?;
let form = reqwest::multipart::Form::new()
.text("model", "qwen3-asr")
.text("response_format", "json")
.part("file", part);
let resp = s let resp = s
.http .http
.post(&s.asr_url) .post(&url)
.bearer_auth(&s.asr_token) .json(&payload)
.multipart(form) .timeout(std::time::Duration::from_secs(3600))
.timeout(std::time::Duration::from_secs(600))
.send() .send()
.await .await
.map_err(|e| format!("connect: {e}"))?; .map_err(|e| format!("connect sidecar: {e}"))?;
if !resp.status().is_success() { if !resp.status().is_success() {
let st = resp.status(); let st = resp.status();
let body = resp.text().await.unwrap_or_default(); let body = resp.text().await.unwrap_or_default();
return Err(format!("ASR {st}: {body}")); return Err(format!("sidecar /transcribe {st}: {body}"));
} }
let v: Value = resp.json().await.map_err(|e| format!("decode: {e}"))?; let v: Value = resp.json().await.map_err(|e| format!("decode: {e}"))?;
let text = v let text = v
.get("text") .get("text")
.and_then(|x| x.as_str()) .and_then(|x| x.as_str())
.map(|s| s.to_string()) .map(|s| s.to_string())
.ok_or_else(|| format!("ASR response no 'text': {v}"))?; .ok_or_else(|| format!("no 'text' in response: {v}"))?;
Ok(text) Ok(text)
} }