notes(asr): 切片串行 ASR 绕单文件大小限制
deploy notes / build-and-deploy (push) Successful in 3m40s

ASR server 直接 500 拒绝大文件 (15MB / ~15min 4.7s 即返回 500),不是
处理超时。改成:sidecar 装 ffmpeg → /transcribe endpoint 把音频切 60s
段 → 串行调外部 ASR → 拼接 transcript。notes 主容器 call_asr 改成 POST
到 sidecar /transcribe(timeout 1h 给长录音留余地)。

- feishu sidecar Dockerfile + ffmpeg + requests
- server.py 加 TranscribeReq;fallback -c copy 失败时 re-encode AAC
- main.rs 删除 asr_url/asr_token 字段(now sidecar concern)
- k8s manifest: ASR_URL/ASR_TOKEN 从主容器移到 feishu sidecar env
This commit is contained in:
Fam Zheng
2026-05-17 22:38:05 +01:00
parent e5a87cc65f
commit 688ccdc76f
4 changed files with 104 additions and 39 deletions
+82 -5
View File
@@ -1,17 +1,19 @@
"""notes feishu sidecarHTTP 包一层 markdown-to-feishu。
POST /convert {md_path, title?, existing_doc_id?}
→ 跑 markdown-to-feishuparse 最后那段 JSON,返回 {doc_id, url}
"""notes 多用途 sidecar
POST /transcribe — 用 ffmpeg 切片 + 串行调外部 ASR,绕过单请求大小限制
POST /convert — markdown-to-feishu,把会议纪要 push 飞书 docx
"""
import json
import logging
import os
import re
import shutil
import subprocess
import tempfile
import uuid
from pathlib import Path
from typing import Optional
import requests
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
@@ -27,6 +29,81 @@ def healthz():
return {'ok': True}
class TranscribeReq(BaseModel):
audio_path: str
chunk_seconds: int = 60 # 60s ≈ 1-1.5 MB m4a,远低于 ASR 限制
@app.post('/transcribe')
def transcribe(req: TranscribeReq):
"""ffmpeg 切片 → 串行喂外部 ASR → 拼接 transcript。"""
src = Path(req.audio_path)
if not src.exists():
raise HTTPException(400, f'audio not found: {src}')
asr_url = os.environ.get('ASR_URL', '')
asr_token = os.environ.get('ASR_TOKEN', '')
if not asr_url or not asr_token:
raise HTTPException(500, 'ASR_URL/ASR_TOKEN not configured in sidecar')
tmp = Path(tempfile.gettempdir()) / f'transcribe-{uuid.uuid4().hex}'
tmp.mkdir(parents=True)
try:
# 用 ffmpeg segment:直接 copy streamfast & 不损失质量)
# 个别情况下 -c copy 在某些容器格式下切不精准,回退 re-encode 到 aac
ext = src.suffix.lstrip('.') or 'm4a'
chunk_pattern = f'chunk_%03d.{ext}'
try:
subprocess.run(
['ffmpeg', '-y', '-i', str(src),
'-f', 'segment', '-segment_time', str(req.chunk_seconds),
'-c', 'copy', '-reset_timestamps', '1',
str(tmp / chunk_pattern)],
check=True, capture_output=True, timeout=180,
)
except subprocess.CalledProcessError:
# fallback: re-encode AAC,慢但稳
log.warning("ffmpeg -c copy 失败,回退 re-encode")
for p in tmp.glob(f'chunk_*.{ext}'):
p.unlink(missing_ok=True)
subprocess.run(
['ffmpeg', '-y', '-i', str(src),
'-f', 'segment', '-segment_time', str(req.chunk_seconds),
'-c:a', 'aac', '-b:a', '64k', '-ac', '1', '-ar', '16000',
'-reset_timestamps', '1',
str(tmp / 'chunk_%03d.m4a')],
check=True, capture_output=True, timeout=600,
)
ext = 'm4a'
chunks = sorted(tmp.glob(f'chunk_*.{ext}'))
if not chunks:
raise HTTPException(500, 'ffmpeg produced 0 chunks')
log.info("split %s%d chunks", src.name, len(chunks))
all_text = []
for i, c in enumerate(chunks, 1):
log.info("ASR chunk %d/%d (%s, %d KB)", i, len(chunks), c.name, c.stat().st_size // 1024)
with open(c, 'rb') as f:
r = requests.post(
asr_url,
headers={'Authorization': f'Bearer {asr_token}'},
files={'file': (c.name, f, 'audio/mp4')},
data={'model': 'qwen3-asr', 'response_format': 'json'},
timeout=300,
)
if not r.ok:
raise HTTPException(502, f'ASR chunk {i} {r.status_code}: {r.text[:300]}')
try:
text = r.json().get('text', '').strip()
except Exception:
raise HTTPException(502, f'ASR chunk {i} bad json: {r.text[:200]}')
all_text.append(text)
full = '\n'.join(t for t in all_text if t)
return {'text': full, 'chunks': len(chunks)}
finally:
shutil.rmtree(tmp, ignore_errors=True)
class ConvertReq(BaseModel):
md_path: str
title: Optional[str] = None