diff --git a/apps/notes/feishu/Dockerfile b/apps/notes/feishu/Dockerfile index 87c7ae6..20f90fe 100644 --- a/apps/notes/feishu/Dockerfile +++ b/apps/notes/feishu/Dockerfile @@ -4,7 +4,7 @@ FROM node:20-slim RUN apt-get update && apt-get install -y --no-install-recommends \ - python3 python3-pip python3-markdown ca-certificates curl \ + python3 python3-pip python3-markdown ca-certificates curl ffmpeg \ && rm -rf /var/lib/apt/lists/* # lark-cli postinstall 调 curl 下二进制,没 curl 会报 spawnSync ENOENT @@ -12,7 +12,8 @@ RUN npm install -g @larksuite/cli@1.0.29 RUN pip install --no-cache-dir --break-system-packages \ fastapi==0.115.6 \ - uvicorn==0.34.0 + uvicorn==0.34.0 \ + requests==2.32.3 COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu RUN chmod +x /usr/local/bin/markdown-to-feishu diff --git a/apps/notes/feishu/server.py b/apps/notes/feishu/server.py index f56e0f4..ed43aac 100644 --- a/apps/notes/feishu/server.py +++ b/apps/notes/feishu/server.py @@ -1,17 +1,19 @@ -"""notes feishu sidecar:HTTP 包一层 markdown-to-feishu。 - -POST /convert {md_path, title?, existing_doc_id?} - → 跑 markdown-to-feishu,parse 最后那段 JSON,返回 {doc_id, url} +"""notes 多用途 sidecar: + POST /transcribe — 用 ffmpeg 切片 + 串行调外部 ASR,绕过单请求大小限制 + POST /convert — markdown-to-feishu,把会议纪要 push 飞书 docx """ import json import logging import os -import re +import shutil import subprocess +import tempfile +import uuid from pathlib import Path from typing import Optional +import requests from fastapi import FastAPI, HTTPException from pydantic import BaseModel @@ -27,6 +29,81 @@ def healthz(): return {'ok': True} +class TranscribeReq(BaseModel): + audio_path: str + chunk_seconds: int = 60 # 60s ≈ 1-1.5 MB m4a,远低于 ASR 限制 + + +@app.post('/transcribe') +def transcribe(req: TranscribeReq): + """ffmpeg 切片 → 串行喂外部 ASR → 拼接 transcript。""" + src = Path(req.audio_path) + if not src.exists(): + raise HTTPException(400, f'audio not found: {src}') + asr_url = os.environ.get('ASR_URL', '') + asr_token = os.environ.get('ASR_TOKEN', '') + if not asr_url or not asr_token: + raise HTTPException(500, 'ASR_URL/ASR_TOKEN not configured in sidecar') + + tmp = Path(tempfile.gettempdir()) / f'transcribe-{uuid.uuid4().hex}' + tmp.mkdir(parents=True) + try: + # 用 ffmpeg segment:直接 copy stream(fast & 不损失质量) + # 个别情况下 -c copy 在某些容器格式下切不精准,回退 re-encode 到 aac + ext = src.suffix.lstrip('.') or 'm4a' + chunk_pattern = f'chunk_%03d.{ext}' + try: + subprocess.run( + ['ffmpeg', '-y', '-i', str(src), + '-f', 'segment', '-segment_time', str(req.chunk_seconds), + '-c', 'copy', '-reset_timestamps', '1', + str(tmp / chunk_pattern)], + check=True, capture_output=True, timeout=180, + ) + except subprocess.CalledProcessError: + # fallback: re-encode AAC,慢但稳 + log.warning("ffmpeg -c copy 失败,回退 re-encode") + for p in tmp.glob(f'chunk_*.{ext}'): + p.unlink(missing_ok=True) + subprocess.run( + ['ffmpeg', '-y', '-i', str(src), + '-f', 'segment', '-segment_time', str(req.chunk_seconds), + '-c:a', 'aac', '-b:a', '64k', '-ac', '1', '-ar', '16000', + '-reset_timestamps', '1', + str(tmp / 'chunk_%03d.m4a')], + check=True, capture_output=True, timeout=600, + ) + ext = 'm4a' + + chunks = sorted(tmp.glob(f'chunk_*.{ext}')) + if not chunks: + raise HTTPException(500, 'ffmpeg produced 0 chunks') + log.info("split %s → %d chunks", src.name, len(chunks)) + + all_text = [] + for i, c in enumerate(chunks, 1): + log.info("ASR chunk %d/%d (%s, %d KB)", i, len(chunks), c.name, c.stat().st_size // 1024) + with open(c, 'rb') as f: + r = requests.post( + asr_url, + headers={'Authorization': f'Bearer {asr_token}'}, + files={'file': (c.name, f, 'audio/mp4')}, + data={'model': 'qwen3-asr', 'response_format': 'json'}, + timeout=300, + ) + if not r.ok: + raise HTTPException(502, f'ASR chunk {i} {r.status_code}: {r.text[:300]}') + try: + text = r.json().get('text', '').strip() + except Exception: + raise HTTPException(502, f'ASR chunk {i} bad json: {r.text[:200]}') + all_text.append(text) + full = '\n'.join(t for t in all_text if t) + return {'text': full, 'chunks': len(chunks)} + finally: + shutil.rmtree(tmp, ignore_errors=True) + + class ConvertReq(BaseModel): md_path: str title: Optional[str] = None diff --git a/apps/notes/k8s/all.yaml b/apps/notes/k8s/all.yaml index be75619..27ace1c 100644 --- a/apps/notes/k8s/all.yaml +++ b/apps/notes/k8s/all.yaml @@ -72,8 +72,6 @@ spec: value: /data/app.db - name: BLOBS_DIR value: /data/blobs - - name: ASR_URL - value: http://18.159.112.195:8848/v1/audio/transcriptions - name: LLM_GATEWAY value: http://3.135.65.204:8848/v1 - name: LLM_MODEL @@ -83,11 +81,6 @@ spec: secretKeyRef: name: notes-creds key: passphrase - - name: ASR_TOKEN - valueFrom: - secretKeyRef: - name: notes-creds - key: asr_token - name: LLM_TOKEN valueFrom: secretKeyRef: @@ -115,6 +108,14 @@ spec: ports: - containerPort: 8002 name: feishu + env: + - name: ASR_URL + value: http://18.159.112.195:8848/v1/audio/transcriptions + - name: ASR_TOKEN + valueFrom: + secretKeyRef: + name: notes-creds + key: asr_token readinessProbe: httpGet: { path: /healthz, port: feishu } initialDelaySeconds: 3 diff --git a/apps/notes/src/main.rs b/apps/notes/src/main.rs index 29a0453..e95220c 100644 --- a/apps/notes/src/main.rs +++ b/apps/notes/src/main.rs @@ -31,8 +31,6 @@ struct AppState { db: Arc>, blobs_dir: PathBuf, passphrase: String, - asr_url: String, - asr_token: String, llm_gateway: String, llm_token: String, llm_model: String, @@ -53,9 +51,7 @@ async fn main() -> std::io::Result<()> { if passphrase.is_empty() { tracing::warn!("PASSPHRASE not set — all /api/* will return 401"); } - let asr_url = std::env::var("ASR_URL") - .unwrap_or_else(|_| "http://18.159.112.195:8848/v1/audio/transcriptions".into()); - let asr_token = std::env::var("ASR_TOKEN").unwrap_or_default(); + // ASR 现在由 sidecar 调(切片串行),主容器不再直接调外部 ASR let llm_gateway = std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into()); let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default(); @@ -95,8 +91,6 @@ async fn main() -> std::io::Result<()> { db: Arc::new(Mutex::new(conn)), blobs_dir, passphrase, - asr_url, - asr_token, llm_gateway, llm_token, llm_model, @@ -465,38 +459,30 @@ fn set_status(s: &AppState, id: i64, status: &str, transcript: Option<&str>, err async fn call_asr( s: &AppState, path: &std::path::Path, - filename: &str, + _filename: &str, ) -> Result { - let bytes = tokio::fs::read(path).await.map_err(|e| e.to_string())?; - let part = reqwest::multipart::Part::bytes(bytes) - .file_name(filename.to_string()) - .mime_str("audio/mpeg") - .map_err(|e| e.to_string())?; - let form = reqwest::multipart::Form::new() - .text("model", "qwen3-asr") - .text("response_format", "json") - .part("file", part); - + // 走 sidecar /transcribe:sidecar 用 ffmpeg 切片 + 串行调外部 ASR,绕过 ASR server 单文件大小限制 + let url = format!("{}/transcribe", s.feishu_url.trim_end_matches('/')); + let payload = json!({ "audio_path": path.to_string_lossy() }); let resp = s .http - .post(&s.asr_url) - .bearer_auth(&s.asr_token) - .multipart(form) - .timeout(std::time::Duration::from_secs(600)) + .post(&url) + .json(&payload) + .timeout(std::time::Duration::from_secs(3600)) .send() .await - .map_err(|e| format!("connect: {e}"))?; + .map_err(|e| format!("connect sidecar: {e}"))?; if !resp.status().is_success() { let st = resp.status(); let body = resp.text().await.unwrap_or_default(); - return Err(format!("ASR {st}: {body}")); + return Err(format!("sidecar /transcribe {st}: {body}")); } let v: Value = resp.json().await.map_err(|e| format!("decode: {e}"))?; let text = v .get("text") .and_then(|x| x.as_str()) .map(|s| s.to_string()) - .ok_or_else(|| format!("ASR response no 'text': {v}"))?; + .ok_or_else(|| format!("no 'text' in response: {v}"))?; Ok(text) }