diff --git a/apps/notes/feishu/server.py b/apps/notes/feishu/server.py index 5aaef8d..d35f112 100644 --- a/apps/notes/feishu/server.py +++ b/apps/notes/feishu/server.py @@ -6,6 +6,7 @@ import json import logging import os +import re import shutil import subprocess import tempfile @@ -17,6 +18,33 @@ import requests from fastapi import FastAPI, HTTPException from pydantic import BaseModel + +def probe_duration(src: Path) -> float: + """browser-recorded webm/m4a 经常没在 metadata 里写 duration(录到一半结束没法 finalize)。 + 先 try ffprobe format.duration,N/A 时 fallback 让 ffmpeg null-muxer 解码一遍统计。 + """ + try: + out = subprocess.check_output( + ['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration', + '-of', 'csv=p=0', str(src)], + timeout=60, + ).decode().strip() + if out and out != 'N/A': + return float(out) + except (subprocess.CalledProcessError, ValueError, subprocess.TimeoutExpired): + pass + log.info("ffprobe format.duration=N/A, decoding to count time") + proc = subprocess.run( + ['ffmpeg', '-i', str(src), '-f', 'null', '-'], + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + timeout=900, + ) + matches = re.findall(rb'time=(\d+):(\d+):(\d+(?:\.\d+)?)', proc.stderr) + if not matches: + raise HTTPException(500, f'cannot determine duration; ffmpeg stderr tail: {proc.stderr[-300:].decode("utf-8","replace")}') + h, m, s = matches[-1] + return int(h) * 3600 + int(m) * 60 + float(s) + logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(name)s: %(message)s') log = logging.getLogger('feishu') @@ -49,16 +77,8 @@ def transcribe(req: TranscribeReq): tmp = Path(tempfile.gettempdir()) / f'transcribe-{uuid.uuid4().hex}' tmp.mkdir(parents=True) try: - # 1) 用 ffprobe 拿总时长 - out = subprocess.check_output( - ['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration', - '-of', 'csv=p=0', str(src)], - timeout=60, - ) - try: - duration = float(out.decode().strip()) - except ValueError: - raise HTTPException(500, f'ffprobe duration parse: {out!r}') + # 1) 拿总时长(ffprobe N/A 时回退 null-muxer 解码) + duration = probe_duration(src) log.info("duration=%.1fs", duration) # 2) 切 chunk_seconds 段,stride = chunk_seconds - overlap_seconds