From ca11a9bda75e2dba47d85d6a0fc167bfa7b419d1 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 18 May 2026 00:40:23 +0100 Subject: [PATCH] =?UTF-8?q?notes(asr):=20ffprobe=20duration=3DN/A=20?= =?UTF-8?q?=E6=97=B6=E5=9B=9E=E9=80=80=E7=94=A8=20ffmpeg=20null-muxer=20?= =?UTF-8?q?=E8=A7=A3=E7=A0=81=E7=BB=9F=E8=AE=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 浏览器内 MediaRecorder 录的 webm/m4a 经常 metadata 没写 duration (录到一半浏览器关掉 tab 没正常 finalize 文件)。ffprobe format.duration 返回 N/A。回退跑 `ffmpeg -i input -f null -`,从 stderr 最后一行 "time=HH:MM:SS.MS" parse 出实际秒数。慢一点但永远能拿到。 --- apps/notes/feishu/server.py | 40 +++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/apps/notes/feishu/server.py b/apps/notes/feishu/server.py index 5aaef8d..d35f112 100644 --- a/apps/notes/feishu/server.py +++ b/apps/notes/feishu/server.py @@ -6,6 +6,7 @@ import json import logging import os +import re import shutil import subprocess import tempfile @@ -17,6 +18,33 @@ import requests from fastapi import FastAPI, HTTPException from pydantic import BaseModel + +def probe_duration(src: Path) -> float: + """browser-recorded webm/m4a 经常没在 metadata 里写 duration(录到一半结束没法 finalize)。 + 先 try ffprobe format.duration,N/A 时 fallback 让 ffmpeg null-muxer 解码一遍统计。 + """ + try: + out = subprocess.check_output( + ['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration', + '-of', 'csv=p=0', str(src)], + timeout=60, + ).decode().strip() + if out and out != 'N/A': + return float(out) + except (subprocess.CalledProcessError, ValueError, subprocess.TimeoutExpired): + pass + log.info("ffprobe format.duration=N/A, decoding to count time") + proc = subprocess.run( + ['ffmpeg', '-i', str(src), '-f', 'null', '-'], + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + timeout=900, + ) + matches = re.findall(rb'time=(\d+):(\d+):(\d+(?:\.\d+)?)', proc.stderr) + if not matches: + raise HTTPException(500, f'cannot determine duration; ffmpeg stderr tail: {proc.stderr[-300:].decode("utf-8","replace")}') + h, m, s = matches[-1] + return int(h) * 3600 + int(m) * 60 + float(s) + logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(name)s: %(message)s') log = logging.getLogger('feishu') @@ -49,16 +77,8 @@ def transcribe(req: TranscribeReq): tmp = Path(tempfile.gettempdir()) / f'transcribe-{uuid.uuid4().hex}' tmp.mkdir(parents=True) try: - # 1) 用 ffprobe 拿总时长 - out = subprocess.check_output( - ['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration', - '-of', 'csv=p=0', str(src)], - timeout=60, - ) - try: - duration = float(out.decode().strip()) - except ValueError: - raise HTTPException(500, f'ffprobe duration parse: {out!r}') + # 1) 拿总时长(ffprobe N/A 时回退 null-muxer 解码) + duration = probe_duration(src) log.info("duration=%.1fs", duration) # 2) 切 chunk_seconds 段,stride = chunk_seconds - overlap_seconds