notes: 加一键转飞书文档 (sidecar markdown-to-feishu)

- backend: POST /api/recordings/:id/feishu → 拼 markdown (总结在最上 + 附件链接到转录/录音 + 转写全文) → 写 /data/feishu-tmp/<id>/ → HTTP POST 到 feishu sidecar - 复用：已有 feishu_doc_id 时 --update 同一个 doc，前端按钮文案变「↻ 重新生成」 - schema 加 feishu_doc_id + feishu_url 两列（ALTER TABLE 兼容旧 db） - LLM prompt 改：行动项用 markdown checkbox `- [ ] 谁·做什么·何时` - sidecar apps/notes/feishu: node:20 + python3 + python3-markdown + @larksuite/cli + COPY 自己的 markdown-to-feishu script + FastAPI /convert - k8s: deployment 加 feishu container 共享 PVC；lark-cli-creds Secret 挂 /root/.lark-cli/config.json - CI: 主 image --no-cache（cube 规矩），sidecar 保留 layer cache（chromium-free，但 apt/npm 也大） - 前端: content 头部加「📤 一键转飞书文档」按钮；已转过显示飞书链接 + 按钮变重生成
2026-05-17 22:16:13 +01:00
parent 3a34fbdfd8
commit 68671784f6
8 changed files with 1327 additions and 11 deletions
@@ -0,0 +1,23 @@
+# notes feishu sidecar：跑 markdown-to-feishu 把会议纪要 push 飞书 docx。
+# 跟 notes 主容器同 pod、共享 PVC（看到主容器在 /data/feishu-tmp/<id>/ 写好的 md + 附件）。
+
+FROM node:20-slim
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 python3-pip python3-markdown ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN npm install -g @larksuite/cli@1.0.29
+
+RUN pip install --no-cache-dir --break-system-packages \
+    fastapi==0.115.6 \
+    uvicorn==0.34.0
+
+COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu
+RUN chmod +x /usr/local/bin/markdown-to-feishu
+COPY server.py /app/server.py
+
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+EXPOSE 8002
+CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8002"]
@@ -0,0 +1,970 @@
+#!/usr/bin/env python3
+"""markdown-to-feishu — convert a Markdown file (with rich embeds) into a Feishu
+docx, using the lark-cli wrapper. Tables, images (URL + local), Mermaid /
+PlantUML diagrams, and arbitrary attachments (PDF / CSV / log / anything) all
+get planted as real DocxXML blocks. Re-runs against the same .md by default
+update the previously-created doc instead of spawning a new one.
+"""
+
+from __future__ import annotations
+
+import argparse
+import html as html_lib
+import json
+import os
+import re
+import subprocess
+import sys
+import textwrap
+import time
+import uuid
+from html.parser import HTMLParser
+from pathlib import Path
+from urllib.parse import urlparse
+
+import markdown
+
+
+STATE_DIR = Path(os.environ.get("MD2FEISHU_STATE_DIR", str(Path.home() / ".local/share/markdown-to-feishu")))
+STATE_FILE = STATE_DIR / "state.json"
+
+SENTINEL_PREFIX = "MD2FEISHU_SENTINEL"
+
+VERSION = "0.1.0"
+
+
+# ---------------------------------------------------------------------------
+# State (markdown abs path -> doc id) so re-runs update in place
+# ---------------------------------------------------------------------------
+
+def load_state() -> dict:
+    if not STATE_FILE.exists():
+        return {}
+    try:
+        return json.loads(STATE_FILE.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+def save_state(state: dict) -> None:
+    STATE_DIR.mkdir(parents=True, exist_ok=True)
+    STATE_FILE.write_text(json.dumps(state, indent=2, ensure_ascii=False), encoding="utf-8")
+
+
+# ---------------------------------------------------------------------------
+# lark-cli runner
+# ---------------------------------------------------------------------------
+
+class LarkError(RuntimeError):
+    pass
+
+
+def run_lark(args: list[str], *, stdin: str | None = None, identity: str = "user", verbose: bool = False, cwd: str | None = None) -> dict:
+    cmd = ["lark-cli", "--as", identity] + args
+    if verbose:
+        cwd_note = f" (cwd={cwd})" if cwd else ""
+        sys.stderr.write(f"[lark] {' '.join(cmd)}{cwd_note}\n")
+    proc = subprocess.run(
+        cmd,
+        input=stdin,
+        capture_output=True,
+        text=True,
+        cwd=cwd,
+    )
+    if proc.returncode != 0:
+        raise LarkError(
+            f"lark-cli failed (exit {proc.returncode}): {' '.join(cmd)}\n"
+            f"stderr: {proc.stderr.strip()}\n"
+            f"stdout: {proc.stdout.strip()}"
+        )
+    if not proc.stdout.strip():
+        return {}
+    try:
+        return json.loads(proc.stdout)
+    except json.JSONDecodeError:
+        return {"_raw": proc.stdout}
+
+
+# ---------------------------------------------------------------------------
+# Markdown helpers
+# ---------------------------------------------------------------------------
+
+def is_http_url(s: str) -> bool:
+    p = urlparse(s)
+    return p.scheme in ("http", "https")
+
+
+def is_anchor(s: str) -> bool:
+    return s.startswith("#")
+
+
+def preprocess_markdown(text: str) -> str:
+    """Handle GFM extras python-markdown core misses."""
+    # Strip BOM
+    if text.startswith(""):
+        text = text[1:]
+    out_lines: list[str] = []
+    in_fence = False
+    fence_re = re.compile(r"^\s*```")
+    strike_re = re.compile(r"~~(\S(?:.*?\S)?)~~")
+    # GFM task-list items at top level: "- [x] text" / "* [ ] text" / "1. [x] text"
+    # Convert to a stand-alone HTML <checkbox> block so python-markdown passes
+    # it through. Leading whitespace becomes a marker (so nested checkboxes
+    # don't get hoisted to top level).
+    task_re = re.compile(r"^(\s*)(?:[-*+]|\d+\.)\s+\[([ xX])\]\s+(.*)$")
+    for line in text.split("\n"):
+        if fence_re.match(line):
+            in_fence = not in_fence
+            out_lines.append(line)
+            continue
+        if in_fence:
+            out_lines.append(line)
+            continue
+        m = task_re.match(line)
+        if m and not m.group(1):  # top-level only; nested stays a list item
+            done = "true" if m.group(2).lower() == "x" else "false"
+            body = m.group(3).strip()
+            # Surround with blank lines so it parses as raw HTML block
+            out_lines.append("")
+            out_lines.append(f'<checkbox done="{done}">{html_lib.escape(body)}</checkbox>')
+            out_lines.append("")
+            continue
+        out_lines.append(strike_re.sub(r"<del>\1</del>", line))
+    return "\n".join(out_lines)
+
+
+# ---------------------------------------------------------------------------
+# HTML -> DocxXML converter
+# ---------------------------------------------------------------------------
+
+INLINE_TAGS = {"a", "b", "strong", "em", "i", "u", "del", "s", "strike", "code", "span", "br", "img", "cite", "latex"}
+BLOCK_PASSTHROUGH = {"p", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "hr", "br"}
+
+
+def xml_escape_text(s: str) -> str:
+    return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+
+
+def xml_escape_attr(s: str) -> str:
+    return xml_escape_text(s).replace('"', "&quot;")
+
+
+class DocxXMLBuilder(HTMLParser):
+    """Walks python-markdown HTML and emits DocxXML.
+
+    Local images / attachments / non-inline-able media become placeholder
+    <p>SENTINEL</p> paragraphs; each one is recorded in ``self.embeds`` so the
+    caller can media-insert the real file in the correct position afterwards.
+    """
+
+    def __init__(self, md_dir: Path, session_tag: str):
+        super().__init__(convert_charrefs=True)
+        self.md_dir = md_dir
+        self.session_tag = session_tag
+        self.out: list[str] = []
+        self.embeds: list[dict] = []  # {sentinel, file, type, caption}
+        self._code_buf: list[str] | None = None
+        self._code_lang: str | None = None
+        self._table_buf: list[str] | None = None  # we buffer the entire table so colspan/rowspan etc. just round-trip
+        self._table_depth = 0
+        self._in_pre = False
+        self._inline_stack: list[str] = []
+        self._li_stack: list[str] = []  # track ul/ol type for current li
+        self._blockquote_depth = 0
+        self._p_depth = 0  # how many <p> are currently open in our output stream
+
+    # ---- sentinel handling ----
+    def _next_sentinel(self) -> str:
+        n = len(self.embeds)
+        # All caps + underscores so it never collides with normal markdown prose
+        return f"{SENTINEL_PREFIX}_{self.session_tag}_{n:04d}"
+
+    def _resolve_local(self, src: str) -> Path | None:
+        # Strip query/fragment for sanity
+        clean = src.split("#", 1)[0].split("?", 1)[0]
+        if not clean or is_http_url(clean) or is_anchor(clean):
+            return None
+        p = Path(clean)
+        if not p.is_absolute():
+            p = (self.md_dir / p).resolve()
+        return p if p.exists() and p.is_file() else None
+
+    # ---- emit helpers ----
+    def _emit(self, s: str) -> None:
+        # If we're buffering a table, append there instead
+        if self._table_buf is not None:
+            self._table_buf.append(s)
+        else:
+            self.out.append(s)
+
+    def _emit_placeholder(self, file: Path, kind: str, caption: str | None = None) -> None:
+        sentinel = self._next_sentinel()
+        self.embeds.append({
+            "sentinel": sentinel,
+            "file": str(file),
+            "type": kind,
+            "caption": caption,
+        })
+        # The placeholder must end up as its own top-level <p> so media-insert
+        # can anchor on it cleanly and the cleanup pass can block_delete it.
+        # If we're currently inside a <p>, split: close, emit standalone, reopen.
+        if self._table_buf is not None:
+            # Inside a table cell — best we can do is emit the sentinel as
+            # inline text and rely on str_replace cleanup. Media still lands at
+            # top level (per --selection-with-ellipsis semantics).
+            self._emit(sentinel)
+            return
+        if self._p_depth > 0:
+            self.out.append("</p>")
+            self.out.append(f"<p>{sentinel}</p>")
+            self.out.append("<p>")
+            return
+        self._emit(f"<p>{sentinel}</p>")
+
+    # ---- HTMLParser hooks ----
+    def handle_starttag(self, tag, attrs):
+        attrd = dict(attrs)
+
+        # Inside <pre><code>: capture verbatim
+        if self._in_pre:
+            # Don't recurse, but still record raw markup if any nested tags appear
+            if tag == "code":
+                self._code_lang = self._extract_lang(attrd.get("class", ""))
+                self._code_buf = []
+            return
+
+        # Table buffer mode: just copy markup through, no transformations needed
+        if self._table_buf is not None:
+            self._table_buf.append(self._raw_tag(tag, attrd))
+            if tag == "table":
+                self._table_depth += 1
+            return
+
+        if tag == "table":
+            self._table_buf = []
+            self._table_depth = 1
+            self._table_buf.append(self._raw_tag(tag, attrd))
+            return
+
+        if tag == "pre":
+            self._in_pre = True
+            return
+
+        if tag == "img":
+            self._emit_img(attrd)
+            return
+
+        if tag == "a":
+            href = attrd.get("href", "")
+            local = self._resolve_local(href) if href else None
+            if local is not None:
+                # Inline attachment: keep the link text in the prose so the
+                # paragraph still reads naturally, and queue a placeholder so
+                # the attachment block appears right after this paragraph.
+                caption = attrd.get("title") or None
+                self._emit_placeholder(local, "file", caption)
+                # Drop the <a> tags (keep their text children) by pushing
+                # a "transparent" marker on the inline stack.
+                self._inline_stack.append("__TRANSPARENT_A__")
+                return
+            # Regular link
+            self._inline_stack.append("a")
+            attrs_s = self._attrs_string({"href": href})
+            self._emit(f"<a{attrs_s}>")
+            return
+
+        if tag in {"b", "strong"}:
+            self._inline_stack.append("b")
+            self._emit("<b>")
+            return
+        if tag in {"em", "i"}:
+            self._inline_stack.append("em")
+            self._emit("<em>")
+            return
+        if tag in {"u"}:
+            self._inline_stack.append("u")
+            self._emit("<u>")
+            return
+        if tag in {"del", "s", "strike"}:
+            self._inline_stack.append("del")
+            self._emit("<del>")
+            return
+        if tag == "code":
+            self._inline_stack.append("code")
+            self._emit("<code>")
+            return
+        if tag == "br":
+            self._emit("<br/>")
+            return
+
+        if tag == "ul":
+            self._li_stack.append("ul")
+            self._emit("<ul>")
+            return
+        if tag == "ol":
+            self._li_stack.append("ol")
+            self._emit("<ol>")
+            return
+        if tag == "li":
+            if self._li_stack and self._li_stack[-1] == "ol":
+                self._emit('<li seq="auto">')
+            else:
+                self._emit("<li>")
+            return
+
+        if tag == "blockquote":
+            self._blockquote_depth += 1
+            self._emit("<blockquote>")
+            return
+
+        if tag == "p":
+            self._p_depth += 1
+            self._emit("<p>")
+            return
+
+        if tag == "checkbox":
+            # Emitted by our preprocessor for GFM task list items.
+            done = attrd.get("done", "false")
+            self._emit(f'<checkbox done="{xml_escape_attr(done)}">')
+            self._inline_stack.append("checkbox")
+            return
+
+        if tag in BLOCK_PASSTHROUGH:
+            self._emit(f"<{tag}>")
+            return
+
+        # span etc.
+        if tag == "span":
+            self._inline_stack.append("span")
+            self._emit("<span>")
+            return
+
+        # Anything else we don't recognise — drop the tag, keep its text
+        self._inline_stack.append("__UNKNOWN__")
+
+    def handle_endtag(self, tag):
+        if self._in_pre:
+            if tag == "code":
+                self._flush_code()
+            elif tag == "pre":
+                self._in_pre = False
+            return
+
+        if self._table_buf is not None:
+            self._table_buf.append(f"</{tag}>")
+            if tag == "table":
+                self._table_depth -= 1
+                if self._table_depth == 0:
+                    table_xml = "".join(self._table_buf)
+                    self._table_buf = None
+                    # Clean the buffered HTML so it's valid DocxXML
+                    self.out.append(self._sanitise_table(table_xml))
+            return
+
+        if tag == "pre":
+            self._in_pre = False
+            return
+
+        if tag == "img":
+            return
+
+        if tag == "a":
+            top = self._inline_stack.pop() if self._inline_stack else None
+            if top == "__TRANSPARENT_A__":
+                return
+            self._emit("</a>")
+            return
+
+        if tag in {"b", "strong"}:
+            if self._inline_stack and self._inline_stack[-1] == "b":
+                self._inline_stack.pop()
+            self._emit("</b>")
+            return
+        if tag in {"em", "i"}:
+            if self._inline_stack and self._inline_stack[-1] == "em":
+                self._inline_stack.pop()
+            self._emit("</em>")
+            return
+        if tag in {"u"}:
+            if self._inline_stack and self._inline_stack[-1] == "u":
+                self._inline_stack.pop()
+            self._emit("</u>")
+            return
+        if tag in {"del", "s", "strike"}:
+            if self._inline_stack and self._inline_stack[-1] == "del":
+                self._inline_stack.pop()
+            self._emit("</del>")
+            return
+        if tag == "code":
+            if self._inline_stack and self._inline_stack[-1] == "code":
+                self._inline_stack.pop()
+            self._emit("</code>")
+            return
+        if tag == "span":
+            if self._inline_stack and self._inline_stack[-1] == "span":
+                self._inline_stack.pop()
+            self._emit("</span>")
+            return
+
+        if tag == "ul":
+            if self._li_stack and self._li_stack[-1] == "ul":
+                self._li_stack.pop()
+            self._emit("</ul>")
+            return
+        if tag == "ol":
+            if self._li_stack and self._li_stack[-1] == "ol":
+                self._li_stack.pop()
+            self._emit("</ol>")
+            return
+        if tag == "li":
+            self._emit("</li>")
+            return
+
+        if tag == "blockquote":
+            self._blockquote_depth = max(0, self._blockquote_depth - 1)
+            self._emit("</blockquote>")
+            return
+
+        if tag == "p":
+            self._p_depth = max(0, self._p_depth - 1)
+            self._emit("</p>")
+            return
+
+        if tag == "checkbox":
+            if self._inline_stack and self._inline_stack[-1] == "checkbox":
+                self._inline_stack.pop()
+            self._emit("</checkbox>")
+            return
+
+        if tag in BLOCK_PASSTHROUGH:
+            self._emit(f"</{tag}>")
+            return
+
+        if self._inline_stack and self._inline_stack[-1] == "__UNKNOWN__":
+            self._inline_stack.pop()
+
+    def handle_startendtag(self, tag, attrs):
+        attrd = dict(attrs)
+        if tag == "img":
+            self._emit_img(attrd)
+            return
+        if tag == "br":
+            self._emit("<br/>")
+            return
+        if tag == "hr":
+            self._emit("<hr/>")
+            return
+        # Treat as start+end
+        self.handle_starttag(tag, attrs)
+        self.handle_endtag(tag)
+
+    def handle_data(self, data):
+        if not data:
+            return
+        if self._in_pre and self._code_buf is not None:
+            self._code_buf.append(data)
+            return
+        if self._table_buf is not None:
+            self._table_buf.append(xml_escape_text(data))
+            return
+        # Preserve user text but escape XML specials
+        # In <pre> outside <code> we also escape (shouldn't normally happen)
+        self._emit(xml_escape_text(data))
+
+    # ---- code / language extraction ----
+    @staticmethod
+    def _extract_lang(class_attr: str) -> str:
+        # python-markdown fenced_code emits e.g. class="language-mermaid"
+        for tok in class_attr.split():
+            if tok.startswith("language-"):
+                return tok[len("language-"):]
+            if tok.startswith("lang-"):
+                return tok[len("lang-"):]
+        return ""
+
+    def _flush_code(self) -> None:
+        body = "".join(self._code_buf or [])
+        lang = (self._code_lang or "").strip().lower()
+        self._code_buf = None
+        self._code_lang = None
+        # Mermaid / PlantUML get rendered as whiteboards
+        if lang in {"mermaid"}:
+            self._emit(f'<whiteboard type="mermaid">{xml_escape_text(body.rstrip())}</whiteboard>')
+            return
+        if lang in {"plantuml", "puml"}:
+            self._emit(f'<whiteboard type="plantuml">{xml_escape_text(body.rstrip())}</whiteboard>')
+            return
+        # Strip trailing newline that python-markdown adds inside <code>
+        body = body.rstrip("\n")
+        lang_attr = f' lang="{xml_escape_attr(lang)}"' if lang else ""
+        self._emit(f"<pre{lang_attr}><code>{xml_escape_text(body)}</code></pre>")
+
+    # ---- image emit ----
+    def _emit_img(self, attrd: dict) -> None:
+        src = attrd.get("src", "").strip()
+        alt = attrd.get("alt", "").strip()
+        title = attrd.get("title", "").strip()
+        caption = title or alt or None
+        if not src:
+            return
+        if is_http_url(src):
+            attrs_s = self._attrs_string({"href": src, "caption": caption, "name": alt or None})
+            self._emit(f"<img{attrs_s}/>")
+            return
+        local = self._resolve_local(src)
+        if local is None:
+            sys.stderr.write(f"[warn] image not found, dropping: {src}\n")
+            return
+        self._emit_placeholder(local, "image", caption)
+
+    # ---- attrs helpers ----
+    @staticmethod
+    def _attrs_string(d: dict) -> str:
+        parts = []
+        for k, v in d.items():
+            if v is None or v == "":
+                continue
+            parts.append(f' {k}="{xml_escape_attr(str(v))}"')
+        return "".join(parts)
+
+    @staticmethod
+    def _raw_tag(tag: str, attrd: dict) -> str:
+        return f"<{tag}{DocxXMLBuilder._attrs_string(attrd)}>"
+
+    @staticmethod
+    def _sanitise_table(html: str) -> str:
+        """Coerce python-markdown's HTML table into DocxXML-legal markup:
+          - <strong>/<em>/<i> become <b>/<em>
+          - Drop style="..." attributes (DocxXML uses background-color /
+            vertical-align, not CSS)
+          - Drop unknown attributes on cells
+        """
+        # tag rename
+        html = re.sub(r"<(/?)strong\b", r"<\1b", html)
+        html = re.sub(r"<(/?)i\b", r"<\1em", html)
+        # drop style="..." on th/td/tr/table
+        html = re.sub(r'\s+style="[^"]*"', "", html)
+        # drop align="..." on th/td (we don't try to map to DocxXML alignment)
+        html = re.sub(r'\s+align="[^"]*"', "", html)
+        return html
+
+
+# ---------------------------------------------------------------------------
+# Driver
+# ---------------------------------------------------------------------------
+
+def derive_title(md_text: str, md_path: Path) -> str:
+    for line in md_text.splitlines():
+        line = line.strip()
+        if line.startswith("# "):
+            return line[2:].strip()
+    # fallback: filename without extension
+    return md_path.stem
+
+
+def strip_first_h1(md_text: str) -> str:
+    """Drop the first H1 line if present — we'll convey it via <title> instead."""
+    out_lines: list[str] = []
+    dropped = False
+    for line in md_text.splitlines():
+        if not dropped and line.strip().startswith("# "):
+            dropped = True
+            continue
+        out_lines.append(line)
+    return "\n".join(out_lines)
+
+
+def build_xml(md_path: Path, *, title: str, session_tag: str) -> tuple[str, list[dict]]:
+    raw = md_path.read_text(encoding="utf-8")
+    raw = preprocess_markdown(raw)
+    body_md = strip_first_h1(raw)
+    html = markdown.markdown(
+        body_md,
+        extensions=["fenced_code", "tables", "sane_lists"],
+        output_format="xhtml",
+    )
+    builder = DocxXMLBuilder(md_dir=md_path.parent, session_tag=session_tag)
+    builder.feed(html)
+    builder.close()
+    body_xml = "".join(builder.out)
+    # Unwrap stray <p>...</p> around block-level <checkbox> (python-markdown
+    # wraps unknown HTML tags in <p>); then collapse empty <p></p> left over
+    # from the placeholder split.
+    body_xml = re.sub(
+        r"<p>\s*(<checkbox\s+done=\"(?:true|false)\">[^<]*</checkbox>)\s*</p>",
+        r"\1",
+        body_xml,
+    )
+    body_xml = re.sub(r"<p>\s*</p>", "", body_xml)
+    title_xml = f"<title>{xml_escape_text(title)}</title>"
+    return title_xml + body_xml, builder.embeds
+
+
+def create_or_overwrite_doc(*, doc_id: str | None, content: str, identity: str, parent_token: str | None, parent_position: str | None, verbose: bool) -> dict:
+    if doc_id:
+        if verbose:
+            sys.stderr.write(f"[md2feishu] overwriting existing doc {doc_id}\n")
+        # Use stdin for content to avoid argv length / shell escaping pitfalls
+        args = [
+            "docs", "+update",
+            "--api-version", "v2",
+            "--doc", doc_id,
+            "--command", "overwrite",
+            "--doc-format", "xml",
+            "--content", "-",
+        ]
+        res = run_lark(args, stdin=content, identity=identity, verbose=verbose)
+        return {"doc_id": doc_id, "result": res}
+    if verbose:
+        sys.stderr.write("[md2feishu] creating new doc\n")
+    args = [
+        "docs", "+create",
+        "--api-version", "v2",
+        "--doc-format", "xml",
+        "--content", "-",
+    ]
+    if parent_token:
+        args += ["--parent-token", parent_token]
+    if parent_position:
+        args += ["--parent-position", parent_position]
+    res = run_lark(args, stdin=content, identity=identity, verbose=verbose)
+    document = (res.get("data") or {}).get("document") or {}
+    new_id = document.get("document_id")
+    if not new_id:
+        raise LarkError(f"docs +create did not return a document_id: {json.dumps(res, ensure_ascii=False)}")
+    return {"doc_id": new_id, "url": document.get("url"), "result": res}
+
+
+def insert_embed(doc_id: str, embed: dict, *, identity: str, verbose: bool) -> None:
+    # lark-cli refuses absolute paths for --file. cd into the file's parent
+    # and pass just the basename.
+    file_path = Path(embed["file"]).resolve()
+    args = [
+        "docs", "+media-insert",
+        "--doc", doc_id,
+        "--file", file_path.name,
+        "--type", embed["type"],
+        "--selection-with-ellipsis", embed["sentinel"],
+        "--before",
+    ]
+    if embed.get("caption") and embed["type"] == "image":
+        args += ["--caption", embed["caption"]]
+    run_lark(args, identity=identity, verbose=verbose, cwd=str(file_path.parent))
+
+
+def cleanup_sentinels(doc_id: str, session_tag: str, embeds: list[dict], *, identity: str, verbose: bool) -> None:
+    """Two-pass cleanup:
+      1. block_delete any paragraph whose entire text is a sentinel
+      2. str_replace any remaining sentinel occurrences (handles sentinels
+         that ended up inline inside table cells or mixed prose)
+    """
+    res = run_lark([
+        "docs", "+fetch",
+        "--api-version", "v2",
+        "--doc", doc_id,
+        "--detail", "with-ids",
+        "--doc-format", "xml",
+    ], identity=identity, verbose=verbose)
+    xml_payload = ((res.get("data") or {}).get("document") or {}).get("content") or ""
+    if not xml_payload:
+        xml_payload = json.dumps(res, ensure_ascii=False)
+    sentinel_re = re.compile(
+        rf'<p[^>]*\bid="([^"]+)"[^>]*>\s*{SENTINEL_PREFIX}_{session_tag}_\d+\s*</p>'
+    )
+    ids = sentinel_re.findall(xml_payload)
+    if ids:
+        if verbose:
+            sys.stderr.write(f"[md2feishu] deleting {len(ids)} sentinel paragraph(s)\n")
+        try:
+            run_lark([
+                "docs", "+update",
+                "--api-version", "v2",
+                "--doc", doc_id,
+                "--command", "block_delete",
+                "--block-id", ",".join(ids),
+            ], identity=identity, verbose=verbose)
+        except LarkError as e:
+            sys.stderr.write(f"[warn] block_delete cleanup failed: {e}\n")
+    # Fallback: scrub any inline sentinel text still in the doc
+    for embed in embeds:
+        sentinel = embed["sentinel"]
+        if sentinel in xml_payload and (not ids or f">{sentinel}<" not in xml_payload):
+            try:
+                run_lark([
+                    "docs", "+update",
+                    "--api-version", "v2",
+                    "--doc", doc_id,
+                    "--command", "str_replace",
+                    "--pattern", sentinel,
+                    "--content", "",
+                ], identity=identity, verbose=verbose)
+            except LarkError as e:
+                sys.stderr.write(f"[warn] str_replace cleanup for {sentinel} failed: {e}\n")
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+HELP_EPILOG = textwrap.dedent("""
+EXAMPLES
+    # First run — creates a new Feishu doc, remembers the mapping
+    markdown-to-feishu ./report.md
+
+    # Re-run on the same file — updates the same doc in place (no new doc spawned)
+    markdown-to-feishu ./report.md
+
+    # Force a brand-new doc even when state already has a mapping
+    markdown-to-feishu --new ./report.md
+
+    # Update a specific doc explicitly, ignoring state file
+    markdown-to-feishu --update doxcnAbc123 ./report.md
+
+    # Drop into a particular folder when creating
+    markdown-to-feishu --parent-token fldcnXXXX ./report.md
+
+    # Put it under your personal knowledge library
+    markdown-to-feishu --parent-position my_library ./report.md
+
+    # Override the document title (default = first H1 or filename stem)
+    markdown-to-feishu --title "2026 Q2 OKR" ./okr.md
+
+    # Inspect the generated XML and embed plan, without touching Feishu
+    markdown-to-feishu --dry-run ./report.md
+
+    # Forget the mapping for a file (does NOT delete the Feishu doc)
+    markdown-to-feishu --forget ./report.md
+
+    # Show the recorded mapping for this file
+    markdown-to-feishu --show ./report.md
+
+SUPPORTED MARKDOWN -> FEISHU BLOCK MAPPING
+    # / ## / ... / ######        ->  <h1> ... <h9>      (the first H1 becomes the
+                                                          document <title>)
+    paragraphs                    ->  <p>
+    **bold** / __bold__           ->  <b>
+    *italic* / _italic_           ->  <em>
+    ~~strike~~ (GFM)              ->  <del>
+    `inline code`                 ->  <code>
+    [text](https://...)           ->  <a href="...">text</a>
+    [text](./local.pdf)           ->  attachment block (file uploaded via
+                                       docs +media-insert --type file)
+    ![alt](https://...)           ->  <img href="https://..."/> (URL is fetched
+                                       server-side by Feishu)
+    ![alt](./local.png)           ->  inline image block (file uploaded via
+                                       docs +media-insert --type image; alt /
+                                       title becomes caption)
+    > blockquote                  ->  <blockquote>
+    --- / ***                     ->  <hr/>
+    - item / * item / 1. item     ->  <ul> / <ol> with seq="auto"
+    nested lists (4-space indent) ->  nested <ul> / <ol>
+    | a | b |  GFM tables         ->  <table><thead><tr><th>...
+    ```lang ... ```               ->  <pre lang="lang"><code>...</code></pre>
+    ```mermaid ... ```            ->  <whiteboard type="mermaid">...</whiteboard>
+    ```plantuml ... ```           ->  <whiteboard type="plantuml">...</whiteboard>
+
+ATTACHMENT DETECTION
+    Any [text](path) link whose href is NOT an http(s) URL and NOT an in-doc
+    anchor (#foo), and which resolves to an existing local file (relative to
+    the markdown file's directory), is uploaded as a Feishu file block. The
+    visible link text is dropped — the attachment block carries the filename
+    itself. This is what makes pasting PDFs / CSVs / logs / arbitrary binaries
+    feel "native".
+
+    Caveat: if a link resolves to a missing local file, it falls through to a
+    regular <a> link (the path will appear as-is). Run with --verbose to see
+    each resolution decision.
+
+IDENTITY
+    Defaults to --as user so the created doc is owned by YOUR Feishu account,
+    not the bot. This means you can manage / move / delete it directly from
+    Feishu without any ownership transfer dance. Use --as bot only if you
+    explicitly want bot-owned documents.
+
+UPDATE-BY-DEFAULT BEHAVIOUR
+    State lives at ~/.local/share/markdown-to-feishu/state.json (override with
+    $MD2FEISHU_STATE_DIR or --state-file). Keyed by the markdown file's
+    absolute path. When state has a doc_id for the given path:
+
+        - default              -> overwrite that doc in place
+        - --new                -> ignore state, create a fresh doc, replace
+                                  the mapping with the new id
+        - --update <id>        -> overwrite the given id and update state
+
+    overwrite replays the full XML and re-uploads every local media file from
+    source, so the doc always matches the markdown 1:1. Comments on the doc
+    survive overwrite; manual edits inside the doc do NOT (markdown is the
+    source of truth).
+
+EXIT CODES
+    0  success
+    1  generic error (bad args, file not found, lark-cli failure)
+    2  partial success — doc created/updated but at least one embed failed
+
+ENVIRONMENT
+    MD2FEISHU_STATE_DIR    override the directory holding state.json
+    LARK_CLI_PROFILE       passed through; honoured by lark-cli itself
+
+DEPENDENCIES
+    python3, python3-markdown, lark-cli (must be authenticated as user via
+    `lark-cli auth login`)
+""")
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        prog="markdown-to-feishu",
+        description="Convert a Markdown file (with rich embeds: tables, images, mermaid, attachments) into a Feishu docx. Re-runs update the previously-created doc by default.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=HELP_EPILOG,
+    )
+    p.add_argument("markdown", nargs="?", help="path to the .md file")
+    p.add_argument("--new", action="store_true", help="force-create a new doc even if state already has a mapping for this file")
+    p.add_argument("--update", metavar="DOC_ID", help="overwrite the given doc id (URL also accepted); ignores and then updates state")
+    p.add_argument("--title", help="override document title (default: first H1, else filename stem)")
+    p.add_argument("--parent-token", help="parent folder or wiki node token (only used when creating)")
+    p.add_argument("--parent-position", help="parent position keyword, e.g. my_library (only used when creating)")
+    p.add_argument("--as", dest="identity", choices=["user", "bot"], default="user", help="identity for lark-cli (default: user, so you own the doc)")
+    p.add_argument("--dry-run", action="store_true", help="print generated XML + embed plan without calling lark-cli")
+    p.add_argument("--state-file", help="override path to state.json (default: ~/.local/share/markdown-to-feishu/state.json)")
+    p.add_argument("--forget", action="store_true", help="remove the state mapping for this file (does not delete the Feishu doc) and exit")
+    p.add_argument("--show", action="store_true", help="print the recorded mapping for this file (if any) and exit")
+    p.add_argument("-v", "--verbose", action="store_true", help="verbose logging (every lark-cli invocation)")
+    p.add_argument("--version", action="version", version=f"markdown-to-feishu {VERSION}")
+    return p.parse_args(argv)
+
+
+def main(argv: list[str]) -> int:
+    args = parse_args(argv)
+    global STATE_FILE, STATE_DIR
+    if args.state_file:
+        STATE_FILE = Path(args.state_file).expanduser().resolve()
+        STATE_DIR = STATE_FILE.parent
+
+    if not args.markdown:
+        sys.stderr.write("error: missing markdown file (use --help)\n")
+        return 1
+
+    md_path = Path(args.markdown).expanduser().resolve()
+    if not md_path.exists() or not md_path.is_file():
+        sys.stderr.write(f"error: {md_path} is not a file\n")
+        return 1
+    key = str(md_path)
+
+    state = load_state()
+
+    if args.show:
+        entry = state.get(key)
+        if entry is None:
+            print(f"no mapping recorded for {md_path}")
+        else:
+            print(json.dumps(entry, indent=2, ensure_ascii=False))
+        return 0
+
+    if args.forget:
+        if key in state:
+            state.pop(key)
+            save_state(state)
+            print(f"forgot mapping for {md_path}")
+        else:
+            print(f"no mapping recorded for {md_path}")
+        return 0
+
+    md_text = md_path.read_text(encoding="utf-8")
+    title = args.title or derive_title(md_text, md_path)
+    session_tag = uuid.uuid4().hex[:8].upper()
+
+    try:
+        content, embeds = build_xml(md_path, title=title, session_tag=session_tag)
+    except Exception as e:
+        sys.stderr.write(f"error: failed to build XML: {e}\n")
+        return 1
+
+    if args.dry_run:
+        print("=== GENERATED DOCXXML ===")
+        print(content)
+        print()
+        print("=== EMBED PLAN ===")
+        if not embeds:
+            print("(no out-of-band embeds)")
+        else:
+            for e in embeds:
+                print(json.dumps(e, ensure_ascii=False))
+        target = "new doc"
+        if args.update:
+            target = f"update doc {args.update}"
+        elif not args.new and key in state:
+            target = f"update existing doc {state[key].get('doc_id')}"
+        print()
+        print(f"=== TARGET ===\n{target}")
+        return 0
+
+    # Decide create-vs-update
+    explicit_doc = args.update
+    if explicit_doc and explicit_doc.startswith("http"):
+        # extract /docx/<id>
+        m = re.search(r"/docx/([A-Za-z0-9]+)", explicit_doc)
+        if m:
+            explicit_doc = m.group(1)
+    target_doc_id = None
+    if explicit_doc:
+        target_doc_id = explicit_doc
+    elif not args.new and key in state:
+        target_doc_id = state[key].get("doc_id")
+
+    try:
+        outcome = create_or_overwrite_doc(
+            doc_id=target_doc_id,
+            content=content,
+            identity=args.identity,
+            parent_token=args.parent_token,
+            parent_position=args.parent_position,
+            verbose=args.verbose,
+        )
+    except LarkError as e:
+        sys.stderr.write(f"error: {e}\n")
+        return 1
+
+    doc_id = outcome["doc_id"]
+    failed_embeds: list[dict] = []
+    for embed in embeds:
+        try:
+            insert_embed(doc_id, embed, identity=args.identity, verbose=args.verbose)
+        except LarkError as e:
+            sys.stderr.write(f"[warn] failed to insert {embed['file']}: {e}\n")
+            failed_embeds.append(embed)
+
+    # Always try to clean up sentinels we managed to anchor
+    if embeds:
+        try:
+            cleanup_sentinels(doc_id, session_tag, embeds, identity=args.identity, verbose=args.verbose)
+        except LarkError as e:
+            sys.stderr.write(f"[warn] cleanup failed: {e}\n")
+
+    # Save state
+    entry = state.get(key, {})
+    entry.update({
+        "doc_id": doc_id,
+        "url": outcome.get("url") or entry.get("url"),
+        "updated_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
+        "title": title,
+    })
+    if entry.get("url") is None and not target_doc_id:
+        # Fetch URL via a separate call if it wasn't returned (shouldn't happen on create)
+        pass
+    state[key] = entry
+    save_state(state)
+
+    print(json.dumps({
+        "doc_id": doc_id,
+        "url": entry.get("url"),
+        "title": title,
+        "embeds_inserted": len(embeds) - len(failed_embeds),
+        "embeds_failed": len(failed_embeds),
+    }, indent=2, ensure_ascii=False))
+
+    return 2 if failed_embeds else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
@@ -0,0 +1,90 @@
+"""notes feishu sidecar：HTTP 包一层 markdown-to-feishu。
+
+POST /convert  {md_path, title?, existing_doc_id?}
+  → 跑 markdown-to-feishu，parse 最后那段 JSON，返回 {doc_id, url}
+"""
+
+import json
+import logging
+import os
+import re
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(levelname)s %(name)s: %(message)s')
+log = logging.getLogger('feishu')
+
+app = FastAPI()
+
+
+@app.get('/healthz')
+def healthz():
+    return {'ok': True}
+
+
+class ConvertReq(BaseModel):
+    md_path: str
+    title: Optional[str] = None
+    existing_doc_id: Optional[str] = None
+
+
+@app.post('/convert')
+def convert(req: ConvertReq):
+    md = Path(req.md_path)
+    if not md.exists():
+        raise HTTPException(400, f'md not found: {md}')
+
+    cmd = ['/usr/local/bin/markdown-to-feishu', str(md), '--as', 'user']
+    if req.existing_doc_id:
+        cmd += ['--update', req.existing_doc_id]
+    if req.title:
+        cmd += ['--title', req.title]
+    log.info("run: %s", ' '.join(cmd))
+
+    env = os.environ.copy()
+    # markdown-to-feishu state file 放 PVC，重启不丢
+    env['MD2FEISHU_STATE_DIR'] = '/data/feishu-state'
+    Path('/data/feishu-state').mkdir(parents=True, exist_ok=True)
+
+    try:
+        proc = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=600, env=env,
+            cwd=str(md.parent),
+        )
+    except subprocess.TimeoutExpired:
+        raise HTTPException(504, 'markdown-to-feishu timeout (>10min)')
+
+    # exit code 2 = embeds 有失败，但 doc 创建成功，仍 parse stdout
+    if proc.returncode not in (0, 2):
+        log.warning("md2feishu exit=%d stderr=%s", proc.returncode, proc.stderr[-500:])
+        raise HTTPException(502, f'md2feishu exit {proc.returncode}: '
+                                 f'{proc.stderr.strip()[-400:]}')
+
+    # 取 stdout 里最后一段 JSON 对象（script 的 final print）
+    out = proc.stdout.strip()
+    # 从后往前找第一个 '{'，取到末尾
+    last_open = out.rfind('{')
+    if last_open < 0:
+        raise HTTPException(502, f'md2feishu no json output. stdout tail: {out[-400:]}')
+    try:
+        data = json.loads(out[last_open:])
+    except json.JSONDecodeError as e:
+        raise HTTPException(502, f'md2feishu json parse: {e}; tail: {out[-400:]}')
+
+    doc_id = data.get('doc_id')
+    url = data.get('url')
+    if not doc_id or not url:
+        raise HTTPException(502, f'md2feishu missing doc_id/url: {data}')
+    log.info("ok: doc_id=%s url=%s embeds=%s",
+             doc_id, url, data.get('embeds_inserted'))
+    return {
+        'doc_id': doc_id,
+        'url': url,
+        'embeds_inserted': data.get('embeds_inserted', 0),
+        'embeds_failed': data.get('embeds_failed', 0),
+    }
@@ -79,6 +79,24 @@
            <button v-if="selected.status === 'failed'" class="retry-btn" @click="retry">↻ 重试</button>
            <button class="danger-btn" @click="remove">删除</button>
          </div>
+          <div v-if="selected.status === 'done'" class="feishu-row">
+            <a
+              v-if="selected.feishu_url"
+              :href="selected.feishu_url"
+              target="_blank"
+              rel="noopener"
+              class="feishu-link"
+            >📄 飞书文档 · {{ selected.feishu_url.replace(/^https?:\/\//, '').slice(0, 40) }}…</a>
+            <button
+              class="feishu-btn"
+              :disabled="feishuPushing"
+              @click="pushFeishu"
+            >
+              {{ feishuPushing ? '⏳ 推送中…'
+                : selected.feishu_url ? '↻ 重新生成' : '📤 一键转飞书文档' }}
+            </button>
+            <p v-if="feishuErr" class="feishu-err">{{ feishuErr }}</p>
+          </div>
        </header>
        <audio :src="audioUrl(selected.id)" controls class="audio" />

@@ -114,6 +132,7 @@ import {
  uploadRecording,
  deleteRecording,
  retryRecording,
+  convertFeishu,
  audioUrl as audioUrlFn,
  getPass,
  setPass,
@@ -130,6 +149,8 @@ const selected = ref(null)
 const selectedId = ref(null)
 const uploading = ref(false)
 const uploadErr = ref('')
+const feishuPushing = ref(false)
+const feishuErr = ref('')
 let pollTimer = null

 // 浏览器内录音（iOS 没法选录音机 App 文件，直接 web record 更顺）
@@ -318,6 +339,23 @@ async function retry() {
  } catch (e) { alert(e.message) }
 }

+async function pushFeishu() {
+  if (feishuPushing.value) return
+  feishuPushing.value = true
+  feishuErr.value = ''
+  try {
+    const r = await convertFeishu(selectedId.value)
+    if (selected.value) {
+      selected.value.feishu_doc_id = r.doc_id
+      selected.value.feishu_url = r.url
+    }
+  } catch (e) {
+    feishuErr.value = e.message || String(e)
+  } finally {
+    feishuPushing.value = false
+  }
+}
+
 function audioUrl(id) { return audioUrlFn(id) }

 function statusLabel(s) {
@@ -580,6 +618,41 @@ input, textarea { font-family: inherit; background: transparent; border: none; c
  padding: 3px 10px;
  border-radius: 4px;
 }
+
+.feishu-row {
+  margin-top: 12px;
+  display: flex;
+  gap: 10px;
+  align-items: center;
+  flex-wrap: wrap;
+}
+.feishu-link {
+  color: var(--accent-cyan);
+  background: rgba(6, 182, 212, 0.1);
+  padding: 6px 12px;
+  border-radius: 6px;
+  font-size: 12px;
+  text-decoration: none;
+}
+.feishu-link:hover { background: rgba(6, 182, 212, 0.2); }
+.feishu-btn {
+  background: var(--accent-strong);
+  color: #fff;
+  padding: 6px 14px;
+  border-radius: 6px;
+  font-size: 12px;
+  font-weight: 600;
+}
+.feishu-btn:hover:not(:disabled) { background: var(--accent); }
+.feishu-err {
+  width: 100%;
+  margin: 0;
+  color: var(--accent-red);
+  background: rgba(239,68,68,0.08);
+  padding: 6px 10px;
+  border-radius: 4px;
+  font-size: 12px;
+}
 .retry-btn { background: rgba(124, 92, 191, 0.15); color: var(--accent); }
 .retry-btn:hover { background: rgba(124, 92, 191, 0.3); }
 .danger-btn { background: rgba(239, 68, 68, 0.1); color: var(--accent-red); }
@@ -35,6 +35,9 @@ export function listRecordings() { return jreq('/api/recordings') }
 export function getRecording(id) { return jreq('/api/recordings/' + id) }
 export function deleteRecording(id) { return jreq('/api/recordings/' + id, { method: 'DELETE' }) }
 export function retryRecording(id) { return jreq('/api/recordings/' + id + '/retry', { method: 'POST' }) }
+export function convertFeishu(id) {
+  return jreq('/api/recordings/' + id + '/feishu', { method: 'POST' })
+}

 export function uploadRecording(title, file) {
  const fd = new FormData()
@@ -69,6 +69,8 @@ spec:
                secretKeyRef:
                  name: notes-creds
                  key: llm_token
+            - name: FEISHU_URL
+              value: http://localhost:8002
          readinessProbe:
            httpGet: { path: /healthz, port: http }
            initialDelaySeconds: 1
@@ -83,10 +85,39 @@ spec:
          volumeMounts:
            - name: data
              mountPath: /data
+        - name: feishu
+          image: registry.famzheng.me/mochi/notes-feishu:latest
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 8002
+              name: feishu
+          readinessProbe:
+            httpGet: { path: /healthz, port: feishu }
+            initialDelaySeconds: 3
+            periodSeconds: 10
+          livenessProbe:
+            httpGet: { path: /healthz, port: feishu }
+            initialDelaySeconds: 30
+            periodSeconds: 30
+          resources:
+            requests: { cpu: 20m, memory: 64Mi }
+            limits:   { cpu: 500m, memory: 384Mi }
+          volumeMounts:
+            - name: data
+              mountPath: /data
+            - name: lark-cli-config
+              mountPath: /root/.lark-cli
+              readOnly: false
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: notes-data
+        - name: lark-cli-config
+          secret:
+            secretName: lark-cli-creds
+            items:
+              - key: config.json
+                path: config.json
 ---
 apiVersion: v1
 kind: Service
@@ -36,6 +36,7 @@ struct AppState {
    llm_gateway: String,
    llm_token: String,
    llm_model: String,
+    feishu_url: String,
    http: reqwest::Client,
 }

@@ -59,6 +60,8 @@ async fn main() -> std::io::Result<()> {
        std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into());
    let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default();
    let llm_model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gemma-4-31b-it".into());
+    let feishu_url =
+        std::env::var("FEISHU_URL").unwrap_or_else(|_| "http://localhost:8002".into());

    std::fs::create_dir_all(&blobs_dir).expect("mkdir blobs_dir");

@@ -79,6 +82,9 @@ async fn main() -> std::io::Result<()> {
         );",
    )
    .expect("init schema");
+    // 兼容旧 db 增量加列；已存在忽略错误
+    let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_doc_id TEXT", []);
+    let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_url TEXT", []);
    tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready");

    let http = reqwest::Client::builder()
@@ -94,6 +100,7 @@ async fn main() -> std::io::Result<()> {
        llm_gateway,
        llm_token,
        llm_model,
+        feishu_url,
        http,
    };

@@ -105,6 +112,7 @@ async fn main() -> std::io::Result<()> {
        .route("/recordings/:id", get(get_recording).delete(delete_recording))
        .route("/recordings/:id/audio", get(stream_audio))
        .route("/recordings/:id/retry", post(retry_recording))
+        .route("/recordings/:id/feishu", post(convert_feishu))
        .with_state(state.clone())
        .layer(from_fn_with_state(state.clone(), auth_middleware));

@@ -211,6 +219,8 @@ struct RecordingDetail {
    summary: Option<String>,
    error: Option<String>,
    created_at: String,
+    feishu_doc_id: Option<String>,
+    feishu_url: Option<String>,
 }

 // ---------- handlers ----------
@@ -253,26 +263,30 @@ async fn get_recording(
    type Row = (
        String, String, String, i64, String,
        Option<String>, Option<String>, Option<String>, String,
+        Option<String>, Option<String>,
    );
    let row: Option<Row> = conn
        .query_row(
            "SELECT title, filename, mime, size_bytes, status,
-                    transcript, summary, error, created_at
+                    transcript, summary, error, created_at,
+                    feishu_doc_id, feishu_url
             FROM recordings WHERE id = ?1",
            params![id],
            |r| {
                Ok((
                    r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?,
                    r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?,
+                    r.get(9)?, r.get(10)?,
                ))
            },
        )
        .optional()?;
-    let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at) =
-        row.ok_or(AppError::NotFound)?;
+    let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at,
+         feishu_doc_id, feishu_url) = row.ok_or(AppError::NotFound)?;
    Ok(JsonResp(RecordingDetail {
        id, title, filename, mime, size_bytes, status,
        transcript, summary, error, created_at,
+        feishu_doc_id, feishu_url,
    }))
 }

@@ -501,13 +515,16 @@ async fn call_llm_summary(s: &AppState, transcript: &str) -> Result<String, Stri
        "model": s.llm_model,
        "messages": [
            { "role": "system", "content":
-                "你是一个会议纪要助手。根据语音转写整理一份结构化纪要（markdown）：\n\
+                "你是一个会议纪要助手。根据语音转写整理一份结构化纪要（markdown 格式）：\n\
+                 \n\
                 1. **概要**：1-2 句话总结\n\
                 2. **关键讨论点**：bullet 列出\n\
                 3. **决定 / 结论**\n\
-                 4. **行动项 (action items)**：谁、做什么、何时\n\
-                 5. **待跟进 / 未决问题**\n\
-                 转写可能有 ASR 错字，结合上下文合理修正；遇到模糊处标 [？]。" },
+                 4. **行动项 (action items)**：每条用 markdown checkbox 格式 `- [ ] 谁 · 做什么 · 何时`\n\
+                 5. **待跟进 / 未决问题**：bullet 列出\n\
+                 \n\
+                 转写可能有 ASR 错字，结合上下文合理修正；遇到模糊处标 [？]。\n\
+                 不要编造没说过的内容。" },
            { "role": "user", "content": trimmed },
        ],
        "temperature": 0.3,
@@ -574,6 +591,107 @@ async fn retry_recording(
    Ok(JsonResp(json!({ "ok": true, "status": "pending" })))
 }

+/// `POST /api/recordings/:id/feishu` — 把转写 + 纪要 push 成飞书 docx。
+/// 已经转过的 piece 仍 update 同一个 doc（markdown-to-feishu 自带 --update）。
+async fn convert_feishu(
+    State(s): State<AppState>,
+    Path(id): Path<i64>,
+) -> Result<JsonResp<Value>, AppError> {
+    let row: (String, String, Option<String>, Option<String>, String, Option<String>) = {
+        let conn = s.db.lock().unwrap();
+        conn.query_row(
+            "SELECT title, filename, transcript, summary, status, feishu_doc_id
+             FROM recordings WHERE id = ?1",
+            params![id],
+            |r| {
+                Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?, r.get(5)?))
+            },
+        )
+        .optional()?
+        .ok_or(AppError::NotFound)?
+    };
+    let (title, filename, transcript, summary, status, existing_doc) = row;
+    if status != "done" {
+        return Err(AppError::bad_request(format!(
+            "recording not ready (status={status})"
+        )));
+    }
+    let summary = summary.unwrap_or_default();
+    let transcript = transcript.unwrap_or_default();
+
+    // 拼 markdown
+    let ext = std::path::Path::new(&filename)
+        .extension()
+        .and_then(|x| x.to_str())
+        .unwrap_or("m4a")
+        .to_string();
+    let audio_name = format!("audio.{ext}");
+    let md = format!(
+        "# {title}\n\n\
+         ## 📋 会议纪要\n\n\
+         {summary}\n\n\
+         ---\n\n\
+         ## 📎 原始材料\n\n\
+         - [📄 转录原文](./transcript.txt)\n\
+         - [🎙️ 原始录音](./{audio_name})\n\n\
+         ---\n\n\
+         ## 🎙️ 转录全文\n\n\
+         {transcript}\n",
+    );
+
+    // 落到 PVC 共享目录，sidecar 同样挂这个卷
+    let work_dir = std::path::PathBuf::from(format!("/data/feishu-tmp/{id}"));
+    tokio::fs::create_dir_all(&work_dir).await.map_err(AppError::Io)?;
+    let md_path = work_dir.join("note.md");
+    tokio::fs::write(&md_path, md).await.map_err(AppError::Io)?;
+    tokio::fs::write(work_dir.join("transcript.txt"), &transcript)
+        .await
+        .map_err(AppError::Io)?;
+    // 拷 audio（用 copy，sidecar 跑期间不会被改）
+    let audio_src = s.blobs_dir.join(id.to_string());
+    let audio_dst = work_dir.join(&audio_name);
+    tokio::fs::copy(&audio_src, &audio_dst).await.map_err(AppError::Io)?;
+
+    // 调 sidecar
+    let url = format!("{}/convert", s.feishu_url.trim_end_matches('/'));
+    let mut payload = json!({
+        "md_path": md_path.to_string_lossy(),
+        "title": title,
+    });
+    if let Some(d) = existing_doc.as_deref().filter(|x| !x.is_empty()) {
+        payload["existing_doc_id"] = json!(d);
+    }
+    let resp = s
+        .http
+        .post(&url)
+        .json(&payload)
+        .timeout(std::time::Duration::from_secs(300))
+        .send()
+        .await
+        .map_err(|e| AppError::bad_request(format!("feishu sidecar: {e}")))?;
+    if !resp.status().is_success() {
+        let st = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        return Err(AppError::bad_request(format!("feishu {st}: {body}")));
+    }
+    let body: Value = resp.json().await.map_err(|e| AppError::bad_request(format!("decode: {e}")))?;
+    let doc_id = body.get("doc_id").and_then(|v| v.as_str()).unwrap_or("").to_string();
+    let doc_url = body.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string();
+    if doc_id.is_empty() || doc_url.is_empty() {
+        return Err(AppError::bad_request(format!("feishu bad response: {body}")));
+    }
+
+    {
+        let conn = s.db.lock().unwrap();
+        conn.execute(
+            "UPDATE recordings SET feishu_doc_id = ?1, feishu_url = ?2 WHERE id = ?3",
+            params![&doc_id, &doc_url, id],
+        )?;
+    }
+
+    Ok(JsonResp(json!({ "doc_id": doc_id, "url": doc_url })))
+}
+
 async fn stream_audio(
    State(s): State<AppState>,
    Path(id): Path<i64>,