From 68671784f659fb664c790de9be176b16c79ae6a4 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Sun, 17 May 2026 22:16:13 +0100 Subject: [PATCH] =?UTF-8?q?notes:=20=E5=8A=A0=E4=B8=80=E9=94=AE=E8=BD=AC?= =?UTF-8?q?=E9=A3=9E=E4=B9=A6=E6=96=87=E6=A1=A3=20(sidecar=20markdown-to-f?= =?UTF-8?q?eishu)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - backend: POST /api/recordings/:id/feishu → 拼 markdown (总结在最上 + 附件链接到转录/录音 + 转写全文) → 写 /data/feishu-tmp// → HTTP POST 到 feishu sidecar - 复用:已有 feishu_doc_id 时 --update 同一个 doc,前端按钮文案变「↻ 重新生成」 - schema 加 feishu_doc_id + feishu_url 两列(ALTER TABLE 兼容旧 db) - LLM prompt 改:行动项用 markdown checkbox `- [ ] 谁·做什么·何时` - sidecar apps/notes/feishu: node:20 + python3 + python3-markdown + @larksuite/cli + COPY 自己的 markdown-to-feishu script + FastAPI /convert - k8s: deployment 加 feishu container 共享 PVC;lark-cli-creds Secret 挂 /root/.lark-cli/config.json - CI: 主 image --no-cache(cube 规矩),sidecar 保留 layer cache(chromium-free,但 apt/npm 也大) - 前端: content 头部加「📤 一键转飞书文档」按钮;已转过显示飞书链接 + 按钮变重生成 --- .gitea/workflows/deploy-notes.yml | 16 +- apps/notes/feishu/Dockerfile | 23 + apps/notes/feishu/markdown-to-feishu | 970 +++++++++++++++++++++++++++ apps/notes/feishu/server.py | 90 +++ apps/notes/frontend/src/App.vue | 73 ++ apps/notes/frontend/src/lib/api.js | 3 + apps/notes/k8s/all.yaml | 31 + apps/notes/src/main.rs | 132 +++- 8 files changed, 1327 insertions(+), 11 deletions(-) create mode 100644 apps/notes/feishu/Dockerfile create mode 100755 apps/notes/feishu/markdown-to-feishu create mode 100644 apps/notes/feishu/server.py diff --git a/.gitea/workflows/deploy-notes.yml b/.gitea/workflows/deploy-notes.yml index a1a8cfe..a4e15cf 100644 --- a/.gitea/workflows/deploy-notes.yml +++ b/.gitea/workflows/deploy-notes.yml @@ -19,6 +19,7 @@ jobs: APP: notes NS: cube-notes IMAGE: registry.famzheng.me/mochi/notes + FEISHU_IMAGE: registry.famzheng.me/mochi/notes-feishu steps: - uses: actions/checkout@v4 @@ -37,19 +38,26 @@ jobs: npm ci npm run build - - name: Build & push image + - name: Build & push images env: REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} run: | echo "$REGISTRY_TOKEN" | docker login registry.famzheng.me -u mochi --password-stdin - # --no-cache 必须 —— 见 memory/feedback_cube_docker_cache.md + # main app —— FROM scratch + COPY musl binary,必须 --no-cache(cube docker cache 坑) docker build --no-cache -f "apps/$APP/Dockerfile" -t "$IMAGE:${{ steps.tag.outputs.sha }}" . docker push "$IMAGE:${{ steps.tag.outputs.sha }}" + # feishu sidecar —— node+python+chromium-free,layer cache 帮助大不用 --no-cache + docker build -f "apps/$APP/feishu/Dockerfile" \ + -t "$FEISHU_IMAGE:${{ steps.tag.outputs.sha }}" \ + "apps/$APP/feishu" + docker push "$FEISHU_IMAGE:${{ steps.tag.outputs.sha }}" - name: Initialize K8s resources run: kubectl apply -f apps/notes/k8s/all.yaml - name: Roll out to k3s run: | - kubectl -n "$NS" set image "deploy/$APP" "$APP=$IMAGE:${{ steps.tag.outputs.sha }}" - kubectl -n "$NS" rollout status "deploy/$APP" --timeout=120s + kubectl -n "$NS" set image "deploy/$APP" \ + "$APP=$IMAGE:${{ steps.tag.outputs.sha }}" \ + "feishu=$FEISHU_IMAGE:${{ steps.tag.outputs.sha }}" + kubectl -n "$NS" rollout status "deploy/$APP" --timeout=300s diff --git a/apps/notes/feishu/Dockerfile b/apps/notes/feishu/Dockerfile new file mode 100644 index 0000000..1a5d890 --- /dev/null +++ b/apps/notes/feishu/Dockerfile @@ -0,0 +1,23 @@ +# notes feishu sidecar:跑 markdown-to-feishu 把会议纪要 push 飞书 docx。 +# 跟 notes 主容器同 pod、共享 PVC(看到主容器在 /data/feishu-tmp// 写好的 md + 附件)。 + +FROM node:20-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip python3-markdown ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +RUN npm install -g @larksuite/cli@1.0.29 + +RUN pip install --no-cache-dir --break-system-packages \ + fastapi==0.115.6 \ + uvicorn==0.34.0 + +COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu +RUN chmod +x /usr/local/bin/markdown-to-feishu +COPY server.py /app/server.py + +ENV PYTHONUNBUFFERED=1 +WORKDIR /app +EXPOSE 8002 +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8002"] diff --git a/apps/notes/feishu/markdown-to-feishu b/apps/notes/feishu/markdown-to-feishu new file mode 100755 index 0000000..ae93cc3 --- /dev/null +++ b/apps/notes/feishu/markdown-to-feishu @@ -0,0 +1,970 @@ +#!/usr/bin/env python3 +"""markdown-to-feishu — convert a Markdown file (with rich embeds) into a Feishu +docx, using the lark-cli wrapper. Tables, images (URL + local), Mermaid / +PlantUML diagrams, and arbitrary attachments (PDF / CSV / log / anything) all +get planted as real DocxXML blocks. Re-runs against the same .md by default +update the previously-created doc instead of spawning a new one. +""" + +from __future__ import annotations + +import argparse +import html as html_lib +import json +import os +import re +import subprocess +import sys +import textwrap +import time +import uuid +from html.parser import HTMLParser +from pathlib import Path +from urllib.parse import urlparse + +import markdown + + +STATE_DIR = Path(os.environ.get("MD2FEISHU_STATE_DIR", str(Path.home() / ".local/share/markdown-to-feishu"))) +STATE_FILE = STATE_DIR / "state.json" + +SENTINEL_PREFIX = "MD2FEISHU_SENTINEL" + +VERSION = "0.1.0" + + +# --------------------------------------------------------------------------- +# State (markdown abs path -> doc id) so re-runs update in place +# --------------------------------------------------------------------------- + +def load_state() -> dict: + if not STATE_FILE.exists(): + return {} + try: + return json.loads(STATE_FILE.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return {} + + +def save_state(state: dict) -> None: + STATE_DIR.mkdir(parents=True, exist_ok=True) + STATE_FILE.write_text(json.dumps(state, indent=2, ensure_ascii=False), encoding="utf-8") + + +# --------------------------------------------------------------------------- +# lark-cli runner +# --------------------------------------------------------------------------- + +class LarkError(RuntimeError): + pass + + +def run_lark(args: list[str], *, stdin: str | None = None, identity: str = "user", verbose: bool = False, cwd: str | None = None) -> dict: + cmd = ["lark-cli", "--as", identity] + args + if verbose: + cwd_note = f" (cwd={cwd})" if cwd else "" + sys.stderr.write(f"[lark] {' '.join(cmd)}{cwd_note}\n") + proc = subprocess.run( + cmd, + input=stdin, + capture_output=True, + text=True, + cwd=cwd, + ) + if proc.returncode != 0: + raise LarkError( + f"lark-cli failed (exit {proc.returncode}): {' '.join(cmd)}\n" + f"stderr: {proc.stderr.strip()}\n" + f"stdout: {proc.stdout.strip()}" + ) + if not proc.stdout.strip(): + return {} + try: + return json.loads(proc.stdout) + except json.JSONDecodeError: + return {"_raw": proc.stdout} + + +# --------------------------------------------------------------------------- +# Markdown helpers +# --------------------------------------------------------------------------- + +def is_http_url(s: str) -> bool: + p = urlparse(s) + return p.scheme in ("http", "https") + + +def is_anchor(s: str) -> bool: + return s.startswith("#") + + +def preprocess_markdown(text: str) -> str: + """Handle GFM extras python-markdown core misses.""" + # Strip BOM + if text.startswith(""): + text = text[1:] + out_lines: list[str] = [] + in_fence = False + fence_re = re.compile(r"^\s*```") + strike_re = re.compile(r"~~(\S(?:.*?\S)?)~~") + # GFM task-list items at top level: "- [x] text" / "* [ ] text" / "1. [x] text" + # Convert to a stand-alone HTML block so python-markdown passes + # it through. Leading whitespace becomes a marker (so nested checkboxes + # don't get hoisted to top level). + task_re = re.compile(r"^(\s*)(?:[-*+]|\d+\.)\s+\[([ xX])\]\s+(.*)$") + for line in text.split("\n"): + if fence_re.match(line): + in_fence = not in_fence + out_lines.append(line) + continue + if in_fence: + out_lines.append(line) + continue + m = task_re.match(line) + if m and not m.group(1): # top-level only; nested stays a list item + done = "true" if m.group(2).lower() == "x" else "false" + body = m.group(3).strip() + # Surround with blank lines so it parses as raw HTML block + out_lines.append("") + out_lines.append(f'{html_lib.escape(body)}') + out_lines.append("") + continue + out_lines.append(strike_re.sub(r"\1", line)) + return "\n".join(out_lines) + + +# --------------------------------------------------------------------------- +# HTML -> DocxXML converter +# --------------------------------------------------------------------------- + +INLINE_TAGS = {"a", "b", "strong", "em", "i", "u", "del", "s", "strike", "code", "span", "br", "img", "cite", "latex"} +BLOCK_PASSTHROUGH = {"p", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "hr", "br"} + + +def xml_escape_text(s: str) -> str: + return s.replace("&", "&").replace("<", "<").replace(">", ">") + + +def xml_escape_attr(s: str) -> str: + return xml_escape_text(s).replace('"', """) + + +class DocxXMLBuilder(HTMLParser): + """Walks python-markdown HTML and emits DocxXML. + + Local images / attachments / non-inline-able media become placeholder +

SENTINEL

paragraphs; each one is recorded in ``self.embeds`` so the + caller can media-insert the real file in the correct position afterwards. + """ + + def __init__(self, md_dir: Path, session_tag: str): + super().__init__(convert_charrefs=True) + self.md_dir = md_dir + self.session_tag = session_tag + self.out: list[str] = [] + self.embeds: list[dict] = [] # {sentinel, file, type, caption} + self._code_buf: list[str] | None = None + self._code_lang: str | None = None + self._table_buf: list[str] | None = None # we buffer the entire table so colspan/rowspan etc. just round-trip + self._table_depth = 0 + self._in_pre = False + self._inline_stack: list[str] = [] + self._li_stack: list[str] = [] # track ul/ol type for current li + self._blockquote_depth = 0 + self._p_depth = 0 # how many

are currently open in our output stream + + # ---- sentinel handling ---- + def _next_sentinel(self) -> str: + n = len(self.embeds) + # All caps + underscores so it never collides with normal markdown prose + return f"{SENTINEL_PREFIX}_{self.session_tag}_{n:04d}" + + def _resolve_local(self, src: str) -> Path | None: + # Strip query/fragment for sanity + clean = src.split("#", 1)[0].split("?", 1)[0] + if not clean or is_http_url(clean) or is_anchor(clean): + return None + p = Path(clean) + if not p.is_absolute(): + p = (self.md_dir / p).resolve() + return p if p.exists() and p.is_file() else None + + # ---- emit helpers ---- + def _emit(self, s: str) -> None: + # If we're buffering a table, append there instead + if self._table_buf is not None: + self._table_buf.append(s) + else: + self.out.append(s) + + def _emit_placeholder(self, file: Path, kind: str, caption: str | None = None) -> None: + sentinel = self._next_sentinel() + self.embeds.append({ + "sentinel": sentinel, + "file": str(file), + "type": kind, + "caption": caption, + }) + # The placeholder must end up as its own top-level

so media-insert + # can anchor on it cleanly and the cleanup pass can block_delete it. + # If we're currently inside a

, split: close, emit standalone, reopen. + if self._table_buf is not None: + # Inside a table cell — best we can do is emit the sentinel as + # inline text and rely on str_replace cleanup. Media still lands at + # top level (per --selection-with-ellipsis semantics). + self._emit(sentinel) + return + if self._p_depth > 0: + self.out.append("

") + self.out.append(f"

{sentinel}

") + self.out.append("

") + return + self._emit(f"

{sentinel}

") + + # ---- HTMLParser hooks ---- + def handle_starttag(self, tag, attrs): + attrd = dict(attrs) + + # Inside
: capture verbatim
+        if self._in_pre:
+            # Don't recurse, but still record raw markup if any nested tags appear
+            if tag == "code":
+                self._code_lang = self._extract_lang(attrd.get("class", ""))
+                self._code_buf = []
+            return
+
+        # Table buffer mode: just copy markup through, no transformations needed
+        if self._table_buf is not None:
+            self._table_buf.append(self._raw_tag(tag, attrd))
+            if tag == "table":
+                self._table_depth += 1
+            return
+
+        if tag == "table":
+            self._table_buf = []
+            self._table_depth = 1
+            self._table_buf.append(self._raw_tag(tag, attrd))
+            return
+
+        if tag == "pre":
+            self._in_pre = True
+            return
+
+        if tag == "img":
+            self._emit_img(attrd)
+            return
+
+        if tag == "a":
+            href = attrd.get("href", "")
+            local = self._resolve_local(href) if href else None
+            if local is not None:
+                # Inline attachment: keep the link text in the prose so the
+                # paragraph still reads naturally, and queue a placeholder so
+                # the attachment block appears right after this paragraph.
+                caption = attrd.get("title") or None
+                self._emit_placeholder(local, "file", caption)
+                # Drop the  tags (keep their text children) by pushing
+                # a "transparent" marker on the inline stack.
+                self._inline_stack.append("__TRANSPARENT_A__")
+                return
+            # Regular link
+            self._inline_stack.append("a")
+            attrs_s = self._attrs_string({"href": href})
+            self._emit(f"")
+            return
+
+        if tag in {"b", "strong"}:
+            self._inline_stack.append("b")
+            self._emit("")
+            return
+        if tag in {"em", "i"}:
+            self._inline_stack.append("em")
+            self._emit("")
+            return
+        if tag in {"u"}:
+            self._inline_stack.append("u")
+            self._emit("")
+            return
+        if tag in {"del", "s", "strike"}:
+            self._inline_stack.append("del")
+            self._emit("")
+            return
+        if tag == "code":
+            self._inline_stack.append("code")
+            self._emit("")
+            return
+        if tag == "br":
+            self._emit("
") + return + + if tag == "ul": + self._li_stack.append("ul") + self._emit("
") + return + if tag == "ol": + if self._li_stack and self._li_stack[-1] == "ol": + self._li_stack.pop() + self._emit("") + return + if tag == "li": + self._emit("") + return + + if tag == "blockquote": + self._blockquote_depth = max(0, self._blockquote_depth - 1) + self._emit("") + return + + if tag == "p": + self._p_depth = max(0, self._p_depth - 1) + self._emit("

") + return + + if tag == "checkbox": + if self._inline_stack and self._inline_stack[-1] == "checkbox": + self._inline_stack.pop() + self._emit("") + return + + if tag in BLOCK_PASSTHROUGH: + self._emit(f"") + return + + if self._inline_stack and self._inline_stack[-1] == "__UNKNOWN__": + self._inline_stack.pop() + + def handle_startendtag(self, tag, attrs): + attrd = dict(attrs) + if tag == "img": + self._emit_img(attrd) + return + if tag == "br": + self._emit("
") + return + if tag == "hr": + self._emit("
") + return + # Treat as start+end + self.handle_starttag(tag, attrs) + self.handle_endtag(tag) + + def handle_data(self, data): + if not data: + return + if self._in_pre and self._code_buf is not None: + self._code_buf.append(data) + return + if self._table_buf is not None: + self._table_buf.append(xml_escape_text(data)) + return + # Preserve user text but escape XML specials + # In
 outside  we also escape (shouldn't normally happen)
+        self._emit(xml_escape_text(data))
+
+    # ---- code / language extraction ----
+    @staticmethod
+    def _extract_lang(class_attr: str) -> str:
+        # python-markdown fenced_code emits e.g. class="language-mermaid"
+        for tok in class_attr.split():
+            if tok.startswith("language-"):
+                return tok[len("language-"):]
+            if tok.startswith("lang-"):
+                return tok[len("lang-"):]
+        return ""
+
+    def _flush_code(self) -> None:
+        body = "".join(self._code_buf or [])
+        lang = (self._code_lang or "").strip().lower()
+        self._code_buf = None
+        self._code_lang = None
+        # Mermaid / PlantUML get rendered as whiteboards
+        if lang in {"mermaid"}:
+            self._emit(f'{xml_escape_text(body.rstrip())}')
+            return
+        if lang in {"plantuml", "puml"}:
+            self._emit(f'{xml_escape_text(body.rstrip())}')
+            return
+        # Strip trailing newline that python-markdown adds inside 
+        body = body.rstrip("\n")
+        lang_attr = f' lang="{xml_escape_attr(lang)}"' if lang else ""
+        self._emit(f"{xml_escape_text(body)}
") + + # ---- image emit ---- + def _emit_img(self, attrd: dict) -> None: + src = attrd.get("src", "").strip() + alt = attrd.get("alt", "").strip() + title = attrd.get("title", "").strip() + caption = title or alt or None + if not src: + return + if is_http_url(src): + attrs_s = self._attrs_string({"href": src, "caption": caption, "name": alt or None}) + self._emit(f"") + return + local = self._resolve_local(src) + if local is None: + sys.stderr.write(f"[warn] image not found, dropping: {src}\n") + return + self._emit_placeholder(local, "image", caption) + + # ---- attrs helpers ---- + @staticmethod + def _attrs_string(d: dict) -> str: + parts = [] + for k, v in d.items(): + if v is None or v == "": + continue + parts.append(f' {k}="{xml_escape_attr(str(v))}"') + return "".join(parts) + + @staticmethod + def _raw_tag(tag: str, attrd: dict) -> str: + return f"<{tag}{DocxXMLBuilder._attrs_string(attrd)}>" + + @staticmethod + def _sanitise_table(html: str) -> str: + """Coerce python-markdown's HTML table into DocxXML-legal markup: + - // become / + - Drop style="..." attributes (DocxXML uses background-color / + vertical-align, not CSS) + - Drop unknown attributes on cells + """ + # tag rename + html = re.sub(r"<(/?)strong\b", r"<\1b", html) + html = re.sub(r"<(/?)i\b", r"<\1em", html) + # drop style="..." on th/td/tr/table + html = re.sub(r'\s+style="[^"]*"', "", html) + # drop align="..." on th/td (we don't try to map to DocxXML alignment) + html = re.sub(r'\s+align="[^"]*"', "", html) + return html + + +# --------------------------------------------------------------------------- +# Driver +# --------------------------------------------------------------------------- + +def derive_title(md_text: str, md_path: Path) -> str: + for line in md_text.splitlines(): + line = line.strip() + if line.startswith("# "): + return line[2:].strip() + # fallback: filename without extension + return md_path.stem + + +def strip_first_h1(md_text: str) -> str: + """Drop the first H1 line if present — we'll convey it via instead.""" + out_lines: list[str] = [] + dropped = False + for line in md_text.splitlines(): + if not dropped and line.strip().startswith("# "): + dropped = True + continue + out_lines.append(line) + return "\n".join(out_lines) + + +def build_xml(md_path: Path, *, title: str, session_tag: str) -> tuple[str, list[dict]]: + raw = md_path.read_text(encoding="utf-8") + raw = preprocess_markdown(raw) + body_md = strip_first_h1(raw) + html = markdown.markdown( + body_md, + extensions=["fenced_code", "tables", "sane_lists"], + output_format="xhtml", + ) + builder = DocxXMLBuilder(md_dir=md_path.parent, session_tag=session_tag) + builder.feed(html) + builder.close() + body_xml = "".join(builder.out) + # Unwrap stray <p>...</p> around block-level <checkbox> (python-markdown + # wraps unknown HTML tags in <p>); then collapse empty <p></p> left over + # from the placeholder split. + body_xml = re.sub( + r"<p>\s*(<checkbox\s+done=\"(?:true|false)\">[^<]*</checkbox>)\s*</p>", + r"\1", + body_xml, + ) + body_xml = re.sub(r"<p>\s*</p>", "", body_xml) + title_xml = f"<title>{xml_escape_text(title)}" + return title_xml + body_xml, builder.embeds + + +def create_or_overwrite_doc(*, doc_id: str | None, content: str, identity: str, parent_token: str | None, parent_position: str | None, verbose: bool) -> dict: + if doc_id: + if verbose: + sys.stderr.write(f"[md2feishu] overwriting existing doc {doc_id}\n") + # Use stdin for content to avoid argv length / shell escaping pitfalls + args = [ + "docs", "+update", + "--api-version", "v2", + "--doc", doc_id, + "--command", "overwrite", + "--doc-format", "xml", + "--content", "-", + ] + res = run_lark(args, stdin=content, identity=identity, verbose=verbose) + return {"doc_id": doc_id, "result": res} + if verbose: + sys.stderr.write("[md2feishu] creating new doc\n") + args = [ + "docs", "+create", + "--api-version", "v2", + "--doc-format", "xml", + "--content", "-", + ] + if parent_token: + args += ["--parent-token", parent_token] + if parent_position: + args += ["--parent-position", parent_position] + res = run_lark(args, stdin=content, identity=identity, verbose=verbose) + document = (res.get("data") or {}).get("document") or {} + new_id = document.get("document_id") + if not new_id: + raise LarkError(f"docs +create did not return a document_id: {json.dumps(res, ensure_ascii=False)}") + return {"doc_id": new_id, "url": document.get("url"), "result": res} + + +def insert_embed(doc_id: str, embed: dict, *, identity: str, verbose: bool) -> None: + # lark-cli refuses absolute paths for --file. cd into the file's parent + # and pass just the basename. + file_path = Path(embed["file"]).resolve() + args = [ + "docs", "+media-insert", + "--doc", doc_id, + "--file", file_path.name, + "--type", embed["type"], + "--selection-with-ellipsis", embed["sentinel"], + "--before", + ] + if embed.get("caption") and embed["type"] == "image": + args += ["--caption", embed["caption"]] + run_lark(args, identity=identity, verbose=verbose, cwd=str(file_path.parent)) + + +def cleanup_sentinels(doc_id: str, session_tag: str, embeds: list[dict], *, identity: str, verbose: bool) -> None: + """Two-pass cleanup: + 1. block_delete any paragraph whose entire text is a sentinel + 2. str_replace any remaining sentinel occurrences (handles sentinels + that ended up inline inside table cells or mixed prose) + """ + res = run_lark([ + "docs", "+fetch", + "--api-version", "v2", + "--doc", doc_id, + "--detail", "with-ids", + "--doc-format", "xml", + ], identity=identity, verbose=verbose) + xml_payload = ((res.get("data") or {}).get("document") or {}).get("content") or "" + if not xml_payload: + xml_payload = json.dumps(res, ensure_ascii=False) + sentinel_re = re.compile( + rf']*\bid="([^"]+)"[^>]*>\s*{SENTINEL_PREFIX}_{session_tag}_\d+\s*

' + ) + ids = sentinel_re.findall(xml_payload) + if ids: + if verbose: + sys.stderr.write(f"[md2feishu] deleting {len(ids)} sentinel paragraph(s)\n") + try: + run_lark([ + "docs", "+update", + "--api-version", "v2", + "--doc", doc_id, + "--command", "block_delete", + "--block-id", ",".join(ids), + ], identity=identity, verbose=verbose) + except LarkError as e: + sys.stderr.write(f"[warn] block_delete cleanup failed: {e}\n") + # Fallback: scrub any inline sentinel text still in the doc + for embed in embeds: + sentinel = embed["sentinel"] + if sentinel in xml_payload and (not ids or f">{sentinel}<" not in xml_payload): + try: + run_lark([ + "docs", "+update", + "--api-version", "v2", + "--doc", doc_id, + "--command", "str_replace", + "--pattern", sentinel, + "--content", "", + ], identity=identity, verbose=verbose) + except LarkError as e: + sys.stderr.write(f"[warn] str_replace cleanup for {sentinel} failed: {e}\n") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +HELP_EPILOG = textwrap.dedent(""" +EXAMPLES + # First run — creates a new Feishu doc, remembers the mapping + markdown-to-feishu ./report.md + + # Re-run on the same file — updates the same doc in place (no new doc spawned) + markdown-to-feishu ./report.md + + # Force a brand-new doc even when state already has a mapping + markdown-to-feishu --new ./report.md + + # Update a specific doc explicitly, ignoring state file + markdown-to-feishu --update doxcnAbc123 ./report.md + + # Drop into a particular folder when creating + markdown-to-feishu --parent-token fldcnXXXX ./report.md + + # Put it under your personal knowledge library + markdown-to-feishu --parent-position my_library ./report.md + + # Override the document title (default = first H1 or filename stem) + markdown-to-feishu --title "2026 Q2 OKR" ./okr.md + + # Inspect the generated XML and embed plan, without touching Feishu + markdown-to-feishu --dry-run ./report.md + + # Forget the mapping for a file (does NOT delete the Feishu doc) + markdown-to-feishu --forget ./report.md + + # Show the recorded mapping for this file + markdown-to-feishu --show ./report.md + +SUPPORTED MARKDOWN -> FEISHU BLOCK MAPPING + # / ## / ... / ###### ->

... (the first H1 becomes the + document ) + paragraphs -> <p> + **bold** / __bold__ -> <b> + *italic* / _italic_ -> <em> + ~~strike~~ (GFM) -> <del> + `inline code` -> <code> + [text](https://...) -> <a href="...">text</a> + [text](./local.pdf) -> attachment block (file uploaded via + docs +media-insert --type file) + ![alt](https://...) -> <img href="https://..."/> (URL is fetched + server-side by Feishu) + ![alt](./local.png) -> inline image block (file uploaded via + docs +media-insert --type image; alt / + title becomes caption) + > blockquote -> <blockquote> + --- / *** -> <hr/> + - item / * item / 1. item -> <ul> / <ol> with seq="auto" + nested lists (4-space indent) -> nested <ul> / <ol> + | a | b | GFM tables -> <table><thead><tr><th>... + ```lang ... ``` -> <pre lang="lang"><code>...</code></pre> + ```mermaid ... ``` -> <whiteboard type="mermaid">...</whiteboard> + ```plantuml ... ``` -> <whiteboard type="plantuml">...</whiteboard> + +ATTACHMENT DETECTION + Any [text](path) link whose href is NOT an http(s) URL and NOT an in-doc + anchor (#foo), and which resolves to an existing local file (relative to + the markdown file's directory), is uploaded as a Feishu file block. The + visible link text is dropped — the attachment block carries the filename + itself. This is what makes pasting PDFs / CSVs / logs / arbitrary binaries + feel "native". + + Caveat: if a link resolves to a missing local file, it falls through to a + regular <a> link (the path will appear as-is). Run with --verbose to see + each resolution decision. + +IDENTITY + Defaults to --as user so the created doc is owned by YOUR Feishu account, + not the bot. This means you can manage / move / delete it directly from + Feishu without any ownership transfer dance. Use --as bot only if you + explicitly want bot-owned documents. + +UPDATE-BY-DEFAULT BEHAVIOUR + State lives at ~/.local/share/markdown-to-feishu/state.json (override with + $MD2FEISHU_STATE_DIR or --state-file). Keyed by the markdown file's + absolute path. When state has a doc_id for the given path: + + - default -> overwrite that doc in place + - --new -> ignore state, create a fresh doc, replace + the mapping with the new id + - --update <id> -> overwrite the given id and update state + + overwrite replays the full XML and re-uploads every local media file from + source, so the doc always matches the markdown 1:1. Comments on the doc + survive overwrite; manual edits inside the doc do NOT (markdown is the + source of truth). + +EXIT CODES + 0 success + 1 generic error (bad args, file not found, lark-cli failure) + 2 partial success — doc created/updated but at least one embed failed + +ENVIRONMENT + MD2FEISHU_STATE_DIR override the directory holding state.json + LARK_CLI_PROFILE passed through; honoured by lark-cli itself + +DEPENDENCIES + python3, python3-markdown, lark-cli (must be authenticated as user via + `lark-cli auth login`) +""") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + p = argparse.ArgumentParser( + prog="markdown-to-feishu", + description="Convert a Markdown file (with rich embeds: tables, images, mermaid, attachments) into a Feishu docx. Re-runs update the previously-created doc by default.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=HELP_EPILOG, + ) + p.add_argument("markdown", nargs="?", help="path to the .md file") + p.add_argument("--new", action="store_true", help="force-create a new doc even if state already has a mapping for this file") + p.add_argument("--update", metavar="DOC_ID", help="overwrite the given doc id (URL also accepted); ignores and then updates state") + p.add_argument("--title", help="override document title (default: first H1, else filename stem)") + p.add_argument("--parent-token", help="parent folder or wiki node token (only used when creating)") + p.add_argument("--parent-position", help="parent position keyword, e.g. my_library (only used when creating)") + p.add_argument("--as", dest="identity", choices=["user", "bot"], default="user", help="identity for lark-cli (default: user, so you own the doc)") + p.add_argument("--dry-run", action="store_true", help="print generated XML + embed plan without calling lark-cli") + p.add_argument("--state-file", help="override path to state.json (default: ~/.local/share/markdown-to-feishu/state.json)") + p.add_argument("--forget", action="store_true", help="remove the state mapping for this file (does not delete the Feishu doc) and exit") + p.add_argument("--show", action="store_true", help="print the recorded mapping for this file (if any) and exit") + p.add_argument("-v", "--verbose", action="store_true", help="verbose logging (every lark-cli invocation)") + p.add_argument("--version", action="version", version=f"markdown-to-feishu {VERSION}") + return p.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + global STATE_FILE, STATE_DIR + if args.state_file: + STATE_FILE = Path(args.state_file).expanduser().resolve() + STATE_DIR = STATE_FILE.parent + + if not args.markdown: + sys.stderr.write("error: missing markdown file (use --help)\n") + return 1 + + md_path = Path(args.markdown).expanduser().resolve() + if not md_path.exists() or not md_path.is_file(): + sys.stderr.write(f"error: {md_path} is not a file\n") + return 1 + key = str(md_path) + + state = load_state() + + if args.show: + entry = state.get(key) + if entry is None: + print(f"no mapping recorded for {md_path}") + else: + print(json.dumps(entry, indent=2, ensure_ascii=False)) + return 0 + + if args.forget: + if key in state: + state.pop(key) + save_state(state) + print(f"forgot mapping for {md_path}") + else: + print(f"no mapping recorded for {md_path}") + return 0 + + md_text = md_path.read_text(encoding="utf-8") + title = args.title or derive_title(md_text, md_path) + session_tag = uuid.uuid4().hex[:8].upper() + + try: + content, embeds = build_xml(md_path, title=title, session_tag=session_tag) + except Exception as e: + sys.stderr.write(f"error: failed to build XML: {e}\n") + return 1 + + if args.dry_run: + print("=== GENERATED DOCXXML ===") + print(content) + print() + print("=== EMBED PLAN ===") + if not embeds: + print("(no out-of-band embeds)") + else: + for e in embeds: + print(json.dumps(e, ensure_ascii=False)) + target = "new doc" + if args.update: + target = f"update doc {args.update}" + elif not args.new and key in state: + target = f"update existing doc {state[key].get('doc_id')}" + print() + print(f"=== TARGET ===\n{target}") + return 0 + + # Decide create-vs-update + explicit_doc = args.update + if explicit_doc and explicit_doc.startswith("http"): + # extract /docx/<id> + m = re.search(r"/docx/([A-Za-z0-9]+)", explicit_doc) + if m: + explicit_doc = m.group(1) + target_doc_id = None + if explicit_doc: + target_doc_id = explicit_doc + elif not args.new and key in state: + target_doc_id = state[key].get("doc_id") + + try: + outcome = create_or_overwrite_doc( + doc_id=target_doc_id, + content=content, + identity=args.identity, + parent_token=args.parent_token, + parent_position=args.parent_position, + verbose=args.verbose, + ) + except LarkError as e: + sys.stderr.write(f"error: {e}\n") + return 1 + + doc_id = outcome["doc_id"] + failed_embeds: list[dict] = [] + for embed in embeds: + try: + insert_embed(doc_id, embed, identity=args.identity, verbose=args.verbose) + except LarkError as e: + sys.stderr.write(f"[warn] failed to insert {embed['file']}: {e}\n") + failed_embeds.append(embed) + + # Always try to clean up sentinels we managed to anchor + if embeds: + try: + cleanup_sentinels(doc_id, session_tag, embeds, identity=args.identity, verbose=args.verbose) + except LarkError as e: + sys.stderr.write(f"[warn] cleanup failed: {e}\n") + + # Save state + entry = state.get(key, {}) + entry.update({ + "doc_id": doc_id, + "url": outcome.get("url") or entry.get("url"), + "updated_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"), + "title": title, + }) + if entry.get("url") is None and not target_doc_id: + # Fetch URL via a separate call if it wasn't returned (shouldn't happen on create) + pass + state[key] = entry + save_state(state) + + print(json.dumps({ + "doc_id": doc_id, + "url": entry.get("url"), + "title": title, + "embeds_inserted": len(embeds) - len(failed_embeds), + "embeds_failed": len(failed_embeds), + }, indent=2, ensure_ascii=False)) + + return 2 if failed_embeds else 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/apps/notes/feishu/server.py b/apps/notes/feishu/server.py new file mode 100644 index 0000000..f56e0f4 --- /dev/null +++ b/apps/notes/feishu/server.py @@ -0,0 +1,90 @@ +"""notes feishu sidecar:HTTP 包一层 markdown-to-feishu。 + +POST /convert {md_path, title?, existing_doc_id?} + → 跑 markdown-to-feishu,parse 最后那段 JSON,返回 {doc_id, url} +""" + +import json +import logging +import os +import re +import subprocess +from pathlib import Path +from typing import Optional + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(levelname)s %(name)s: %(message)s') +log = logging.getLogger('feishu') + +app = FastAPI() + + +@app.get('/healthz') +def healthz(): + return {'ok': True} + + +class ConvertReq(BaseModel): + md_path: str + title: Optional[str] = None + existing_doc_id: Optional[str] = None + + +@app.post('/convert') +def convert(req: ConvertReq): + md = Path(req.md_path) + if not md.exists(): + raise HTTPException(400, f'md not found: {md}') + + cmd = ['/usr/local/bin/markdown-to-feishu', str(md), '--as', 'user'] + if req.existing_doc_id: + cmd += ['--update', req.existing_doc_id] + if req.title: + cmd += ['--title', req.title] + log.info("run: %s", ' '.join(cmd)) + + env = os.environ.copy() + # markdown-to-feishu state file 放 PVC,重启不丢 + env['MD2FEISHU_STATE_DIR'] = '/data/feishu-state' + Path('/data/feishu-state').mkdir(parents=True, exist_ok=True) + + try: + proc = subprocess.run( + cmd, capture_output=True, text=True, timeout=600, env=env, + cwd=str(md.parent), + ) + except subprocess.TimeoutExpired: + raise HTTPException(504, 'markdown-to-feishu timeout (>10min)') + + # exit code 2 = embeds 有失败,但 doc 创建成功,仍 parse stdout + if proc.returncode not in (0, 2): + log.warning("md2feishu exit=%d stderr=%s", proc.returncode, proc.stderr[-500:]) + raise HTTPException(502, f'md2feishu exit {proc.returncode}: ' + f'{proc.stderr.strip()[-400:]}') + + # 取 stdout 里最后一段 JSON 对象(script 的 final print) + out = proc.stdout.strip() + # 从后往前找第一个 '{',取到末尾 + last_open = out.rfind('{') + if last_open < 0: + raise HTTPException(502, f'md2feishu no json output. stdout tail: {out[-400:]}') + try: + data = json.loads(out[last_open:]) + except json.JSONDecodeError as e: + raise HTTPException(502, f'md2feishu json parse: {e}; tail: {out[-400:]}') + + doc_id = data.get('doc_id') + url = data.get('url') + if not doc_id or not url: + raise HTTPException(502, f'md2feishu missing doc_id/url: {data}') + log.info("ok: doc_id=%s url=%s embeds=%s", + doc_id, url, data.get('embeds_inserted')) + return { + 'doc_id': doc_id, + 'url': url, + 'embeds_inserted': data.get('embeds_inserted', 0), + 'embeds_failed': data.get('embeds_failed', 0), + } diff --git a/apps/notes/frontend/src/App.vue b/apps/notes/frontend/src/App.vue index 7859c95..c8328e6 100644 --- a/apps/notes/frontend/src/App.vue +++ b/apps/notes/frontend/src/App.vue @@ -79,6 +79,24 @@ <button v-if="selected.status === 'failed'" class="retry-btn" @click="retry">↻ 重试</button> <button class="danger-btn" @click="remove">删除</button> </div> + <div v-if="selected.status === 'done'" class="feishu-row"> + <a + v-if="selected.feishu_url" + :href="selected.feishu_url" + target="_blank" + rel="noopener" + class="feishu-link" + >📄 飞书文档 · {{ selected.feishu_url.replace(/^https?:\/\//, '').slice(0, 40) }}…</a> + <button + class="feishu-btn" + :disabled="feishuPushing" + @click="pushFeishu" + > + {{ feishuPushing ? '⏳ 推送中…' + : selected.feishu_url ? '↻ 重新生成' : '📤 一键转飞书文档' }} + </button> + <p v-if="feishuErr" class="feishu-err">{{ feishuErr }}</p> + </div> </header> <audio :src="audioUrl(selected.id)" controls class="audio" /> @@ -114,6 +132,7 @@ import { uploadRecording, deleteRecording, retryRecording, + convertFeishu, audioUrl as audioUrlFn, getPass, setPass, @@ -130,6 +149,8 @@ const selected = ref(null) const selectedId = ref(null) const uploading = ref(false) const uploadErr = ref('') +const feishuPushing = ref(false) +const feishuErr = ref('') let pollTimer = null // 浏览器内录音(iOS 没法选录音机 App 文件,直接 web record 更顺) @@ -318,6 +339,23 @@ async function retry() { } catch (e) { alert(e.message) } } +async function pushFeishu() { + if (feishuPushing.value) return + feishuPushing.value = true + feishuErr.value = '' + try { + const r = await convertFeishu(selectedId.value) + if (selected.value) { + selected.value.feishu_doc_id = r.doc_id + selected.value.feishu_url = r.url + } + } catch (e) { + feishuErr.value = e.message || String(e) + } finally { + feishuPushing.value = false + } +} + function audioUrl(id) { return audioUrlFn(id) } function statusLabel(s) { @@ -580,6 +618,41 @@ input, textarea { font-family: inherit; background: transparent; border: none; c padding: 3px 10px; border-radius: 4px; } + +.feishu-row { + margin-top: 12px; + display: flex; + gap: 10px; + align-items: center; + flex-wrap: wrap; +} +.feishu-link { + color: var(--accent-cyan); + background: rgba(6, 182, 212, 0.1); + padding: 6px 12px; + border-radius: 6px; + font-size: 12px; + text-decoration: none; +} +.feishu-link:hover { background: rgba(6, 182, 212, 0.2); } +.feishu-btn { + background: var(--accent-strong); + color: #fff; + padding: 6px 14px; + border-radius: 6px; + font-size: 12px; + font-weight: 600; +} +.feishu-btn:hover:not(:disabled) { background: var(--accent); } +.feishu-err { + width: 100%; + margin: 0; + color: var(--accent-red); + background: rgba(239,68,68,0.08); + padding: 6px 10px; + border-radius: 4px; + font-size: 12px; +} .retry-btn { background: rgba(124, 92, 191, 0.15); color: var(--accent); } .retry-btn:hover { background: rgba(124, 92, 191, 0.3); } .danger-btn { background: rgba(239, 68, 68, 0.1); color: var(--accent-red); } diff --git a/apps/notes/frontend/src/lib/api.js b/apps/notes/frontend/src/lib/api.js index d7c85d9..5babc41 100644 --- a/apps/notes/frontend/src/lib/api.js +++ b/apps/notes/frontend/src/lib/api.js @@ -35,6 +35,9 @@ export function listRecordings() { return jreq('/api/recordings') } export function getRecording(id) { return jreq('/api/recordings/' + id) } export function deleteRecording(id) { return jreq('/api/recordings/' + id, { method: 'DELETE' }) } export function retryRecording(id) { return jreq('/api/recordings/' + id + '/retry', { method: 'POST' }) } +export function convertFeishu(id) { + return jreq('/api/recordings/' + id + '/feishu', { method: 'POST' }) +} export function uploadRecording(title, file) { const fd = new FormData() diff --git a/apps/notes/k8s/all.yaml b/apps/notes/k8s/all.yaml index 873d6fc..c7dfa5d 100644 --- a/apps/notes/k8s/all.yaml +++ b/apps/notes/k8s/all.yaml @@ -69,6 +69,8 @@ spec: secretKeyRef: name: notes-creds key: llm_token + - name: FEISHU_URL + value: http://localhost:8002 readinessProbe: httpGet: { path: /healthz, port: http } initialDelaySeconds: 1 @@ -83,10 +85,39 @@ spec: volumeMounts: - name: data mountPath: /data + - name: feishu + image: registry.famzheng.me/mochi/notes-feishu:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8002 + name: feishu + readinessProbe: + httpGet: { path: /healthz, port: feishu } + initialDelaySeconds: 3 + periodSeconds: 10 + livenessProbe: + httpGet: { path: /healthz, port: feishu } + initialDelaySeconds: 30 + periodSeconds: 30 + resources: + requests: { cpu: 20m, memory: 64Mi } + limits: { cpu: 500m, memory: 384Mi } + volumeMounts: + - name: data + mountPath: /data + - name: lark-cli-config + mountPath: /root/.lark-cli + readOnly: false volumes: - name: data persistentVolumeClaim: claimName: notes-data + - name: lark-cli-config + secret: + secretName: lark-cli-creds + items: + - key: config.json + path: config.json --- apiVersion: v1 kind: Service diff --git a/apps/notes/src/main.rs b/apps/notes/src/main.rs index bc8e0a1..29a0453 100644 --- a/apps/notes/src/main.rs +++ b/apps/notes/src/main.rs @@ -36,6 +36,7 @@ struct AppState { llm_gateway: String, llm_token: String, llm_model: String, + feishu_url: String, http: reqwest::Client, } @@ -59,6 +60,8 @@ async fn main() -> std::io::Result<()> { std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into()); let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default(); let llm_model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gemma-4-31b-it".into()); + let feishu_url = + std::env::var("FEISHU_URL").unwrap_or_else(|_| "http://localhost:8002".into()); std::fs::create_dir_all(&blobs_dir).expect("mkdir blobs_dir"); @@ -79,6 +82,9 @@ async fn main() -> std::io::Result<()> { );", ) .expect("init schema"); + // 兼容旧 db 增量加列;已存在忽略错误 + let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_doc_id TEXT", []); + let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_url TEXT", []); tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready"); let http = reqwest::Client::builder() @@ -94,6 +100,7 @@ async fn main() -> std::io::Result<()> { llm_gateway, llm_token, llm_model, + feishu_url, http, }; @@ -105,6 +112,7 @@ async fn main() -> std::io::Result<()> { .route("/recordings/:id", get(get_recording).delete(delete_recording)) .route("/recordings/:id/audio", get(stream_audio)) .route("/recordings/:id/retry", post(retry_recording)) + .route("/recordings/:id/feishu", post(convert_feishu)) .with_state(state.clone()) .layer(from_fn_with_state(state.clone(), auth_middleware)); @@ -211,6 +219,8 @@ struct RecordingDetail { summary: Option<String>, error: Option<String>, created_at: String, + feishu_doc_id: Option<String>, + feishu_url: Option<String>, } // ---------- handlers ---------- @@ -253,26 +263,30 @@ async fn get_recording( type Row = ( String, String, String, i64, String, Option<String>, Option<String>, Option<String>, String, + Option<String>, Option<String>, ); let row: Option<Row> = conn .query_row( "SELECT title, filename, mime, size_bytes, status, - transcript, summary, error, created_at + transcript, summary, error, created_at, + feishu_doc_id, feishu_url FROM recordings WHERE id = ?1", params![id], |r| { Ok(( r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?, r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?, + r.get(9)?, r.get(10)?, )) }, ) .optional()?; - let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at) = - row.ok_or(AppError::NotFound)?; + let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at, + feishu_doc_id, feishu_url) = row.ok_or(AppError::NotFound)?; Ok(JsonResp(RecordingDetail { id, title, filename, mime, size_bytes, status, transcript, summary, error, created_at, + feishu_doc_id, feishu_url, })) } @@ -501,13 +515,16 @@ async fn call_llm_summary(s: &AppState, transcript: &str) -> Result<String, Stri "model": s.llm_model, "messages": [ { "role": "system", "content": - "你是一个会议纪要助手。根据语音转写整理一份结构化纪要(markdown):\n\ + "你是一个会议纪要助手。根据语音转写整理一份结构化纪要(markdown 格式):\n\ + \n\ 1. **概要**:1-2 句话总结\n\ 2. **关键讨论点**:bullet 列出\n\ 3. **决定 / 结论**\n\ - 4. **行动项 (action items)**:谁、做什么、何时\n\ - 5. **待跟进 / 未决问题**\n\ - 转写可能有 ASR 错字,结合上下文合理修正;遇到模糊处标 [?]。" }, + 4. **行动项 (action items)**:每条用 markdown checkbox 格式 `- [ ] 谁 · 做什么 · 何时`\n\ + 5. **待跟进 / 未决问题**:bullet 列出\n\ + \n\ + 转写可能有 ASR 错字,结合上下文合理修正;遇到模糊处标 [?]。\n\ + 不要编造没说过的内容。" }, { "role": "user", "content": trimmed }, ], "temperature": 0.3, @@ -574,6 +591,107 @@ async fn retry_recording( Ok(JsonResp(json!({ "ok": true, "status": "pending" }))) } +/// `POST /api/recordings/:id/feishu` — 把转写 + 纪要 push 成飞书 docx。 +/// 已经转过的 piece 仍 update 同一个 doc(markdown-to-feishu 自带 --update)。 +async fn convert_feishu( + State(s): State<AppState>, + Path(id): Path<i64>, +) -> Result<JsonResp<Value>, AppError> { + let row: (String, String, Option<String>, Option<String>, String, Option<String>) = { + let conn = s.db.lock().unwrap(); + conn.query_row( + "SELECT title, filename, transcript, summary, status, feishu_doc_id + FROM recordings WHERE id = ?1", + params![id], + |r| { + Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?, r.get(5)?)) + }, + ) + .optional()? + .ok_or(AppError::NotFound)? + }; + let (title, filename, transcript, summary, status, existing_doc) = row; + if status != "done" { + return Err(AppError::bad_request(format!( + "recording not ready (status={status})" + ))); + } + let summary = summary.unwrap_or_default(); + let transcript = transcript.unwrap_or_default(); + + // 拼 markdown + let ext = std::path::Path::new(&filename) + .extension() + .and_then(|x| x.to_str()) + .unwrap_or("m4a") + .to_string(); + let audio_name = format!("audio.{ext}"); + let md = format!( + "# {title}\n\n\ + ## 📋 会议纪要\n\n\ + {summary}\n\n\ + ---\n\n\ + ## 📎 原始材料\n\n\ + - [📄 转录原文](./transcript.txt)\n\ + - [🎙️ 原始录音](./{audio_name})\n\n\ + ---\n\n\ + ## 🎙️ 转录全文\n\n\ + {transcript}\n", + ); + + // 落到 PVC 共享目录,sidecar 同样挂这个卷 + let work_dir = std::path::PathBuf::from(format!("/data/feishu-tmp/{id}")); + tokio::fs::create_dir_all(&work_dir).await.map_err(AppError::Io)?; + let md_path = work_dir.join("note.md"); + tokio::fs::write(&md_path, md).await.map_err(AppError::Io)?; + tokio::fs::write(work_dir.join("transcript.txt"), &transcript) + .await + .map_err(AppError::Io)?; + // 拷 audio(用 copy,sidecar 跑期间不会被改) + let audio_src = s.blobs_dir.join(id.to_string()); + let audio_dst = work_dir.join(&audio_name); + tokio::fs::copy(&audio_src, &audio_dst).await.map_err(AppError::Io)?; + + // 调 sidecar + let url = format!("{}/convert", s.feishu_url.trim_end_matches('/')); + let mut payload = json!({ + "md_path": md_path.to_string_lossy(), + "title": title, + }); + if let Some(d) = existing_doc.as_deref().filter(|x| !x.is_empty()) { + payload["existing_doc_id"] = json!(d); + } + let resp = s + .http + .post(&url) + .json(&payload) + .timeout(std::time::Duration::from_secs(300)) + .send() + .await + .map_err(|e| AppError::bad_request(format!("feishu sidecar: {e}")))?; + if !resp.status().is_success() { + let st = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(AppError::bad_request(format!("feishu {st}: {body}"))); + } + let body: Value = resp.json().await.map_err(|e| AppError::bad_request(format!("decode: {e}")))?; + let doc_id = body.get("doc_id").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let doc_url = body.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string(); + if doc_id.is_empty() || doc_url.is_empty() { + return Err(AppError::bad_request(format!("feishu bad response: {body}"))); + } + + { + let conn = s.db.lock().unwrap(); + conn.execute( + "UPDATE recordings SET feishu_doc_id = ?1, feishu_url = ?2 WHERE id = ?3", + params![&doc_id, &doc_url, id], + )?; + } + + Ok(JsonResp(json!({ "doc_id": doc_id, "url": doc_url }))) +} + async fn stream_audio( State(s): State<AppState>, Path(id): Path<i64>,