notes: 加一键转飞书文档 (sidecar markdown-to-feishu)
deploy notes / build-and-deploy (push) Failing after 2m2s
deploy notes / build-and-deploy (push) Failing after 2m2s
- backend: POST /api/recordings/:id/feishu → 拼 markdown (总结在最上 + 附件链接到转录/录音 + 转写全文) → 写 /data/feishu-tmp/<id>/ → HTTP POST 到 feishu sidecar
- 复用:已有 feishu_doc_id 时 --update 同一个 doc,前端按钮文案变「↻ 重新生成」
- schema 加 feishu_doc_id + feishu_url 两列(ALTER TABLE 兼容旧 db)
- LLM prompt 改:行动项用 markdown checkbox `- [ ] 谁·做什么·何时`
- sidecar apps/notes/feishu: node:20 + python3 + python3-markdown + @larksuite/cli + COPY 自己的 markdown-to-feishu script + FastAPI /convert
- k8s: deployment 加 feishu container 共享 PVC;lark-cli-creds Secret 挂 /root/.lark-cli/config.json
- CI: 主 image --no-cache(cube 规矩),sidecar 保留 layer cache(chromium-free,但 apt/npm 也大)
- 前端: content 头部加「📤 一键转飞书文档」按钮;已转过显示飞书链接 + 按钮变重生成
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
# notes feishu sidecar:跑 markdown-to-feishu 把会议纪要 push 飞书 docx。
|
||||
# 跟 notes 主容器同 pod、共享 PVC(看到主容器在 /data/feishu-tmp/<id>/ 写好的 md + 附件)。
|
||||
|
||||
FROM node:20-slim
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-pip python3-markdown ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN npm install -g @larksuite/cli@1.0.29
|
||||
|
||||
RUN pip install --no-cache-dir --break-system-packages \
|
||||
fastapi==0.115.6 \
|
||||
uvicorn==0.34.0
|
||||
|
||||
COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu
|
||||
RUN chmod +x /usr/local/bin/markdown-to-feishu
|
||||
COPY server.py /app/server.py
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
WORKDIR /app
|
||||
EXPOSE 8002
|
||||
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8002"]
|
||||
Executable
+970
@@ -0,0 +1,970 @@
|
||||
#!/usr/bin/env python3
|
||||
"""markdown-to-feishu — convert a Markdown file (with rich embeds) into a Feishu
|
||||
docx, using the lark-cli wrapper. Tables, images (URL + local), Mermaid /
|
||||
PlantUML diagrams, and arbitrary attachments (PDF / CSV / log / anything) all
|
||||
get planted as real DocxXML blocks. Re-runs against the same .md by default
|
||||
update the previously-created doc instead of spawning a new one.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import html as html_lib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
import time
|
||||
import uuid
|
||||
from html.parser import HTMLParser
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import markdown
|
||||
|
||||
|
||||
STATE_DIR = Path(os.environ.get("MD2FEISHU_STATE_DIR", str(Path.home() / ".local/share/markdown-to-feishu")))
|
||||
STATE_FILE = STATE_DIR / "state.json"
|
||||
|
||||
SENTINEL_PREFIX = "MD2FEISHU_SENTINEL"
|
||||
|
||||
VERSION = "0.1.0"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State (markdown abs path -> doc id) so re-runs update in place
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_state() -> dict:
|
||||
if not STATE_FILE.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(STATE_FILE.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
|
||||
|
||||
def save_state(state: dict) -> None:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
STATE_FILE.write_text(json.dumps(state, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# lark-cli runner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class LarkError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def run_lark(args: list[str], *, stdin: str | None = None, identity: str = "user", verbose: bool = False, cwd: str | None = None) -> dict:
|
||||
cmd = ["lark-cli", "--as", identity] + args
|
||||
if verbose:
|
||||
cwd_note = f" (cwd={cwd})" if cwd else ""
|
||||
sys.stderr.write(f"[lark] {' '.join(cmd)}{cwd_note}\n")
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
input=stdin,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=cwd,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
raise LarkError(
|
||||
f"lark-cli failed (exit {proc.returncode}): {' '.join(cmd)}\n"
|
||||
f"stderr: {proc.stderr.strip()}\n"
|
||||
f"stdout: {proc.stdout.strip()}"
|
||||
)
|
||||
if not proc.stdout.strip():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(proc.stdout)
|
||||
except json.JSONDecodeError:
|
||||
return {"_raw": proc.stdout}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def is_http_url(s: str) -> bool:
|
||||
p = urlparse(s)
|
||||
return p.scheme in ("http", "https")
|
||||
|
||||
|
||||
def is_anchor(s: str) -> bool:
|
||||
return s.startswith("#")
|
||||
|
||||
|
||||
def preprocess_markdown(text: str) -> str:
|
||||
"""Handle GFM extras python-markdown core misses."""
|
||||
# Strip BOM
|
||||
if text.startswith(""):
|
||||
text = text[1:]
|
||||
out_lines: list[str] = []
|
||||
in_fence = False
|
||||
fence_re = re.compile(r"^\s*```")
|
||||
strike_re = re.compile(r"~~(\S(?:.*?\S)?)~~")
|
||||
# GFM task-list items at top level: "- [x] text" / "* [ ] text" / "1. [x] text"
|
||||
# Convert to a stand-alone HTML <checkbox> block so python-markdown passes
|
||||
# it through. Leading whitespace becomes a marker (so nested checkboxes
|
||||
# don't get hoisted to top level).
|
||||
task_re = re.compile(r"^(\s*)(?:[-*+]|\d+\.)\s+\[([ xX])\]\s+(.*)$")
|
||||
for line in text.split("\n"):
|
||||
if fence_re.match(line):
|
||||
in_fence = not in_fence
|
||||
out_lines.append(line)
|
||||
continue
|
||||
if in_fence:
|
||||
out_lines.append(line)
|
||||
continue
|
||||
m = task_re.match(line)
|
||||
if m and not m.group(1): # top-level only; nested stays a list item
|
||||
done = "true" if m.group(2).lower() == "x" else "false"
|
||||
body = m.group(3).strip()
|
||||
# Surround with blank lines so it parses as raw HTML block
|
||||
out_lines.append("")
|
||||
out_lines.append(f'<checkbox done="{done}">{html_lib.escape(body)}</checkbox>')
|
||||
out_lines.append("")
|
||||
continue
|
||||
out_lines.append(strike_re.sub(r"<del>\1</del>", line))
|
||||
return "\n".join(out_lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTML -> DocxXML converter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
INLINE_TAGS = {"a", "b", "strong", "em", "i", "u", "del", "s", "strike", "code", "span", "br", "img", "cite", "latex"}
|
||||
BLOCK_PASSTHROUGH = {"p", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "hr", "br"}
|
||||
|
||||
|
||||
def xml_escape_text(s: str) -> str:
|
||||
return s.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
|
||||
|
||||
def xml_escape_attr(s: str) -> str:
|
||||
return xml_escape_text(s).replace('"', """)
|
||||
|
||||
|
||||
class DocxXMLBuilder(HTMLParser):
|
||||
"""Walks python-markdown HTML and emits DocxXML.
|
||||
|
||||
Local images / attachments / non-inline-able media become placeholder
|
||||
<p>SENTINEL</p> paragraphs; each one is recorded in ``self.embeds`` so the
|
||||
caller can media-insert the real file in the correct position afterwards.
|
||||
"""
|
||||
|
||||
def __init__(self, md_dir: Path, session_tag: str):
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.md_dir = md_dir
|
||||
self.session_tag = session_tag
|
||||
self.out: list[str] = []
|
||||
self.embeds: list[dict] = [] # {sentinel, file, type, caption}
|
||||
self._code_buf: list[str] | None = None
|
||||
self._code_lang: str | None = None
|
||||
self._table_buf: list[str] | None = None # we buffer the entire table so colspan/rowspan etc. just round-trip
|
||||
self._table_depth = 0
|
||||
self._in_pre = False
|
||||
self._inline_stack: list[str] = []
|
||||
self._li_stack: list[str] = [] # track ul/ol type for current li
|
||||
self._blockquote_depth = 0
|
||||
self._p_depth = 0 # how many <p> are currently open in our output stream
|
||||
|
||||
# ---- sentinel handling ----
|
||||
def _next_sentinel(self) -> str:
|
||||
n = len(self.embeds)
|
||||
# All caps + underscores so it never collides with normal markdown prose
|
||||
return f"{SENTINEL_PREFIX}_{self.session_tag}_{n:04d}"
|
||||
|
||||
def _resolve_local(self, src: str) -> Path | None:
|
||||
# Strip query/fragment for sanity
|
||||
clean = src.split("#", 1)[0].split("?", 1)[0]
|
||||
if not clean or is_http_url(clean) or is_anchor(clean):
|
||||
return None
|
||||
p = Path(clean)
|
||||
if not p.is_absolute():
|
||||
p = (self.md_dir / p).resolve()
|
||||
return p if p.exists() and p.is_file() else None
|
||||
|
||||
# ---- emit helpers ----
|
||||
def _emit(self, s: str) -> None:
|
||||
# If we're buffering a table, append there instead
|
||||
if self._table_buf is not None:
|
||||
self._table_buf.append(s)
|
||||
else:
|
||||
self.out.append(s)
|
||||
|
||||
def _emit_placeholder(self, file: Path, kind: str, caption: str | None = None) -> None:
|
||||
sentinel = self._next_sentinel()
|
||||
self.embeds.append({
|
||||
"sentinel": sentinel,
|
||||
"file": str(file),
|
||||
"type": kind,
|
||||
"caption": caption,
|
||||
})
|
||||
# The placeholder must end up as its own top-level <p> so media-insert
|
||||
# can anchor on it cleanly and the cleanup pass can block_delete it.
|
||||
# If we're currently inside a <p>, split: close, emit standalone, reopen.
|
||||
if self._table_buf is not None:
|
||||
# Inside a table cell — best we can do is emit the sentinel as
|
||||
# inline text and rely on str_replace cleanup. Media still lands at
|
||||
# top level (per --selection-with-ellipsis semantics).
|
||||
self._emit(sentinel)
|
||||
return
|
||||
if self._p_depth > 0:
|
||||
self.out.append("</p>")
|
||||
self.out.append(f"<p>{sentinel}</p>")
|
||||
self.out.append("<p>")
|
||||
return
|
||||
self._emit(f"<p>{sentinel}</p>")
|
||||
|
||||
# ---- HTMLParser hooks ----
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrd = dict(attrs)
|
||||
|
||||
# Inside <pre><code>: capture verbatim
|
||||
if self._in_pre:
|
||||
# Don't recurse, but still record raw markup if any nested tags appear
|
||||
if tag == "code":
|
||||
self._code_lang = self._extract_lang(attrd.get("class", ""))
|
||||
self._code_buf = []
|
||||
return
|
||||
|
||||
# Table buffer mode: just copy markup through, no transformations needed
|
||||
if self._table_buf is not None:
|
||||
self._table_buf.append(self._raw_tag(tag, attrd))
|
||||
if tag == "table":
|
||||
self._table_depth += 1
|
||||
return
|
||||
|
||||
if tag == "table":
|
||||
self._table_buf = []
|
||||
self._table_depth = 1
|
||||
self._table_buf.append(self._raw_tag(tag, attrd))
|
||||
return
|
||||
|
||||
if tag == "pre":
|
||||
self._in_pre = True
|
||||
return
|
||||
|
||||
if tag == "img":
|
||||
self._emit_img(attrd)
|
||||
return
|
||||
|
||||
if tag == "a":
|
||||
href = attrd.get("href", "")
|
||||
local = self._resolve_local(href) if href else None
|
||||
if local is not None:
|
||||
# Inline attachment: keep the link text in the prose so the
|
||||
# paragraph still reads naturally, and queue a placeholder so
|
||||
# the attachment block appears right after this paragraph.
|
||||
caption = attrd.get("title") or None
|
||||
self._emit_placeholder(local, "file", caption)
|
||||
# Drop the <a> tags (keep their text children) by pushing
|
||||
# a "transparent" marker on the inline stack.
|
||||
self._inline_stack.append("__TRANSPARENT_A__")
|
||||
return
|
||||
# Regular link
|
||||
self._inline_stack.append("a")
|
||||
attrs_s = self._attrs_string({"href": href})
|
||||
self._emit(f"<a{attrs_s}>")
|
||||
return
|
||||
|
||||
if tag in {"b", "strong"}:
|
||||
self._inline_stack.append("b")
|
||||
self._emit("<b>")
|
||||
return
|
||||
if tag in {"em", "i"}:
|
||||
self._inline_stack.append("em")
|
||||
self._emit("<em>")
|
||||
return
|
||||
if tag in {"u"}:
|
||||
self._inline_stack.append("u")
|
||||
self._emit("<u>")
|
||||
return
|
||||
if tag in {"del", "s", "strike"}:
|
||||
self._inline_stack.append("del")
|
||||
self._emit("<del>")
|
||||
return
|
||||
if tag == "code":
|
||||
self._inline_stack.append("code")
|
||||
self._emit("<code>")
|
||||
return
|
||||
if tag == "br":
|
||||
self._emit("<br/>")
|
||||
return
|
||||
|
||||
if tag == "ul":
|
||||
self._li_stack.append("ul")
|
||||
self._emit("<ul>")
|
||||
return
|
||||
if tag == "ol":
|
||||
self._li_stack.append("ol")
|
||||
self._emit("<ol>")
|
||||
return
|
||||
if tag == "li":
|
||||
if self._li_stack and self._li_stack[-1] == "ol":
|
||||
self._emit('<li seq="auto">')
|
||||
else:
|
||||
self._emit("<li>")
|
||||
return
|
||||
|
||||
if tag == "blockquote":
|
||||
self._blockquote_depth += 1
|
||||
self._emit("<blockquote>")
|
||||
return
|
||||
|
||||
if tag == "p":
|
||||
self._p_depth += 1
|
||||
self._emit("<p>")
|
||||
return
|
||||
|
||||
if tag == "checkbox":
|
||||
# Emitted by our preprocessor for GFM task list items.
|
||||
done = attrd.get("done", "false")
|
||||
self._emit(f'<checkbox done="{xml_escape_attr(done)}">')
|
||||
self._inline_stack.append("checkbox")
|
||||
return
|
||||
|
||||
if tag in BLOCK_PASSTHROUGH:
|
||||
self._emit(f"<{tag}>")
|
||||
return
|
||||
|
||||
# span etc.
|
||||
if tag == "span":
|
||||
self._inline_stack.append("span")
|
||||
self._emit("<span>")
|
||||
return
|
||||
|
||||
# Anything else we don't recognise — drop the tag, keep its text
|
||||
self._inline_stack.append("__UNKNOWN__")
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if self._in_pre:
|
||||
if tag == "code":
|
||||
self._flush_code()
|
||||
elif tag == "pre":
|
||||
self._in_pre = False
|
||||
return
|
||||
|
||||
if self._table_buf is not None:
|
||||
self._table_buf.append(f"</{tag}>")
|
||||
if tag == "table":
|
||||
self._table_depth -= 1
|
||||
if self._table_depth == 0:
|
||||
table_xml = "".join(self._table_buf)
|
||||
self._table_buf = None
|
||||
# Clean the buffered HTML so it's valid DocxXML
|
||||
self.out.append(self._sanitise_table(table_xml))
|
||||
return
|
||||
|
||||
if tag == "pre":
|
||||
self._in_pre = False
|
||||
return
|
||||
|
||||
if tag == "img":
|
||||
return
|
||||
|
||||
if tag == "a":
|
||||
top = self._inline_stack.pop() if self._inline_stack else None
|
||||
if top == "__TRANSPARENT_A__":
|
||||
return
|
||||
self._emit("</a>")
|
||||
return
|
||||
|
||||
if tag in {"b", "strong"}:
|
||||
if self._inline_stack and self._inline_stack[-1] == "b":
|
||||
self._inline_stack.pop()
|
||||
self._emit("</b>")
|
||||
return
|
||||
if tag in {"em", "i"}:
|
||||
if self._inline_stack and self._inline_stack[-1] == "em":
|
||||
self._inline_stack.pop()
|
||||
self._emit("</em>")
|
||||
return
|
||||
if tag in {"u"}:
|
||||
if self._inline_stack and self._inline_stack[-1] == "u":
|
||||
self._inline_stack.pop()
|
||||
self._emit("</u>")
|
||||
return
|
||||
if tag in {"del", "s", "strike"}:
|
||||
if self._inline_stack and self._inline_stack[-1] == "del":
|
||||
self._inline_stack.pop()
|
||||
self._emit("</del>")
|
||||
return
|
||||
if tag == "code":
|
||||
if self._inline_stack and self._inline_stack[-1] == "code":
|
||||
self._inline_stack.pop()
|
||||
self._emit("</code>")
|
||||
return
|
||||
if tag == "span":
|
||||
if self._inline_stack and self._inline_stack[-1] == "span":
|
||||
self._inline_stack.pop()
|
||||
self._emit("</span>")
|
||||
return
|
||||
|
||||
if tag == "ul":
|
||||
if self._li_stack and self._li_stack[-1] == "ul":
|
||||
self._li_stack.pop()
|
||||
self._emit("</ul>")
|
||||
return
|
||||
if tag == "ol":
|
||||
if self._li_stack and self._li_stack[-1] == "ol":
|
||||
self._li_stack.pop()
|
||||
self._emit("</ol>")
|
||||
return
|
||||
if tag == "li":
|
||||
self._emit("</li>")
|
||||
return
|
||||
|
||||
if tag == "blockquote":
|
||||
self._blockquote_depth = max(0, self._blockquote_depth - 1)
|
||||
self._emit("</blockquote>")
|
||||
return
|
||||
|
||||
if tag == "p":
|
||||
self._p_depth = max(0, self._p_depth - 1)
|
||||
self._emit("</p>")
|
||||
return
|
||||
|
||||
if tag == "checkbox":
|
||||
if self._inline_stack and self._inline_stack[-1] == "checkbox":
|
||||
self._inline_stack.pop()
|
||||
self._emit("</checkbox>")
|
||||
return
|
||||
|
||||
if tag in BLOCK_PASSTHROUGH:
|
||||
self._emit(f"</{tag}>")
|
||||
return
|
||||
|
||||
if self._inline_stack and self._inline_stack[-1] == "__UNKNOWN__":
|
||||
self._inline_stack.pop()
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
attrd = dict(attrs)
|
||||
if tag == "img":
|
||||
self._emit_img(attrd)
|
||||
return
|
||||
if tag == "br":
|
||||
self._emit("<br/>")
|
||||
return
|
||||
if tag == "hr":
|
||||
self._emit("<hr/>")
|
||||
return
|
||||
# Treat as start+end
|
||||
self.handle_starttag(tag, attrs)
|
||||
self.handle_endtag(tag)
|
||||
|
||||
def handle_data(self, data):
|
||||
if not data:
|
||||
return
|
||||
if self._in_pre and self._code_buf is not None:
|
||||
self._code_buf.append(data)
|
||||
return
|
||||
if self._table_buf is not None:
|
||||
self._table_buf.append(xml_escape_text(data))
|
||||
return
|
||||
# Preserve user text but escape XML specials
|
||||
# In <pre> outside <code> we also escape (shouldn't normally happen)
|
||||
self._emit(xml_escape_text(data))
|
||||
|
||||
# ---- code / language extraction ----
|
||||
@staticmethod
|
||||
def _extract_lang(class_attr: str) -> str:
|
||||
# python-markdown fenced_code emits e.g. class="language-mermaid"
|
||||
for tok in class_attr.split():
|
||||
if tok.startswith("language-"):
|
||||
return tok[len("language-"):]
|
||||
if tok.startswith("lang-"):
|
||||
return tok[len("lang-"):]
|
||||
return ""
|
||||
|
||||
def _flush_code(self) -> None:
|
||||
body = "".join(self._code_buf or [])
|
||||
lang = (self._code_lang or "").strip().lower()
|
||||
self._code_buf = None
|
||||
self._code_lang = None
|
||||
# Mermaid / PlantUML get rendered as whiteboards
|
||||
if lang in {"mermaid"}:
|
||||
self._emit(f'<whiteboard type="mermaid">{xml_escape_text(body.rstrip())}</whiteboard>')
|
||||
return
|
||||
if lang in {"plantuml", "puml"}:
|
||||
self._emit(f'<whiteboard type="plantuml">{xml_escape_text(body.rstrip())}</whiteboard>')
|
||||
return
|
||||
# Strip trailing newline that python-markdown adds inside <code>
|
||||
body = body.rstrip("\n")
|
||||
lang_attr = f' lang="{xml_escape_attr(lang)}"' if lang else ""
|
||||
self._emit(f"<pre{lang_attr}><code>{xml_escape_text(body)}</code></pre>")
|
||||
|
||||
# ---- image emit ----
|
||||
def _emit_img(self, attrd: dict) -> None:
|
||||
src = attrd.get("src", "").strip()
|
||||
alt = attrd.get("alt", "").strip()
|
||||
title = attrd.get("title", "").strip()
|
||||
caption = title or alt or None
|
||||
if not src:
|
||||
return
|
||||
if is_http_url(src):
|
||||
attrs_s = self._attrs_string({"href": src, "caption": caption, "name": alt or None})
|
||||
self._emit(f"<img{attrs_s}/>")
|
||||
return
|
||||
local = self._resolve_local(src)
|
||||
if local is None:
|
||||
sys.stderr.write(f"[warn] image not found, dropping: {src}\n")
|
||||
return
|
||||
self._emit_placeholder(local, "image", caption)
|
||||
|
||||
# ---- attrs helpers ----
|
||||
@staticmethod
|
||||
def _attrs_string(d: dict) -> str:
|
||||
parts = []
|
||||
for k, v in d.items():
|
||||
if v is None or v == "":
|
||||
continue
|
||||
parts.append(f' {k}="{xml_escape_attr(str(v))}"')
|
||||
return "".join(parts)
|
||||
|
||||
@staticmethod
|
||||
def _raw_tag(tag: str, attrd: dict) -> str:
|
||||
return f"<{tag}{DocxXMLBuilder._attrs_string(attrd)}>"
|
||||
|
||||
@staticmethod
|
||||
def _sanitise_table(html: str) -> str:
|
||||
"""Coerce python-markdown's HTML table into DocxXML-legal markup:
|
||||
- <strong>/<em>/<i> become <b>/<em>
|
||||
- Drop style="..." attributes (DocxXML uses background-color /
|
||||
vertical-align, not CSS)
|
||||
- Drop unknown attributes on cells
|
||||
"""
|
||||
# tag rename
|
||||
html = re.sub(r"<(/?)strong\b", r"<\1b", html)
|
||||
html = re.sub(r"<(/?)i\b", r"<\1em", html)
|
||||
# drop style="..." on th/td/tr/table
|
||||
html = re.sub(r'\s+style="[^"]*"', "", html)
|
||||
# drop align="..." on th/td (we don't try to map to DocxXML alignment)
|
||||
html = re.sub(r'\s+align="[^"]*"', "", html)
|
||||
return html
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Driver
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def derive_title(md_text: str, md_path: Path) -> str:
|
||||
for line in md_text.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("# "):
|
||||
return line[2:].strip()
|
||||
# fallback: filename without extension
|
||||
return md_path.stem
|
||||
|
||||
|
||||
def strip_first_h1(md_text: str) -> str:
|
||||
"""Drop the first H1 line if present — we'll convey it via <title> instead."""
|
||||
out_lines: list[str] = []
|
||||
dropped = False
|
||||
for line in md_text.splitlines():
|
||||
if not dropped and line.strip().startswith("# "):
|
||||
dropped = True
|
||||
continue
|
||||
out_lines.append(line)
|
||||
return "\n".join(out_lines)
|
||||
|
||||
|
||||
def build_xml(md_path: Path, *, title: str, session_tag: str) -> tuple[str, list[dict]]:
|
||||
raw = md_path.read_text(encoding="utf-8")
|
||||
raw = preprocess_markdown(raw)
|
||||
body_md = strip_first_h1(raw)
|
||||
html = markdown.markdown(
|
||||
body_md,
|
||||
extensions=["fenced_code", "tables", "sane_lists"],
|
||||
output_format="xhtml",
|
||||
)
|
||||
builder = DocxXMLBuilder(md_dir=md_path.parent, session_tag=session_tag)
|
||||
builder.feed(html)
|
||||
builder.close()
|
||||
body_xml = "".join(builder.out)
|
||||
# Unwrap stray <p>...</p> around block-level <checkbox> (python-markdown
|
||||
# wraps unknown HTML tags in <p>); then collapse empty <p></p> left over
|
||||
# from the placeholder split.
|
||||
body_xml = re.sub(
|
||||
r"<p>\s*(<checkbox\s+done=\"(?:true|false)\">[^<]*</checkbox>)\s*</p>",
|
||||
r"\1",
|
||||
body_xml,
|
||||
)
|
||||
body_xml = re.sub(r"<p>\s*</p>", "", body_xml)
|
||||
title_xml = f"<title>{xml_escape_text(title)}</title>"
|
||||
return title_xml + body_xml, builder.embeds
|
||||
|
||||
|
||||
def create_or_overwrite_doc(*, doc_id: str | None, content: str, identity: str, parent_token: str | None, parent_position: str | None, verbose: bool) -> dict:
|
||||
if doc_id:
|
||||
if verbose:
|
||||
sys.stderr.write(f"[md2feishu] overwriting existing doc {doc_id}\n")
|
||||
# Use stdin for content to avoid argv length / shell escaping pitfalls
|
||||
args = [
|
||||
"docs", "+update",
|
||||
"--api-version", "v2",
|
||||
"--doc", doc_id,
|
||||
"--command", "overwrite",
|
||||
"--doc-format", "xml",
|
||||
"--content", "-",
|
||||
]
|
||||
res = run_lark(args, stdin=content, identity=identity, verbose=verbose)
|
||||
return {"doc_id": doc_id, "result": res}
|
||||
if verbose:
|
||||
sys.stderr.write("[md2feishu] creating new doc\n")
|
||||
args = [
|
||||
"docs", "+create",
|
||||
"--api-version", "v2",
|
||||
"--doc-format", "xml",
|
||||
"--content", "-",
|
||||
]
|
||||
if parent_token:
|
||||
args += ["--parent-token", parent_token]
|
||||
if parent_position:
|
||||
args += ["--parent-position", parent_position]
|
||||
res = run_lark(args, stdin=content, identity=identity, verbose=verbose)
|
||||
document = (res.get("data") or {}).get("document") or {}
|
||||
new_id = document.get("document_id")
|
||||
if not new_id:
|
||||
raise LarkError(f"docs +create did not return a document_id: {json.dumps(res, ensure_ascii=False)}")
|
||||
return {"doc_id": new_id, "url": document.get("url"), "result": res}
|
||||
|
||||
|
||||
def insert_embed(doc_id: str, embed: dict, *, identity: str, verbose: bool) -> None:
|
||||
# lark-cli refuses absolute paths for --file. cd into the file's parent
|
||||
# and pass just the basename.
|
||||
file_path = Path(embed["file"]).resolve()
|
||||
args = [
|
||||
"docs", "+media-insert",
|
||||
"--doc", doc_id,
|
||||
"--file", file_path.name,
|
||||
"--type", embed["type"],
|
||||
"--selection-with-ellipsis", embed["sentinel"],
|
||||
"--before",
|
||||
]
|
||||
if embed.get("caption") and embed["type"] == "image":
|
||||
args += ["--caption", embed["caption"]]
|
||||
run_lark(args, identity=identity, verbose=verbose, cwd=str(file_path.parent))
|
||||
|
||||
|
||||
def cleanup_sentinels(doc_id: str, session_tag: str, embeds: list[dict], *, identity: str, verbose: bool) -> None:
|
||||
"""Two-pass cleanup:
|
||||
1. block_delete any paragraph whose entire text is a sentinel
|
||||
2. str_replace any remaining sentinel occurrences (handles sentinels
|
||||
that ended up inline inside table cells or mixed prose)
|
||||
"""
|
||||
res = run_lark([
|
||||
"docs", "+fetch",
|
||||
"--api-version", "v2",
|
||||
"--doc", doc_id,
|
||||
"--detail", "with-ids",
|
||||
"--doc-format", "xml",
|
||||
], identity=identity, verbose=verbose)
|
||||
xml_payload = ((res.get("data") or {}).get("document") or {}).get("content") or ""
|
||||
if not xml_payload:
|
||||
xml_payload = json.dumps(res, ensure_ascii=False)
|
||||
sentinel_re = re.compile(
|
||||
rf'<p[^>]*\bid="([^"]+)"[^>]*>\s*{SENTINEL_PREFIX}_{session_tag}_\d+\s*</p>'
|
||||
)
|
||||
ids = sentinel_re.findall(xml_payload)
|
||||
if ids:
|
||||
if verbose:
|
||||
sys.stderr.write(f"[md2feishu] deleting {len(ids)} sentinel paragraph(s)\n")
|
||||
try:
|
||||
run_lark([
|
||||
"docs", "+update",
|
||||
"--api-version", "v2",
|
||||
"--doc", doc_id,
|
||||
"--command", "block_delete",
|
||||
"--block-id", ",".join(ids),
|
||||
], identity=identity, verbose=verbose)
|
||||
except LarkError as e:
|
||||
sys.stderr.write(f"[warn] block_delete cleanup failed: {e}\n")
|
||||
# Fallback: scrub any inline sentinel text still in the doc
|
||||
for embed in embeds:
|
||||
sentinel = embed["sentinel"]
|
||||
if sentinel in xml_payload and (not ids or f">{sentinel}<" not in xml_payload):
|
||||
try:
|
||||
run_lark([
|
||||
"docs", "+update",
|
||||
"--api-version", "v2",
|
||||
"--doc", doc_id,
|
||||
"--command", "str_replace",
|
||||
"--pattern", sentinel,
|
||||
"--content", "",
|
||||
], identity=identity, verbose=verbose)
|
||||
except LarkError as e:
|
||||
sys.stderr.write(f"[warn] str_replace cleanup for {sentinel} failed: {e}\n")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
HELP_EPILOG = textwrap.dedent("""
|
||||
EXAMPLES
|
||||
# First run — creates a new Feishu doc, remembers the mapping
|
||||
markdown-to-feishu ./report.md
|
||||
|
||||
# Re-run on the same file — updates the same doc in place (no new doc spawned)
|
||||
markdown-to-feishu ./report.md
|
||||
|
||||
# Force a brand-new doc even when state already has a mapping
|
||||
markdown-to-feishu --new ./report.md
|
||||
|
||||
# Update a specific doc explicitly, ignoring state file
|
||||
markdown-to-feishu --update doxcnAbc123 ./report.md
|
||||
|
||||
# Drop into a particular folder when creating
|
||||
markdown-to-feishu --parent-token fldcnXXXX ./report.md
|
||||
|
||||
# Put it under your personal knowledge library
|
||||
markdown-to-feishu --parent-position my_library ./report.md
|
||||
|
||||
# Override the document title (default = first H1 or filename stem)
|
||||
markdown-to-feishu --title "2026 Q2 OKR" ./okr.md
|
||||
|
||||
# Inspect the generated XML and embed plan, without touching Feishu
|
||||
markdown-to-feishu --dry-run ./report.md
|
||||
|
||||
# Forget the mapping for a file (does NOT delete the Feishu doc)
|
||||
markdown-to-feishu --forget ./report.md
|
||||
|
||||
# Show the recorded mapping for this file
|
||||
markdown-to-feishu --show ./report.md
|
||||
|
||||
SUPPORTED MARKDOWN -> FEISHU BLOCK MAPPING
|
||||
# / ## / ... / ###### -> <h1> ... <h9> (the first H1 becomes the
|
||||
document <title>)
|
||||
paragraphs -> <p>
|
||||
**bold** / __bold__ -> <b>
|
||||
*italic* / _italic_ -> <em>
|
||||
~~strike~~ (GFM) -> <del>
|
||||
`inline code` -> <code>
|
||||
[text](https://...) -> <a href="...">text</a>
|
||||
[text](./local.pdf) -> attachment block (file uploaded via
|
||||
docs +media-insert --type file)
|
||||
 -> <img href="https://..."/> (URL is fetched
|
||||
server-side by Feishu)
|
||||
 -> inline image block (file uploaded via
|
||||
docs +media-insert --type image; alt /
|
||||
title becomes caption)
|
||||
> blockquote -> <blockquote>
|
||||
--- / *** -> <hr/>
|
||||
- item / * item / 1. item -> <ul> / <ol> with seq="auto"
|
||||
nested lists (4-space indent) -> nested <ul> / <ol>
|
||||
| a | b | GFM tables -> <table><thead><tr><th>...
|
||||
```lang ... ``` -> <pre lang="lang"><code>...</code></pre>
|
||||
```mermaid ... ``` -> <whiteboard type="mermaid">...</whiteboard>
|
||||
```plantuml ... ``` -> <whiteboard type="plantuml">...</whiteboard>
|
||||
|
||||
ATTACHMENT DETECTION
|
||||
Any [text](path) link whose href is NOT an http(s) URL and NOT an in-doc
|
||||
anchor (#foo), and which resolves to an existing local file (relative to
|
||||
the markdown file's directory), is uploaded as a Feishu file block. The
|
||||
visible link text is dropped — the attachment block carries the filename
|
||||
itself. This is what makes pasting PDFs / CSVs / logs / arbitrary binaries
|
||||
feel "native".
|
||||
|
||||
Caveat: if a link resolves to a missing local file, it falls through to a
|
||||
regular <a> link (the path will appear as-is). Run with --verbose to see
|
||||
each resolution decision.
|
||||
|
||||
IDENTITY
|
||||
Defaults to --as user so the created doc is owned by YOUR Feishu account,
|
||||
not the bot. This means you can manage / move / delete it directly from
|
||||
Feishu without any ownership transfer dance. Use --as bot only if you
|
||||
explicitly want bot-owned documents.
|
||||
|
||||
UPDATE-BY-DEFAULT BEHAVIOUR
|
||||
State lives at ~/.local/share/markdown-to-feishu/state.json (override with
|
||||
$MD2FEISHU_STATE_DIR or --state-file). Keyed by the markdown file's
|
||||
absolute path. When state has a doc_id for the given path:
|
||||
|
||||
- default -> overwrite that doc in place
|
||||
- --new -> ignore state, create a fresh doc, replace
|
||||
the mapping with the new id
|
||||
- --update <id> -> overwrite the given id and update state
|
||||
|
||||
overwrite replays the full XML and re-uploads every local media file from
|
||||
source, so the doc always matches the markdown 1:1. Comments on the doc
|
||||
survive overwrite; manual edits inside the doc do NOT (markdown is the
|
||||
source of truth).
|
||||
|
||||
EXIT CODES
|
||||
0 success
|
||||
1 generic error (bad args, file not found, lark-cli failure)
|
||||
2 partial success — doc created/updated but at least one embed failed
|
||||
|
||||
ENVIRONMENT
|
||||
MD2FEISHU_STATE_DIR override the directory holding state.json
|
||||
LARK_CLI_PROFILE passed through; honoured by lark-cli itself
|
||||
|
||||
DEPENDENCIES
|
||||
python3, python3-markdown, lark-cli (must be authenticated as user via
|
||||
`lark-cli auth login`)
|
||||
""")
|
||||
|
||||
|
||||
def parse_args(argv: list[str]) -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(
|
||||
prog="markdown-to-feishu",
|
||||
description="Convert a Markdown file (with rich embeds: tables, images, mermaid, attachments) into a Feishu docx. Re-runs update the previously-created doc by default.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=HELP_EPILOG,
|
||||
)
|
||||
p.add_argument("markdown", nargs="?", help="path to the .md file")
|
||||
p.add_argument("--new", action="store_true", help="force-create a new doc even if state already has a mapping for this file")
|
||||
p.add_argument("--update", metavar="DOC_ID", help="overwrite the given doc id (URL also accepted); ignores and then updates state")
|
||||
p.add_argument("--title", help="override document title (default: first H1, else filename stem)")
|
||||
p.add_argument("--parent-token", help="parent folder or wiki node token (only used when creating)")
|
||||
p.add_argument("--parent-position", help="parent position keyword, e.g. my_library (only used when creating)")
|
||||
p.add_argument("--as", dest="identity", choices=["user", "bot"], default="user", help="identity for lark-cli (default: user, so you own the doc)")
|
||||
p.add_argument("--dry-run", action="store_true", help="print generated XML + embed plan without calling lark-cli")
|
||||
p.add_argument("--state-file", help="override path to state.json (default: ~/.local/share/markdown-to-feishu/state.json)")
|
||||
p.add_argument("--forget", action="store_true", help="remove the state mapping for this file (does not delete the Feishu doc) and exit")
|
||||
p.add_argument("--show", action="store_true", help="print the recorded mapping for this file (if any) and exit")
|
||||
p.add_argument("-v", "--verbose", action="store_true", help="verbose logging (every lark-cli invocation)")
|
||||
p.add_argument("--version", action="version", version=f"markdown-to-feishu {VERSION}")
|
||||
return p.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
args = parse_args(argv)
|
||||
global STATE_FILE, STATE_DIR
|
||||
if args.state_file:
|
||||
STATE_FILE = Path(args.state_file).expanduser().resolve()
|
||||
STATE_DIR = STATE_FILE.parent
|
||||
|
||||
if not args.markdown:
|
||||
sys.stderr.write("error: missing markdown file (use --help)\n")
|
||||
return 1
|
||||
|
||||
md_path = Path(args.markdown).expanduser().resolve()
|
||||
if not md_path.exists() or not md_path.is_file():
|
||||
sys.stderr.write(f"error: {md_path} is not a file\n")
|
||||
return 1
|
||||
key = str(md_path)
|
||||
|
||||
state = load_state()
|
||||
|
||||
if args.show:
|
||||
entry = state.get(key)
|
||||
if entry is None:
|
||||
print(f"no mapping recorded for {md_path}")
|
||||
else:
|
||||
print(json.dumps(entry, indent=2, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
if args.forget:
|
||||
if key in state:
|
||||
state.pop(key)
|
||||
save_state(state)
|
||||
print(f"forgot mapping for {md_path}")
|
||||
else:
|
||||
print(f"no mapping recorded for {md_path}")
|
||||
return 0
|
||||
|
||||
md_text = md_path.read_text(encoding="utf-8")
|
||||
title = args.title or derive_title(md_text, md_path)
|
||||
session_tag = uuid.uuid4().hex[:8].upper()
|
||||
|
||||
try:
|
||||
content, embeds = build_xml(md_path, title=title, session_tag=session_tag)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"error: failed to build XML: {e}\n")
|
||||
return 1
|
||||
|
||||
if args.dry_run:
|
||||
print("=== GENERATED DOCXXML ===")
|
||||
print(content)
|
||||
print()
|
||||
print("=== EMBED PLAN ===")
|
||||
if not embeds:
|
||||
print("(no out-of-band embeds)")
|
||||
else:
|
||||
for e in embeds:
|
||||
print(json.dumps(e, ensure_ascii=False))
|
||||
target = "new doc"
|
||||
if args.update:
|
||||
target = f"update doc {args.update}"
|
||||
elif not args.new and key in state:
|
||||
target = f"update existing doc {state[key].get('doc_id')}"
|
||||
print()
|
||||
print(f"=== TARGET ===\n{target}")
|
||||
return 0
|
||||
|
||||
# Decide create-vs-update
|
||||
explicit_doc = args.update
|
||||
if explicit_doc and explicit_doc.startswith("http"):
|
||||
# extract /docx/<id>
|
||||
m = re.search(r"/docx/([A-Za-z0-9]+)", explicit_doc)
|
||||
if m:
|
||||
explicit_doc = m.group(1)
|
||||
target_doc_id = None
|
||||
if explicit_doc:
|
||||
target_doc_id = explicit_doc
|
||||
elif not args.new and key in state:
|
||||
target_doc_id = state[key].get("doc_id")
|
||||
|
||||
try:
|
||||
outcome = create_or_overwrite_doc(
|
||||
doc_id=target_doc_id,
|
||||
content=content,
|
||||
identity=args.identity,
|
||||
parent_token=args.parent_token,
|
||||
parent_position=args.parent_position,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
except LarkError as e:
|
||||
sys.stderr.write(f"error: {e}\n")
|
||||
return 1
|
||||
|
||||
doc_id = outcome["doc_id"]
|
||||
failed_embeds: list[dict] = []
|
||||
for embed in embeds:
|
||||
try:
|
||||
insert_embed(doc_id, embed, identity=args.identity, verbose=args.verbose)
|
||||
except LarkError as e:
|
||||
sys.stderr.write(f"[warn] failed to insert {embed['file']}: {e}\n")
|
||||
failed_embeds.append(embed)
|
||||
|
||||
# Always try to clean up sentinels we managed to anchor
|
||||
if embeds:
|
||||
try:
|
||||
cleanup_sentinels(doc_id, session_tag, embeds, identity=args.identity, verbose=args.verbose)
|
||||
except LarkError as e:
|
||||
sys.stderr.write(f"[warn] cleanup failed: {e}\n")
|
||||
|
||||
# Save state
|
||||
entry = state.get(key, {})
|
||||
entry.update({
|
||||
"doc_id": doc_id,
|
||||
"url": outcome.get("url") or entry.get("url"),
|
||||
"updated_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
||||
"title": title,
|
||||
})
|
||||
if entry.get("url") is None and not target_doc_id:
|
||||
# Fetch URL via a separate call if it wasn't returned (shouldn't happen on create)
|
||||
pass
|
||||
state[key] = entry
|
||||
save_state(state)
|
||||
|
||||
print(json.dumps({
|
||||
"doc_id": doc_id,
|
||||
"url": entry.get("url"),
|
||||
"title": title,
|
||||
"embeds_inserted": len(embeds) - len(failed_embeds),
|
||||
"embeds_failed": len(failed_embeds),
|
||||
}, indent=2, ensure_ascii=False))
|
||||
|
||||
return 2 if failed_embeds else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
||||
@@ -0,0 +1,90 @@
|
||||
"""notes feishu sidecar:HTTP 包一层 markdown-to-feishu。
|
||||
|
||||
POST /convert {md_path, title?, existing_doc_id?}
|
||||
→ 跑 markdown-to-feishu,parse 最后那段 JSON,返回 {doc_id, url}
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s %(name)s: %(message)s')
|
||||
log = logging.getLogger('feishu')
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.get('/healthz')
|
||||
def healthz():
|
||||
return {'ok': True}
|
||||
|
||||
|
||||
class ConvertReq(BaseModel):
|
||||
md_path: str
|
||||
title: Optional[str] = None
|
||||
existing_doc_id: Optional[str] = None
|
||||
|
||||
|
||||
@app.post('/convert')
|
||||
def convert(req: ConvertReq):
|
||||
md = Path(req.md_path)
|
||||
if not md.exists():
|
||||
raise HTTPException(400, f'md not found: {md}')
|
||||
|
||||
cmd = ['/usr/local/bin/markdown-to-feishu', str(md), '--as', 'user']
|
||||
if req.existing_doc_id:
|
||||
cmd += ['--update', req.existing_doc_id]
|
||||
if req.title:
|
||||
cmd += ['--title', req.title]
|
||||
log.info("run: %s", ' '.join(cmd))
|
||||
|
||||
env = os.environ.copy()
|
||||
# markdown-to-feishu state file 放 PVC,重启不丢
|
||||
env['MD2FEISHU_STATE_DIR'] = '/data/feishu-state'
|
||||
Path('/data/feishu-state').mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=600, env=env,
|
||||
cwd=str(md.parent),
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
raise HTTPException(504, 'markdown-to-feishu timeout (>10min)')
|
||||
|
||||
# exit code 2 = embeds 有失败,但 doc 创建成功,仍 parse stdout
|
||||
if proc.returncode not in (0, 2):
|
||||
log.warning("md2feishu exit=%d stderr=%s", proc.returncode, proc.stderr[-500:])
|
||||
raise HTTPException(502, f'md2feishu exit {proc.returncode}: '
|
||||
f'{proc.stderr.strip()[-400:]}')
|
||||
|
||||
# 取 stdout 里最后一段 JSON 对象(script 的 final print)
|
||||
out = proc.stdout.strip()
|
||||
# 从后往前找第一个 '{',取到末尾
|
||||
last_open = out.rfind('{')
|
||||
if last_open < 0:
|
||||
raise HTTPException(502, f'md2feishu no json output. stdout tail: {out[-400:]}')
|
||||
try:
|
||||
data = json.loads(out[last_open:])
|
||||
except json.JSONDecodeError as e:
|
||||
raise HTTPException(502, f'md2feishu json parse: {e}; tail: {out[-400:]}')
|
||||
|
||||
doc_id = data.get('doc_id')
|
||||
url = data.get('url')
|
||||
if not doc_id or not url:
|
||||
raise HTTPException(502, f'md2feishu missing doc_id/url: {data}')
|
||||
log.info("ok: doc_id=%s url=%s embeds=%s",
|
||||
doc_id, url, data.get('embeds_inserted'))
|
||||
return {
|
||||
'doc_id': doc_id,
|
||||
'url': url,
|
||||
'embeds_inserted': data.get('embeds_inserted', 0),
|
||||
'embeds_failed': data.get('embeds_failed', 0),
|
||||
}
|
||||
@@ -79,6 +79,24 @@
|
||||
<button v-if="selected.status === 'failed'" class="retry-btn" @click="retry">↻ 重试</button>
|
||||
<button class="danger-btn" @click="remove">删除</button>
|
||||
</div>
|
||||
<div v-if="selected.status === 'done'" class="feishu-row">
|
||||
<a
|
||||
v-if="selected.feishu_url"
|
||||
:href="selected.feishu_url"
|
||||
target="_blank"
|
||||
rel="noopener"
|
||||
class="feishu-link"
|
||||
>📄 飞书文档 · {{ selected.feishu_url.replace(/^https?:\/\//, '').slice(0, 40) }}…</a>
|
||||
<button
|
||||
class="feishu-btn"
|
||||
:disabled="feishuPushing"
|
||||
@click="pushFeishu"
|
||||
>
|
||||
{{ feishuPushing ? '⏳ 推送中…'
|
||||
: selected.feishu_url ? '↻ 重新生成' : '📤 一键转飞书文档' }}
|
||||
</button>
|
||||
<p v-if="feishuErr" class="feishu-err">{{ feishuErr }}</p>
|
||||
</div>
|
||||
</header>
|
||||
<audio :src="audioUrl(selected.id)" controls class="audio" />
|
||||
|
||||
@@ -114,6 +132,7 @@ import {
|
||||
uploadRecording,
|
||||
deleteRecording,
|
||||
retryRecording,
|
||||
convertFeishu,
|
||||
audioUrl as audioUrlFn,
|
||||
getPass,
|
||||
setPass,
|
||||
@@ -130,6 +149,8 @@ const selected = ref(null)
|
||||
const selectedId = ref(null)
|
||||
const uploading = ref(false)
|
||||
const uploadErr = ref('')
|
||||
const feishuPushing = ref(false)
|
||||
const feishuErr = ref('')
|
||||
let pollTimer = null
|
||||
|
||||
// 浏览器内录音(iOS 没法选录音机 App 文件,直接 web record 更顺)
|
||||
@@ -318,6 +339,23 @@ async function retry() {
|
||||
} catch (e) { alert(e.message) }
|
||||
}
|
||||
|
||||
async function pushFeishu() {
|
||||
if (feishuPushing.value) return
|
||||
feishuPushing.value = true
|
||||
feishuErr.value = ''
|
||||
try {
|
||||
const r = await convertFeishu(selectedId.value)
|
||||
if (selected.value) {
|
||||
selected.value.feishu_doc_id = r.doc_id
|
||||
selected.value.feishu_url = r.url
|
||||
}
|
||||
} catch (e) {
|
||||
feishuErr.value = e.message || String(e)
|
||||
} finally {
|
||||
feishuPushing.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function audioUrl(id) { return audioUrlFn(id) }
|
||||
|
||||
function statusLabel(s) {
|
||||
@@ -580,6 +618,41 @@ input, textarea { font-family: inherit; background: transparent; border: none; c
|
||||
padding: 3px 10px;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.feishu-row {
|
||||
margin-top: 12px;
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.feishu-link {
|
||||
color: var(--accent-cyan);
|
||||
background: rgba(6, 182, 212, 0.1);
|
||||
padding: 6px 12px;
|
||||
border-radius: 6px;
|
||||
font-size: 12px;
|
||||
text-decoration: none;
|
||||
}
|
||||
.feishu-link:hover { background: rgba(6, 182, 212, 0.2); }
|
||||
.feishu-btn {
|
||||
background: var(--accent-strong);
|
||||
color: #fff;
|
||||
padding: 6px 14px;
|
||||
border-radius: 6px;
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
}
|
||||
.feishu-btn:hover:not(:disabled) { background: var(--accent); }
|
||||
.feishu-err {
|
||||
width: 100%;
|
||||
margin: 0;
|
||||
color: var(--accent-red);
|
||||
background: rgba(239,68,68,0.08);
|
||||
padding: 6px 10px;
|
||||
border-radius: 4px;
|
||||
font-size: 12px;
|
||||
}
|
||||
.retry-btn { background: rgba(124, 92, 191, 0.15); color: var(--accent); }
|
||||
.retry-btn:hover { background: rgba(124, 92, 191, 0.3); }
|
||||
.danger-btn { background: rgba(239, 68, 68, 0.1); color: var(--accent-red); }
|
||||
|
||||
@@ -35,6 +35,9 @@ export function listRecordings() { return jreq('/api/recordings') }
|
||||
export function getRecording(id) { return jreq('/api/recordings/' + id) }
|
||||
export function deleteRecording(id) { return jreq('/api/recordings/' + id, { method: 'DELETE' }) }
|
||||
export function retryRecording(id) { return jreq('/api/recordings/' + id + '/retry', { method: 'POST' }) }
|
||||
export function convertFeishu(id) {
|
||||
return jreq('/api/recordings/' + id + '/feishu', { method: 'POST' })
|
||||
}
|
||||
|
||||
export function uploadRecording(title, file) {
|
||||
const fd = new FormData()
|
||||
|
||||
@@ -69,6 +69,8 @@ spec:
|
||||
secretKeyRef:
|
||||
name: notes-creds
|
||||
key: llm_token
|
||||
- name: FEISHU_URL
|
||||
value: http://localhost:8002
|
||||
readinessProbe:
|
||||
httpGet: { path: /healthz, port: http }
|
||||
initialDelaySeconds: 1
|
||||
@@ -83,10 +85,39 @@ spec:
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: feishu
|
||||
image: registry.famzheng.me/mochi/notes-feishu:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 8002
|
||||
name: feishu
|
||||
readinessProbe:
|
||||
httpGet: { path: /healthz, port: feishu }
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet: { path: /healthz, port: feishu }
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
resources:
|
||||
requests: { cpu: 20m, memory: 64Mi }
|
||||
limits: { cpu: 500m, memory: 384Mi }
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: lark-cli-config
|
||||
mountPath: /root/.lark-cli
|
||||
readOnly: false
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: notes-data
|
||||
- name: lark-cli-config
|
||||
secret:
|
||||
secretName: lark-cli-creds
|
||||
items:
|
||||
- key: config.json
|
||||
path: config.json
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
|
||||
+125
-7
@@ -36,6 +36,7 @@ struct AppState {
|
||||
llm_gateway: String,
|
||||
llm_token: String,
|
||||
llm_model: String,
|
||||
feishu_url: String,
|
||||
http: reqwest::Client,
|
||||
}
|
||||
|
||||
@@ -59,6 +60,8 @@ async fn main() -> std::io::Result<()> {
|
||||
std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into());
|
||||
let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default();
|
||||
let llm_model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gemma-4-31b-it".into());
|
||||
let feishu_url =
|
||||
std::env::var("FEISHU_URL").unwrap_or_else(|_| "http://localhost:8002".into());
|
||||
|
||||
std::fs::create_dir_all(&blobs_dir).expect("mkdir blobs_dir");
|
||||
|
||||
@@ -79,6 +82,9 @@ async fn main() -> std::io::Result<()> {
|
||||
);",
|
||||
)
|
||||
.expect("init schema");
|
||||
// 兼容旧 db 增量加列;已存在忽略错误
|
||||
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_doc_id TEXT", []);
|
||||
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_url TEXT", []);
|
||||
tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready");
|
||||
|
||||
let http = reqwest::Client::builder()
|
||||
@@ -94,6 +100,7 @@ async fn main() -> std::io::Result<()> {
|
||||
llm_gateway,
|
||||
llm_token,
|
||||
llm_model,
|
||||
feishu_url,
|
||||
http,
|
||||
};
|
||||
|
||||
@@ -105,6 +112,7 @@ async fn main() -> std::io::Result<()> {
|
||||
.route("/recordings/:id", get(get_recording).delete(delete_recording))
|
||||
.route("/recordings/:id/audio", get(stream_audio))
|
||||
.route("/recordings/:id/retry", post(retry_recording))
|
||||
.route("/recordings/:id/feishu", post(convert_feishu))
|
||||
.with_state(state.clone())
|
||||
.layer(from_fn_with_state(state.clone(), auth_middleware));
|
||||
|
||||
@@ -211,6 +219,8 @@ struct RecordingDetail {
|
||||
summary: Option<String>,
|
||||
error: Option<String>,
|
||||
created_at: String,
|
||||
feishu_doc_id: Option<String>,
|
||||
feishu_url: Option<String>,
|
||||
}
|
||||
|
||||
// ---------- handlers ----------
|
||||
@@ -253,26 +263,30 @@ async fn get_recording(
|
||||
type Row = (
|
||||
String, String, String, i64, String,
|
||||
Option<String>, Option<String>, Option<String>, String,
|
||||
Option<String>, Option<String>,
|
||||
);
|
||||
let row: Option<Row> = conn
|
||||
.query_row(
|
||||
"SELECT title, filename, mime, size_bytes, status,
|
||||
transcript, summary, error, created_at
|
||||
transcript, summary, error, created_at,
|
||||
feishu_doc_id, feishu_url
|
||||
FROM recordings WHERE id = ?1",
|
||||
params![id],
|
||||
|r| {
|
||||
Ok((
|
||||
r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?,
|
||||
r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?,
|
||||
r.get(9)?, r.get(10)?,
|
||||
))
|
||||
},
|
||||
)
|
||||
.optional()?;
|
||||
let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at) =
|
||||
row.ok_or(AppError::NotFound)?;
|
||||
let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at,
|
||||
feishu_doc_id, feishu_url) = row.ok_or(AppError::NotFound)?;
|
||||
Ok(JsonResp(RecordingDetail {
|
||||
id, title, filename, mime, size_bytes, status,
|
||||
transcript, summary, error, created_at,
|
||||
feishu_doc_id, feishu_url,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -501,13 +515,16 @@ async fn call_llm_summary(s: &AppState, transcript: &str) -> Result<String, Stri
|
||||
"model": s.llm_model,
|
||||
"messages": [
|
||||
{ "role": "system", "content":
|
||||
"你是一个会议纪要助手。根据语音转写整理一份结构化纪要(markdown):\n\
|
||||
"你是一个会议纪要助手。根据语音转写整理一份结构化纪要(markdown 格式):\n\
|
||||
\n\
|
||||
1. **概要**:1-2 句话总结\n\
|
||||
2. **关键讨论点**:bullet 列出\n\
|
||||
3. **决定 / 结论**\n\
|
||||
4. **行动项 (action items)**:谁、做什么、何时\n\
|
||||
5. **待跟进 / 未决问题**\n\
|
||||
转写可能有 ASR 错字,结合上下文合理修正;遇到模糊处标 [?]。" },
|
||||
4. **行动项 (action items)**:每条用 markdown checkbox 格式 `- [ ] 谁 · 做什么 · 何时`\n\
|
||||
5. **待跟进 / 未决问题**:bullet 列出\n\
|
||||
\n\
|
||||
转写可能有 ASR 错字,结合上下文合理修正;遇到模糊处标 [?]。\n\
|
||||
不要编造没说过的内容。" },
|
||||
{ "role": "user", "content": trimmed },
|
||||
],
|
||||
"temperature": 0.3,
|
||||
@@ -574,6 +591,107 @@ async fn retry_recording(
|
||||
Ok(JsonResp(json!({ "ok": true, "status": "pending" })))
|
||||
}
|
||||
|
||||
/// `POST /api/recordings/:id/feishu` — 把转写 + 纪要 push 成飞书 docx。
|
||||
/// 已经转过的 piece 仍 update 同一个 doc(markdown-to-feishu 自带 --update)。
|
||||
async fn convert_feishu(
|
||||
State(s): State<AppState>,
|
||||
Path(id): Path<i64>,
|
||||
) -> Result<JsonResp<Value>, AppError> {
|
||||
let row: (String, String, Option<String>, Option<String>, String, Option<String>) = {
|
||||
let conn = s.db.lock().unwrap();
|
||||
conn.query_row(
|
||||
"SELECT title, filename, transcript, summary, status, feishu_doc_id
|
||||
FROM recordings WHERE id = ?1",
|
||||
params![id],
|
||||
|r| {
|
||||
Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?, r.get(5)?))
|
||||
},
|
||||
)
|
||||
.optional()?
|
||||
.ok_or(AppError::NotFound)?
|
||||
};
|
||||
let (title, filename, transcript, summary, status, existing_doc) = row;
|
||||
if status != "done" {
|
||||
return Err(AppError::bad_request(format!(
|
||||
"recording not ready (status={status})"
|
||||
)));
|
||||
}
|
||||
let summary = summary.unwrap_or_default();
|
||||
let transcript = transcript.unwrap_or_default();
|
||||
|
||||
// 拼 markdown
|
||||
let ext = std::path::Path::new(&filename)
|
||||
.extension()
|
||||
.and_then(|x| x.to_str())
|
||||
.unwrap_or("m4a")
|
||||
.to_string();
|
||||
let audio_name = format!("audio.{ext}");
|
||||
let md = format!(
|
||||
"# {title}\n\n\
|
||||
## 📋 会议纪要\n\n\
|
||||
{summary}\n\n\
|
||||
---\n\n\
|
||||
## 📎 原始材料\n\n\
|
||||
- [📄 转录原文](./transcript.txt)\n\
|
||||
- [🎙️ 原始录音](./{audio_name})\n\n\
|
||||
---\n\n\
|
||||
## 🎙️ 转录全文\n\n\
|
||||
{transcript}\n",
|
||||
);
|
||||
|
||||
// 落到 PVC 共享目录,sidecar 同样挂这个卷
|
||||
let work_dir = std::path::PathBuf::from(format!("/data/feishu-tmp/{id}"));
|
||||
tokio::fs::create_dir_all(&work_dir).await.map_err(AppError::Io)?;
|
||||
let md_path = work_dir.join("note.md");
|
||||
tokio::fs::write(&md_path, md).await.map_err(AppError::Io)?;
|
||||
tokio::fs::write(work_dir.join("transcript.txt"), &transcript)
|
||||
.await
|
||||
.map_err(AppError::Io)?;
|
||||
// 拷 audio(用 copy,sidecar 跑期间不会被改)
|
||||
let audio_src = s.blobs_dir.join(id.to_string());
|
||||
let audio_dst = work_dir.join(&audio_name);
|
||||
tokio::fs::copy(&audio_src, &audio_dst).await.map_err(AppError::Io)?;
|
||||
|
||||
// 调 sidecar
|
||||
let url = format!("{}/convert", s.feishu_url.trim_end_matches('/'));
|
||||
let mut payload = json!({
|
||||
"md_path": md_path.to_string_lossy(),
|
||||
"title": title,
|
||||
});
|
||||
if let Some(d) = existing_doc.as_deref().filter(|x| !x.is_empty()) {
|
||||
payload["existing_doc_id"] = json!(d);
|
||||
}
|
||||
let resp = s
|
||||
.http
|
||||
.post(&url)
|
||||
.json(&payload)
|
||||
.timeout(std::time::Duration::from_secs(300))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| AppError::bad_request(format!("feishu sidecar: {e}")))?;
|
||||
if !resp.status().is_success() {
|
||||
let st = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
return Err(AppError::bad_request(format!("feishu {st}: {body}")));
|
||||
}
|
||||
let body: Value = resp.json().await.map_err(|e| AppError::bad_request(format!("decode: {e}")))?;
|
||||
let doc_id = body.get("doc_id").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
let doc_url = body.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
if doc_id.is_empty() || doc_url.is_empty() {
|
||||
return Err(AppError::bad_request(format!("feishu bad response: {body}")));
|
||||
}
|
||||
|
||||
{
|
||||
let conn = s.db.lock().unwrap();
|
||||
conn.execute(
|
||||
"UPDATE recordings SET feishu_doc_id = ?1, feishu_url = ?2 WHERE id = ?3",
|
||||
params![&doc_id, &doc_url, id],
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(JsonResp(json!({ "doc_id": doc_id, "url": doc_url })))
|
||||
}
|
||||
|
||||
async fn stream_audio(
|
||||
State(s): State<AppState>,
|
||||
Path(id): Path<i64>,
|
||||
|
||||
Reference in New Issue
Block a user