notes: 加一键转飞书文档 (sidecar markdown-to-feishu)
deploy notes / build-and-deploy (push) Failing after 2m2s
deploy notes / build-and-deploy (push) Failing after 2m2s
- backend: POST /api/recordings/:id/feishu → 拼 markdown (总结在最上 + 附件链接到转录/录音 + 转写全文) → 写 /data/feishu-tmp/<id>/ → HTTP POST 到 feishu sidecar
- 复用:已有 feishu_doc_id 时 --update 同一个 doc,前端按钮文案变「↻ 重新生成」
- schema 加 feishu_doc_id + feishu_url 两列(ALTER TABLE 兼容旧 db)
- LLM prompt 改:行动项用 markdown checkbox `- [ ] 谁·做什么·何时`
- sidecar apps/notes/feishu: node:20 + python3 + python3-markdown + @larksuite/cli + COPY 自己的 markdown-to-feishu script + FastAPI /convert
- k8s: deployment 加 feishu container 共享 PVC;lark-cli-creds Secret 挂 /root/.lark-cli/config.json
- CI: 主 image --no-cache(cube 规矩),sidecar 保留 layer cache(chromium-free,但 apt/npm 也大)
- 前端: content 头部加「📤 一键转飞书文档」按钮;已转过显示飞书链接 + 按钮变重生成
This commit is contained in:
@@ -19,6 +19,7 @@ jobs:
|
|||||||
APP: notes
|
APP: notes
|
||||||
NS: cube-notes
|
NS: cube-notes
|
||||||
IMAGE: registry.famzheng.me/mochi/notes
|
IMAGE: registry.famzheng.me/mochi/notes
|
||||||
|
FEISHU_IMAGE: registry.famzheng.me/mochi/notes-feishu
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
@@ -37,19 +38,26 @@ jobs:
|
|||||||
npm ci
|
npm ci
|
||||||
npm run build
|
npm run build
|
||||||
|
|
||||||
- name: Build & push image
|
- name: Build & push images
|
||||||
env:
|
env:
|
||||||
REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
|
REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
echo "$REGISTRY_TOKEN" | docker login registry.famzheng.me -u mochi --password-stdin
|
echo "$REGISTRY_TOKEN" | docker login registry.famzheng.me -u mochi --password-stdin
|
||||||
# --no-cache 必须 —— 见 memory/feedback_cube_docker_cache.md
|
# main app —— FROM scratch + COPY musl binary,必须 --no-cache(cube docker cache 坑)
|
||||||
docker build --no-cache -f "apps/$APP/Dockerfile" -t "$IMAGE:${{ steps.tag.outputs.sha }}" .
|
docker build --no-cache -f "apps/$APP/Dockerfile" -t "$IMAGE:${{ steps.tag.outputs.sha }}" .
|
||||||
docker push "$IMAGE:${{ steps.tag.outputs.sha }}"
|
docker push "$IMAGE:${{ steps.tag.outputs.sha }}"
|
||||||
|
# feishu sidecar —— node+python+chromium-free,layer cache 帮助大不用 --no-cache
|
||||||
|
docker build -f "apps/$APP/feishu/Dockerfile" \
|
||||||
|
-t "$FEISHU_IMAGE:${{ steps.tag.outputs.sha }}" \
|
||||||
|
"apps/$APP/feishu"
|
||||||
|
docker push "$FEISHU_IMAGE:${{ steps.tag.outputs.sha }}"
|
||||||
|
|
||||||
- name: Initialize K8s resources
|
- name: Initialize K8s resources
|
||||||
run: kubectl apply -f apps/notes/k8s/all.yaml
|
run: kubectl apply -f apps/notes/k8s/all.yaml
|
||||||
|
|
||||||
- name: Roll out to k3s
|
- name: Roll out to k3s
|
||||||
run: |
|
run: |
|
||||||
kubectl -n "$NS" set image "deploy/$APP" "$APP=$IMAGE:${{ steps.tag.outputs.sha }}"
|
kubectl -n "$NS" set image "deploy/$APP" \
|
||||||
kubectl -n "$NS" rollout status "deploy/$APP" --timeout=120s
|
"$APP=$IMAGE:${{ steps.tag.outputs.sha }}" \
|
||||||
|
"feishu=$FEISHU_IMAGE:${{ steps.tag.outputs.sha }}"
|
||||||
|
kubectl -n "$NS" rollout status "deploy/$APP" --timeout=300s
|
||||||
|
|||||||
@@ -0,0 +1,23 @@
|
|||||||
|
# notes feishu sidecar:跑 markdown-to-feishu 把会议纪要 push 飞书 docx。
|
||||||
|
# 跟 notes 主容器同 pod、共享 PVC(看到主容器在 /data/feishu-tmp/<id>/ 写好的 md + 附件)。
|
||||||
|
|
||||||
|
FROM node:20-slim
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
python3 python3-pip python3-markdown ca-certificates \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN npm install -g @larksuite/cli@1.0.29
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir --break-system-packages \
|
||||||
|
fastapi==0.115.6 \
|
||||||
|
uvicorn==0.34.0
|
||||||
|
|
||||||
|
COPY markdown-to-feishu /usr/local/bin/markdown-to-feishu
|
||||||
|
RUN chmod +x /usr/local/bin/markdown-to-feishu
|
||||||
|
COPY server.py /app/server.py
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
WORKDIR /app
|
||||||
|
EXPOSE 8002
|
||||||
|
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8002"]
|
||||||
Executable
+970
@@ -0,0 +1,970 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""markdown-to-feishu — convert a Markdown file (with rich embeds) into a Feishu
|
||||||
|
docx, using the lark-cli wrapper. Tables, images (URL + local), Mermaid /
|
||||||
|
PlantUML diagrams, and arbitrary attachments (PDF / CSV / log / anything) all
|
||||||
|
get planted as real DocxXML blocks. Re-runs against the same .md by default
|
||||||
|
update the previously-created doc instead of spawning a new one.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import html as html_lib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import textwrap
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import markdown
|
||||||
|
|
||||||
|
|
||||||
|
STATE_DIR = Path(os.environ.get("MD2FEISHU_STATE_DIR", str(Path.home() / ".local/share/markdown-to-feishu")))
|
||||||
|
STATE_FILE = STATE_DIR / "state.json"
|
||||||
|
|
||||||
|
SENTINEL_PREFIX = "MD2FEISHU_SENTINEL"
|
||||||
|
|
||||||
|
VERSION = "0.1.0"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# State (markdown abs path -> doc id) so re-runs update in place
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def load_state() -> dict:
|
||||||
|
if not STATE_FILE.exists():
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
return json.loads(STATE_FILE.read_text(encoding="utf-8"))
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def save_state(state: dict) -> None:
|
||||||
|
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
STATE_FILE.write_text(json.dumps(state, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# lark-cli runner
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class LarkError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def run_lark(args: list[str], *, stdin: str | None = None, identity: str = "user", verbose: bool = False, cwd: str | None = None) -> dict:
|
||||||
|
cmd = ["lark-cli", "--as", identity] + args
|
||||||
|
if verbose:
|
||||||
|
cwd_note = f" (cwd={cwd})" if cwd else ""
|
||||||
|
sys.stderr.write(f"[lark] {' '.join(cmd)}{cwd_note}\n")
|
||||||
|
proc = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
input=stdin,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
cwd=cwd,
|
||||||
|
)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise LarkError(
|
||||||
|
f"lark-cli failed (exit {proc.returncode}): {' '.join(cmd)}\n"
|
||||||
|
f"stderr: {proc.stderr.strip()}\n"
|
||||||
|
f"stdout: {proc.stdout.strip()}"
|
||||||
|
)
|
||||||
|
if not proc.stdout.strip():
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
return json.loads(proc.stdout)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"_raw": proc.stdout}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Markdown helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def is_http_url(s: str) -> bool:
|
||||||
|
p = urlparse(s)
|
||||||
|
return p.scheme in ("http", "https")
|
||||||
|
|
||||||
|
|
||||||
|
def is_anchor(s: str) -> bool:
|
||||||
|
return s.startswith("#")
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_markdown(text: str) -> str:
|
||||||
|
"""Handle GFM extras python-markdown core misses."""
|
||||||
|
# Strip BOM
|
||||||
|
if text.startswith(""):
|
||||||
|
text = text[1:]
|
||||||
|
out_lines: list[str] = []
|
||||||
|
in_fence = False
|
||||||
|
fence_re = re.compile(r"^\s*```")
|
||||||
|
strike_re = re.compile(r"~~(\S(?:.*?\S)?)~~")
|
||||||
|
# GFM task-list items at top level: "- [x] text" / "* [ ] text" / "1. [x] text"
|
||||||
|
# Convert to a stand-alone HTML <checkbox> block so python-markdown passes
|
||||||
|
# it through. Leading whitespace becomes a marker (so nested checkboxes
|
||||||
|
# don't get hoisted to top level).
|
||||||
|
task_re = re.compile(r"^(\s*)(?:[-*+]|\d+\.)\s+\[([ xX])\]\s+(.*)$")
|
||||||
|
for line in text.split("\n"):
|
||||||
|
if fence_re.match(line):
|
||||||
|
in_fence = not in_fence
|
||||||
|
out_lines.append(line)
|
||||||
|
continue
|
||||||
|
if in_fence:
|
||||||
|
out_lines.append(line)
|
||||||
|
continue
|
||||||
|
m = task_re.match(line)
|
||||||
|
if m and not m.group(1): # top-level only; nested stays a list item
|
||||||
|
done = "true" if m.group(2).lower() == "x" else "false"
|
||||||
|
body = m.group(3).strip()
|
||||||
|
# Surround with blank lines so it parses as raw HTML block
|
||||||
|
out_lines.append("")
|
||||||
|
out_lines.append(f'<checkbox done="{done}">{html_lib.escape(body)}</checkbox>')
|
||||||
|
out_lines.append("")
|
||||||
|
continue
|
||||||
|
out_lines.append(strike_re.sub(r"<del>\1</del>", line))
|
||||||
|
return "\n".join(out_lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# HTML -> DocxXML converter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
INLINE_TAGS = {"a", "b", "strong", "em", "i", "u", "del", "s", "strike", "code", "span", "br", "img", "cite", "latex"}
|
||||||
|
BLOCK_PASSTHROUGH = {"p", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "hr", "br"}
|
||||||
|
|
||||||
|
|
||||||
|
def xml_escape_text(s: str) -> str:
|
||||||
|
return s.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
|
||||||
|
|
||||||
|
def xml_escape_attr(s: str) -> str:
|
||||||
|
return xml_escape_text(s).replace('"', """)
|
||||||
|
|
||||||
|
|
||||||
|
class DocxXMLBuilder(HTMLParser):
|
||||||
|
"""Walks python-markdown HTML and emits DocxXML.
|
||||||
|
|
||||||
|
Local images / attachments / non-inline-able media become placeholder
|
||||||
|
<p>SENTINEL</p> paragraphs; each one is recorded in ``self.embeds`` so the
|
||||||
|
caller can media-insert the real file in the correct position afterwards.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, md_dir: Path, session_tag: str):
|
||||||
|
super().__init__(convert_charrefs=True)
|
||||||
|
self.md_dir = md_dir
|
||||||
|
self.session_tag = session_tag
|
||||||
|
self.out: list[str] = []
|
||||||
|
self.embeds: list[dict] = [] # {sentinel, file, type, caption}
|
||||||
|
self._code_buf: list[str] | None = None
|
||||||
|
self._code_lang: str | None = None
|
||||||
|
self._table_buf: list[str] | None = None # we buffer the entire table so colspan/rowspan etc. just round-trip
|
||||||
|
self._table_depth = 0
|
||||||
|
self._in_pre = False
|
||||||
|
self._inline_stack: list[str] = []
|
||||||
|
self._li_stack: list[str] = [] # track ul/ol type for current li
|
||||||
|
self._blockquote_depth = 0
|
||||||
|
self._p_depth = 0 # how many <p> are currently open in our output stream
|
||||||
|
|
||||||
|
# ---- sentinel handling ----
|
||||||
|
def _next_sentinel(self) -> str:
|
||||||
|
n = len(self.embeds)
|
||||||
|
# All caps + underscores so it never collides with normal markdown prose
|
||||||
|
return f"{SENTINEL_PREFIX}_{self.session_tag}_{n:04d}"
|
||||||
|
|
||||||
|
def _resolve_local(self, src: str) -> Path | None:
|
||||||
|
# Strip query/fragment for sanity
|
||||||
|
clean = src.split("#", 1)[0].split("?", 1)[0]
|
||||||
|
if not clean or is_http_url(clean) or is_anchor(clean):
|
||||||
|
return None
|
||||||
|
p = Path(clean)
|
||||||
|
if not p.is_absolute():
|
||||||
|
p = (self.md_dir / p).resolve()
|
||||||
|
return p if p.exists() and p.is_file() else None
|
||||||
|
|
||||||
|
# ---- emit helpers ----
|
||||||
|
def _emit(self, s: str) -> None:
|
||||||
|
# If we're buffering a table, append there instead
|
||||||
|
if self._table_buf is not None:
|
||||||
|
self._table_buf.append(s)
|
||||||
|
else:
|
||||||
|
self.out.append(s)
|
||||||
|
|
||||||
|
def _emit_placeholder(self, file: Path, kind: str, caption: str | None = None) -> None:
|
||||||
|
sentinel = self._next_sentinel()
|
||||||
|
self.embeds.append({
|
||||||
|
"sentinel": sentinel,
|
||||||
|
"file": str(file),
|
||||||
|
"type": kind,
|
||||||
|
"caption": caption,
|
||||||
|
})
|
||||||
|
# The placeholder must end up as its own top-level <p> so media-insert
|
||||||
|
# can anchor on it cleanly and the cleanup pass can block_delete it.
|
||||||
|
# If we're currently inside a <p>, split: close, emit standalone, reopen.
|
||||||
|
if self._table_buf is not None:
|
||||||
|
# Inside a table cell — best we can do is emit the sentinel as
|
||||||
|
# inline text and rely on str_replace cleanup. Media still lands at
|
||||||
|
# top level (per --selection-with-ellipsis semantics).
|
||||||
|
self._emit(sentinel)
|
||||||
|
return
|
||||||
|
if self._p_depth > 0:
|
||||||
|
self.out.append("</p>")
|
||||||
|
self.out.append(f"<p>{sentinel}</p>")
|
||||||
|
self.out.append("<p>")
|
||||||
|
return
|
||||||
|
self._emit(f"<p>{sentinel}</p>")
|
||||||
|
|
||||||
|
# ---- HTMLParser hooks ----
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
attrd = dict(attrs)
|
||||||
|
|
||||||
|
# Inside <pre><code>: capture verbatim
|
||||||
|
if self._in_pre:
|
||||||
|
# Don't recurse, but still record raw markup if any nested tags appear
|
||||||
|
if tag == "code":
|
||||||
|
self._code_lang = self._extract_lang(attrd.get("class", ""))
|
||||||
|
self._code_buf = []
|
||||||
|
return
|
||||||
|
|
||||||
|
# Table buffer mode: just copy markup through, no transformations needed
|
||||||
|
if self._table_buf is not None:
|
||||||
|
self._table_buf.append(self._raw_tag(tag, attrd))
|
||||||
|
if tag == "table":
|
||||||
|
self._table_depth += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "table":
|
||||||
|
self._table_buf = []
|
||||||
|
self._table_depth = 1
|
||||||
|
self._table_buf.append(self._raw_tag(tag, attrd))
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "pre":
|
||||||
|
self._in_pre = True
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "img":
|
||||||
|
self._emit_img(attrd)
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "a":
|
||||||
|
href = attrd.get("href", "")
|
||||||
|
local = self._resolve_local(href) if href else None
|
||||||
|
if local is not None:
|
||||||
|
# Inline attachment: keep the link text in the prose so the
|
||||||
|
# paragraph still reads naturally, and queue a placeholder so
|
||||||
|
# the attachment block appears right after this paragraph.
|
||||||
|
caption = attrd.get("title") or None
|
||||||
|
self._emit_placeholder(local, "file", caption)
|
||||||
|
# Drop the <a> tags (keep their text children) by pushing
|
||||||
|
# a "transparent" marker on the inline stack.
|
||||||
|
self._inline_stack.append("__TRANSPARENT_A__")
|
||||||
|
return
|
||||||
|
# Regular link
|
||||||
|
self._inline_stack.append("a")
|
||||||
|
attrs_s = self._attrs_string({"href": href})
|
||||||
|
self._emit(f"<a{attrs_s}>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag in {"b", "strong"}:
|
||||||
|
self._inline_stack.append("b")
|
||||||
|
self._emit("<b>")
|
||||||
|
return
|
||||||
|
if tag in {"em", "i"}:
|
||||||
|
self._inline_stack.append("em")
|
||||||
|
self._emit("<em>")
|
||||||
|
return
|
||||||
|
if tag in {"u"}:
|
||||||
|
self._inline_stack.append("u")
|
||||||
|
self._emit("<u>")
|
||||||
|
return
|
||||||
|
if tag in {"del", "s", "strike"}:
|
||||||
|
self._inline_stack.append("del")
|
||||||
|
self._emit("<del>")
|
||||||
|
return
|
||||||
|
if tag == "code":
|
||||||
|
self._inline_stack.append("code")
|
||||||
|
self._emit("<code>")
|
||||||
|
return
|
||||||
|
if tag == "br":
|
||||||
|
self._emit("<br/>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "ul":
|
||||||
|
self._li_stack.append("ul")
|
||||||
|
self._emit("<ul>")
|
||||||
|
return
|
||||||
|
if tag == "ol":
|
||||||
|
self._li_stack.append("ol")
|
||||||
|
self._emit("<ol>")
|
||||||
|
return
|
||||||
|
if tag == "li":
|
||||||
|
if self._li_stack and self._li_stack[-1] == "ol":
|
||||||
|
self._emit('<li seq="auto">')
|
||||||
|
else:
|
||||||
|
self._emit("<li>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "blockquote":
|
||||||
|
self._blockquote_depth += 1
|
||||||
|
self._emit("<blockquote>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "p":
|
||||||
|
self._p_depth += 1
|
||||||
|
self._emit("<p>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "checkbox":
|
||||||
|
# Emitted by our preprocessor for GFM task list items.
|
||||||
|
done = attrd.get("done", "false")
|
||||||
|
self._emit(f'<checkbox done="{xml_escape_attr(done)}">')
|
||||||
|
self._inline_stack.append("checkbox")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag in BLOCK_PASSTHROUGH:
|
||||||
|
self._emit(f"<{tag}>")
|
||||||
|
return
|
||||||
|
|
||||||
|
# span etc.
|
||||||
|
if tag == "span":
|
||||||
|
self._inline_stack.append("span")
|
||||||
|
self._emit("<span>")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Anything else we don't recognise — drop the tag, keep its text
|
||||||
|
self._inline_stack.append("__UNKNOWN__")
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if self._in_pre:
|
||||||
|
if tag == "code":
|
||||||
|
self._flush_code()
|
||||||
|
elif tag == "pre":
|
||||||
|
self._in_pre = False
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._table_buf is not None:
|
||||||
|
self._table_buf.append(f"</{tag}>")
|
||||||
|
if tag == "table":
|
||||||
|
self._table_depth -= 1
|
||||||
|
if self._table_depth == 0:
|
||||||
|
table_xml = "".join(self._table_buf)
|
||||||
|
self._table_buf = None
|
||||||
|
# Clean the buffered HTML so it's valid DocxXML
|
||||||
|
self.out.append(self._sanitise_table(table_xml))
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "pre":
|
||||||
|
self._in_pre = False
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "img":
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "a":
|
||||||
|
top = self._inline_stack.pop() if self._inline_stack else None
|
||||||
|
if top == "__TRANSPARENT_A__":
|
||||||
|
return
|
||||||
|
self._emit("</a>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag in {"b", "strong"}:
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "b":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
self._emit("</b>")
|
||||||
|
return
|
||||||
|
if tag in {"em", "i"}:
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "em":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
self._emit("</em>")
|
||||||
|
return
|
||||||
|
if tag in {"u"}:
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "u":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
self._emit("</u>")
|
||||||
|
return
|
||||||
|
if tag in {"del", "s", "strike"}:
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "del":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
self._emit("</del>")
|
||||||
|
return
|
||||||
|
if tag == "code":
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "code":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
self._emit("</code>")
|
||||||
|
return
|
||||||
|
if tag == "span":
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "span":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
self._emit("</span>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "ul":
|
||||||
|
if self._li_stack and self._li_stack[-1] == "ul":
|
||||||
|
self._li_stack.pop()
|
||||||
|
self._emit("</ul>")
|
||||||
|
return
|
||||||
|
if tag == "ol":
|
||||||
|
if self._li_stack and self._li_stack[-1] == "ol":
|
||||||
|
self._li_stack.pop()
|
||||||
|
self._emit("</ol>")
|
||||||
|
return
|
||||||
|
if tag == "li":
|
||||||
|
self._emit("</li>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "blockquote":
|
||||||
|
self._blockquote_depth = max(0, self._blockquote_depth - 1)
|
||||||
|
self._emit("</blockquote>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "p":
|
||||||
|
self._p_depth = max(0, self._p_depth - 1)
|
||||||
|
self._emit("</p>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag == "checkbox":
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "checkbox":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
self._emit("</checkbox>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if tag in BLOCK_PASSTHROUGH:
|
||||||
|
self._emit(f"</{tag}>")
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._inline_stack and self._inline_stack[-1] == "__UNKNOWN__":
|
||||||
|
self._inline_stack.pop()
|
||||||
|
|
||||||
|
def handle_startendtag(self, tag, attrs):
|
||||||
|
attrd = dict(attrs)
|
||||||
|
if tag == "img":
|
||||||
|
self._emit_img(attrd)
|
||||||
|
return
|
||||||
|
if tag == "br":
|
||||||
|
self._emit("<br/>")
|
||||||
|
return
|
||||||
|
if tag == "hr":
|
||||||
|
self._emit("<hr/>")
|
||||||
|
return
|
||||||
|
# Treat as start+end
|
||||||
|
self.handle_starttag(tag, attrs)
|
||||||
|
self.handle_endtag(tag)
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
if self._in_pre and self._code_buf is not None:
|
||||||
|
self._code_buf.append(data)
|
||||||
|
return
|
||||||
|
if self._table_buf is not None:
|
||||||
|
self._table_buf.append(xml_escape_text(data))
|
||||||
|
return
|
||||||
|
# Preserve user text but escape XML specials
|
||||||
|
# In <pre> outside <code> we also escape (shouldn't normally happen)
|
||||||
|
self._emit(xml_escape_text(data))
|
||||||
|
|
||||||
|
# ---- code / language extraction ----
|
||||||
|
@staticmethod
|
||||||
|
def _extract_lang(class_attr: str) -> str:
|
||||||
|
# python-markdown fenced_code emits e.g. class="language-mermaid"
|
||||||
|
for tok in class_attr.split():
|
||||||
|
if tok.startswith("language-"):
|
||||||
|
return tok[len("language-"):]
|
||||||
|
if tok.startswith("lang-"):
|
||||||
|
return tok[len("lang-"):]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _flush_code(self) -> None:
|
||||||
|
body = "".join(self._code_buf or [])
|
||||||
|
lang = (self._code_lang or "").strip().lower()
|
||||||
|
self._code_buf = None
|
||||||
|
self._code_lang = None
|
||||||
|
# Mermaid / PlantUML get rendered as whiteboards
|
||||||
|
if lang in {"mermaid"}:
|
||||||
|
self._emit(f'<whiteboard type="mermaid">{xml_escape_text(body.rstrip())}</whiteboard>')
|
||||||
|
return
|
||||||
|
if lang in {"plantuml", "puml"}:
|
||||||
|
self._emit(f'<whiteboard type="plantuml">{xml_escape_text(body.rstrip())}</whiteboard>')
|
||||||
|
return
|
||||||
|
# Strip trailing newline that python-markdown adds inside <code>
|
||||||
|
body = body.rstrip("\n")
|
||||||
|
lang_attr = f' lang="{xml_escape_attr(lang)}"' if lang else ""
|
||||||
|
self._emit(f"<pre{lang_attr}><code>{xml_escape_text(body)}</code></pre>")
|
||||||
|
|
||||||
|
# ---- image emit ----
|
||||||
|
def _emit_img(self, attrd: dict) -> None:
|
||||||
|
src = attrd.get("src", "").strip()
|
||||||
|
alt = attrd.get("alt", "").strip()
|
||||||
|
title = attrd.get("title", "").strip()
|
||||||
|
caption = title or alt or None
|
||||||
|
if not src:
|
||||||
|
return
|
||||||
|
if is_http_url(src):
|
||||||
|
attrs_s = self._attrs_string({"href": src, "caption": caption, "name": alt or None})
|
||||||
|
self._emit(f"<img{attrs_s}/>")
|
||||||
|
return
|
||||||
|
local = self._resolve_local(src)
|
||||||
|
if local is None:
|
||||||
|
sys.stderr.write(f"[warn] image not found, dropping: {src}\n")
|
||||||
|
return
|
||||||
|
self._emit_placeholder(local, "image", caption)
|
||||||
|
|
||||||
|
# ---- attrs helpers ----
|
||||||
|
@staticmethod
|
||||||
|
def _attrs_string(d: dict) -> str:
|
||||||
|
parts = []
|
||||||
|
for k, v in d.items():
|
||||||
|
if v is None or v == "":
|
||||||
|
continue
|
||||||
|
parts.append(f' {k}="{xml_escape_attr(str(v))}"')
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _raw_tag(tag: str, attrd: dict) -> str:
|
||||||
|
return f"<{tag}{DocxXMLBuilder._attrs_string(attrd)}>"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sanitise_table(html: str) -> str:
|
||||||
|
"""Coerce python-markdown's HTML table into DocxXML-legal markup:
|
||||||
|
- <strong>/<em>/<i> become <b>/<em>
|
||||||
|
- Drop style="..." attributes (DocxXML uses background-color /
|
||||||
|
vertical-align, not CSS)
|
||||||
|
- Drop unknown attributes on cells
|
||||||
|
"""
|
||||||
|
# tag rename
|
||||||
|
html = re.sub(r"<(/?)strong\b", r"<\1b", html)
|
||||||
|
html = re.sub(r"<(/?)i\b", r"<\1em", html)
|
||||||
|
# drop style="..." on th/td/tr/table
|
||||||
|
html = re.sub(r'\s+style="[^"]*"', "", html)
|
||||||
|
# drop align="..." on th/td (we don't try to map to DocxXML alignment)
|
||||||
|
html = re.sub(r'\s+align="[^"]*"', "", html)
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Driver
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def derive_title(md_text: str, md_path: Path) -> str:
|
||||||
|
for line in md_text.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("# "):
|
||||||
|
return line[2:].strip()
|
||||||
|
# fallback: filename without extension
|
||||||
|
return md_path.stem
|
||||||
|
|
||||||
|
|
||||||
|
def strip_first_h1(md_text: str) -> str:
|
||||||
|
"""Drop the first H1 line if present — we'll convey it via <title> instead."""
|
||||||
|
out_lines: list[str] = []
|
||||||
|
dropped = False
|
||||||
|
for line in md_text.splitlines():
|
||||||
|
if not dropped and line.strip().startswith("# "):
|
||||||
|
dropped = True
|
||||||
|
continue
|
||||||
|
out_lines.append(line)
|
||||||
|
return "\n".join(out_lines)
|
||||||
|
|
||||||
|
|
||||||
|
def build_xml(md_path: Path, *, title: str, session_tag: str) -> tuple[str, list[dict]]:
|
||||||
|
raw = md_path.read_text(encoding="utf-8")
|
||||||
|
raw = preprocess_markdown(raw)
|
||||||
|
body_md = strip_first_h1(raw)
|
||||||
|
html = markdown.markdown(
|
||||||
|
body_md,
|
||||||
|
extensions=["fenced_code", "tables", "sane_lists"],
|
||||||
|
output_format="xhtml",
|
||||||
|
)
|
||||||
|
builder = DocxXMLBuilder(md_dir=md_path.parent, session_tag=session_tag)
|
||||||
|
builder.feed(html)
|
||||||
|
builder.close()
|
||||||
|
body_xml = "".join(builder.out)
|
||||||
|
# Unwrap stray <p>...</p> around block-level <checkbox> (python-markdown
|
||||||
|
# wraps unknown HTML tags in <p>); then collapse empty <p></p> left over
|
||||||
|
# from the placeholder split.
|
||||||
|
body_xml = re.sub(
|
||||||
|
r"<p>\s*(<checkbox\s+done=\"(?:true|false)\">[^<]*</checkbox>)\s*</p>",
|
||||||
|
r"\1",
|
||||||
|
body_xml,
|
||||||
|
)
|
||||||
|
body_xml = re.sub(r"<p>\s*</p>", "", body_xml)
|
||||||
|
title_xml = f"<title>{xml_escape_text(title)}</title>"
|
||||||
|
return title_xml + body_xml, builder.embeds
|
||||||
|
|
||||||
|
|
||||||
|
def create_or_overwrite_doc(*, doc_id: str | None, content: str, identity: str, parent_token: str | None, parent_position: str | None, verbose: bool) -> dict:
|
||||||
|
if doc_id:
|
||||||
|
if verbose:
|
||||||
|
sys.stderr.write(f"[md2feishu] overwriting existing doc {doc_id}\n")
|
||||||
|
# Use stdin for content to avoid argv length / shell escaping pitfalls
|
||||||
|
args = [
|
||||||
|
"docs", "+update",
|
||||||
|
"--api-version", "v2",
|
||||||
|
"--doc", doc_id,
|
||||||
|
"--command", "overwrite",
|
||||||
|
"--doc-format", "xml",
|
||||||
|
"--content", "-",
|
||||||
|
]
|
||||||
|
res = run_lark(args, stdin=content, identity=identity, verbose=verbose)
|
||||||
|
return {"doc_id": doc_id, "result": res}
|
||||||
|
if verbose:
|
||||||
|
sys.stderr.write("[md2feishu] creating new doc\n")
|
||||||
|
args = [
|
||||||
|
"docs", "+create",
|
||||||
|
"--api-version", "v2",
|
||||||
|
"--doc-format", "xml",
|
||||||
|
"--content", "-",
|
||||||
|
]
|
||||||
|
if parent_token:
|
||||||
|
args += ["--parent-token", parent_token]
|
||||||
|
if parent_position:
|
||||||
|
args += ["--parent-position", parent_position]
|
||||||
|
res = run_lark(args, stdin=content, identity=identity, verbose=verbose)
|
||||||
|
document = (res.get("data") or {}).get("document") or {}
|
||||||
|
new_id = document.get("document_id")
|
||||||
|
if not new_id:
|
||||||
|
raise LarkError(f"docs +create did not return a document_id: {json.dumps(res, ensure_ascii=False)}")
|
||||||
|
return {"doc_id": new_id, "url": document.get("url"), "result": res}
|
||||||
|
|
||||||
|
|
||||||
|
def insert_embed(doc_id: str, embed: dict, *, identity: str, verbose: bool) -> None:
|
||||||
|
# lark-cli refuses absolute paths for --file. cd into the file's parent
|
||||||
|
# and pass just the basename.
|
||||||
|
file_path = Path(embed["file"]).resolve()
|
||||||
|
args = [
|
||||||
|
"docs", "+media-insert",
|
||||||
|
"--doc", doc_id,
|
||||||
|
"--file", file_path.name,
|
||||||
|
"--type", embed["type"],
|
||||||
|
"--selection-with-ellipsis", embed["sentinel"],
|
||||||
|
"--before",
|
||||||
|
]
|
||||||
|
if embed.get("caption") and embed["type"] == "image":
|
||||||
|
args += ["--caption", embed["caption"]]
|
||||||
|
run_lark(args, identity=identity, verbose=verbose, cwd=str(file_path.parent))
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_sentinels(doc_id: str, session_tag: str, embeds: list[dict], *, identity: str, verbose: bool) -> None:
|
||||||
|
"""Two-pass cleanup:
|
||||||
|
1. block_delete any paragraph whose entire text is a sentinel
|
||||||
|
2. str_replace any remaining sentinel occurrences (handles sentinels
|
||||||
|
that ended up inline inside table cells or mixed prose)
|
||||||
|
"""
|
||||||
|
res = run_lark([
|
||||||
|
"docs", "+fetch",
|
||||||
|
"--api-version", "v2",
|
||||||
|
"--doc", doc_id,
|
||||||
|
"--detail", "with-ids",
|
||||||
|
"--doc-format", "xml",
|
||||||
|
], identity=identity, verbose=verbose)
|
||||||
|
xml_payload = ((res.get("data") or {}).get("document") or {}).get("content") or ""
|
||||||
|
if not xml_payload:
|
||||||
|
xml_payload = json.dumps(res, ensure_ascii=False)
|
||||||
|
sentinel_re = re.compile(
|
||||||
|
rf'<p[^>]*\bid="([^"]+)"[^>]*>\s*{SENTINEL_PREFIX}_{session_tag}_\d+\s*</p>'
|
||||||
|
)
|
||||||
|
ids = sentinel_re.findall(xml_payload)
|
||||||
|
if ids:
|
||||||
|
if verbose:
|
||||||
|
sys.stderr.write(f"[md2feishu] deleting {len(ids)} sentinel paragraph(s)\n")
|
||||||
|
try:
|
||||||
|
run_lark([
|
||||||
|
"docs", "+update",
|
||||||
|
"--api-version", "v2",
|
||||||
|
"--doc", doc_id,
|
||||||
|
"--command", "block_delete",
|
||||||
|
"--block-id", ",".join(ids),
|
||||||
|
], identity=identity, verbose=verbose)
|
||||||
|
except LarkError as e:
|
||||||
|
sys.stderr.write(f"[warn] block_delete cleanup failed: {e}\n")
|
||||||
|
# Fallback: scrub any inline sentinel text still in the doc
|
||||||
|
for embed in embeds:
|
||||||
|
sentinel = embed["sentinel"]
|
||||||
|
if sentinel in xml_payload and (not ids or f">{sentinel}<" not in xml_payload):
|
||||||
|
try:
|
||||||
|
run_lark([
|
||||||
|
"docs", "+update",
|
||||||
|
"--api-version", "v2",
|
||||||
|
"--doc", doc_id,
|
||||||
|
"--command", "str_replace",
|
||||||
|
"--pattern", sentinel,
|
||||||
|
"--content", "",
|
||||||
|
], identity=identity, verbose=verbose)
|
||||||
|
except LarkError as e:
|
||||||
|
sys.stderr.write(f"[warn] str_replace cleanup for {sentinel} failed: {e}\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
HELP_EPILOG = textwrap.dedent("""
|
||||||
|
EXAMPLES
|
||||||
|
# First run — creates a new Feishu doc, remembers the mapping
|
||||||
|
markdown-to-feishu ./report.md
|
||||||
|
|
||||||
|
# Re-run on the same file — updates the same doc in place (no new doc spawned)
|
||||||
|
markdown-to-feishu ./report.md
|
||||||
|
|
||||||
|
# Force a brand-new doc even when state already has a mapping
|
||||||
|
markdown-to-feishu --new ./report.md
|
||||||
|
|
||||||
|
# Update a specific doc explicitly, ignoring state file
|
||||||
|
markdown-to-feishu --update doxcnAbc123 ./report.md
|
||||||
|
|
||||||
|
# Drop into a particular folder when creating
|
||||||
|
markdown-to-feishu --parent-token fldcnXXXX ./report.md
|
||||||
|
|
||||||
|
# Put it under your personal knowledge library
|
||||||
|
markdown-to-feishu --parent-position my_library ./report.md
|
||||||
|
|
||||||
|
# Override the document title (default = first H1 or filename stem)
|
||||||
|
markdown-to-feishu --title "2026 Q2 OKR" ./okr.md
|
||||||
|
|
||||||
|
# Inspect the generated XML and embed plan, without touching Feishu
|
||||||
|
markdown-to-feishu --dry-run ./report.md
|
||||||
|
|
||||||
|
# Forget the mapping for a file (does NOT delete the Feishu doc)
|
||||||
|
markdown-to-feishu --forget ./report.md
|
||||||
|
|
||||||
|
# Show the recorded mapping for this file
|
||||||
|
markdown-to-feishu --show ./report.md
|
||||||
|
|
||||||
|
SUPPORTED MARKDOWN -> FEISHU BLOCK MAPPING
|
||||||
|
# / ## / ... / ###### -> <h1> ... <h9> (the first H1 becomes the
|
||||||
|
document <title>)
|
||||||
|
paragraphs -> <p>
|
||||||
|
**bold** / __bold__ -> <b>
|
||||||
|
*italic* / _italic_ -> <em>
|
||||||
|
~~strike~~ (GFM) -> <del>
|
||||||
|
`inline code` -> <code>
|
||||||
|
[text](https://...) -> <a href="...">text</a>
|
||||||
|
[text](./local.pdf) -> attachment block (file uploaded via
|
||||||
|
docs +media-insert --type file)
|
||||||
|
 -> <img href="https://..."/> (URL is fetched
|
||||||
|
server-side by Feishu)
|
||||||
|
 -> inline image block (file uploaded via
|
||||||
|
docs +media-insert --type image; alt /
|
||||||
|
title becomes caption)
|
||||||
|
> blockquote -> <blockquote>
|
||||||
|
--- / *** -> <hr/>
|
||||||
|
- item / * item / 1. item -> <ul> / <ol> with seq="auto"
|
||||||
|
nested lists (4-space indent) -> nested <ul> / <ol>
|
||||||
|
| a | b | GFM tables -> <table><thead><tr><th>...
|
||||||
|
```lang ... ``` -> <pre lang="lang"><code>...</code></pre>
|
||||||
|
```mermaid ... ``` -> <whiteboard type="mermaid">...</whiteboard>
|
||||||
|
```plantuml ... ``` -> <whiteboard type="plantuml">...</whiteboard>
|
||||||
|
|
||||||
|
ATTACHMENT DETECTION
|
||||||
|
Any [text](path) link whose href is NOT an http(s) URL and NOT an in-doc
|
||||||
|
anchor (#foo), and which resolves to an existing local file (relative to
|
||||||
|
the markdown file's directory), is uploaded as a Feishu file block. The
|
||||||
|
visible link text is dropped — the attachment block carries the filename
|
||||||
|
itself. This is what makes pasting PDFs / CSVs / logs / arbitrary binaries
|
||||||
|
feel "native".
|
||||||
|
|
||||||
|
Caveat: if a link resolves to a missing local file, it falls through to a
|
||||||
|
regular <a> link (the path will appear as-is). Run with --verbose to see
|
||||||
|
each resolution decision.
|
||||||
|
|
||||||
|
IDENTITY
|
||||||
|
Defaults to --as user so the created doc is owned by YOUR Feishu account,
|
||||||
|
not the bot. This means you can manage / move / delete it directly from
|
||||||
|
Feishu without any ownership transfer dance. Use --as bot only if you
|
||||||
|
explicitly want bot-owned documents.
|
||||||
|
|
||||||
|
UPDATE-BY-DEFAULT BEHAVIOUR
|
||||||
|
State lives at ~/.local/share/markdown-to-feishu/state.json (override with
|
||||||
|
$MD2FEISHU_STATE_DIR or --state-file). Keyed by the markdown file's
|
||||||
|
absolute path. When state has a doc_id for the given path:
|
||||||
|
|
||||||
|
- default -> overwrite that doc in place
|
||||||
|
- --new -> ignore state, create a fresh doc, replace
|
||||||
|
the mapping with the new id
|
||||||
|
- --update <id> -> overwrite the given id and update state
|
||||||
|
|
||||||
|
overwrite replays the full XML and re-uploads every local media file from
|
||||||
|
source, so the doc always matches the markdown 1:1. Comments on the doc
|
||||||
|
survive overwrite; manual edits inside the doc do NOT (markdown is the
|
||||||
|
source of truth).
|
||||||
|
|
||||||
|
EXIT CODES
|
||||||
|
0 success
|
||||||
|
1 generic error (bad args, file not found, lark-cli failure)
|
||||||
|
2 partial success — doc created/updated but at least one embed failed
|
||||||
|
|
||||||
|
ENVIRONMENT
|
||||||
|
MD2FEISHU_STATE_DIR override the directory holding state.json
|
||||||
|
LARK_CLI_PROFILE passed through; honoured by lark-cli itself
|
||||||
|
|
||||||
|
DEPENDENCIES
|
||||||
|
python3, python3-markdown, lark-cli (must be authenticated as user via
|
||||||
|
`lark-cli auth login`)
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args(argv: list[str]) -> argparse.Namespace:
|
||||||
|
p = argparse.ArgumentParser(
|
||||||
|
prog="markdown-to-feishu",
|
||||||
|
description="Convert a Markdown file (with rich embeds: tables, images, mermaid, attachments) into a Feishu docx. Re-runs update the previously-created doc by default.",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog=HELP_EPILOG,
|
||||||
|
)
|
||||||
|
p.add_argument("markdown", nargs="?", help="path to the .md file")
|
||||||
|
p.add_argument("--new", action="store_true", help="force-create a new doc even if state already has a mapping for this file")
|
||||||
|
p.add_argument("--update", metavar="DOC_ID", help="overwrite the given doc id (URL also accepted); ignores and then updates state")
|
||||||
|
p.add_argument("--title", help="override document title (default: first H1, else filename stem)")
|
||||||
|
p.add_argument("--parent-token", help="parent folder or wiki node token (only used when creating)")
|
||||||
|
p.add_argument("--parent-position", help="parent position keyword, e.g. my_library (only used when creating)")
|
||||||
|
p.add_argument("--as", dest="identity", choices=["user", "bot"], default="user", help="identity for lark-cli (default: user, so you own the doc)")
|
||||||
|
p.add_argument("--dry-run", action="store_true", help="print generated XML + embed plan without calling lark-cli")
|
||||||
|
p.add_argument("--state-file", help="override path to state.json (default: ~/.local/share/markdown-to-feishu/state.json)")
|
||||||
|
p.add_argument("--forget", action="store_true", help="remove the state mapping for this file (does not delete the Feishu doc) and exit")
|
||||||
|
p.add_argument("--show", action="store_true", help="print the recorded mapping for this file (if any) and exit")
|
||||||
|
p.add_argument("-v", "--verbose", action="store_true", help="verbose logging (every lark-cli invocation)")
|
||||||
|
p.add_argument("--version", action="version", version=f"markdown-to-feishu {VERSION}")
|
||||||
|
return p.parse_args(argv)
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str]) -> int:
|
||||||
|
args = parse_args(argv)
|
||||||
|
global STATE_FILE, STATE_DIR
|
||||||
|
if args.state_file:
|
||||||
|
STATE_FILE = Path(args.state_file).expanduser().resolve()
|
||||||
|
STATE_DIR = STATE_FILE.parent
|
||||||
|
|
||||||
|
if not args.markdown:
|
||||||
|
sys.stderr.write("error: missing markdown file (use --help)\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
md_path = Path(args.markdown).expanduser().resolve()
|
||||||
|
if not md_path.exists() or not md_path.is_file():
|
||||||
|
sys.stderr.write(f"error: {md_path} is not a file\n")
|
||||||
|
return 1
|
||||||
|
key = str(md_path)
|
||||||
|
|
||||||
|
state = load_state()
|
||||||
|
|
||||||
|
if args.show:
|
||||||
|
entry = state.get(key)
|
||||||
|
if entry is None:
|
||||||
|
print(f"no mapping recorded for {md_path}")
|
||||||
|
else:
|
||||||
|
print(json.dumps(entry, indent=2, ensure_ascii=False))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if args.forget:
|
||||||
|
if key in state:
|
||||||
|
state.pop(key)
|
||||||
|
save_state(state)
|
||||||
|
print(f"forgot mapping for {md_path}")
|
||||||
|
else:
|
||||||
|
print(f"no mapping recorded for {md_path}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
md_text = md_path.read_text(encoding="utf-8")
|
||||||
|
title = args.title or derive_title(md_text, md_path)
|
||||||
|
session_tag = uuid.uuid4().hex[:8].upper()
|
||||||
|
|
||||||
|
try:
|
||||||
|
content, embeds = build_xml(md_path, title=title, session_tag=session_tag)
|
||||||
|
except Exception as e:
|
||||||
|
sys.stderr.write(f"error: failed to build XML: {e}\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print("=== GENERATED DOCXXML ===")
|
||||||
|
print(content)
|
||||||
|
print()
|
||||||
|
print("=== EMBED PLAN ===")
|
||||||
|
if not embeds:
|
||||||
|
print("(no out-of-band embeds)")
|
||||||
|
else:
|
||||||
|
for e in embeds:
|
||||||
|
print(json.dumps(e, ensure_ascii=False))
|
||||||
|
target = "new doc"
|
||||||
|
if args.update:
|
||||||
|
target = f"update doc {args.update}"
|
||||||
|
elif not args.new and key in state:
|
||||||
|
target = f"update existing doc {state[key].get('doc_id')}"
|
||||||
|
print()
|
||||||
|
print(f"=== TARGET ===\n{target}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Decide create-vs-update
|
||||||
|
explicit_doc = args.update
|
||||||
|
if explicit_doc and explicit_doc.startswith("http"):
|
||||||
|
# extract /docx/<id>
|
||||||
|
m = re.search(r"/docx/([A-Za-z0-9]+)", explicit_doc)
|
||||||
|
if m:
|
||||||
|
explicit_doc = m.group(1)
|
||||||
|
target_doc_id = None
|
||||||
|
if explicit_doc:
|
||||||
|
target_doc_id = explicit_doc
|
||||||
|
elif not args.new and key in state:
|
||||||
|
target_doc_id = state[key].get("doc_id")
|
||||||
|
|
||||||
|
try:
|
||||||
|
outcome = create_or_overwrite_doc(
|
||||||
|
doc_id=target_doc_id,
|
||||||
|
content=content,
|
||||||
|
identity=args.identity,
|
||||||
|
parent_token=args.parent_token,
|
||||||
|
parent_position=args.parent_position,
|
||||||
|
verbose=args.verbose,
|
||||||
|
)
|
||||||
|
except LarkError as e:
|
||||||
|
sys.stderr.write(f"error: {e}\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
doc_id = outcome["doc_id"]
|
||||||
|
failed_embeds: list[dict] = []
|
||||||
|
for embed in embeds:
|
||||||
|
try:
|
||||||
|
insert_embed(doc_id, embed, identity=args.identity, verbose=args.verbose)
|
||||||
|
except LarkError as e:
|
||||||
|
sys.stderr.write(f"[warn] failed to insert {embed['file']}: {e}\n")
|
||||||
|
failed_embeds.append(embed)
|
||||||
|
|
||||||
|
# Always try to clean up sentinels we managed to anchor
|
||||||
|
if embeds:
|
||||||
|
try:
|
||||||
|
cleanup_sentinels(doc_id, session_tag, embeds, identity=args.identity, verbose=args.verbose)
|
||||||
|
except LarkError as e:
|
||||||
|
sys.stderr.write(f"[warn] cleanup failed: {e}\n")
|
||||||
|
|
||||||
|
# Save state
|
||||||
|
entry = state.get(key, {})
|
||||||
|
entry.update({
|
||||||
|
"doc_id": doc_id,
|
||||||
|
"url": outcome.get("url") or entry.get("url"),
|
||||||
|
"updated_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
||||||
|
"title": title,
|
||||||
|
})
|
||||||
|
if entry.get("url") is None and not target_doc_id:
|
||||||
|
# Fetch URL via a separate call if it wasn't returned (shouldn't happen on create)
|
||||||
|
pass
|
||||||
|
state[key] = entry
|
||||||
|
save_state(state)
|
||||||
|
|
||||||
|
print(json.dumps({
|
||||||
|
"doc_id": doc_id,
|
||||||
|
"url": entry.get("url"),
|
||||||
|
"title": title,
|
||||||
|
"embeds_inserted": len(embeds) - len(failed_embeds),
|
||||||
|
"embeds_failed": len(failed_embeds),
|
||||||
|
}, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
|
return 2 if failed_embeds else 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main(sys.argv[1:]))
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
"""notes feishu sidecar:HTTP 包一层 markdown-to-feishu。
|
||||||
|
|
||||||
|
POST /convert {md_path, title?, existing_doc_id?}
|
||||||
|
→ 跑 markdown-to-feishu,parse 最后那段 JSON,返回 {doc_id, url}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO,
|
||||||
|
format='%(asctime)s %(levelname)s %(name)s: %(message)s')
|
||||||
|
log = logging.getLogger('feishu')
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get('/healthz')
|
||||||
|
def healthz():
|
||||||
|
return {'ok': True}
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertReq(BaseModel):
|
||||||
|
md_path: str
|
||||||
|
title: Optional[str] = None
|
||||||
|
existing_doc_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@app.post('/convert')
|
||||||
|
def convert(req: ConvertReq):
|
||||||
|
md = Path(req.md_path)
|
||||||
|
if not md.exists():
|
||||||
|
raise HTTPException(400, f'md not found: {md}')
|
||||||
|
|
||||||
|
cmd = ['/usr/local/bin/markdown-to-feishu', str(md), '--as', 'user']
|
||||||
|
if req.existing_doc_id:
|
||||||
|
cmd += ['--update', req.existing_doc_id]
|
||||||
|
if req.title:
|
||||||
|
cmd += ['--title', req.title]
|
||||||
|
log.info("run: %s", ' '.join(cmd))
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
# markdown-to-feishu state file 放 PVC,重启不丢
|
||||||
|
env['MD2FEISHU_STATE_DIR'] = '/data/feishu-state'
|
||||||
|
Path('/data/feishu-state').mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
cmd, capture_output=True, text=True, timeout=600, env=env,
|
||||||
|
cwd=str(md.parent),
|
||||||
|
)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
raise HTTPException(504, 'markdown-to-feishu timeout (>10min)')
|
||||||
|
|
||||||
|
# exit code 2 = embeds 有失败,但 doc 创建成功,仍 parse stdout
|
||||||
|
if proc.returncode not in (0, 2):
|
||||||
|
log.warning("md2feishu exit=%d stderr=%s", proc.returncode, proc.stderr[-500:])
|
||||||
|
raise HTTPException(502, f'md2feishu exit {proc.returncode}: '
|
||||||
|
f'{proc.stderr.strip()[-400:]}')
|
||||||
|
|
||||||
|
# 取 stdout 里最后一段 JSON 对象(script 的 final print)
|
||||||
|
out = proc.stdout.strip()
|
||||||
|
# 从后往前找第一个 '{',取到末尾
|
||||||
|
last_open = out.rfind('{')
|
||||||
|
if last_open < 0:
|
||||||
|
raise HTTPException(502, f'md2feishu no json output. stdout tail: {out[-400:]}')
|
||||||
|
try:
|
||||||
|
data = json.loads(out[last_open:])
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise HTTPException(502, f'md2feishu json parse: {e}; tail: {out[-400:]}')
|
||||||
|
|
||||||
|
doc_id = data.get('doc_id')
|
||||||
|
url = data.get('url')
|
||||||
|
if not doc_id or not url:
|
||||||
|
raise HTTPException(502, f'md2feishu missing doc_id/url: {data}')
|
||||||
|
log.info("ok: doc_id=%s url=%s embeds=%s",
|
||||||
|
doc_id, url, data.get('embeds_inserted'))
|
||||||
|
return {
|
||||||
|
'doc_id': doc_id,
|
||||||
|
'url': url,
|
||||||
|
'embeds_inserted': data.get('embeds_inserted', 0),
|
||||||
|
'embeds_failed': data.get('embeds_failed', 0),
|
||||||
|
}
|
||||||
@@ -79,6 +79,24 @@
|
|||||||
<button v-if="selected.status === 'failed'" class="retry-btn" @click="retry">↻ 重试</button>
|
<button v-if="selected.status === 'failed'" class="retry-btn" @click="retry">↻ 重试</button>
|
||||||
<button class="danger-btn" @click="remove">删除</button>
|
<button class="danger-btn" @click="remove">删除</button>
|
||||||
</div>
|
</div>
|
||||||
|
<div v-if="selected.status === 'done'" class="feishu-row">
|
||||||
|
<a
|
||||||
|
v-if="selected.feishu_url"
|
||||||
|
:href="selected.feishu_url"
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener"
|
||||||
|
class="feishu-link"
|
||||||
|
>📄 飞书文档 · {{ selected.feishu_url.replace(/^https?:\/\//, '').slice(0, 40) }}…</a>
|
||||||
|
<button
|
||||||
|
class="feishu-btn"
|
||||||
|
:disabled="feishuPushing"
|
||||||
|
@click="pushFeishu"
|
||||||
|
>
|
||||||
|
{{ feishuPushing ? '⏳ 推送中…'
|
||||||
|
: selected.feishu_url ? '↻ 重新生成' : '📤 一键转飞书文档' }}
|
||||||
|
</button>
|
||||||
|
<p v-if="feishuErr" class="feishu-err">{{ feishuErr }}</p>
|
||||||
|
</div>
|
||||||
</header>
|
</header>
|
||||||
<audio :src="audioUrl(selected.id)" controls class="audio" />
|
<audio :src="audioUrl(selected.id)" controls class="audio" />
|
||||||
|
|
||||||
@@ -114,6 +132,7 @@ import {
|
|||||||
uploadRecording,
|
uploadRecording,
|
||||||
deleteRecording,
|
deleteRecording,
|
||||||
retryRecording,
|
retryRecording,
|
||||||
|
convertFeishu,
|
||||||
audioUrl as audioUrlFn,
|
audioUrl as audioUrlFn,
|
||||||
getPass,
|
getPass,
|
||||||
setPass,
|
setPass,
|
||||||
@@ -130,6 +149,8 @@ const selected = ref(null)
|
|||||||
const selectedId = ref(null)
|
const selectedId = ref(null)
|
||||||
const uploading = ref(false)
|
const uploading = ref(false)
|
||||||
const uploadErr = ref('')
|
const uploadErr = ref('')
|
||||||
|
const feishuPushing = ref(false)
|
||||||
|
const feishuErr = ref('')
|
||||||
let pollTimer = null
|
let pollTimer = null
|
||||||
|
|
||||||
// 浏览器内录音(iOS 没法选录音机 App 文件,直接 web record 更顺)
|
// 浏览器内录音(iOS 没法选录音机 App 文件,直接 web record 更顺)
|
||||||
@@ -318,6 +339,23 @@ async function retry() {
|
|||||||
} catch (e) { alert(e.message) }
|
} catch (e) { alert(e.message) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function pushFeishu() {
|
||||||
|
if (feishuPushing.value) return
|
||||||
|
feishuPushing.value = true
|
||||||
|
feishuErr.value = ''
|
||||||
|
try {
|
||||||
|
const r = await convertFeishu(selectedId.value)
|
||||||
|
if (selected.value) {
|
||||||
|
selected.value.feishu_doc_id = r.doc_id
|
||||||
|
selected.value.feishu_url = r.url
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
feishuErr.value = e.message || String(e)
|
||||||
|
} finally {
|
||||||
|
feishuPushing.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function audioUrl(id) { return audioUrlFn(id) }
|
function audioUrl(id) { return audioUrlFn(id) }
|
||||||
|
|
||||||
function statusLabel(s) {
|
function statusLabel(s) {
|
||||||
@@ -580,6 +618,41 @@ input, textarea { font-family: inherit; background: transparent; border: none; c
|
|||||||
padding: 3px 10px;
|
padding: 3px 10px;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.feishu-row {
|
||||||
|
margin-top: 12px;
|
||||||
|
display: flex;
|
||||||
|
gap: 10px;
|
||||||
|
align-items: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
.feishu-link {
|
||||||
|
color: var(--accent-cyan);
|
||||||
|
background: rgba(6, 182, 212, 0.1);
|
||||||
|
padding: 6px 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 12px;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
.feishu-link:hover { background: rgba(6, 182, 212, 0.2); }
|
||||||
|
.feishu-btn {
|
||||||
|
background: var(--accent-strong);
|
||||||
|
color: #fff;
|
||||||
|
padding: 6px 14px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 12px;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
.feishu-btn:hover:not(:disabled) { background: var(--accent); }
|
||||||
|
.feishu-err {
|
||||||
|
width: 100%;
|
||||||
|
margin: 0;
|
||||||
|
color: var(--accent-red);
|
||||||
|
background: rgba(239,68,68,0.08);
|
||||||
|
padding: 6px 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
.retry-btn { background: rgba(124, 92, 191, 0.15); color: var(--accent); }
|
.retry-btn { background: rgba(124, 92, 191, 0.15); color: var(--accent); }
|
||||||
.retry-btn:hover { background: rgba(124, 92, 191, 0.3); }
|
.retry-btn:hover { background: rgba(124, 92, 191, 0.3); }
|
||||||
.danger-btn { background: rgba(239, 68, 68, 0.1); color: var(--accent-red); }
|
.danger-btn { background: rgba(239, 68, 68, 0.1); color: var(--accent-red); }
|
||||||
|
|||||||
@@ -35,6 +35,9 @@ export function listRecordings() { return jreq('/api/recordings') }
|
|||||||
export function getRecording(id) { return jreq('/api/recordings/' + id) }
|
export function getRecording(id) { return jreq('/api/recordings/' + id) }
|
||||||
export function deleteRecording(id) { return jreq('/api/recordings/' + id, { method: 'DELETE' }) }
|
export function deleteRecording(id) { return jreq('/api/recordings/' + id, { method: 'DELETE' }) }
|
||||||
export function retryRecording(id) { return jreq('/api/recordings/' + id + '/retry', { method: 'POST' }) }
|
export function retryRecording(id) { return jreq('/api/recordings/' + id + '/retry', { method: 'POST' }) }
|
||||||
|
export function convertFeishu(id) {
|
||||||
|
return jreq('/api/recordings/' + id + '/feishu', { method: 'POST' })
|
||||||
|
}
|
||||||
|
|
||||||
export function uploadRecording(title, file) {
|
export function uploadRecording(title, file) {
|
||||||
const fd = new FormData()
|
const fd = new FormData()
|
||||||
|
|||||||
@@ -69,6 +69,8 @@ spec:
|
|||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: notes-creds
|
name: notes-creds
|
||||||
key: llm_token
|
key: llm_token
|
||||||
|
- name: FEISHU_URL
|
||||||
|
value: http://localhost:8002
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet: { path: /healthz, port: http }
|
httpGet: { path: /healthz, port: http }
|
||||||
initialDelaySeconds: 1
|
initialDelaySeconds: 1
|
||||||
@@ -83,10 +85,39 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: data
|
- name: data
|
||||||
mountPath: /data
|
mountPath: /data
|
||||||
|
- name: feishu
|
||||||
|
image: registry.famzheng.me/mochi/notes-feishu:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- containerPort: 8002
|
||||||
|
name: feishu
|
||||||
|
readinessProbe:
|
||||||
|
httpGet: { path: /healthz, port: feishu }
|
||||||
|
initialDelaySeconds: 3
|
||||||
|
periodSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
httpGet: { path: /healthz, port: feishu }
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 20m, memory: 64Mi }
|
||||||
|
limits: { cpu: 500m, memory: 384Mi }
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: lark-cli-config
|
||||||
|
mountPath: /root/.lark-cli
|
||||||
|
readOnly: false
|
||||||
volumes:
|
volumes:
|
||||||
- name: data
|
- name: data
|
||||||
persistentVolumeClaim:
|
persistentVolumeClaim:
|
||||||
claimName: notes-data
|
claimName: notes-data
|
||||||
|
- name: lark-cli-config
|
||||||
|
secret:
|
||||||
|
secretName: lark-cli-creds
|
||||||
|
items:
|
||||||
|
- key: config.json
|
||||||
|
path: config.json
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
|
|||||||
+125
-7
@@ -36,6 +36,7 @@ struct AppState {
|
|||||||
llm_gateway: String,
|
llm_gateway: String,
|
||||||
llm_token: String,
|
llm_token: String,
|
||||||
llm_model: String,
|
llm_model: String,
|
||||||
|
feishu_url: String,
|
||||||
http: reqwest::Client,
|
http: reqwest::Client,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,6 +60,8 @@ async fn main() -> std::io::Result<()> {
|
|||||||
std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into());
|
std::env::var("LLM_GATEWAY").unwrap_or_else(|_| "http://3.135.65.204:8848/v1".into());
|
||||||
let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default();
|
let llm_token = std::env::var("LLM_TOKEN").unwrap_or_default();
|
||||||
let llm_model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gemma-4-31b-it".into());
|
let llm_model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gemma-4-31b-it".into());
|
||||||
|
let feishu_url =
|
||||||
|
std::env::var("FEISHU_URL").unwrap_or_else(|_| "http://localhost:8002".into());
|
||||||
|
|
||||||
std::fs::create_dir_all(&blobs_dir).expect("mkdir blobs_dir");
|
std::fs::create_dir_all(&blobs_dir).expect("mkdir blobs_dir");
|
||||||
|
|
||||||
@@ -79,6 +82,9 @@ async fn main() -> std::io::Result<()> {
|
|||||||
);",
|
);",
|
||||||
)
|
)
|
||||||
.expect("init schema");
|
.expect("init schema");
|
||||||
|
// 兼容旧 db 增量加列;已存在忽略错误
|
||||||
|
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_doc_id TEXT", []);
|
||||||
|
let _ = conn.execute("ALTER TABLE recordings ADD COLUMN feishu_url TEXT", []);
|
||||||
tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready");
|
tracing::info!(%db_path, blobs = %blobs_dir.display(), "notes ready");
|
||||||
|
|
||||||
let http = reqwest::Client::builder()
|
let http = reqwest::Client::builder()
|
||||||
@@ -94,6 +100,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
llm_gateway,
|
llm_gateway,
|
||||||
llm_token,
|
llm_token,
|
||||||
llm_model,
|
llm_model,
|
||||||
|
feishu_url,
|
||||||
http,
|
http,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -105,6 +112,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
.route("/recordings/:id", get(get_recording).delete(delete_recording))
|
.route("/recordings/:id", get(get_recording).delete(delete_recording))
|
||||||
.route("/recordings/:id/audio", get(stream_audio))
|
.route("/recordings/:id/audio", get(stream_audio))
|
||||||
.route("/recordings/:id/retry", post(retry_recording))
|
.route("/recordings/:id/retry", post(retry_recording))
|
||||||
|
.route("/recordings/:id/feishu", post(convert_feishu))
|
||||||
.with_state(state.clone())
|
.with_state(state.clone())
|
||||||
.layer(from_fn_with_state(state.clone(), auth_middleware));
|
.layer(from_fn_with_state(state.clone(), auth_middleware));
|
||||||
|
|
||||||
@@ -211,6 +219,8 @@ struct RecordingDetail {
|
|||||||
summary: Option<String>,
|
summary: Option<String>,
|
||||||
error: Option<String>,
|
error: Option<String>,
|
||||||
created_at: String,
|
created_at: String,
|
||||||
|
feishu_doc_id: Option<String>,
|
||||||
|
feishu_url: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------- handlers ----------
|
// ---------- handlers ----------
|
||||||
@@ -253,26 +263,30 @@ async fn get_recording(
|
|||||||
type Row = (
|
type Row = (
|
||||||
String, String, String, i64, String,
|
String, String, String, i64, String,
|
||||||
Option<String>, Option<String>, Option<String>, String,
|
Option<String>, Option<String>, Option<String>, String,
|
||||||
|
Option<String>, Option<String>,
|
||||||
);
|
);
|
||||||
let row: Option<Row> = conn
|
let row: Option<Row> = conn
|
||||||
.query_row(
|
.query_row(
|
||||||
"SELECT title, filename, mime, size_bytes, status,
|
"SELECT title, filename, mime, size_bytes, status,
|
||||||
transcript, summary, error, created_at
|
transcript, summary, error, created_at,
|
||||||
|
feishu_doc_id, feishu_url
|
||||||
FROM recordings WHERE id = ?1",
|
FROM recordings WHERE id = ?1",
|
||||||
params![id],
|
params![id],
|
||||||
|r| {
|
|r| {
|
||||||
Ok((
|
Ok((
|
||||||
r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?,
|
r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?,
|
||||||
r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?,
|
r.get(5)?, r.get(6)?, r.get(7)?, r.get(8)?,
|
||||||
|
r.get(9)?, r.get(10)?,
|
||||||
))
|
))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.optional()?;
|
.optional()?;
|
||||||
let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at) =
|
let (title, filename, mime, size_bytes, status, transcript, summary, error, created_at,
|
||||||
row.ok_or(AppError::NotFound)?;
|
feishu_doc_id, feishu_url) = row.ok_or(AppError::NotFound)?;
|
||||||
Ok(JsonResp(RecordingDetail {
|
Ok(JsonResp(RecordingDetail {
|
||||||
id, title, filename, mime, size_bytes, status,
|
id, title, filename, mime, size_bytes, status,
|
||||||
transcript, summary, error, created_at,
|
transcript, summary, error, created_at,
|
||||||
|
feishu_doc_id, feishu_url,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -501,13 +515,16 @@ async fn call_llm_summary(s: &AppState, transcript: &str) -> Result<String, Stri
|
|||||||
"model": s.llm_model,
|
"model": s.llm_model,
|
||||||
"messages": [
|
"messages": [
|
||||||
{ "role": "system", "content":
|
{ "role": "system", "content":
|
||||||
"你是一个会议纪要助手。根据语音转写整理一份结构化纪要(markdown):\n\
|
"你是一个会议纪要助手。根据语音转写整理一份结构化纪要(markdown 格式):\n\
|
||||||
|
\n\
|
||||||
1. **概要**:1-2 句话总结\n\
|
1. **概要**:1-2 句话总结\n\
|
||||||
2. **关键讨论点**:bullet 列出\n\
|
2. **关键讨论点**:bullet 列出\n\
|
||||||
3. **决定 / 结论**\n\
|
3. **决定 / 结论**\n\
|
||||||
4. **行动项 (action items)**:谁、做什么、何时\n\
|
4. **行动项 (action items)**:每条用 markdown checkbox 格式 `- [ ] 谁 · 做什么 · 何时`\n\
|
||||||
5. **待跟进 / 未决问题**\n\
|
5. **待跟进 / 未决问题**:bullet 列出\n\
|
||||||
转写可能有 ASR 错字,结合上下文合理修正;遇到模糊处标 [?]。" },
|
\n\
|
||||||
|
转写可能有 ASR 错字,结合上下文合理修正;遇到模糊处标 [?]。\n\
|
||||||
|
不要编造没说过的内容。" },
|
||||||
{ "role": "user", "content": trimmed },
|
{ "role": "user", "content": trimmed },
|
||||||
],
|
],
|
||||||
"temperature": 0.3,
|
"temperature": 0.3,
|
||||||
@@ -574,6 +591,107 @@ async fn retry_recording(
|
|||||||
Ok(JsonResp(json!({ "ok": true, "status": "pending" })))
|
Ok(JsonResp(json!({ "ok": true, "status": "pending" })))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `POST /api/recordings/:id/feishu` — 把转写 + 纪要 push 成飞书 docx。
|
||||||
|
/// 已经转过的 piece 仍 update 同一个 doc(markdown-to-feishu 自带 --update)。
|
||||||
|
async fn convert_feishu(
|
||||||
|
State(s): State<AppState>,
|
||||||
|
Path(id): Path<i64>,
|
||||||
|
) -> Result<JsonResp<Value>, AppError> {
|
||||||
|
let row: (String, String, Option<String>, Option<String>, String, Option<String>) = {
|
||||||
|
let conn = s.db.lock().unwrap();
|
||||||
|
conn.query_row(
|
||||||
|
"SELECT title, filename, transcript, summary, status, feishu_doc_id
|
||||||
|
FROM recordings WHERE id = ?1",
|
||||||
|
params![id],
|
||||||
|
|r| {
|
||||||
|
Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?, r.get(5)?))
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.optional()?
|
||||||
|
.ok_or(AppError::NotFound)?
|
||||||
|
};
|
||||||
|
let (title, filename, transcript, summary, status, existing_doc) = row;
|
||||||
|
if status != "done" {
|
||||||
|
return Err(AppError::bad_request(format!(
|
||||||
|
"recording not ready (status={status})"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
let summary = summary.unwrap_or_default();
|
||||||
|
let transcript = transcript.unwrap_or_default();
|
||||||
|
|
||||||
|
// 拼 markdown
|
||||||
|
let ext = std::path::Path::new(&filename)
|
||||||
|
.extension()
|
||||||
|
.and_then(|x| x.to_str())
|
||||||
|
.unwrap_or("m4a")
|
||||||
|
.to_string();
|
||||||
|
let audio_name = format!("audio.{ext}");
|
||||||
|
let md = format!(
|
||||||
|
"# {title}\n\n\
|
||||||
|
## 📋 会议纪要\n\n\
|
||||||
|
{summary}\n\n\
|
||||||
|
---\n\n\
|
||||||
|
## 📎 原始材料\n\n\
|
||||||
|
- [📄 转录原文](./transcript.txt)\n\
|
||||||
|
- [🎙️ 原始录音](./{audio_name})\n\n\
|
||||||
|
---\n\n\
|
||||||
|
## 🎙️ 转录全文\n\n\
|
||||||
|
{transcript}\n",
|
||||||
|
);
|
||||||
|
|
||||||
|
// 落到 PVC 共享目录,sidecar 同样挂这个卷
|
||||||
|
let work_dir = std::path::PathBuf::from(format!("/data/feishu-tmp/{id}"));
|
||||||
|
tokio::fs::create_dir_all(&work_dir).await.map_err(AppError::Io)?;
|
||||||
|
let md_path = work_dir.join("note.md");
|
||||||
|
tokio::fs::write(&md_path, md).await.map_err(AppError::Io)?;
|
||||||
|
tokio::fs::write(work_dir.join("transcript.txt"), &transcript)
|
||||||
|
.await
|
||||||
|
.map_err(AppError::Io)?;
|
||||||
|
// 拷 audio(用 copy,sidecar 跑期间不会被改)
|
||||||
|
let audio_src = s.blobs_dir.join(id.to_string());
|
||||||
|
let audio_dst = work_dir.join(&audio_name);
|
||||||
|
tokio::fs::copy(&audio_src, &audio_dst).await.map_err(AppError::Io)?;
|
||||||
|
|
||||||
|
// 调 sidecar
|
||||||
|
let url = format!("{}/convert", s.feishu_url.trim_end_matches('/'));
|
||||||
|
let mut payload = json!({
|
||||||
|
"md_path": md_path.to_string_lossy(),
|
||||||
|
"title": title,
|
||||||
|
});
|
||||||
|
if let Some(d) = existing_doc.as_deref().filter(|x| !x.is_empty()) {
|
||||||
|
payload["existing_doc_id"] = json!(d);
|
||||||
|
}
|
||||||
|
let resp = s
|
||||||
|
.http
|
||||||
|
.post(&url)
|
||||||
|
.json(&payload)
|
||||||
|
.timeout(std::time::Duration::from_secs(300))
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| AppError::bad_request(format!("feishu sidecar: {e}")))?;
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
let st = resp.status();
|
||||||
|
let body = resp.text().await.unwrap_or_default();
|
||||||
|
return Err(AppError::bad_request(format!("feishu {st}: {body}")));
|
||||||
|
}
|
||||||
|
let body: Value = resp.json().await.map_err(|e| AppError::bad_request(format!("decode: {e}")))?;
|
||||||
|
let doc_id = body.get("doc_id").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||||
|
let doc_url = body.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||||
|
if doc_id.is_empty() || doc_url.is_empty() {
|
||||||
|
return Err(AppError::bad_request(format!("feishu bad response: {body}")));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let conn = s.db.lock().unwrap();
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE recordings SET feishu_doc_id = ?1, feishu_url = ?2 WHERE id = ?3",
|
||||||
|
params![&doc_id, &doc_url, id],
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(JsonResp(json!({ "doc_id": doc_id, "url": doc_url })))
|
||||||
|
}
|
||||||
|
|
||||||
async fn stream_audio(
|
async fn stream_audio(
|
||||||
State(s): State<AppState>,
|
State(s): State<AppState>,
|
||||||
Path(id): Path<i64>,
|
Path(id): Path<i64>,
|
||||||
|
|||||||
Reference in New Issue
Block a user