llm-proxy(app): gemma 反向代理 + token 鉴权 + /chat web UI
deploy articulate / build-and-deploy (push) Successful in 1m29s
deploy cube / build-and-deploy (push) Successful in 1m49s
deploy karaoke / build-and-deploy (push) Successful in 1m18s
deploy llm-proxy / build-and-deploy (push) Successful in 2m41s
deploy music / build-and-deploy (push) Successful in 3m6s
deploy notes / build-and-deploy (push) Successful in 2m40s
deploy simpleasm / build-and-deploy (push) Successful in 2m5s
deploy werewolf / build-and-deploy (push) Successful in 1m41s

新 service,ns `llm-proxy`,域 `llm.famzheng.me`。
- POST /v1/chat/completions — OpenAI 兼容透传到 mochi 同款 backend
  gateway (gemma-4-31b-it);一期强制 stream=false,SSE 留二期
- 鉴权: `Authorization: token <PROXY_AUTH_TOKEN>` 或同款 Bearer;
  常时间比较防 timing;空 expected 一律拒
- GET /chat — 自带极简 HTML chat UI(token 走 localStorage,
  附 curl example details);/ 跳转到 /chat
- Secrets `llm-proxy/proxy-credentials` 已 kubectl 手工创建:
  BACKEND_TOKEN (上游) + PROXY_AUTH_TOKEN (对外)
- 13 个 cargo test 覆盖 auth 多个 scheme / 边界 + body
  改写 (stream=false 强制注入)
This commit is contained in:
Fam Zheng
2026-05-18 00:21:47 +01:00
parent 34fa47f95f
commit 857c0d5481
9 changed files with 673 additions and 0 deletions
+174
View File
@@ -0,0 +1,174 @@
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#0f1419" />
<title>llm.famzheng.me</title>
<style>
:root {
color-scheme: dark;
--bg: #0f1419; --soft: rgba(255,255,255,.06); --border: rgba(255,255,255,.15);
--fg: rgba(255,255,255,.92); --dim: rgba(255,255,255,.55);
--accent: #7c3aed; --accent2: #06b6d4;
}
* { box-sizing: border-box; }
html, body { margin: 0; padding: 0; min-height: 100vh; background: var(--bg); color: var(--fg);
font-family: -apple-system, BlinkMacSystemFont, 'PingFang SC', system-ui, sans-serif; }
main { max-width: 760px; margin: 0 auto; padding: 16px; display: flex; flex-direction: column; min-height: 100vh; }
header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 12px; }
h1 { font-size: 1.25rem; margin: 0; background: linear-gradient(135deg, #fff, var(--accent2));
-webkit-background-clip: text; background-clip: text; color: transparent; }
header small { color: var(--dim); font-size: 0.8rem; }
.config { display: flex; gap: 8px; margin-bottom: 12px; }
.config input, .config select {
flex: 1; padding: 8px 10px; background: var(--soft); border: 1px solid var(--border);
border-radius: 6px; color: var(--fg); font: inherit;
}
.thread { flex: 1; overflow-y: auto; padding: 8px 0; display: flex; flex-direction: column; gap: 10px;
border-top: 1px solid var(--border); border-bottom: 1px solid var(--border); margin-bottom: 10px; }
.bubble { max-width: 85%; padding: 10px 13px; border-radius: 12px; white-space: pre-wrap;
word-wrap: break-word; line-height: 1.4; font-size: 0.92rem; }
.bubble.user { align-self: flex-end; background: linear-gradient(135deg, var(--accent), #4f46e5); color: white; }
.bubble.assistant { align-self: flex-start; background: var(--soft); border: 1px solid var(--border); }
.bubble.err { align-self: stretch; background: rgba(239,68,68,.15); border: 1px solid rgba(239,68,68,.4); color: #ff8080; }
.typing { display: inline-flex; gap: 4px; padding: 12px; }
.typing span { width: 6px; height: 6px; border-radius: 50%; background: var(--dim); animation: b 1.2s infinite; }
.typing span:nth-child(2) { animation-delay: 0.15s; }
.typing span:nth-child(3) { animation-delay: 0.3s; }
@keyframes b { 0%,60%,100% { transform: translateY(0); opacity: 0.45; } 30% { transform: translateY(-4px); opacity: 1; } }
footer { display: flex; gap: 8px; align-items: flex-end; }
textarea { flex: 1; resize: none; padding: 8px 10px; background: var(--soft); border: 1px solid var(--border);
border-radius: 8px; color: var(--fg); font: inherit; line-height: 1.4; }
textarea:focus { outline: 2px solid var(--accent); outline-offset: -1px; }
button.send { background: linear-gradient(135deg, var(--accent), var(--accent2));
color: white; border: none; padding: 10px 16px; border-radius: 8px; font-weight: 600; }
button.send:disabled { background: var(--soft); color: var(--dim); cursor: not-allowed; }
button.ghost { background: transparent; border: 1px solid var(--border); color: var(--fg);
padding: 6px 10px; border-radius: 6px; font-size: 0.85rem; }
details { margin-top: 12px; color: var(--dim); font-size: 0.85rem; }
details code { background: var(--soft); padding: 1px 5px; border-radius: 4px; font-size: 0.9em; color: var(--fg); }
details pre { background: rgba(0,0,0,.4); padding: 10px; border-radius: 8px; overflow-x: auto;
border: 1px solid var(--border); color: var(--fg); }
</style>
</head>
<body>
<main>
<header>
<h1>llm.famzheng.me</h1>
<small id="meta">gemma-4-31b-it · 反向代理</small>
</header>
<div class="config">
<input id="token" type="password" placeholder="Authorization token (e.g. famzheng-llm-2026)" />
<button class="ghost" id="reset">清空对话</button>
</div>
<div class="thread" id="thread"></div>
<footer>
<textarea id="input" rows="2" placeholder="说点什么...Enter 发送,Shift+Enter 换行)"></textarea>
<button class="send" id="send">发送</button>
</footer>
<details>
<summary>curl example</summary>
<pre>curl -X POST https://llm.famzheng.me/v1/chat/completions \
-H 'Authorization: token &lt;your-token&gt;' \
-H 'Content-Type: application/json' \
-d '{
"model": "gemma-4-31b-it",
"messages": [{"role":"user","content":"hello"}]
}'</pre>
</details>
</main>
<script>
const TOKEN_KEY = 'llm-proxy-token'
const tokenInput = document.getElementById('token')
const sendBtn = document.getElementById('send')
const resetBtn = document.getElementById('reset')
const input = document.getElementById('input')
const thread = document.getElementById('thread')
tokenInput.value = localStorage.getItem(TOKEN_KEY) || ''
tokenInput.addEventListener('change', () => {
localStorage.setItem(TOKEN_KEY, tokenInput.value.trim())
})
const history = []
function bubble(role, text, cls) {
const div = document.createElement('div')
div.className = 'bubble ' + (cls || role)
div.textContent = text
thread.appendChild(div)
thread.scrollTop = thread.scrollHeight
return div
}
function typing() {
const div = document.createElement('div')
div.className = 'bubble assistant typing'
div.innerHTML = '<span></span><span></span><span></span>'
thread.appendChild(div)
thread.scrollTop = thread.scrollHeight
return div
}
async function send() {
const text = input.value.trim()
const token = tokenInput.value.trim()
if (!text) return
if (!token) { bubble('err', '先在上方填 token。', 'err'); return }
input.value = ''
history.push({ role: 'user', content: text })
bubble('user', text)
sendBtn.disabled = true
const dot = typing()
try {
const res = await fetch('/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'token ' + token,
},
body: JSON.stringify({
model: 'gemma-4-31b-it',
messages: history,
}),
})
const body = await res.text()
dot.remove()
if (!res.ok) {
bubble('err', `${res.status}: ${body}`, 'err')
history.pop()
return
}
let data
try { data = JSON.parse(body) } catch (e) {
bubble('err', '上游返回非 JSON: ' + body.slice(0, 300), 'err'); history.pop(); return
}
const reply = data?.choices?.[0]?.message?.content?.trim() || '(空回复)'
history.push({ role: 'assistant', content: reply })
bubble('assistant', reply)
} catch (e) {
dot.remove()
bubble('err', '网络错误: ' + e.message, 'err')
history.pop()
} finally {
sendBtn.disabled = false
}
}
sendBtn.addEventListener('click', send)
input.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send() }
})
resetBtn.addEventListener('click', () => {
history.length = 0
thread.innerHTML = ''
})
</script>
</body>
</html>