llm-proxy(app): gemma 反向代理 + token 鉴权 + /chat web UI
deploy articulate / build-and-deploy (push) Successful in 1m29s
deploy cube / build-and-deploy (push) Successful in 1m49s
deploy karaoke / build-and-deploy (push) Successful in 1m18s
deploy llm-proxy / build-and-deploy (push) Successful in 2m41s
deploy music / build-and-deploy (push) Successful in 3m6s
deploy notes / build-and-deploy (push) Successful in 2m40s
deploy simpleasm / build-and-deploy (push) Successful in 2m5s
deploy werewolf / build-and-deploy (push) Successful in 1m41s
deploy articulate / build-and-deploy (push) Successful in 1m29s
deploy cube / build-and-deploy (push) Successful in 1m49s
deploy karaoke / build-and-deploy (push) Successful in 1m18s
deploy llm-proxy / build-and-deploy (push) Successful in 2m41s
deploy music / build-and-deploy (push) Successful in 3m6s
deploy notes / build-and-deploy (push) Successful in 2m40s
deploy simpleasm / build-and-deploy (push) Successful in 2m5s
deploy werewolf / build-and-deploy (push) Successful in 1m41s
新 service,ns `llm-proxy`,域 `llm.famzheng.me`。 - POST /v1/chat/completions — OpenAI 兼容透传到 mochi 同款 backend gateway (gemma-4-31b-it);一期强制 stream=false,SSE 留二期 - 鉴权: `Authorization: token <PROXY_AUTH_TOKEN>` 或同款 Bearer; 常时间比较防 timing;空 expected 一律拒 - GET /chat — 自带极简 HTML chat UI(token 走 localStorage, 附 curl example details);/ 跳转到 /chat - Secrets `llm-proxy/proxy-credentials` 已 kubectl 手工创建: BACKEND_TOKEN (上游) + PROXY_AUTH_TOKEN (对外) - 13 个 cargo test 覆盖 auth 多个 scheme / 边界 + body 改写 (stream=false 强制注入)
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "llm-proxy"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
description = "llm.famzheng.me — gemma-4-31b-it 反向代理 + token 鉴权 + /chat web UI"
|
||||
|
||||
[dependencies]
|
||||
cube-core = { path = "../../crates/cube-core" }
|
||||
axum = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tower-http = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
@@ -0,0 +1,5 @@
|
||||
# llm-proxy — llm.famzheng.me
|
||||
FROM scratch
|
||||
COPY target/x86_64-unknown-linux-musl/release/llm-proxy /llm-proxy
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT ["/llm-proxy"]
|
||||
@@ -0,0 +1,90 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: llm-proxy
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-proxy
|
||||
namespace: llm-proxy
|
||||
labels:
|
||||
app: llm-proxy
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-proxy
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llm-proxy
|
||||
spec:
|
||||
imagePullSecrets:
|
||||
- name: registry-creds
|
||||
containers:
|
||||
- name: llm-proxy
|
||||
image: registry.famzheng.me/mochi/llm-proxy:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
envFrom:
|
||||
# secret `proxy-credentials` 由 kubectl 手工创建(BACKEND_TOKEN +
|
||||
# PROXY_AUTH_TOKEN),不在 git manifest 里。
|
||||
- secretRef:
|
||||
name: proxy-credentials
|
||||
env:
|
||||
- name: LLM_GATEWAY
|
||||
value: "http://3.135.65.204:8848/v1"
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: http
|
||||
initialDelaySeconds: 1
|
||||
periodSeconds: 5
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 15
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 16Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 128Mi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-proxy
|
||||
namespace: llm-proxy
|
||||
spec:
|
||||
selector:
|
||||
app: llm-proxy
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 8080
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: llm-proxy
|
||||
namespace: llm-proxy
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
rules:
|
||||
- host: llm.famzheng.me
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: llm-proxy
|
||||
port:
|
||||
number: 80
|
||||
@@ -0,0 +1,147 @@
|
||||
//! llm.famzheng.me — gemma-4-31b-it 反向代理 + 简单 token 鉴权。
|
||||
//!
|
||||
//! - `GET /` → `/chat` 跳转
|
||||
//! - `GET /chat` → 静态 web UI
|
||||
//! - `POST /v1/chat/completions` → OpenAI 兼容透传 (要 Authorization: token <PROXY_AUTH_TOKEN>)
|
||||
//! - `GET /healthz` → 不带 auth, 给 k8s probe
|
||||
|
||||
mod proxy;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::{header, StatusCode},
|
||||
middleware::{self, Next},
|
||||
response::{Html, IntoResponse, Redirect, Response},
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use tower_http::trace::TraceLayer;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> std::io::Result<()> {
|
||||
cube_core::init_tracing();
|
||||
let cfg = Arc::new(proxy::Config::from_env());
|
||||
let port: u16 = std::env::var("PORT")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(8080);
|
||||
|
||||
let chat_api = Router::new()
|
||||
.route("/v1/chat/completions", post(proxy::handle))
|
||||
.route_layer(middleware::from_fn_with_state(cfg.clone(), require_token))
|
||||
.with_state(cfg);
|
||||
|
||||
let app = Router::new()
|
||||
.route("/healthz", get(|| async { "ok" }))
|
||||
.route("/", get(|| async { Redirect::permanent("/chat") }))
|
||||
.route("/chat", get(chat_ui))
|
||||
.merge(chat_api)
|
||||
.layer(TraceLayer::new_for_http());
|
||||
|
||||
let addr = format!("0.0.0.0:{port}");
|
||||
let listener = tokio::net::TcpListener::bind(&addr).await?;
|
||||
tracing::info!(%addr, "llm-proxy listening");
|
||||
axum::serve(listener, app).await
|
||||
}
|
||||
|
||||
const CHAT_HTML: &str = include_str!("../web/chat.html");
|
||||
|
||||
async fn chat_ui() -> Html<&'static str> {
|
||||
Html(CHAT_HTML)
|
||||
}
|
||||
|
||||
/// 验 `Authorization: token <PROXY_AUTH_TOKEN>`,错的直接 401。
|
||||
async fn require_token(
|
||||
State(cfg): State<Arc<proxy::Config>>,
|
||||
req: axum::extract::Request,
|
||||
next: Next,
|
||||
) -> Response {
|
||||
let header_val = req
|
||||
.headers()
|
||||
.get(header::AUTHORIZATION)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(str::trim);
|
||||
|
||||
match header_val {
|
||||
Some(v) if check_token(v, &cfg.proxy_auth_token) => next.run(req).await,
|
||||
_ => (
|
||||
StatusCode::UNAUTHORIZED,
|
||||
"缺少或不匹配 `Authorization: token <your-token>`",
|
||||
)
|
||||
.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
/// 接受 `token <T>` 或 `Bearer <T>`(OpenAI client 习惯发 Bearer,宽容点)。
|
||||
pub fn check_token(header_value: &str, expected: &str) -> bool {
|
||||
if expected.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let trimmed = header_value.trim();
|
||||
if let Some(rest) = trimmed.strip_prefix("token ") {
|
||||
return constant_time_eq(rest.trim().as_bytes(), expected.as_bytes());
|
||||
}
|
||||
if let Some(rest) = trimmed.strip_prefix("Bearer ") {
|
||||
return constant_time_eq(rest.trim().as_bytes(), expected.as_bytes());
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// 常时间比较,防 timing attack(虽然这场景影响小,做了不亏)。
|
||||
fn constant_time_eq(a: &[u8], b: &[u8]) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
let mut diff: u8 = 0;
|
||||
for (x, y) in a.iter().zip(b.iter()) {
|
||||
diff |= x ^ y;
|
||||
}
|
||||
diff == 0
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn check_token_accepts_token_scheme() {
|
||||
assert!(check_token("token famzheng-llm-2026", "famzheng-llm-2026"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_token_accepts_bearer_scheme() {
|
||||
assert!(check_token("Bearer famzheng-llm-2026", "famzheng-llm-2026"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_token_rejects_wrong_value() {
|
||||
assert!(!check_token("token wrong", "famzheng-llm-2026"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_token_rejects_unknown_scheme() {
|
||||
assert!(!check_token("Basic famzheng-llm-2026", "famzheng-llm-2026"));
|
||||
assert!(!check_token("famzheng-llm-2026", "famzheng-llm-2026"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_token_rejects_empty_expected() {
|
||||
// 防 misconfigured:空 expected 不应该让任何人通过
|
||||
assert!(!check_token("token any", ""));
|
||||
assert!(!check_token("Bearer ", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_token_strips_extra_whitespace() {
|
||||
assert!(check_token(" token famzheng-llm-2026 ", "famzheng-llm-2026"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_token_rejects_prefix_match() {
|
||||
// 防止"famzheng-llm-2026-extra" 通过
|
||||
assert!(!check_token("token famzheng-llm-2026-extra", "famzheng-llm-2026"));
|
||||
assert!(!check_token("token famzheng-llm", "famzheng-llm-2026"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
//! `/v1/chat/completions` 透传 — 替换 Authorization 头,把请求 body 原样 forward 到
|
||||
//! 上游 LLM gateway,把响应 body 原样回吐给客户端。
|
||||
//!
|
||||
//! 一期只支持非 streaming(force `stream: false` 进 body),SSE 透传留给二期。
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{
|
||||
body::Bytes,
|
||||
extract::State,
|
||||
http::{HeaderMap, HeaderValue, StatusCode},
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use serde_json::Value;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Config {
|
||||
pub upstream_url: String, // http://3.135.65.204:8848/v1/chat/completions
|
||||
pub upstream_token: String,
|
||||
pub proxy_auth_token: String,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn from_env() -> Self {
|
||||
let gateway = std::env::var("LLM_GATEWAY")
|
||||
.unwrap_or_else(|_| "http://3.135.65.204:8848/v1".to_string());
|
||||
let upstream_url = format!("{}/chat/completions", gateway.trim_end_matches('/'));
|
||||
Self {
|
||||
upstream_url,
|
||||
upstream_token: std::env::var("BACKEND_TOKEN").unwrap_or_default(),
|
||||
proxy_auth_token: std::env::var("PROXY_AUTH_TOKEN").unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handle(State(cfg): State<Arc<Config>>, body: Bytes) -> Response {
|
||||
// 1. parse body → 强制 stream=false(一期不支持流式)
|
||||
let body_bytes = match force_non_stream(&body) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
return (StatusCode::BAD_REQUEST, format!("bad JSON body: {e}")).into_response();
|
||||
}
|
||||
};
|
||||
|
||||
// 2. forward
|
||||
let client = reqwest::Client::new();
|
||||
let res = client
|
||||
.post(&cfg.upstream_url)
|
||||
.header("Authorization", format!("Bearer {}", cfg.upstream_token))
|
||||
.header("Content-Type", "application/json")
|
||||
.body(body_bytes)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(r) => relay_response(r).await,
|
||||
Err(e) => {
|
||||
tracing::error!(error=%e, "upstream call failed");
|
||||
(StatusCode::BAD_GATEWAY, format!("upstream error: {e}")).into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// parse JSON、塞入 `stream: false`、重新 serialize。
|
||||
/// 如果不是 JSON object 就保持原样(让上游自己报错)。
|
||||
fn force_non_stream(body: &Bytes) -> Result<Vec<u8>, String> {
|
||||
if body.is_empty() {
|
||||
return Err("empty body".into());
|
||||
}
|
||||
let mut v: Value = serde_json::from_slice(body).map_err(|e| e.to_string())?;
|
||||
if let Some(obj) = v.as_object_mut() {
|
||||
obj.insert("stream".to_string(), Value::Bool(false));
|
||||
}
|
||||
serde_json::to_vec(&v).map_err(|e| e.to_string())
|
||||
}
|
||||
|
||||
async fn relay_response(upstream: reqwest::Response) -> Response {
|
||||
let status = upstream.status();
|
||||
let ct = upstream
|
||||
.headers()
|
||||
.get(reqwest::header::CONTENT_TYPE)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| HeaderValue::from_static("application/json"));
|
||||
let bytes = match upstream.bytes().await {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::error!(error=%e, "read upstream body");
|
||||
return (StatusCode::BAD_GATEWAY, "read upstream body failed").into_response();
|
||||
}
|
||||
};
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(axum::http::header::CONTENT_TYPE, ct);
|
||||
(
|
||||
StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::BAD_GATEWAY),
|
||||
headers,
|
||||
bytes,
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn force_non_stream_overrides_stream_true() {
|
||||
let input = Bytes::from(r#"{"model":"gemma","messages":[],"stream":true}"#);
|
||||
let out = force_non_stream(&input).unwrap();
|
||||
let v: Value = serde_json::from_slice(&out).unwrap();
|
||||
assert_eq!(v["stream"], Value::Bool(false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn force_non_stream_injects_when_absent() {
|
||||
let input = Bytes::from(r#"{"model":"gemma","messages":[]}"#);
|
||||
let out = force_non_stream(&input).unwrap();
|
||||
let v: Value = serde_json::from_slice(&out).unwrap();
|
||||
assert_eq!(v["stream"], Value::Bool(false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn force_non_stream_preserves_other_fields() {
|
||||
let input = Bytes::from(
|
||||
r#"{"model":"gemma-4-31b-it","temperature":0.7,"messages":[{"role":"user","content":"hi"}]}"#,
|
||||
);
|
||||
let out = force_non_stream(&input).unwrap();
|
||||
let v: Value = serde_json::from_slice(&out).unwrap();
|
||||
assert_eq!(v["model"], "gemma-4-31b-it");
|
||||
assert_eq!(v["temperature"], 0.7);
|
||||
assert_eq!(v["messages"][0]["role"], "user");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn force_non_stream_rejects_empty() {
|
||||
assert!(force_non_stream(&Bytes::new()).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn force_non_stream_rejects_invalid_json() {
|
||||
let input = Bytes::from(r#"not json"#);
|
||||
assert!(force_non_stream(&input).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_from_env_builds_completions_url() {
|
||||
// Saved env keeps test isolation under cargo test (run in parallel)
|
||||
let prev_gateway = std::env::var("LLM_GATEWAY").ok();
|
||||
let prev_token = std::env::var("BACKEND_TOKEN").ok();
|
||||
let prev_proxy = std::env::var("PROXY_AUTH_TOKEN").ok();
|
||||
std::env::set_var("LLM_GATEWAY", "http://1.2.3.4:8848/v1/");
|
||||
std::env::set_var("BACKEND_TOKEN", "backend-xxx");
|
||||
std::env::set_var("PROXY_AUTH_TOKEN", "client-yyy");
|
||||
|
||||
let cfg = Config::from_env();
|
||||
assert_eq!(cfg.upstream_url, "http://1.2.3.4:8848/v1/chat/completions");
|
||||
assert_eq!(cfg.upstream_token, "backend-xxx");
|
||||
assert_eq!(cfg.proxy_auth_token, "client-yyy");
|
||||
|
||||
// restore
|
||||
match prev_gateway {
|
||||
Some(v) => std::env::set_var("LLM_GATEWAY", v),
|
||||
None => std::env::remove_var("LLM_GATEWAY"),
|
||||
}
|
||||
match prev_token {
|
||||
Some(v) => std::env::set_var("BACKEND_TOKEN", v),
|
||||
None => std::env::remove_var("BACKEND_TOKEN"),
|
||||
}
|
||||
match prev_proxy {
|
||||
Some(v) => std::env::set_var("PROXY_AUTH_TOKEN", v),
|
||||
None => std::env::remove_var("PROXY_AUTH_TOKEN"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,174 @@
|
||||
<!doctype html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<meta name="theme-color" content="#0f1419" />
|
||||
<title>llm.famzheng.me</title>
|
||||
<style>
|
||||
:root {
|
||||
color-scheme: dark;
|
||||
--bg: #0f1419; --soft: rgba(255,255,255,.06); --border: rgba(255,255,255,.15);
|
||||
--fg: rgba(255,255,255,.92); --dim: rgba(255,255,255,.55);
|
||||
--accent: #7c3aed; --accent2: #06b6d4;
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
html, body { margin: 0; padding: 0; min-height: 100vh; background: var(--bg); color: var(--fg);
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'PingFang SC', system-ui, sans-serif; }
|
||||
main { max-width: 760px; margin: 0 auto; padding: 16px; display: flex; flex-direction: column; min-height: 100vh; }
|
||||
header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 12px; }
|
||||
h1 { font-size: 1.25rem; margin: 0; background: linear-gradient(135deg, #fff, var(--accent2));
|
||||
-webkit-background-clip: text; background-clip: text; color: transparent; }
|
||||
header small { color: var(--dim); font-size: 0.8rem; }
|
||||
.config { display: flex; gap: 8px; margin-bottom: 12px; }
|
||||
.config input, .config select {
|
||||
flex: 1; padding: 8px 10px; background: var(--soft); border: 1px solid var(--border);
|
||||
border-radius: 6px; color: var(--fg); font: inherit;
|
||||
}
|
||||
.thread { flex: 1; overflow-y: auto; padding: 8px 0; display: flex; flex-direction: column; gap: 10px;
|
||||
border-top: 1px solid var(--border); border-bottom: 1px solid var(--border); margin-bottom: 10px; }
|
||||
.bubble { max-width: 85%; padding: 10px 13px; border-radius: 12px; white-space: pre-wrap;
|
||||
word-wrap: break-word; line-height: 1.4; font-size: 0.92rem; }
|
||||
.bubble.user { align-self: flex-end; background: linear-gradient(135deg, var(--accent), #4f46e5); color: white; }
|
||||
.bubble.assistant { align-self: flex-start; background: var(--soft); border: 1px solid var(--border); }
|
||||
.bubble.err { align-self: stretch; background: rgba(239,68,68,.15); border: 1px solid rgba(239,68,68,.4); color: #ff8080; }
|
||||
.typing { display: inline-flex; gap: 4px; padding: 12px; }
|
||||
.typing span { width: 6px; height: 6px; border-radius: 50%; background: var(--dim); animation: b 1.2s infinite; }
|
||||
.typing span:nth-child(2) { animation-delay: 0.15s; }
|
||||
.typing span:nth-child(3) { animation-delay: 0.3s; }
|
||||
@keyframes b { 0%,60%,100% { transform: translateY(0); opacity: 0.45; } 30% { transform: translateY(-4px); opacity: 1; } }
|
||||
footer { display: flex; gap: 8px; align-items: flex-end; }
|
||||
textarea { flex: 1; resize: none; padding: 8px 10px; background: var(--soft); border: 1px solid var(--border);
|
||||
border-radius: 8px; color: var(--fg); font: inherit; line-height: 1.4; }
|
||||
textarea:focus { outline: 2px solid var(--accent); outline-offset: -1px; }
|
||||
button.send { background: linear-gradient(135deg, var(--accent), var(--accent2));
|
||||
color: white; border: none; padding: 10px 16px; border-radius: 8px; font-weight: 600; }
|
||||
button.send:disabled { background: var(--soft); color: var(--dim); cursor: not-allowed; }
|
||||
button.ghost { background: transparent; border: 1px solid var(--border); color: var(--fg);
|
||||
padding: 6px 10px; border-radius: 6px; font-size: 0.85rem; }
|
||||
details { margin-top: 12px; color: var(--dim); font-size: 0.85rem; }
|
||||
details code { background: var(--soft); padding: 1px 5px; border-radius: 4px; font-size: 0.9em; color: var(--fg); }
|
||||
details pre { background: rgba(0,0,0,.4); padding: 10px; border-radius: 8px; overflow-x: auto;
|
||||
border: 1px solid var(--border); color: var(--fg); }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<header>
|
||||
<h1>llm.famzheng.me</h1>
|
||||
<small id="meta">gemma-4-31b-it · 反向代理</small>
|
||||
</header>
|
||||
|
||||
<div class="config">
|
||||
<input id="token" type="password" placeholder="Authorization token (e.g. famzheng-llm-2026)" />
|
||||
<button class="ghost" id="reset">清空对话</button>
|
||||
</div>
|
||||
|
||||
<div class="thread" id="thread"></div>
|
||||
|
||||
<footer>
|
||||
<textarea id="input" rows="2" placeholder="说点什么...(Enter 发送,Shift+Enter 换行)"></textarea>
|
||||
<button class="send" id="send">发送</button>
|
||||
</footer>
|
||||
|
||||
<details>
|
||||
<summary>curl example</summary>
|
||||
<pre>curl -X POST https://llm.famzheng.me/v1/chat/completions \
|
||||
-H 'Authorization: token <your-token>' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"model": "gemma-4-31b-it",
|
||||
"messages": [{"role":"user","content":"hello"}]
|
||||
}'</pre>
|
||||
</details>
|
||||
</main>
|
||||
|
||||
<script>
|
||||
const TOKEN_KEY = 'llm-proxy-token'
|
||||
const tokenInput = document.getElementById('token')
|
||||
const sendBtn = document.getElementById('send')
|
||||
const resetBtn = document.getElementById('reset')
|
||||
const input = document.getElementById('input')
|
||||
const thread = document.getElementById('thread')
|
||||
|
||||
tokenInput.value = localStorage.getItem(TOKEN_KEY) || ''
|
||||
tokenInput.addEventListener('change', () => {
|
||||
localStorage.setItem(TOKEN_KEY, tokenInput.value.trim())
|
||||
})
|
||||
|
||||
const history = []
|
||||
|
||||
function bubble(role, text, cls) {
|
||||
const div = document.createElement('div')
|
||||
div.className = 'bubble ' + (cls || role)
|
||||
div.textContent = text
|
||||
thread.appendChild(div)
|
||||
thread.scrollTop = thread.scrollHeight
|
||||
return div
|
||||
}
|
||||
|
||||
function typing() {
|
||||
const div = document.createElement('div')
|
||||
div.className = 'bubble assistant typing'
|
||||
div.innerHTML = '<span></span><span></span><span></span>'
|
||||
thread.appendChild(div)
|
||||
thread.scrollTop = thread.scrollHeight
|
||||
return div
|
||||
}
|
||||
|
||||
async function send() {
|
||||
const text = input.value.trim()
|
||||
const token = tokenInput.value.trim()
|
||||
if (!text) return
|
||||
if (!token) { bubble('err', '先在上方填 token。', 'err'); return }
|
||||
input.value = ''
|
||||
history.push({ role: 'user', content: text })
|
||||
bubble('user', text)
|
||||
sendBtn.disabled = true
|
||||
const dot = typing()
|
||||
try {
|
||||
const res = await fetch('/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'token ' + token,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'gemma-4-31b-it',
|
||||
messages: history,
|
||||
}),
|
||||
})
|
||||
const body = await res.text()
|
||||
dot.remove()
|
||||
if (!res.ok) {
|
||||
bubble('err', `${res.status}: ${body}`, 'err')
|
||||
history.pop()
|
||||
return
|
||||
}
|
||||
let data
|
||||
try { data = JSON.parse(body) } catch (e) {
|
||||
bubble('err', '上游返回非 JSON: ' + body.slice(0, 300), 'err'); history.pop(); return
|
||||
}
|
||||
const reply = data?.choices?.[0]?.message?.content?.trim() || '(空回复)'
|
||||
history.push({ role: 'assistant', content: reply })
|
||||
bubble('assistant', reply)
|
||||
} catch (e) {
|
||||
dot.remove()
|
||||
bubble('err', '网络错误: ' + e.message, 'err')
|
||||
history.pop()
|
||||
} finally {
|
||||
sendBtn.disabled = false
|
||||
}
|
||||
}
|
||||
|
||||
sendBtn.addEventListener('click', send)
|
||||
input.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send() }
|
||||
})
|
||||
resetBtn.addEventListener('click', () => {
|
||||
history.length = 0
|
||||
thread.innerHTML = ''
|
||||
})
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user