From 857c0d548186609353bee2904d79e11626a792ff Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 18 May 2026 00:21:47 +0100 Subject: [PATCH] =?UTF-8?q?llm-proxy(app):=20gemma=20=E5=8F=8D=E5=90=91?= =?UTF-8?q?=E4=BB=A3=E7=90=86=20+=20token=20=E9=89=B4=E6=9D=83=20+=20/chat?= =?UTF-8?q?=20web=20UI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新 service,ns `llm-proxy`,域 `llm.famzheng.me`。 - POST /v1/chat/completions — OpenAI 兼容透传到 mochi 同款 backend gateway (gemma-4-31b-it);一期强制 stream=false,SSE 留二期 - 鉴权: `Authorization: token ` 或同款 Bearer; 常时间比较防 timing;空 expected 一律拒 - GET /chat — 自带极简 HTML chat UI(token 走 localStorage, 附 curl example details);/ 跳转到 /chat - Secrets `llm-proxy/proxy-credentials` 已 kubectl 手工创建: BACKEND_TOKEN (上游) + PROXY_AUTH_TOKEN (对外) - 13 个 cargo test 覆盖 auth 多个 scheme / 边界 + body 改写 (stream=false 强制注入) --- .gitea/workflows/deploy-llm-proxy.yml | 52 ++++++++ Cargo.lock | 14 +++ Cargo.toml | 1 + apps/llm-proxy/Cargo.toml | 17 +++ apps/llm-proxy/Dockerfile | 5 + apps/llm-proxy/k8s/all.yaml | 90 +++++++++++++ apps/llm-proxy/src/main.rs | 147 ++++++++++++++++++++++ apps/llm-proxy/src/proxy.rs | 173 +++++++++++++++++++++++++ apps/llm-proxy/web/chat.html | 174 ++++++++++++++++++++++++++ 9 files changed, 673 insertions(+) create mode 100644 .gitea/workflows/deploy-llm-proxy.yml create mode 100644 apps/llm-proxy/Cargo.toml create mode 100644 apps/llm-proxy/Dockerfile create mode 100644 apps/llm-proxy/k8s/all.yaml create mode 100644 apps/llm-proxy/src/main.rs create mode 100644 apps/llm-proxy/src/proxy.rs create mode 100644 apps/llm-proxy/web/chat.html diff --git a/.gitea/workflows/deploy-llm-proxy.yml b/.gitea/workflows/deploy-llm-proxy.yml new file mode 100644 index 0000000..91581d2 --- /dev/null +++ b/.gitea/workflows/deploy-llm-proxy.yml @@ -0,0 +1,52 @@ +name: deploy llm-proxy +# llm.famzheng.me — gemma 反向代理。host shell runner(fam 用户)。 + +on: + push: + branches: [master] + paths: + - 'apps/llm-proxy/**' + - 'crates/cube-core/**' + - 'Cargo.toml' + - 'Cargo.lock' + - '.gitea/workflows/deploy-llm-proxy.yml' + workflow_dispatch: + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + env: + APP: llm-proxy + IMAGE: registry.famzheng.me/mochi/llm-proxy + steps: + - uses: actions/checkout@v4 + + - name: Resolve image tag + id: tag + run: echo "sha=$(git rev-parse --short=12 HEAD)" >> "$GITHUB_OUTPUT" + + - name: Build rust (musl static) + run: | + export PATH="$HOME/.cargo/bin:$PATH" + cargo build --release --target x86_64-unknown-linux-musl -p "$APP" + + - name: Run tests + run: | + export PATH="$HOME/.cargo/bin:$PATH" + cargo test --release -p "$APP" + + - name: Build & push image + env: + REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} + run: | + echo "$REGISTRY_TOKEN" | docker login registry.famzheng.me -u mochi --password-stdin + docker build --no-cache -f "apps/$APP/Dockerfile" -t "$IMAGE:${{ steps.tag.outputs.sha }}" . + docker push "$IMAGE:${{ steps.tag.outputs.sha }}" + + - name: Initialize K8s resources + run: kubectl apply -f "apps/$APP/k8s/all.yaml" + + - name: Roll out to k3s + run: | + kubectl -n llm-proxy set image "deploy/$APP" "$APP=$IMAGE:${{ steps.tag.outputs.sha }}" + kubectl -n llm-proxy rollout status "deploy/$APP" --timeout=120s diff --git a/Cargo.lock b/Cargo.lock index bfd34fa..98dd232 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -649,6 +649,20 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" +[[package]] +name = "llm-proxy" +version = "0.1.0" +dependencies = [ + "axum", + "cube-core", + "reqwest", + "serde", + "serde_json", + "tokio", + "tower-http", + "tracing", +] + [[package]] name = "lock_api" version = "0.4.14" diff --git a/Cargo.toml b/Cargo.toml index 4c8f4b7..06ff09d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "apps/articulate", "apps/karaoke", "apps/notes", + "apps/llm-proxy", ] [workspace.package] diff --git a/apps/llm-proxy/Cargo.toml b/apps/llm-proxy/Cargo.toml new file mode 100644 index 0000000..4c70100 --- /dev/null +++ b/apps/llm-proxy/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "llm-proxy" +version = "0.1.0" +edition.workspace = true +license.workspace = true +authors.workspace = true +description = "llm.famzheng.me — gemma-4-31b-it 反向代理 + token 鉴权 + /chat web UI" + +[dependencies] +cube-core = { path = "../../crates/cube-core" } +axum = { workspace = true } +tokio = { workspace = true } +tower-http = { workspace = true } +tracing = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +reqwest = { workspace = true } diff --git a/apps/llm-proxy/Dockerfile b/apps/llm-proxy/Dockerfile new file mode 100644 index 0000000..24a236a --- /dev/null +++ b/apps/llm-proxy/Dockerfile @@ -0,0 +1,5 @@ +# llm-proxy — llm.famzheng.me +FROM scratch +COPY target/x86_64-unknown-linux-musl/release/llm-proxy /llm-proxy +EXPOSE 8080 +ENTRYPOINT ["/llm-proxy"] diff --git a/apps/llm-proxy/k8s/all.yaml b/apps/llm-proxy/k8s/all.yaml new file mode 100644 index 0000000..cc656e2 --- /dev/null +++ b/apps/llm-proxy/k8s/all.yaml @@ -0,0 +1,90 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: llm-proxy +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-proxy + namespace: llm-proxy + labels: + app: llm-proxy +spec: + replicas: 1 + selector: + matchLabels: + app: llm-proxy + template: + metadata: + labels: + app: llm-proxy + spec: + imagePullSecrets: + - name: registry-creds + containers: + - name: llm-proxy + image: registry.famzheng.me/mochi/llm-proxy:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8080 + name: http + envFrom: + # secret `proxy-credentials` 由 kubectl 手工创建(BACKEND_TOKEN + + # PROXY_AUTH_TOKEN),不在 git manifest 里。 + - secretRef: + name: proxy-credentials + env: + - name: LLM_GATEWAY + value: "http://3.135.65.204:8848/v1" + readinessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 1 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 5 + periodSeconds: 15 + resources: + requests: + cpu: 10m + memory: 16Mi + limits: + cpu: 200m + memory: 128Mi +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-proxy + namespace: llm-proxy +spec: + selector: + app: llm-proxy + ports: + - name: http + port: 80 + targetPort: 8080 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: llm-proxy + namespace: llm-proxy +spec: + ingressClassName: traefik + rules: + - host: llm.famzheng.me + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: llm-proxy + port: + number: 80 diff --git a/apps/llm-proxy/src/main.rs b/apps/llm-proxy/src/main.rs new file mode 100644 index 0000000..02f35fa --- /dev/null +++ b/apps/llm-proxy/src/main.rs @@ -0,0 +1,147 @@ +//! llm.famzheng.me — gemma-4-31b-it 反向代理 + 简单 token 鉴权。 +//! +//! - `GET /` → `/chat` 跳转 +//! - `GET /chat` → 静态 web UI +//! - `POST /v1/chat/completions` → OpenAI 兼容透传 (要 Authorization: token ) +//! - `GET /healthz` → 不带 auth, 给 k8s probe + +mod proxy; + +use std::sync::Arc; + +use axum::{ + extract::State, + http::{header, StatusCode}, + middleware::{self, Next}, + response::{Html, IntoResponse, Redirect, Response}, + routing::{get, post}, + Router, +}; +use tower_http::trace::TraceLayer; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + cube_core::init_tracing(); + let cfg = Arc::new(proxy::Config::from_env()); + let port: u16 = std::env::var("PORT") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(8080); + + let chat_api = Router::new() + .route("/v1/chat/completions", post(proxy::handle)) + .route_layer(middleware::from_fn_with_state(cfg.clone(), require_token)) + .with_state(cfg); + + let app = Router::new() + .route("/healthz", get(|| async { "ok" })) + .route("/", get(|| async { Redirect::permanent("/chat") })) + .route("/chat", get(chat_ui)) + .merge(chat_api) + .layer(TraceLayer::new_for_http()); + + let addr = format!("0.0.0.0:{port}"); + let listener = tokio::net::TcpListener::bind(&addr).await?; + tracing::info!(%addr, "llm-proxy listening"); + axum::serve(listener, app).await +} + +const CHAT_HTML: &str = include_str!("../web/chat.html"); + +async fn chat_ui() -> Html<&'static str> { + Html(CHAT_HTML) +} + +/// 验 `Authorization: token `,错的直接 401。 +async fn require_token( + State(cfg): State>, + req: axum::extract::Request, + next: Next, +) -> Response { + let header_val = req + .headers() + .get(header::AUTHORIZATION) + .and_then(|v| v.to_str().ok()) + .map(str::trim); + + match header_val { + Some(v) if check_token(v, &cfg.proxy_auth_token) => next.run(req).await, + _ => ( + StatusCode::UNAUTHORIZED, + "缺少或不匹配 `Authorization: token `", + ) + .into_response(), + } +} + +/// 接受 `token ` 或 `Bearer `(OpenAI client 习惯发 Bearer,宽容点)。 +pub fn check_token(header_value: &str, expected: &str) -> bool { + if expected.is_empty() { + return false; + } + let trimmed = header_value.trim(); + if let Some(rest) = trimmed.strip_prefix("token ") { + return constant_time_eq(rest.trim().as_bytes(), expected.as_bytes()); + } + if let Some(rest) = trimmed.strip_prefix("Bearer ") { + return constant_time_eq(rest.trim().as_bytes(), expected.as_bytes()); + } + false +} + +/// 常时间比较,防 timing attack(虽然这场景影响小,做了不亏)。 +fn constant_time_eq(a: &[u8], b: &[u8]) -> bool { + if a.len() != b.len() { + return false; + } + let mut diff: u8 = 0; + for (x, y) in a.iter().zip(b.iter()) { + diff |= x ^ y; + } + diff == 0 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn check_token_accepts_token_scheme() { + assert!(check_token("token famzheng-llm-2026", "famzheng-llm-2026")); + } + + #[test] + fn check_token_accepts_bearer_scheme() { + assert!(check_token("Bearer famzheng-llm-2026", "famzheng-llm-2026")); + } + + #[test] + fn check_token_rejects_wrong_value() { + assert!(!check_token("token wrong", "famzheng-llm-2026")); + } + + #[test] + fn check_token_rejects_unknown_scheme() { + assert!(!check_token("Basic famzheng-llm-2026", "famzheng-llm-2026")); + assert!(!check_token("famzheng-llm-2026", "famzheng-llm-2026")); + } + + #[test] + fn check_token_rejects_empty_expected() { + // 防 misconfigured:空 expected 不应该让任何人通过 + assert!(!check_token("token any", "")); + assert!(!check_token("Bearer ", "")); + } + + #[test] + fn check_token_strips_extra_whitespace() { + assert!(check_token(" token famzheng-llm-2026 ", "famzheng-llm-2026")); + } + + #[test] + fn check_token_rejects_prefix_match() { + // 防止"famzheng-llm-2026-extra" 通过 + assert!(!check_token("token famzheng-llm-2026-extra", "famzheng-llm-2026")); + assert!(!check_token("token famzheng-llm", "famzheng-llm-2026")); + } +} diff --git a/apps/llm-proxy/src/proxy.rs b/apps/llm-proxy/src/proxy.rs new file mode 100644 index 0000000..3e413b6 --- /dev/null +++ b/apps/llm-proxy/src/proxy.rs @@ -0,0 +1,173 @@ +//! `/v1/chat/completions` 透传 — 替换 Authorization 头,把请求 body 原样 forward 到 +//! 上游 LLM gateway,把响应 body 原样回吐给客户端。 +//! +//! 一期只支持非 streaming(force `stream: false` 进 body),SSE 透传留给二期。 + +use std::sync::Arc; + +use axum::{ + body::Bytes, + extract::State, + http::{HeaderMap, HeaderValue, StatusCode}, + response::{IntoResponse, Response}, +}; +use serde_json::Value; + +#[derive(Clone, Debug)] +pub struct Config { + pub upstream_url: String, // http://3.135.65.204:8848/v1/chat/completions + pub upstream_token: String, + pub proxy_auth_token: String, +} + +impl Config { + pub fn from_env() -> Self { + let gateway = std::env::var("LLM_GATEWAY") + .unwrap_or_else(|_| "http://3.135.65.204:8848/v1".to_string()); + let upstream_url = format!("{}/chat/completions", gateway.trim_end_matches('/')); + Self { + upstream_url, + upstream_token: std::env::var("BACKEND_TOKEN").unwrap_or_default(), + proxy_auth_token: std::env::var("PROXY_AUTH_TOKEN").unwrap_or_default(), + } + } +} + +pub async fn handle(State(cfg): State>, body: Bytes) -> Response { + // 1. parse body → 强制 stream=false(一期不支持流式) + let body_bytes = match force_non_stream(&body) { + Ok(b) => b, + Err(e) => { + return (StatusCode::BAD_REQUEST, format!("bad JSON body: {e}")).into_response(); + } + }; + + // 2. forward + let client = reqwest::Client::new(); + let res = client + .post(&cfg.upstream_url) + .header("Authorization", format!("Bearer {}", cfg.upstream_token)) + .header("Content-Type", "application/json") + .body(body_bytes) + .send() + .await; + + match res { + Ok(r) => relay_response(r).await, + Err(e) => { + tracing::error!(error=%e, "upstream call failed"); + (StatusCode::BAD_GATEWAY, format!("upstream error: {e}")).into_response() + } + } +} + +/// parse JSON、塞入 `stream: false`、重新 serialize。 +/// 如果不是 JSON object 就保持原样(让上游自己报错)。 +fn force_non_stream(body: &Bytes) -> Result, String> { + if body.is_empty() { + return Err("empty body".into()); + } + let mut v: Value = serde_json::from_slice(body).map_err(|e| e.to_string())?; + if let Some(obj) = v.as_object_mut() { + obj.insert("stream".to_string(), Value::Bool(false)); + } + serde_json::to_vec(&v).map_err(|e| e.to_string()) +} + +async fn relay_response(upstream: reqwest::Response) -> Response { + let status = upstream.status(); + let ct = upstream + .headers() + .get(reqwest::header::CONTENT_TYPE) + .cloned() + .unwrap_or_else(|| HeaderValue::from_static("application/json")); + let bytes = match upstream.bytes().await { + Ok(b) => b, + Err(e) => { + tracing::error!(error=%e, "read upstream body"); + return (StatusCode::BAD_GATEWAY, "read upstream body failed").into_response(); + } + }; + let mut headers = HeaderMap::new(); + headers.insert(axum::http::header::CONTENT_TYPE, ct); + ( + StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::BAD_GATEWAY), + headers, + bytes, + ) + .into_response() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn force_non_stream_overrides_stream_true() { + let input = Bytes::from(r#"{"model":"gemma","messages":[],"stream":true}"#); + let out = force_non_stream(&input).unwrap(); + let v: Value = serde_json::from_slice(&out).unwrap(); + assert_eq!(v["stream"], Value::Bool(false)); + } + + #[test] + fn force_non_stream_injects_when_absent() { + let input = Bytes::from(r#"{"model":"gemma","messages":[]}"#); + let out = force_non_stream(&input).unwrap(); + let v: Value = serde_json::from_slice(&out).unwrap(); + assert_eq!(v["stream"], Value::Bool(false)); + } + + #[test] + fn force_non_stream_preserves_other_fields() { + let input = Bytes::from( + r#"{"model":"gemma-4-31b-it","temperature":0.7,"messages":[{"role":"user","content":"hi"}]}"#, + ); + let out = force_non_stream(&input).unwrap(); + let v: Value = serde_json::from_slice(&out).unwrap(); + assert_eq!(v["model"], "gemma-4-31b-it"); + assert_eq!(v["temperature"], 0.7); + assert_eq!(v["messages"][0]["role"], "user"); + } + + #[test] + fn force_non_stream_rejects_empty() { + assert!(force_non_stream(&Bytes::new()).is_err()); + } + + #[test] + fn force_non_stream_rejects_invalid_json() { + let input = Bytes::from(r#"not json"#); + assert!(force_non_stream(&input).is_err()); + } + + #[test] + fn config_from_env_builds_completions_url() { + // Saved env keeps test isolation under cargo test (run in parallel) + let prev_gateway = std::env::var("LLM_GATEWAY").ok(); + let prev_token = std::env::var("BACKEND_TOKEN").ok(); + let prev_proxy = std::env::var("PROXY_AUTH_TOKEN").ok(); + std::env::set_var("LLM_GATEWAY", "http://1.2.3.4:8848/v1/"); + std::env::set_var("BACKEND_TOKEN", "backend-xxx"); + std::env::set_var("PROXY_AUTH_TOKEN", "client-yyy"); + + let cfg = Config::from_env(); + assert_eq!(cfg.upstream_url, "http://1.2.3.4:8848/v1/chat/completions"); + assert_eq!(cfg.upstream_token, "backend-xxx"); + assert_eq!(cfg.proxy_auth_token, "client-yyy"); + + // restore + match prev_gateway { + Some(v) => std::env::set_var("LLM_GATEWAY", v), + None => std::env::remove_var("LLM_GATEWAY"), + } + match prev_token { + Some(v) => std::env::set_var("BACKEND_TOKEN", v), + None => std::env::remove_var("BACKEND_TOKEN"), + } + match prev_proxy { + Some(v) => std::env::set_var("PROXY_AUTH_TOKEN", v), + None => std::env::remove_var("PROXY_AUTH_TOKEN"), + } + } +} diff --git a/apps/llm-proxy/web/chat.html b/apps/llm-proxy/web/chat.html new file mode 100644 index 0000000..f8c57d9 --- /dev/null +++ b/apps/llm-proxy/web/chat.html @@ -0,0 +1,174 @@ + + + + + + +llm.famzheng.me + + + +
+
+

llm.famzheng.me

+ gemma-4-31b-it · 反向代理 +
+ +
+ + +
+ +
+ +
+ + +
+ +
+ curl example +
curl -X POST https://llm.famzheng.me/v1/chat/completions \
+  -H 'Authorization: token <your-token>' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "model": "gemma-4-31b-it",
+    "messages": [{"role":"user","content":"hello"}]
+  }'
+
+
+ + + +