add hf models in docker

2025-11-04 23:06:33 +00:00 · 2025-11-04 23:06:33 +00:00 · bd3faf8133
commit bd3faf8133
parent de6d175f7e
4 changed files with 42 additions and 2 deletions
--- a/2
+++ b/2
@ -18,6 +18,8 @@ ADD requirements.txt requirements.txt
 RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
    rm -f /usr/lib/python*/EXTERNALLY-MANAGED && \
    /root/.local/bin/uv pip install --system --no-cache -r requirements.txt
+ADD scripts/download_models.py /tmp/download_models.py
+RUN python3 /tmp/download_models.py && rm /tmp/download_models.py
 ADD api /emblem/api
 ADD web /emblem/web
 RUN cd /emblem/api && ./manage.py collectstatic --noinput
--- a/api/products/rag_service.py
+++ b/api/products/rag_service.py
@ -5,7 +5,7 @@ RAG Service Module for Themblem
 """

 from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
 from .models import Article
@ -20,8 +20,21 @@ class CachedLangChainRAG:
    """基于缓存的实时LangChain RAG服务"""
    
    def __init__(self):
+        # Use pre-downloaded model from container (relative to this file)
+        import os
+        from pathlib import Path
+        
+        # Model path relative to this file: api/products/rag_service.py -> ../../models/
+        model_path = Path(__file__).parent.parent.parent / "models" / "text2vec-base-chinese"
+        
+        # Fall back to downloading if local model doesn't exist (for development)
+        if not model_path.exists():
+            model_path = "shibing624/text2vec-base-chinese"
+        else:
+            model_path = str(model_path)
+        
        self.embeddings = HuggingFaceEmbeddings(
-            model_name="shibing624/text2vec-base-chinese",
+            model_name=model_path,
            model_kwargs={'device': 'cpu'},
            encode_kwargs={'normalize_embeddings': True}
        )
--- a/requirements.txt
+++ b/requirements.txt
@ -14,6 +14,7 @@ langchain>=0.1.0
 langchain-community>=0.1.0
 langchain-core>=0.1.0
 langchain-text-splitters>=0.1.0
+langchain-huggingface>=0.1.0
 sentence-transformers>=2.2.0
 faiss-cpu>=1.7.4
 beautifulsoup4>=4.12.0
--- a/scripts/download_models.py
+++ b/scripts/download_models.py
@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+"""
+Download HuggingFace models for offline use
+"""
+from sentence_transformers import SentenceTransformer
+from pathlib import Path
+
+# Create models directory relative to this script: scripts/ -> ../models/
+script_dir = Path(__file__).parent
+models_dir = script_dir.parent / "models"
+models_dir.mkdir(parents=True, exist_ok=True)
+
+# Download the model
+model_name = "shibing624/text2vec-base-chinese"
+model_path = models_dir / "text2vec-base-chinese"
+
+print(f"Downloading model: {model_name}")
+print(f"Saving to: {model_path}")
+
+model = SentenceTransformer(model_name)
+model.save(str(model_path))
+
+print(f"Model successfully downloaded to {model_path}")
+