add hf models in docker

This commit is contained in:
Fam Zheng 2025-11-04 23:06:33 +00:00
parent de6d175f7e
commit bd3faf8133
4 changed files with 42 additions and 2 deletions

View File

@ -18,6 +18,8 @@ ADD requirements.txt requirements.txt
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
rm -f /usr/lib/python*/EXTERNALLY-MANAGED && \
/root/.local/bin/uv pip install --system --no-cache -r requirements.txt
ADD scripts/download_models.py /tmp/download_models.py
RUN python3 /tmp/download_models.py && rm /tmp/download_models.py
ADD api /emblem/api
ADD web /emblem/web
RUN cd /emblem/api && ./manage.py collectstatic --noinput

View File

@ -5,7 +5,7 @@ RAG Service Module for Themblem
"""
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from .models import Article
@ -20,8 +20,21 @@ class CachedLangChainRAG:
"""基于缓存的实时LangChain RAG服务"""
def __init__(self):
# Use pre-downloaded model from container (relative to this file)
import os
from pathlib import Path
# Model path relative to this file: api/products/rag_service.py -> ../../models/
model_path = Path(__file__).parent.parent.parent / "models" / "text2vec-base-chinese"
# Fall back to downloading if local model doesn't exist (for development)
if not model_path.exists():
model_path = "shibing624/text2vec-base-chinese"
else:
model_path = str(model_path)
self.embeddings = HuggingFaceEmbeddings(
model_name="shibing624/text2vec-base-chinese",
model_name=model_path,
model_kwargs={'device': 'cpu'},
encode_kwargs={'normalize_embeddings': True}
)

View File

@ -14,6 +14,7 @@ langchain>=0.1.0
langchain-community>=0.1.0
langchain-core>=0.1.0
langchain-text-splitters>=0.1.0
langchain-huggingface>=0.1.0
sentence-transformers>=2.2.0
faiss-cpu>=1.7.4
beautifulsoup4>=4.12.0

24
scripts/download_models.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""
Download HuggingFace models for offline use
"""
from sentence_transformers import SentenceTransformer
from pathlib import Path
# Create models directory relative to this script: scripts/ -> ../models/
script_dir = Path(__file__).parent
models_dir = script_dir.parent / "models"
models_dir.mkdir(parents=True, exist_ok=True)
# Download the model
model_name = "shibing624/text2vec-base-chinese"
model_path = models_dir / "text2vec-base-chinese"
print(f"Downloading model: {model_name}")
print(f"Saving to: {model_path}")
model = SentenceTransformer(model_name)
model.save(str(model_path))
print(f"Model successfully downloaded to {model_path}")