Spaces:
Running
Running
Update Dockerfile
Browse files- Dockerfile +45 -81
Dockerfile
CHANGED
|
@@ -1,99 +1,63 @@
|
|
| 1 |
-
# We use Python 3.10 because it has the best pre-built wheels for Llama
|
| 2 |
-
FROM python:3.10-slim
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
-
#
|
| 7 |
RUN apt-get update && apt-get install -y \
|
|
|
|
| 8 |
curl \
|
| 9 |
git \
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
# 2. Upgrade pip (Critical for finding the right wheels)
|
| 14 |
-
RUN pip install --upgrade pip
|
| 15 |
-
|
| 16 |
-
# 3. Install LLM Engine (Pre-built)
|
| 17 |
-
# This installs the tool on the hard drive. It takes space, but ZERO CPU to run.
|
| 18 |
-
# The --prefer-binary flag ensures we don't try to compile it.
|
| 19 |
-
RUN pip install llama-cpp-python \
|
| 20 |
-
--no-cache-dir \
|
| 21 |
-
--prefer-binary \
|
| 22 |
-
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
|
| 23 |
-
|
| 24 |
-
# 4. Install other requirements
|
| 25 |
-
COPY requirements.txt .
|
| 26 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 27 |
-
|
| 28 |
-
# 5. Download NLTK data
|
| 29 |
-
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('stopwords')"
|
| 30 |
-
|
| 31 |
-
COPY . .
|
| 32 |
-
|
| 33 |
-
# 6. Run the Unified App (Back to app.py!)
|
| 34 |
-
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
| 35 |
-
|
| 36 |
-
|
| 37 |
|
| 38 |
-
# # ---- Base image ----
|
| 39 |
-
# FROM python:3.11-slim
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
# git \
|
| 49 |
-
# unzip \
|
| 50 |
-
# && rm -rf /var/lib/apt/lists/*
|
| 51 |
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
#
|
| 54 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
# # Torch / sentence-transformers like having the CPU wheel index explicitly
|
| 57 |
-
# RUN pip install --no-cache-dir \
|
| 58 |
-
# --extra-index-url https://download.pytorch.org/whl/cpu \
|
| 59 |
-
# -r requirements.txt
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
# # unzip /tmp/punkt.zip -d /usr/local/share/nltk_data && \
|
| 68 |
-
# # rm /tmp/punkt.zip
|
| 69 |
-
# # punkt_tab tokenizer (for NLTK >= 3.9)
|
| 70 |
-
# RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip" \
|
| 71 |
-
# -o /tmp/punkt_tab.zip && \
|
| 72 |
-
# unzip /tmp/punkt_tab.zip -d /usr/local/share/nltk_data && \
|
| 73 |
-
# rm /tmp/punkt_tab.zip
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
# # stopwords corpus
|
| 77 |
-
# RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" \
|
| 78 |
-
# -o /tmp/stopwords.zip && \
|
| 79 |
-
# unzip /tmp/stopwords.zip -d /usr/local/share/nltk_data && \
|
| 80 |
-
# rm /tmp/stopwords.zip
|
| 81 |
-
|
| 82 |
-
# # ---- Copy app code ----
|
| 83 |
-
# # If you only want app.py + data, you can narrow this, but copying all is fine.
|
| 84 |
-
# COPY . .
|
| 85 |
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
|
| 89 |
|
| 90 |
-
#
|
| 91 |
-
|
| 92 |
|
| 93 |
-
#
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
+
# ---- Base image ----
|
| 3 |
+
FROM python:3.11-slim
|
| 4 |
+
|
| 5 |
+
# Workdir inside the container
|
| 6 |
WORKDIR /app
|
| 7 |
|
| 8 |
+
# ---- System dependencies ----
|
| 9 |
RUN apt-get update && apt-get install -y \
|
| 10 |
+
build-essential \
|
| 11 |
curl \
|
| 12 |
git \
|
| 13 |
+
unzip \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# ---- Python deps ----
|
| 18 |
+
COPY requirements.txt ./
|
| 19 |
|
| 20 |
+
# Torch / sentence-transformers like having the CPU wheel index explicitly
|
| 21 |
+
RUN pip install --no-cache-dir \
|
| 22 |
+
--extra-index-url https://download.pytorch.org/whl/cpu \
|
| 23 |
+
-r requirements.txt
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
# ---- NLTK data (punkt + stopwords) ----
|
| 26 |
+
RUN mkdir -p /usr/local/share/nltk_data
|
| 27 |
|
| 28 |
+
# punkt tokenizer
|
| 29 |
+
# RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip" \
|
| 30 |
+
# -o /tmp/punkt.zip && \
|
| 31 |
+
# unzip /tmp/punkt.zip -d /usr/local/share/nltk_data && \
|
| 32 |
+
# rm /tmp/punkt.zip
|
| 33 |
+
# punkt_tab tokenizer (for NLTK >= 3.9)
|
| 34 |
+
RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip" \
|
| 35 |
+
-o /tmp/punkt_tab.zip && \
|
| 36 |
+
unzip /tmp/punkt_tab.zip -d /usr/local/share/nltk_data && \
|
| 37 |
+
rm /tmp/punkt_tab.zip
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
# stopwords corpus
|
| 41 |
+
RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" \
|
| 42 |
+
-o /tmp/stopwords.zip && \
|
| 43 |
+
unzip /tmp/stopwords.zip -d /usr/local/share/nltk_data && \
|
| 44 |
+
rm /tmp/stopwords.zip
|
| 45 |
|
| 46 |
+
# ---- Copy app code ----
|
| 47 |
+
# If you only want app.py + data, you can narrow this, but copying all is fine.
|
| 48 |
+
COPY . .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# ---- Hugging Face port wiring ----
|
| 51 |
+
ENV PORT=7860
|
| 52 |
+
EXPOSE 7860
|
| 53 |
|
| 54 |
+
# Optional healthcheck; HF will just ignore failures but nice to have
|
| 55 |
+
HEALTHCHECK CMD curl --fail http://localhost:${PORT}/_stcore/health || exit 1
|
| 56 |
|
| 57 |
+
# ---- Run Streamlit ----
|
| 58 |
+
ENTRYPOINT ["bash", "-c", "streamlit run app.py \
|
| 59 |
+
--server.port=${PORT} \
|
| 60 |
+
--server.address=0.0.0.0 \
|
| 61 |
+
--server.enableCORS=false \
|
| 62 |
+
--server.enableXsrfProtection=false"]
|
| 63 |
|