romybeaute commited on
Commit
9a5a218
·
verified ·
1 Parent(s): 9baedc9

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +45 -81
Dockerfile CHANGED
@@ -1,99 +1,63 @@
1
- # We use Python 3.10 because it has the best pre-built wheels for Llama
2
- FROM python:3.10-slim
3
 
 
 
 
 
4
  WORKDIR /app
5
 
6
- # 1. Install minimal tools (Git/Curl) but NO heavy compilers
7
  RUN apt-get update && apt-get install -y \
 
8
  curl \
9
  git \
10
- build-essential \
11
- && rm -rf /var/lib/apt/lists/*
12
-
13
- # 2. Upgrade pip (Critical for finding the right wheels)
14
- RUN pip install --upgrade pip
15
-
16
- # 3. Install LLM Engine (Pre-built)
17
- # This installs the tool on the hard drive. It takes space, but ZERO CPU to run.
18
- # The --prefer-binary flag ensures we don't try to compile it.
19
- RUN pip install llama-cpp-python \
20
- --no-cache-dir \
21
- --prefer-binary \
22
- --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
23
-
24
- # 4. Install other requirements
25
- COPY requirements.txt .
26
- RUN pip install --no-cache-dir -r requirements.txt
27
-
28
- # 5. Download NLTK data
29
- RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('stopwords')"
30
-
31
- COPY . .
32
-
33
- # 6. Run the Unified App (Back to app.py!)
34
- CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
35
-
36
-
37
 
38
- # # ---- Base image ----
39
- # FROM python:3.11-slim
40
 
41
- # # Workdir inside the container
42
- # WORKDIR /app
43
 
44
- # # ---- System dependencies ----
45
- # RUN apt-get update && apt-get install -y \
46
- # build-essential \
47
- # curl \
48
- # git \
49
- # unzip \
50
- # && rm -rf /var/lib/apt/lists/*
51
 
 
 
52
 
53
- # # ---- Python deps ----
54
- # COPY requirements.txt ./
 
 
 
 
 
 
 
 
55
 
56
- # # Torch / sentence-transformers like having the CPU wheel index explicitly
57
- # RUN pip install --no-cache-dir \
58
- # --extra-index-url https://download.pytorch.org/whl/cpu \
59
- # -r requirements.txt
60
 
61
- # # ---- NLTK data (punkt + stopwords) ----
62
- # RUN mkdir -p /usr/local/share/nltk_data
 
 
 
63
 
64
- # # punkt tokenizer
65
- # # RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip" \
66
- # # -o /tmp/punkt.zip && \
67
- # # unzip /tmp/punkt.zip -d /usr/local/share/nltk_data && \
68
- # # rm /tmp/punkt.zip
69
- # # punkt_tab tokenizer (for NLTK >= 3.9)
70
- # RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip" \
71
- # -o /tmp/punkt_tab.zip && \
72
- # unzip /tmp/punkt_tab.zip -d /usr/local/share/nltk_data && \
73
- # rm /tmp/punkt_tab.zip
74
-
75
-
76
- # # stopwords corpus
77
- # RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" \
78
- # -o /tmp/stopwords.zip && \
79
- # unzip /tmp/stopwords.zip -d /usr/local/share/nltk_data && \
80
- # rm /tmp/stopwords.zip
81
-
82
- # # ---- Copy app code ----
83
- # # If you only want app.py + data, you can narrow this, but copying all is fine.
84
- # COPY . .
85
 
86
- # # ---- Hugging Face port wiring ----
87
- # ENV PORT=7860
88
- # EXPOSE 7860
89
 
90
- # # Optional healthcheck; HF will just ignore failures but nice to have
91
- # HEALTHCHECK CMD curl --fail http://localhost:${PORT}/_stcore/health || exit 1
92
 
93
- # # ---- Run Streamlit ----
94
- # ENTRYPOINT ["bash", "-c", "streamlit run app.py \
95
- # --server.port=${PORT} \
96
- # --server.address=0.0.0.0 \
97
- # --server.enableCORS=false \
98
- # --server.enableXsrfProtection=false"]
99
 
 
 
 
1
 
2
+ # ---- Base image ----
3
+ FROM python:3.11-slim
4
+
5
+ # Workdir inside the container
6
  WORKDIR /app
7
 
8
+ # ---- System dependencies ----
9
  RUN apt-get update && apt-get install -y \
10
+ build-essential \
11
  curl \
12
  git \
13
+ unzip \
14
+ && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
 
16
 
17
+ # ---- Python deps ----
18
+ COPY requirements.txt ./
19
 
20
+ # Torch / sentence-transformers like having the CPU wheel index explicitly
21
+ RUN pip install --no-cache-dir \
22
+ --extra-index-url https://download.pytorch.org/whl/cpu \
23
+ -r requirements.txt
 
 
 
24
 
25
+ # ---- NLTK data (punkt + stopwords) ----
26
+ RUN mkdir -p /usr/local/share/nltk_data
27
 
28
+ # punkt tokenizer
29
+ # RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip" \
30
+ # -o /tmp/punkt.zip && \
31
+ # unzip /tmp/punkt.zip -d /usr/local/share/nltk_data && \
32
+ # rm /tmp/punkt.zip
33
+ # punkt_tab tokenizer (for NLTK >= 3.9)
34
+ RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip" \
35
+ -o /tmp/punkt_tab.zip && \
36
+ unzip /tmp/punkt_tab.zip -d /usr/local/share/nltk_data && \
37
+ rm /tmp/punkt_tab.zip
38
 
 
 
 
 
39
 
40
+ # stopwords corpus
41
+ RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" \
42
+ -o /tmp/stopwords.zip && \
43
+ unzip /tmp/stopwords.zip -d /usr/local/share/nltk_data && \
44
+ rm /tmp/stopwords.zip
45
 
46
+ # ---- Copy app code ----
47
+ # If you only want app.py + data, you can narrow this, but copying all is fine.
48
+ COPY . .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ # ---- Hugging Face port wiring ----
51
+ ENV PORT=7860
52
+ EXPOSE 7860
53
 
54
+ # Optional healthcheck; HF will just ignore failures but nice to have
55
+ HEALTHCHECK CMD curl --fail http://localhost:${PORT}/_stcore/health || exit 1
56
 
57
+ # ---- Run Streamlit ----
58
+ ENTRYPOINT ["bash", "-c", "streamlit run app.py \
59
+ --server.port=${PORT} \
60
+ --server.address=0.0.0.0 \
61
+ --server.enableCORS=false \
62
+ --server.enableXsrfProtection=false"]
63