Single commit for Hugging Face
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .github/workflows/manual-pushing-to-HF1.yml +26 -0
- .github/workflows/sync-to-hf.yml +93 -0
- .github/workflows/unit-tests.yml +40 -0
- .gitignore +11 -0
- Dockerfile +24 -0
- LICENSE +21 -0
- README.md +85 -1
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/api.py +260 -0
- app/automigration.py +4 -0
- app/backend/__init__.py +0 -0
- app/backend/controllers/__init__.py +0 -0
- app/backend/controllers/base_controller.py +5 -0
- app/backend/controllers/chats.py +117 -0
- app/backend/controllers/messages.py +26 -0
- app/backend/controllers/users.py +164 -0
- app/backend/controllers/utils.py +13 -0
- app/backend/models/__init__.py +0 -0
- app/backend/models/base_model.py +14 -0
- app/backend/models/chats.py +51 -0
- app/backend/models/db_service.py +37 -0
- app/backend/models/documents.py +22 -0
- app/backend/models/messages.py +28 -0
- app/backend/models/users.py +58 -0
- app/backend/schemas.py +20 -0
- app/core/__init__.py +0 -0
- app/core/chunks.py +54 -0
- app/core/database.py +233 -0
- app/core/document_validator.py +9 -0
- app/core/main.py +50 -0
- app/core/models.py +214 -0
- app/core/processor.py +305 -0
- app/core/rag_generator.py +171 -0
- app/core/response_parser.py +29 -0
- app/core/some.py +27 -0
- app/core/utils.py +208 -0
- app/frontend/static/styles.css +377 -0
- app/frontend/templates/base.html +42 -0
- app/frontend/templates/components/navbar.html +33 -0
- app/frontend/templates/components/sidebar.html +26 -0
- app/frontend/templates/pages/chat.html +163 -0
- app/frontend/templates/pages/login.html +145 -0
- app/frontend/templates/pages/main.html +14 -0
- app/frontend/templates/pages/registration.html +155 -0
- app/frontend/templates/pages/show_pdf.html +98 -0
- app/frontend/templates/pages/show_text.html +47 -0
- app/initializer.py +44 -0
- app/prompt_templates/test1.txt +16 -0
- app/prompt_templates/test2.txt +284 -0
.github/workflows/manual-pushing-to-HF1.yml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face Hub
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches: [for_testing]
|
| 5 |
+
workflow_dispatch:
|
| 6 |
+
jobs:
|
| 7 |
+
sync-to-hub:
|
| 8 |
+
runs-on: ubuntu-latest
|
| 9 |
+
steps:
|
| 10 |
+
- uses: actions/checkout@v4
|
| 11 |
+
with:
|
| 12 |
+
fetch-depth: 0
|
| 13 |
+
lfs: true
|
| 14 |
+
- name: Configure Git identity
|
| 15 |
+
run: |
|
| 16 |
+
git config --global user.name "Andrchest"
|
| 17 |
+
git config --global user.email "andreipolevoi220@gmail.com"
|
| 18 |
+
- name: Push to Hugging Face
|
| 19 |
+
env:
|
| 20 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 21 |
+
run: |
|
| 22 |
+
git checkout -b hf-single-commit
|
| 23 |
+
git reset --soft $(git rev-list --max-parents=0 HEAD)
|
| 24 |
+
git commit -m "Single commit for Hugging Face"
|
| 25 |
+
git remote add hf https://Andrchest:$HF_TOKEN@huggingface.co/spaces/The-Ultimate-RAG-HF/RAG-Integration-test
|
| 26 |
+
git push --force hf hf-single-commit:main
|
.github/workflows/sync-to-hf.yml
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face Hub
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches: [main]
|
| 5 |
+
workflow_dispatch:
|
| 6 |
+
jobs:
|
| 7 |
+
sync-to-hub:
|
| 8 |
+
runs-on: ubuntu-latest
|
| 9 |
+
environment: Integration test
|
| 10 |
+
steps:
|
| 11 |
+
- uses: actions/checkout@v4
|
| 12 |
+
with:
|
| 13 |
+
fetch-depth: 0
|
| 14 |
+
lfs: true
|
| 15 |
+
- name: Configure Git identity
|
| 16 |
+
run: |
|
| 17 |
+
git config --global user.name "Andrchest"
|
| 18 |
+
git config --global user.email "andreipolevoi220@gmail.com"
|
| 19 |
+
- name: Push to HF1
|
| 20 |
+
env:
|
| 21 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 22 |
+
run: |
|
| 23 |
+
git checkout -b hf1-single-commit
|
| 24 |
+
git reset --soft $(git rev-list --max-parents=0 HEAD)
|
| 25 |
+
git commit -m "Single commit for HF1"
|
| 26 |
+
git remote add hf1 https://Andrchest:$HF_TOKEN@huggingface.co/spaces/The-Ultimate-RAG-HF/RAG-Integration-test
|
| 27 |
+
git push --force hf1 hf1-single-commit:main
|
| 28 |
+
- name: Set up Python
|
| 29 |
+
uses: actions/setup-python@v4
|
| 30 |
+
with:
|
| 31 |
+
python-version: '3.12'
|
| 32 |
+
- name: Install dependencies
|
| 33 |
+
run: |
|
| 34 |
+
python -m pip install --upgrade pip
|
| 35 |
+
pip install pytest pytest-cov
|
| 36 |
+
pip install -r app/requirements.txt
|
| 37 |
+
- name: Wait for HF1 deployment
|
| 38 |
+
run: sleep 120
|
| 39 |
+
- name: Debug environment variables
|
| 40 |
+
env:
|
| 41 |
+
DATABASE_URL: ${{ secrets.DATABASE_URL }}
|
| 42 |
+
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
| 43 |
+
SECRET_PEPPER: ${{ secrets.SECRET_PEPPER }}
|
| 44 |
+
JWT_ALGORITHM: ${{ secrets.JWT_ALGORITHM }}
|
| 45 |
+
PYTHONPATH: ${{ github.workspace }}
|
| 46 |
+
run: |
|
| 47 |
+
echo "DATABASE_URL is set: ${DATABASE_URL:+set}"
|
| 48 |
+
echo "GEMINI_API_KEY is set: ${GEMINI_API_KEY:+set}"
|
| 49 |
+
echo "SECRET_PEPPER is set: ${SECRET_PEPPER:+set}"
|
| 50 |
+
echo "JWT_ALGORITHM is set: ${JWT_ALGORITHM:+set}"
|
| 51 |
+
env | grep -E 'DATABASE_URL|GEMINI_API_KEY|SECRET_PEPPER|JWT_ALGORITHM'
|
| 52 |
+
- name: Initialize directories
|
| 53 |
+
env:
|
| 54 |
+
DATABASE_URL: ${{ secrets.DATABASE_URL }}
|
| 55 |
+
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
| 56 |
+
SECRET_PEPPER: ${{ secrets.SECRET_PEPPER }}
|
| 57 |
+
JWT_ALGORITHM: ${{ secrets.JWT_ALGORITHM }}
|
| 58 |
+
PYTHONPATH: ${{ github.workspace }}
|
| 59 |
+
working-directory: ./
|
| 60 |
+
run: |
|
| 61 |
+
python -m app.initializer
|
| 62 |
+
- name: Debug directory structure
|
| 63 |
+
run: |
|
| 64 |
+
ls -R
|
| 65 |
+
- name: Run integration tests with coverage
|
| 66 |
+
env:
|
| 67 |
+
HF1_URL: ${{ secrets.HF1_URL }}
|
| 68 |
+
DATABASE_URL: ${{ secrets.DATABASE_URL }}
|
| 69 |
+
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
| 70 |
+
SECRET_PEPPER: ${{ secrets.SECRET_PEPPER }}
|
| 71 |
+
JWT_ALGORITHM: ${{ secrets.JWT_ALGORITHM }}
|
| 72 |
+
PYTHONPATH: ${{ github.workspace }}
|
| 73 |
+
working-directory: ./
|
| 74 |
+
run: |
|
| 75 |
+
echo "PYTHONPATH: $PYTHONPATH"
|
| 76 |
+
python -m pytest app/tests/integration/test.py -v --cov=app --cov-report=xml --cov-report=html
|
| 77 |
+
- name: Upload coverage report
|
| 78 |
+
uses: actions/upload-artifact@v4
|
| 79 |
+
with:
|
| 80 |
+
name: integration-coverage-report
|
| 81 |
+
path: |
|
| 82 |
+
coverage.xml
|
| 83 |
+
htmlcov/
|
| 84 |
+
- name: Push to HF2 if tests pass
|
| 85 |
+
if: success()
|
| 86 |
+
env:
|
| 87 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 88 |
+
run: |
|
| 89 |
+
git checkout -b hf2-single-commit
|
| 90 |
+
git reset --soft $(git rev-list --max-parents=0 HEAD)
|
| 91 |
+
git commit -m "Single commit for HF2"
|
| 92 |
+
git remote add hf2 https://Andrchest:$HF_TOKEN@huggingface.co/spaces/The-Ultimate-RAG-HF/The-Ultimate-RAG
|
| 93 |
+
git push --force hf2 hf2-single-commit:main
|
.github/workflows/unit-tests.yml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Unit Tests
|
| 2 |
+
on:
|
| 3 |
+
pull_request:
|
| 4 |
+
branches:
|
| 5 |
+
- main
|
| 6 |
+
jobs:
|
| 7 |
+
test:
|
| 8 |
+
runs-on: ubuntu-latest
|
| 9 |
+
steps:
|
| 10 |
+
- uses: actions/checkout@v4
|
| 11 |
+
- name: Set up Python
|
| 12 |
+
uses: actions/setup-python@v4
|
| 13 |
+
with:
|
| 14 |
+
python-version: '3.12'
|
| 15 |
+
- name: Install dependencies
|
| 16 |
+
run: |
|
| 17 |
+
python -m pip install --upgrade pip
|
| 18 |
+
pip install coverage
|
| 19 |
+
pip install -r app/requirements.txt
|
| 20 |
+
pip install flake8 pytest
|
| 21 |
+
- name: Run linter
|
| 22 |
+
run: |
|
| 23 |
+
flake8 app/ --max-line-length=160 --extend-ignore=E203
|
| 24 |
+
- name: Run unit tests with coverage
|
| 25 |
+
run: |
|
| 26 |
+
coverage run -m pytest app/tests/unit/test.py
|
| 27 |
+
coverage xml
|
| 28 |
+
coverage html
|
| 29 |
+
- name: Upload coverage report
|
| 30 |
+
uses: actions/upload-artifact@v4
|
| 31 |
+
with:
|
| 32 |
+
name: unit-coverage-report
|
| 33 |
+
path: |
|
| 34 |
+
coverage.xml
|
| 35 |
+
htmlcov/
|
| 36 |
+
- name: Upload coverage reports to Codecov
|
| 37 |
+
uses: codecov/codecov-action@v5
|
| 38 |
+
with:
|
| 39 |
+
token: ${{ secrets.CODECOV_TOKEN }}
|
| 40 |
+
files: ./coverage.xml
|
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__
|
| 2 |
+
/app/temp_storage
|
| 3 |
+
/database
|
| 4 |
+
/new_env
|
| 5 |
+
/prompt.txt
|
| 6 |
+
/app/key.py
|
| 7 |
+
/app/env_vars.py
|
| 8 |
+
/chats_storage
|
| 9 |
+
/.env
|
| 10 |
+
exp.*
|
| 11 |
+
response.txt
|
Dockerfile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# syntax=docker/dockerfile:1
|
| 2 |
+
FROM python:3.12.10
|
| 3 |
+
|
| 4 |
+
RUN useradd -m -u 1000 user
|
| 5 |
+
USER user
|
| 6 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
# copy and install Python reqs
|
| 11 |
+
COPY app/requirements.txt /app/requirements.txt
|
| 12 |
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
| 13 |
+
|
| 14 |
+
# download Qdrant binary
|
| 15 |
+
RUN wget https://github.com/qdrant/qdrant/releases/download/v1.11.5/qdrant-x86_64-unknown-linux-gnu.tar.gz \
|
| 16 |
+
&& tar -xzf qdrant-x86_64-unknown-linux-gnu.tar.gz \
|
| 17 |
+
&& mv qdrant /home/user/.local/bin/qdrant \
|
| 18 |
+
&& rm qdrant-x86_64-unknown-linux-gnu.tar.gz
|
| 19 |
+
|
| 20 |
+
COPY --chown=user . /app
|
| 21 |
+
|
| 22 |
+
RUN chmod +x start.sh
|
| 23 |
+
|
| 24 |
+
CMD ["./start.sh"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Danil Popov
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1,2 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# The-Ultimate-RAG
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: The Ultimate RAG
|
| 3 |
+
emoji: 🌍
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
short_description: the ultimate rag
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
# The-Ultimate-RAG
|
| 12 |
+
|
| 13 |
+
## Overview
|
| 14 |
+
|
| 15 |
+
[S25] The Ultimate RAG is an Innopolis University software project that generates cited responses from a local database.
|
| 16 |
+
|
| 17 |
+
## Prerequisites
|
| 18 |
+
|
| 19 |
+
Before you begin, ensure the following is installed on your machine:
|
| 20 |
+
|
| 21 |
+
- [Python](https://www.python.org/)
|
| 22 |
+
- [Docker](https://www.docker.com/get-started/)
|
| 23 |
+
|
| 24 |
+
## Installation
|
| 25 |
+
|
| 26 |
+
1. **Clone the repository**
|
| 27 |
+
```bash
|
| 28 |
+
git clone https://github.com/PopovDanil/The-Ultimate-RAG
|
| 29 |
+
cd The-Ultimate-RAG
|
| 30 |
+
```
|
| 31 |
+
2. **Set up a virtual environment (recommended)**
|
| 32 |
+
|
| 33 |
+
To isolate project dependencies and avoid conflicts, create a virtual environment:
|
| 34 |
+
- **On Unix/Linux/macOS:**
|
| 35 |
+
```bash
|
| 36 |
+
python3 -m venv env
|
| 37 |
+
source env/bin/activate
|
| 38 |
+
```
|
| 39 |
+
- **On Windows:**
|
| 40 |
+
```bash
|
| 41 |
+
python -m venv env
|
| 42 |
+
env\Scripts\activate
|
| 43 |
+
```
|
| 44 |
+
3. **Install required libraries**
|
| 45 |
+
|
| 46 |
+
Within the activated virtual environment, install the dependencies:
|
| 47 |
+
```bash
|
| 48 |
+
pip install -r ./app/requirements.txt
|
| 49 |
+
```
|
| 50 |
+
*Note:* ensure you are in the virtual environment before running the command
|
| 51 |
+
|
| 52 |
+
4. **Set up Docker**
|
| 53 |
+
- Ensure Docker is running on your machine
|
| 54 |
+
- Open a terminal, navigate to project directory, and run:
|
| 55 |
+
```bash
|
| 56 |
+
docker-compose up --build
|
| 57 |
+
```
|
| 58 |
+
*Note:* The initial build may take 10–20 minutes, as it needs to download large language models and other
|
| 59 |
+
dependencies.
|
| 60 |
+
Later launches will be much faster.
|
| 61 |
+
|
| 62 |
+
5. **Server access**
|
| 63 |
+
|
| 64 |
+
Once the containers are running, visit `http://localhost:5050`. You should see the application’s welcome page
|
| 65 |
+
|
| 66 |
+
To stop the application and shut down all containers, press `Ctrl+C` in the terminal where `docker-compose` is running,
|
| 67 |
+
and then run:
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
docker-compose down
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
## Usage
|
| 74 |
+
|
| 75 |
+
1. **Upload your file:** click the upload button and select a supported file (`.txt`, `.doc`, `.docx`, or `.pdf`)
|
| 76 |
+
2. **Ask a question**: Once the file is processed, type your question into the prompt box and submit.
|
| 77 |
+
3. **Receive your answer**
|
| 78 |
+
|
| 79 |
+
**A note on performance**
|
| 80 |
+
|
| 81 |
+
Response generation is a computationally intensive task.
|
| 82 |
+
The time to receive an answer may vary depending on your machine's hardware and the complexity of the query.
|
| 83 |
+
|
| 84 |
+
## License
|
| 85 |
+
|
| 86 |
+
This project is licensed under the [MIT License](LICENSE).
|
app/__init__.py
ADDED
|
File without changes
|
app/api/__init__.py
ADDED
|
File without changes
|
app/api/api.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.backend.controllers.messages import register_message
|
| 2 |
+
from app.core.document_validator import path_is_valid
|
| 3 |
+
from app.core.response_parser import add_links
|
| 4 |
+
from app.backend.models.users import User
|
| 5 |
+
from app.settings import BASE_DIR
|
| 6 |
+
from app.backend.controllers.chats import (
|
| 7 |
+
get_chat_with_messages,
|
| 8 |
+
create_new_chat,
|
| 9 |
+
update_title,
|
| 10 |
+
list_user_chats
|
| 11 |
+
)
|
| 12 |
+
from app.backend.controllers.users import (
|
| 13 |
+
extract_user_from_context,
|
| 14 |
+
get_current_user,
|
| 15 |
+
get_latest_chat,
|
| 16 |
+
refresh_cookie,
|
| 17 |
+
authorize_user,
|
| 18 |
+
check_cookie,
|
| 19 |
+
create_user
|
| 20 |
+
)
|
| 21 |
+
from app.core.utils import (
|
| 22 |
+
construct_collection_name,
|
| 23 |
+
create_collection,
|
| 24 |
+
extend_context,
|
| 25 |
+
initialize_rag,
|
| 26 |
+
save_documents,
|
| 27 |
+
protect_chat,
|
| 28 |
+
TextHandler,
|
| 29 |
+
PDFHandler,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
from fastapi.templating import Jinja2Templates
|
| 33 |
+
from fastapi.staticfiles import StaticFiles
|
| 34 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 35 |
+
from fastapi import (
|
| 36 |
+
HTTPException,
|
| 37 |
+
UploadFile,
|
| 38 |
+
Request,
|
| 39 |
+
Depends,
|
| 40 |
+
FastAPI,
|
| 41 |
+
Form,
|
| 42 |
+
File,
|
| 43 |
+
)
|
| 44 |
+
from fastapi.responses import (
|
| 45 |
+
StreamingResponse,
|
| 46 |
+
RedirectResponse,
|
| 47 |
+
FileResponse,
|
| 48 |
+
JSONResponse,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
from typing import Optional
|
| 52 |
+
import os
|
| 53 |
+
|
| 54 |
+
# <------------------------------------- API ------------------------------------->
|
| 55 |
+
api = FastAPI()
|
| 56 |
+
rag = initialize_rag()
|
| 57 |
+
|
| 58 |
+
origins = [
|
| 59 |
+
"*",
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
api.add_middleware(
|
| 63 |
+
CORSMiddleware,
|
| 64 |
+
allow_origins=origins,
|
| 65 |
+
allow_credentials=True,
|
| 66 |
+
allow_methods=["*"],
|
| 67 |
+
allow_headers=["*"],
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
api.mount(
|
| 71 |
+
"/chats_storage",
|
| 72 |
+
StaticFiles(directory=os.path.join(BASE_DIR, "chats_storage")),
|
| 73 |
+
name="chats_storage",
|
| 74 |
+
)
|
| 75 |
+
api.mount(
|
| 76 |
+
"/static",
|
| 77 |
+
StaticFiles(directory=os.path.join(BASE_DIR, "app", "frontend", "static")),
|
| 78 |
+
name="static",
|
| 79 |
+
)
|
| 80 |
+
templates = Jinja2Templates(
|
| 81 |
+
directory=os.path.join(BASE_DIR, "app", "frontend", "templates")
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# <--------------------------------- Middleware --------------------------------->
|
| 86 |
+
@api.middleware("http")
|
| 87 |
+
async def require_user(request: Request, call_next):
|
| 88 |
+
print("&" * 40, "START MIDDLEWARE", "&" * 40)
|
| 89 |
+
try:
|
| 90 |
+
print(f"Path ----> {request.url.path}, Method ----> {request.method}, Port ----> {request.url.port}\n")
|
| 91 |
+
|
| 92 |
+
stripped_path = request.url.path.strip("/")
|
| 93 |
+
|
| 94 |
+
if (
|
| 95 |
+
stripped_path.startswith("pdfs")
|
| 96 |
+
or "static/styles.css" in stripped_path
|
| 97 |
+
or "favicon.ico" in stripped_path
|
| 98 |
+
):
|
| 99 |
+
return await call_next(request)
|
| 100 |
+
|
| 101 |
+
user = get_current_user(request)
|
| 102 |
+
authorized = True
|
| 103 |
+
if user is None:
|
| 104 |
+
authorized = False
|
| 105 |
+
user = create_user()
|
| 106 |
+
|
| 107 |
+
print(f"User in Context ----> {user.id}\n")
|
| 108 |
+
|
| 109 |
+
request.state.current_user = user
|
| 110 |
+
response = await call_next(request)
|
| 111 |
+
|
| 112 |
+
if authorized:
|
| 113 |
+
refresh_cookie(request=request, response=response)
|
| 114 |
+
else:
|
| 115 |
+
authorize_user(response, user)
|
| 116 |
+
return response
|
| 117 |
+
|
| 118 |
+
except Exception as exception:
|
| 119 |
+
raise exception
|
| 120 |
+
finally:
|
| 121 |
+
print("&" * 40, "END MIDDLEWARE", "&" * 40, "\n\n")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
# <--------------------------------- Common routes --------------------------------->
|
| 125 |
+
@api.post("/message_with_docs")
|
| 126 |
+
async def send_message(
|
| 127 |
+
request: Request,
|
| 128 |
+
files: list[UploadFile] = File(None),
|
| 129 |
+
prompt: str = Form(...),
|
| 130 |
+
chat_id: str = Form(None),
|
| 131 |
+
) -> StreamingResponse:
|
| 132 |
+
status = 200
|
| 133 |
+
try:
|
| 134 |
+
user = extract_user_from_context(request)
|
| 135 |
+
print("-" * 100, "User ---->", user, "-" * 100, "\n\n")
|
| 136 |
+
collection_name = construct_collection_name(user, chat_id)
|
| 137 |
+
|
| 138 |
+
message_id = register_message(content=prompt, sender="user", chat_id=chat_id)
|
| 139 |
+
|
| 140 |
+
await save_documents(
|
| 141 |
+
collection_name, files=files, RAG=rag, user=user, chat_id=chat_id, message_id=message_id
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
return StreamingResponse(
|
| 145 |
+
rag.generate_response_stream(
|
| 146 |
+
collection_name=collection_name, user_prompt=prompt, stream=True
|
| 147 |
+
),
|
| 148 |
+
status,
|
| 149 |
+
media_type="text/event-stream",
|
| 150 |
+
)
|
| 151 |
+
except Exception as e:
|
| 152 |
+
print(e)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
@api.post("/replace_message")
|
| 156 |
+
async def replace_message(request: Request):
|
| 157 |
+
data = await request.json()
|
| 158 |
+
with open(os.path.join(BASE_DIR, "response.txt"), "w") as f:
|
| 159 |
+
f.write(data.get("message", ""))
|
| 160 |
+
updated_message = data.get("message", "")
|
| 161 |
+
register_message(
|
| 162 |
+
content=updated_message, sender="system", chat_id=data.get("chatId")
|
| 163 |
+
)
|
| 164 |
+
return JSONResponse({"updated_message": updated_message})
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
@api.get("/viewer/{path:path}")
|
| 168 |
+
def show_document(
|
| 169 |
+
request: Request,
|
| 170 |
+
path: str,
|
| 171 |
+
page: Optional[int] = 1,
|
| 172 |
+
lines: Optional[str] = "1-1",
|
| 173 |
+
start: Optional[int] = 0,
|
| 174 |
+
):
|
| 175 |
+
print(f"DEBUG: Show document with path: {path}, page: {page}, lines: {lines}, start: {start}")
|
| 176 |
+
path = os.path.realpath(path)
|
| 177 |
+
print(f"DEBUG: Real path: {path}")
|
| 178 |
+
|
| 179 |
+
path = os.path.realpath(path)
|
| 180 |
+
if not path_is_valid(path):
|
| 181 |
+
return HTTPException(status_code=404, detail="Document not found")
|
| 182 |
+
|
| 183 |
+
ext = path.split(".")[-1]
|
| 184 |
+
if ext == "pdf":
|
| 185 |
+
print("Open pdf file by path")
|
| 186 |
+
return FileResponse(path=path)
|
| 187 |
+
elif ext in ("txt", "csv", "md", "json"):
|
| 188 |
+
print("Open txt file by path")
|
| 189 |
+
return TextHandler(request, path=path, lines=lines, templates=templates)
|
| 190 |
+
elif ext in ("docx", "doc"):
|
| 191 |
+
return TextHandler(
|
| 192 |
+
request, path=path, lines=lines, templates=templates
|
| 193 |
+
)
|
| 194 |
+
else:
|
| 195 |
+
return FileResponse(path=path)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# <--------------------------------- Get --------------------------------->
|
| 199 |
+
@api.get("/list_chats")
|
| 200 |
+
def list_chats_for_user(request: Request):
|
| 201 |
+
user = extract_user_from_context(request)
|
| 202 |
+
chats = list_user_chats(user.id)
|
| 203 |
+
print(f"Chats for user {user.id}: {chats}")
|
| 204 |
+
return JSONResponse({"chats": chats})
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
@api.get("/chats/{chat_id}")
|
| 208 |
+
def show_chat(request: Request, chat_id: str):
|
| 209 |
+
user = extract_user_from_context(request)
|
| 210 |
+
|
| 211 |
+
if not protect_chat(user, chat_id):
|
| 212 |
+
raise HTTPException(401, "Yod do not have rights to use this chat!")
|
| 213 |
+
|
| 214 |
+
chat_data = get_chat_with_messages(chat_id)
|
| 215 |
+
|
| 216 |
+
print(f"DEBUG: Data for chat '{chat_id}' from get_chat_with_messages: {chat_data}")
|
| 217 |
+
|
| 218 |
+
if not chat_data:
|
| 219 |
+
raise HTTPException(status_code=404, detail=f"Chat with id {chat_id} not found.")
|
| 220 |
+
|
| 221 |
+
update_title(chat_data["chat_id"])
|
| 222 |
+
|
| 223 |
+
return JSONResponse(content=chat_data)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
@api.get("/")
|
| 227 |
+
def last_user_chat(request: Request):
|
| 228 |
+
user = extract_user_from_context(request)
|
| 229 |
+
chat = get_latest_chat(user)
|
| 230 |
+
|
| 231 |
+
if chat is None:
|
| 232 |
+
print("new_chat")
|
| 233 |
+
new_chat = create_new_chat("new chat", user)
|
| 234 |
+
url = new_chat.get("url")
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
create_collection(user, new_chat.get("chat_id"), rag)
|
| 238 |
+
except Exception as e:
|
| 239 |
+
raise HTTPException(500, e)
|
| 240 |
+
|
| 241 |
+
else:
|
| 242 |
+
url = f"/chats/{chat.id}"
|
| 243 |
+
|
| 244 |
+
return RedirectResponse(url, status_code=303)
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
# <--------------------------------- Post --------------------------------->
|
| 248 |
+
@api.post("/new_chat")
|
| 249 |
+
def create_chat(request: Request, title: Optional[str] = "new chat"):
|
| 250 |
+
user = extract_user_from_context(request)
|
| 251 |
+
new_chat_data = create_new_chat(title, user)
|
| 252 |
+
if not new_chat_data.get("id"):
|
| 253 |
+
raise HTTPException(500, "New chat could not be created.")
|
| 254 |
+
|
| 255 |
+
create_collection(user, new_chat_data["id"], rag)
|
| 256 |
+
|
| 257 |
+
return JSONResponse(new_chat_data)
|
| 258 |
+
|
| 259 |
+
if __name__ == "__main__":
|
| 260 |
+
pass
|
app/automigration.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.backend.models.db_service import automigrate
|
| 2 |
+
|
| 3 |
+
if __name__ == "__main__":
|
| 4 |
+
automigrate()
|
app/backend/__init__.py
ADDED
|
File without changes
|
app/backend/controllers/__init__.py
ADDED
|
File without changes
|
app/backend/controllers/base_controller.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.settings import settings
|
| 2 |
+
from sqlalchemy import create_engine
|
| 3 |
+
|
| 4 |
+
postgres_config = settings.postgres.model_dump()
|
| 5 |
+
engine = create_engine(**postgres_config)
|
app/backend/controllers/chats.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.backend.models.messages import get_messages_by_chat_id, Message
|
| 2 |
+
from app.backend.models.users import User, get_user_chats
|
| 3 |
+
from app.backend.models.documents import Document
|
| 4 |
+
from app.backend.controllers.utils import get_group_title
|
| 5 |
+
from app.settings import BASE_DIR
|
| 6 |
+
from app.backend.models.chats import (
|
| 7 |
+
get_chats_by_user_id,
|
| 8 |
+
get_chat_by_id,
|
| 9 |
+
refresh_title,
|
| 10 |
+
add_new_chat,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
from datetime import datetime, timedelta
|
| 14 |
+
from fastapi import HTTPException
|
| 15 |
+
from uuid import uuid4
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def create_new_chat(title: str | None, user: User) -> dict:
|
| 20 |
+
print("+" * 40, "START Creating Chat", "+" * 40)
|
| 21 |
+
try:
|
| 22 |
+
chat_id = str(uuid4())
|
| 23 |
+
add_new_chat(id=chat_id, title=title, user=user)
|
| 24 |
+
try:
|
| 25 |
+
path_to_chat = os.path.join(
|
| 26 |
+
BASE_DIR,
|
| 27 |
+
"chats_storage",
|
| 28 |
+
f"user_id={user.id}",
|
| 29 |
+
f"chat_id={chat_id}",
|
| 30 |
+
"documents",
|
| 31 |
+
)
|
| 32 |
+
os.makedirs(path_to_chat, exist_ok=True)
|
| 33 |
+
except Exception:
|
| 34 |
+
raise HTTPException(500, "error while creating chat folders")
|
| 35 |
+
|
| 36 |
+
return {"id": chat_id, "title": title}
|
| 37 |
+
except Exception as exception:
|
| 38 |
+
raise exception
|
| 39 |
+
finally:
|
| 40 |
+
print("+" * 40, "END Creating Chat", "+" * 40, "\n\n")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def dump_documents_dict(documents: list[Document]) -> list[map]:
|
| 44 |
+
output = []
|
| 45 |
+
for doc in documents:
|
| 46 |
+
output.append({"name": doc.name, "path": doc.path, "size": doc.size})
|
| 47 |
+
print("Add document --->", doc.name)
|
| 48 |
+
return output
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def dump_messages_dict(messages: list[Message], dst: dict) -> None:
|
| 52 |
+
history = []
|
| 53 |
+
|
| 54 |
+
print("!" * 40, "START Dumping History", "!" * 40)
|
| 55 |
+
for message in messages:
|
| 56 |
+
history.append({"sender": message.sender, "content": message.content, "documents": dump_documents_dict(message.documents)})
|
| 57 |
+
print(f"Role ----> {message.sender}, Content ----> {message.content}\n")
|
| 58 |
+
print("!" * 40, "END Dumping History", "!" * 40, "\n\n")
|
| 59 |
+
|
| 60 |
+
dst.update({"messages": history})
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def get_chat_with_messages(id: str) -> dict:
|
| 64 |
+
response = {"chat_id": id}
|
| 65 |
+
|
| 66 |
+
chat = get_chat_by_id(id=id)
|
| 67 |
+
if chat is None:
|
| 68 |
+
raise HTTPException(418, f"Invalid chat id. Chat with id={id} does not exists!")
|
| 69 |
+
|
| 70 |
+
messages = get_messages_by_chat_id(id=id)
|
| 71 |
+
dump_messages_dict(messages, response)
|
| 72 |
+
|
| 73 |
+
return response
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def create_dict_from_chat(chat) -> dict:
|
| 77 |
+
return {"id": chat.id, "title": chat.title}
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def list_user_chats(user_id: str) -> list[dict]:
|
| 81 |
+
current_date = datetime.now()
|
| 82 |
+
|
| 83 |
+
today = []
|
| 84 |
+
last_week = []
|
| 85 |
+
last_month = []
|
| 86 |
+
later = []
|
| 87 |
+
|
| 88 |
+
groups = [today, last_week, last_month, later]
|
| 89 |
+
|
| 90 |
+
chats = get_chats_by_user_id(user_id)
|
| 91 |
+
for chat in chats:
|
| 92 |
+
if current_date - timedelta(days=1) <= chat.created_at:
|
| 93 |
+
today.append(chat)
|
| 94 |
+
elif current_date - timedelta(weeks=1) <= chat.created_at:
|
| 95 |
+
last_week.append(chat)
|
| 96 |
+
elif current_date - timedelta(weeks=4) <= chat.created_at:
|
| 97 |
+
last_month.append(chat)
|
| 98 |
+
else:
|
| 99 |
+
later.append(chat)
|
| 100 |
+
|
| 101 |
+
result = []
|
| 102 |
+
|
| 103 |
+
for id, group in enumerate(groups):
|
| 104 |
+
if len(group):
|
| 105 |
+
result.append(
|
| 106 |
+
{"title": get_group_title(id=id), "chats": [create_dict_from_chat(chat) for chat in group]}
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
return result
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def verify_ownership_rights(user: User, chat_id: str) -> bool:
|
| 113 |
+
return chat_id in [chat.id for chat in get_user_chats(user)]
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def update_title(chat_id: str) -> bool:
|
| 117 |
+
return refresh_title(chat_id)
|
app/backend/controllers/messages.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.backend.models.messages import add_new_message
|
| 2 |
+
from uuid import uuid4
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def remove_html_tags(content: str) -> str:
|
| 7 |
+
pattern = "<(.*?)>"
|
| 8 |
+
replace_with = (
|
| 9 |
+
"<a href=https://www.youtube.com/results?search_query=rickroll>click me</a>"
|
| 10 |
+
)
|
| 11 |
+
de_taggeed = re.sub(pattern, "REPLACE_WITH_RICKROLL", content)
|
| 12 |
+
|
| 13 |
+
return de_taggeed.replace("REPLACE_WITH_RICKROLL", replace_with)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def register_message(content: str, sender: str, chat_id: str) -> str:
|
| 17 |
+
print("-" * 40, "START Registering Message", "-" * 40)
|
| 18 |
+
try:
|
| 19 |
+
id = str(uuid4())
|
| 20 |
+
message = content if sender == "system" else remove_html_tags(content)
|
| 21 |
+
|
| 22 |
+
print(f"Message -----> {message[:min(30, len(message))]}")
|
| 23 |
+
|
| 24 |
+
return add_new_message(id=id, chat_id=chat_id, sender=sender, content=message)
|
| 25 |
+
finally:
|
| 26 |
+
print("-" * 40, "END Registering Message", "-" * 40, "\n\n")
|
app/backend/controllers/users.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.backend.models.chats import Chat
|
| 2 |
+
from app.settings import settings
|
| 3 |
+
from app.backend.models.users import (
|
| 4 |
+
get_user_last_chat,
|
| 5 |
+
find_user_by_id,
|
| 6 |
+
add_new_user,
|
| 7 |
+
User,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
from fastapi import Response, Request, HTTPException
|
| 11 |
+
from datetime import datetime, timedelta, timezone
|
| 12 |
+
|
| 13 |
+
from uuid import uuid4
|
| 14 |
+
import jwt
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def extract_user_from_context(request: Request) -> User | None:
|
| 18 |
+
if hasattr(request.state, "current_user"):
|
| 19 |
+
return request.state.current_user
|
| 20 |
+
print("*" * 40, "No attribute 'current_user`", "*" * 40, "\n")
|
| 21 |
+
return None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def create_access_token(user_id: str, expires_delta: timedelta = settings.max_cookie_lifetime) -> str:
|
| 25 |
+
token_payload = {"user_id": user_id}
|
| 26 |
+
token_payload.update({"exp": datetime.now() + expires_delta})
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
encoded_jwt: str = jwt.encode(
|
| 30 |
+
token_payload, settings.secret_pepper, algorithm=settings.jwt_algorithm
|
| 31 |
+
)
|
| 32 |
+
except Exception:
|
| 33 |
+
raise HTTPException(status_code=500, detail="json encoding error")
|
| 34 |
+
|
| 35 |
+
print("^" * 40, "New JWT token was created", "^" * 40)
|
| 36 |
+
print(encoded_jwt)
|
| 37 |
+
print("^" * 105, "\n\n")
|
| 38 |
+
|
| 39 |
+
return encoded_jwt
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def create_user() -> User | None:
|
| 43 |
+
new_user_id = str(uuid4())
|
| 44 |
+
try:
|
| 45 |
+
user = add_new_user(id=new_user_id)
|
| 46 |
+
except Exception as e:
|
| 47 |
+
raise HTTPException(status_code=418, detail=e)
|
| 48 |
+
|
| 49 |
+
print("$" * 40, "New User was created", "$" * 40)
|
| 50 |
+
print("Created user - {user.id}")
|
| 51 |
+
print("$" * 100, "\n\n")
|
| 52 |
+
|
| 53 |
+
return user
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def authorize_user(response: Response, user: User) -> dict:
|
| 57 |
+
print("%" * 40, "START Authorizing User", "%" * 40)
|
| 58 |
+
try:
|
| 59 |
+
access_token: str = create_access_token(user_id=user.id)
|
| 60 |
+
expires = datetime.now(timezone.utc) + settings.max_cookie_lifetime
|
| 61 |
+
|
| 62 |
+
response.set_cookie(
|
| 63 |
+
key="access_token",
|
| 64 |
+
value=access_token,
|
| 65 |
+
path="/",
|
| 66 |
+
expires=expires.strftime("%a, %d %b %Y %H:%M:%S GMT"),
|
| 67 |
+
max_age=settings.max_cookie_lifetime,
|
| 68 |
+
httponly=True,
|
| 69 |
+
secure=True,
|
| 70 |
+
samesite='None'
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
return {"status": "ok"}
|
| 74 |
+
finally:
|
| 75 |
+
print("%" * 40, "END Authorizing User", "%" * 40)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def get_current_user(request: Request) -> User | None:
|
| 79 |
+
print("-" * 40, "START Getting User", "-" * 40)
|
| 80 |
+
try:
|
| 81 |
+
user = None
|
| 82 |
+
token: str | None = request.cookies.get("access_token")
|
| 83 |
+
|
| 84 |
+
print(f"Token -----> {token if token else 'Empty token!'}\n")
|
| 85 |
+
|
| 86 |
+
if not token:
|
| 87 |
+
return None
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
user_id = jwt.decode(
|
| 91 |
+
jwt=bytes(token, encoding="utf-8"),
|
| 92 |
+
key=settings.secret_pepper,
|
| 93 |
+
algorithms=[settings.jwt_algorithm],
|
| 94 |
+
).get("user_id")
|
| 95 |
+
|
| 96 |
+
print(f"User id -----> {user_id if user_id else 'Empty user id!'}\n")
|
| 97 |
+
|
| 98 |
+
user = find_user_by_id(id=user_id)
|
| 99 |
+
|
| 100 |
+
print(f"Found user -----> {user.id if user else 'No user was found!'}")
|
| 101 |
+
except Exception as e:
|
| 102 |
+
raise e
|
| 103 |
+
|
| 104 |
+
if not user:
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
return user
|
| 108 |
+
except HTTPException as exception:
|
| 109 |
+
raise exception
|
| 110 |
+
finally:
|
| 111 |
+
print("-" * 40, "END Getting User", "-" * 40, "\n\n")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def check_cookie(request: Request) -> dict:
|
| 115 |
+
result = {"token": "No token is present"}
|
| 116 |
+
token = request.cookies.get("access_token")
|
| 117 |
+
if token:
|
| 118 |
+
result["token"] = token
|
| 119 |
+
return result
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def clear_cookie(response: Response) -> dict:
|
| 123 |
+
response.set_cookie(key="access_token", value="", httponly=True)
|
| 124 |
+
return {"status": "ok"}
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def get_latest_chat(user: User) -> Chat | None:
|
| 128 |
+
return get_user_last_chat(user)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def refresh_cookie(request: Request, response: Response) -> None:
|
| 132 |
+
print("+" * 40, "START Refreshing cookie", "+" * 40)
|
| 133 |
+
try:
|
| 134 |
+
token: str | None = request.cookies.get("access_token")
|
| 135 |
+
|
| 136 |
+
print(f"Token -----> {token if token else 'Empty token!'}\n")
|
| 137 |
+
|
| 138 |
+
if token is None:
|
| 139 |
+
return
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
jwt_token = jwt.decode(
|
| 143 |
+
jwt=bytes(token, encoding="utf-8"),
|
| 144 |
+
key=settings.secret_pepper,
|
| 145 |
+
algorithms=[settings.jwt_algorithm],
|
| 146 |
+
)
|
| 147 |
+
exp_datetime = datetime.fromtimestamp(jwt_token.get("exp"), tz=timezone.utc)
|
| 148 |
+
print(f"Expires -----> {exp_datetime if exp_datetime else 'No expiration date!'}\n")
|
| 149 |
+
except jwt.ExpiredSignatureError:
|
| 150 |
+
raise HTTPException(status_code=401, detail="jwt signature has expired")
|
| 151 |
+
except jwt.PyJWTError as e:
|
| 152 |
+
raise HTTPException(status_code=500, detail=e)
|
| 153 |
+
|
| 154 |
+
diff = exp_datetime - datetime.now(timezone.utc)
|
| 155 |
+
print(f"Difference -----> {diff if diff else 'No difference in date!'}\n")
|
| 156 |
+
|
| 157 |
+
if diff.total_seconds() < 0.2 * settings.max_cookie_lifetime.total_seconds():
|
| 158 |
+
print("<----- Refreshing ----->")
|
| 159 |
+
user = extract_user_from_context(request)
|
| 160 |
+
authorize_user(response, user)
|
| 161 |
+
except HTTPException as exception:
|
| 162 |
+
raise exception
|
| 163 |
+
finally:
|
| 164 |
+
print("+" * 40, "END Refreshing cookie", "+" * 40, "\n\n")
|
app/backend/controllers/utils.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_group_title(id: int) -> str:
|
| 2 |
+
result = "LATER"
|
| 3 |
+
|
| 4 |
+
if id == 0:
|
| 5 |
+
result = "TODAY"
|
| 6 |
+
elif id == 1:
|
| 7 |
+
result = "LAST_WEEK"
|
| 8 |
+
elif id == 2:
|
| 9 |
+
result = "LAST_MONTH"
|
| 10 |
+
elif id == 3:
|
| 11 |
+
result = "LATER"
|
| 12 |
+
|
| 13 |
+
return result
|
app/backend/models/__init__.py
ADDED
|
File without changes
|
app/backend/models/base_model.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, DateTime
|
| 2 |
+
from sqlalchemy.orm import DeclarativeBase
|
| 3 |
+
from sqlalchemy.sql import func
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Base(DeclarativeBase):
|
| 7 |
+
'''
|
| 8 |
+
Base model for all others \\
|
| 9 |
+
Defines base for table creation
|
| 10 |
+
'''
|
| 11 |
+
__abstract__ = True
|
| 12 |
+
created_at = Column("created_at", DateTime, default=func.now())
|
| 13 |
+
deleted_at = Column("deleted_at", DateTime, nullable=True)
|
| 14 |
+
updated_at = Column("updated_at", DateTime, nullable=True)
|
app/backend/models/chats.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.backend.models.base_model import Base
|
| 2 |
+
from sqlalchemy import String, Column, ForeignKey
|
| 3 |
+
from sqlalchemy.orm import relationship, Session
|
| 4 |
+
from app.backend.controllers.base_controller import engine
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class Chat(Base):
|
| 8 |
+
__tablename__ = "chats"
|
| 9 |
+
id = Column("id", String, primary_key=True, unique=True)
|
| 10 |
+
title = Column("title", String, nullable=True)
|
| 11 |
+
user_id = Column(String, ForeignKey("users.id"))
|
| 12 |
+
user = relationship("User", back_populates="chats")
|
| 13 |
+
messages = relationship("Message", back_populates="chat")
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def add_new_chat(id: str, title: str | None, user) -> None:
|
| 17 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 18 |
+
user = db.merge(user)
|
| 19 |
+
new_chat = Chat(id=id, user_id=user.id, user=user)
|
| 20 |
+
if title:
|
| 21 |
+
new_chat.title = title
|
| 22 |
+
db.add(new_chat)
|
| 23 |
+
db.commit()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_chat_by_id(id: str) -> Chat | None:
|
| 27 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 28 |
+
return db.query(Chat).where(Chat.id == id).first()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def get_chats_by_user_id(id: str) -> list[Chat]:
|
| 32 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 33 |
+
return (
|
| 34 |
+
db.query(Chat).filter(Chat.user_id == id).order_by(Chat.created_at.desc())
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def refresh_title(chat_id: str) -> bool:
|
| 39 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 40 |
+
chat = db.get(Chat, chat_id)
|
| 41 |
+
messages = chat.messages
|
| 42 |
+
|
| 43 |
+
if messages is None or len(messages) == 0:
|
| 44 |
+
return False
|
| 45 |
+
|
| 46 |
+
chat.title = messages[0].content[:47]
|
| 47 |
+
if len(messages[0].content) > 46:
|
| 48 |
+
chat.title += "..."
|
| 49 |
+
|
| 50 |
+
db.commit()
|
| 51 |
+
return True
|
app/backend/models/db_service.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import inspect
|
| 2 |
+
from app.backend.controllers.base_controller import engine
|
| 3 |
+
from app.backend.models.base_model import Base
|
| 4 |
+
from app.backend.models.chats import Chat
|
| 5 |
+
from app.backend.models.messages import Message
|
| 6 |
+
from app.backend.models.users import User
|
| 7 |
+
from app.backend.models.documents import Document
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def table_exists(name: str) -> bool:
|
| 11 |
+
return inspect(engine).has_table(name)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def create_tables() -> None:
|
| 15 |
+
Base.metadata.create_all(engine)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def drop_tables() -> None:
|
| 19 |
+
# List tables in the correct order for dropping (considering dependencies)
|
| 20 |
+
tables = [Document.__table__, Message.__table__, Chat.__table__, User.__table__]
|
| 21 |
+
|
| 22 |
+
for table in tables:
|
| 23 |
+
if table_exists(table.name):
|
| 24 |
+
try:
|
| 25 |
+
table.drop(engine)
|
| 26 |
+
print(f"Dropped table {table.name}")
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"Error dropping table {table.name}: {e}")
|
| 29 |
+
else:
|
| 30 |
+
print(f"Table {table.name} does not exist, skipping drop")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def automigrate() -> None:
|
| 34 |
+
print("Starting automigration...")
|
| 35 |
+
drop_tables()
|
| 36 |
+
create_tables()
|
| 37 |
+
print("Automigration completed.")
|
app/backend/models/documents.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, ForeignKey, String, Text, Integer
|
| 2 |
+
from sqlalchemy.orm import Session, relationship
|
| 3 |
+
|
| 4 |
+
from app.backend.controllers.base_controller import engine
|
| 5 |
+
from app.backend.models.base_model import Base
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Document(Base):
|
| 9 |
+
__tablename__ = "documents"
|
| 10 |
+
id = Column('id', String, primary_key=True, unique=True)
|
| 11 |
+
name = Column('name', String, nullable=False)
|
| 12 |
+
path = Column('path', String, nullable=False)
|
| 13 |
+
size = Column('size', Integer, nullable=False)
|
| 14 |
+
message_id = Column("message_id", ForeignKey("messages.id"))
|
| 15 |
+
message = relationship("Message", back_populates="documents")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def add_new_document(id: str, name: str, path: str, message_id: str, size: int):
|
| 19 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 20 |
+
new_doc = Document(id=id, name=name, path=path, message_id=message_id, size=size)
|
| 21 |
+
db.add(new_doc)
|
| 22 |
+
db.commit()
|
app/backend/models/messages.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, ForeignKey, String, Text, asc
|
| 2 |
+
from sqlalchemy.orm import Session, relationship, joinedload
|
| 3 |
+
|
| 4 |
+
from app.backend.controllers.base_controller import engine
|
| 5 |
+
from app.backend.models.base_model import Base
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Message(Base):
|
| 9 |
+
__tablename__ = "messages"
|
| 10 |
+
id = Column("id", String, primary_key=True, unique=True)
|
| 11 |
+
content = Column("text", Text)
|
| 12 |
+
sender = Column("role", String)
|
| 13 |
+
chat_id = Column(String, ForeignKey("chats.id"))
|
| 14 |
+
chat = relationship("Chat", back_populates="messages")
|
| 15 |
+
documents = relationship("Document", back_populates="message")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def add_new_message(id: str, chat_id: str, sender: str, content: str) -> str:
|
| 19 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 20 |
+
new_message = Message(id=id, content=content, sender=sender, chat_id=chat_id)
|
| 21 |
+
db.add(new_message)
|
| 22 |
+
db.commit()
|
| 23 |
+
return id
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_messages_by_chat_id(id: str) -> list[Message]:
|
| 27 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 28 |
+
return db.query(Message).options(joinedload(Message.documents)).filter(Message.chat_id == id).order_by(asc(Message.created_at))
|
app/backend/models/users.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.backend.controllers.base_controller import engine
|
| 2 |
+
from app.backend.models.base_model import Base
|
| 3 |
+
from app.backend.models.chats import Chat
|
| 4 |
+
|
| 5 |
+
from sqlalchemy.orm import relationship, Session
|
| 6 |
+
from sqlalchemy import Column, String
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class User(Base):
|
| 10 |
+
'''
|
| 11 |
+
Base model for users table
|
| 12 |
+
'''
|
| 13 |
+
__tablename__ = "users"
|
| 14 |
+
id = Column("id", String, primary_key=True, unique=True)
|
| 15 |
+
language = Column("language", String, default="English", nullable=False)
|
| 16 |
+
theme = Column("theme", String, default="light", nullable=False)
|
| 17 |
+
chats = relationship("Chat", back_populates="user")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def add_new_user(id: str) -> User:
|
| 21 |
+
with Session(autoflush=False, bind=engine, expire_on_commit=False) as db:
|
| 22 |
+
new_user = User(id=id)
|
| 23 |
+
db.add(new_user)
|
| 24 |
+
db.commit()
|
| 25 |
+
return new_user
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def find_user_by_id(id: str) -> User | None:
|
| 29 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 30 |
+
return db.query(User).where(User.id == id).first()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def update_user(user: User, language: str = None, theme: str = None) -> None:
|
| 34 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 35 |
+
user = db.merge(user)
|
| 36 |
+
if language:
|
| 37 |
+
user.language = language
|
| 38 |
+
if theme:
|
| 39 |
+
user.theme = theme
|
| 40 |
+
db.commit()
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def get_user_chats(user: User) -> list[Chat]:
|
| 44 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 45 |
+
user = db.get(User, user.id)
|
| 46 |
+
return user.chats
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def get_user_last_chat(user: User) -> Chat | None:
|
| 50 |
+
with Session(autoflush=False, bind=engine) as db:
|
| 51 |
+
user = db.get(User, user.id)
|
| 52 |
+
|
| 53 |
+
chats = user.chats
|
| 54 |
+
|
| 55 |
+
if chats is not None and len(chats):
|
| 56 |
+
return chats[-1]
|
| 57 |
+
|
| 58 |
+
return None
|
app/backend/schemas.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class ThemeOptions(str, Enum):
|
| 5 |
+
'''
|
| 6 |
+
Used as custom-defined fields in `users` table
|
| 7 |
+
Means UI theme
|
| 8 |
+
'''
|
| 9 |
+
LIGHT = "light"
|
| 10 |
+
DARK = "dark"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class LanguageOptions(str, Enum):
|
| 14 |
+
'''
|
| 15 |
+
Used as custom-defined fields in `users` table
|
| 16 |
+
Means preferred response language
|
| 17 |
+
'''
|
| 18 |
+
AR = "ar"
|
| 19 |
+
EN = "en"
|
| 20 |
+
RU = "ru"
|
app/core/__init__.py
ADDED
|
File without changes
|
app/core/chunks.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Chunk:
|
| 5 |
+
"""
|
| 6 |
+
id -> unique number in uuid format, can be tried https://www.uuidgenerator.net/
|
| 7 |
+
start_index -> the index of the first char from the beginning of the original document
|
| 8 |
+
|
| 9 |
+
TODO: implement access modifiers and set of getters and setters
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(
|
| 13 |
+
self,
|
| 14 |
+
id: uuid.UUID,
|
| 15 |
+
filename: str,
|
| 16 |
+
page_number: int,
|
| 17 |
+
start_index: int,
|
| 18 |
+
start_line: int,
|
| 19 |
+
end_line: int,
|
| 20 |
+
text: str,
|
| 21 |
+
):
|
| 22 |
+
self.id: uuid.UUID = id
|
| 23 |
+
self.filename: str = filename
|
| 24 |
+
self.page_number: int = page_number
|
| 25 |
+
self.start_index: int = start_index
|
| 26 |
+
self.start_line: int = start_line
|
| 27 |
+
self.end_line: int = end_line
|
| 28 |
+
self.text: str = text
|
| 29 |
+
|
| 30 |
+
def get_raw_text(self) -> str:
|
| 31 |
+
return self.text
|
| 32 |
+
|
| 33 |
+
def get_splitted_text(self) -> list[str]:
|
| 34 |
+
return self.text.split(" ")
|
| 35 |
+
|
| 36 |
+
def get_metadata(self) -> dict:
|
| 37 |
+
return {
|
| 38 |
+
"id": str(self.id),
|
| 39 |
+
"filename": self.filename,
|
| 40 |
+
"page_number": self.page_number,
|
| 41 |
+
"start_index": self.start_index,
|
| 42 |
+
"start_line": self.start_line,
|
| 43 |
+
"end_line": self.end_line,
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
# TODO: remove kostyly
|
| 47 |
+
def __str__(self):
|
| 48 |
+
return (
|
| 49 |
+
f"Chunk from {self.filename.split('/')[-1]}, "
|
| 50 |
+
f"page - {self.page_number}, "
|
| 51 |
+
f"start - {self.start_line}, "
|
| 52 |
+
f"end - {self.end_line}, "
|
| 53 |
+
f"and text - {self.text[:100]}... ({len(self.text)})...{self.text[-20:]}\n"
|
| 54 |
+
)
|
app/core/database.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from qdrant_client import QdrantClient # main component to provide the access to db
|
| 2 |
+
from qdrant_client.http.models import (
|
| 3 |
+
ScoredPoint,
|
| 4 |
+
Filter,
|
| 5 |
+
FieldCondition,
|
| 6 |
+
MatchText
|
| 7 |
+
)
|
| 8 |
+
from qdrant_client.models import (
|
| 9 |
+
VectorParams,
|
| 10 |
+
Distance,
|
| 11 |
+
PointStruct,
|
| 12 |
+
TextIndexParams,
|
| 13 |
+
TokenizerType
|
| 14 |
+
) # VectorParams -> config of vectors that will be used as primary keys
|
| 15 |
+
from app.core.models import Embedder # Distance -> defines the metric
|
| 16 |
+
from app.core.chunks import Chunk # PointStruct -> instance that will be stored in db
|
| 17 |
+
import numpy as np
|
| 18 |
+
from uuid import UUID
|
| 19 |
+
from app.settings import settings
|
| 20 |
+
import time
|
| 21 |
+
from fastapi import HTTPException
|
| 22 |
+
import re
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class VectorDatabase:
|
| 26 |
+
def __init__(self, embedder: Embedder, host: str = "qdrant", port: int = 6333):
|
| 27 |
+
self.host: str = host
|
| 28 |
+
self.client: QdrantClient = self._initialize_qdrant_client()
|
| 29 |
+
self.embedder: Embedder = embedder # embedder is used to convert a user's query
|
| 30 |
+
self.already_stored: np.array[np.array] = np.array([]).reshape(
|
| 31 |
+
0, embedder.get_vector_dimensionality()
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
def store(
|
| 35 |
+
self, collection_name: str, chunks: list[Chunk], batch_size: int = 1000
|
| 36 |
+
) -> None:
|
| 37 |
+
points: list[PointStruct] = []
|
| 38 |
+
|
| 39 |
+
print("Start getting text embeddings")
|
| 40 |
+
start = time.time()
|
| 41 |
+
vectors = self.embedder.encode([chunk.get_raw_text() for chunk in chunks])
|
| 42 |
+
print(f"Embeddings - {time.time() - start}")
|
| 43 |
+
|
| 44 |
+
for vector, chunk in zip(vectors, chunks):
|
| 45 |
+
if self.accept_vector(collection_name, vector):
|
| 46 |
+
points.append(
|
| 47 |
+
PointStruct(
|
| 48 |
+
id=str(chunk.id),
|
| 49 |
+
vector=vector,
|
| 50 |
+
payload={
|
| 51 |
+
"metadata": chunk.get_metadata(),
|
| 52 |
+
"text": chunk.get_raw_text(),
|
| 53 |
+
},
|
| 54 |
+
)
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
if len(points):
|
| 58 |
+
for group in range(0, len(points), batch_size):
|
| 59 |
+
self.client.upsert(
|
| 60 |
+
collection_name=collection_name,
|
| 61 |
+
points=points[group : group + batch_size],
|
| 62 |
+
wait=False,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
"""
|
| 66 |
+
Measures a cosine of angle between tow vectors
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
def cosine_similarity(self, vec1: list[float], vec2: list[float] | list[list[float]]) -> float:
|
| 70 |
+
if len(vec2) == 0:
|
| 71 |
+
return 0
|
| 72 |
+
|
| 73 |
+
vec1_np = np.array(vec1)
|
| 74 |
+
vec2_np = np.array(vec2)
|
| 75 |
+
|
| 76 |
+
if vec2_np.ndim == 2:
|
| 77 |
+
vec2_np = vec2_np.T
|
| 78 |
+
|
| 79 |
+
similarities = np.array(vec1_np @ vec2_np / (np.linalg.norm(vec1_np) * np.linalg.norm(vec2_np, axis=0)))
|
| 80 |
+
return np.max(similarities)
|
| 81 |
+
|
| 82 |
+
"""
|
| 83 |
+
Defines weather the vector should be stored in the db by searching for the most
|
| 84 |
+
similar one
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
def accept_vector(self, collection_name: str, vector: np.array) -> bool:
|
| 88 |
+
most_similar = self.client.query_points(
|
| 89 |
+
collection_name=collection_name, query=vector, limit=1, with_vectors=True
|
| 90 |
+
).points
|
| 91 |
+
|
| 92 |
+
if not len(most_similar):
|
| 93 |
+
return True
|
| 94 |
+
else:
|
| 95 |
+
most_similar = most_similar[0]
|
| 96 |
+
|
| 97 |
+
if 1 - self.cosine_similarity(vector, most_similar.vector) < settings.max_delta:
|
| 98 |
+
return False
|
| 99 |
+
return True
|
| 100 |
+
|
| 101 |
+
def construct_keywords_list(self, query: str) -> list[FieldCondition]:
|
| 102 |
+
keywords = re.findall(r'\b[A-Z]{2,}\b', query)
|
| 103 |
+
filters = []
|
| 104 |
+
|
| 105 |
+
print(keywords)
|
| 106 |
+
|
| 107 |
+
for word in keywords:
|
| 108 |
+
if len(word) > 30 or len(word) < 2:
|
| 109 |
+
continue
|
| 110 |
+
filters.append(FieldCondition(key="text", match=MatchText(text=word)))
|
| 111 |
+
|
| 112 |
+
return filters
|
| 113 |
+
|
| 114 |
+
def combine_points_without_duplications(self, first: list[ScoredPoint], second: list[ScoredPoint] = None) -> list[ScoredPoint]:
|
| 115 |
+
combined = []
|
| 116 |
+
similarity_vectors = []
|
| 117 |
+
|
| 118 |
+
to_combine = [first]
|
| 119 |
+
if second is not None:
|
| 120 |
+
to_combine.append(second)
|
| 121 |
+
|
| 122 |
+
for group in to_combine:
|
| 123 |
+
for point in group:
|
| 124 |
+
if 1 - self.cosine_similarity(point.vector, similarity_vectors) > min(settings.max_delta, 0.2):
|
| 125 |
+
combined.append(point)
|
| 126 |
+
similarity_vectors.append(point.vector)
|
| 127 |
+
return combined
|
| 128 |
+
|
| 129 |
+
def search(self, collection_name: str, query: str, top_k: int = 5) -> list[Chunk]:
|
| 130 |
+
query_embedded: np.ndarray = self.embedder.encode(query)
|
| 131 |
+
|
| 132 |
+
if isinstance(query_embedded, list):
|
| 133 |
+
query_embedded = query_embedded[0]
|
| 134 |
+
|
| 135 |
+
keywords = self.construct_keywords_list(query)
|
| 136 |
+
|
| 137 |
+
mixed_result: list[ScoredPoint] = self.client.query_points(
|
| 138 |
+
collection_name=collection_name, query=query_embedded, limit=top_k + int(top_k * 0.3),
|
| 139 |
+
query_filter=Filter(should=keywords), with_vectors=True
|
| 140 |
+
).points
|
| 141 |
+
|
| 142 |
+
print(f"Len of original array -> {len(mixed_result)}")
|
| 143 |
+
combined = self.combine_points_without_duplications(mixed_result)
|
| 144 |
+
print(f"Len of combined array -> {len(combined)}")
|
| 145 |
+
|
| 146 |
+
return [
|
| 147 |
+
Chunk(
|
| 148 |
+
id=UUID(point.payload.get("metadata", {}).get("id", "")),
|
| 149 |
+
filename=point.payload.get("metadata", {}).get("filename", ""),
|
| 150 |
+
page_number=point.payload.get("metadata", {}).get("page_number", 0),
|
| 151 |
+
start_index=point.payload.get("metadata", {}).get("start_index", 0),
|
| 152 |
+
start_line=point.payload.get("metadata", {}).get("start_line", 0),
|
| 153 |
+
end_line=point.payload.get("metadata", {}).get("end_line", 0),
|
| 154 |
+
text=point.payload.get("text", ""),
|
| 155 |
+
)
|
| 156 |
+
for point in combined
|
| 157 |
+
]
|
| 158 |
+
|
| 159 |
+
def _initialize_qdrant_client(self, max_retries=5, delay=2) -> QdrantClient:
|
| 160 |
+
for attempt in range(max_retries):
|
| 161 |
+
try:
|
| 162 |
+
client = QdrantClient(**settings.qdrant.model_dump())
|
| 163 |
+
client.get_collections()
|
| 164 |
+
return client
|
| 165 |
+
except Exception as e:
|
| 166 |
+
if attempt == max_retries - 1:
|
| 167 |
+
raise HTTPException(
|
| 168 |
+
500,
|
| 169 |
+
f"Failed to connect to Qdrant server after {max_retries} attempts. "
|
| 170 |
+
f"Last error: {str(e)}",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
print(
|
| 174 |
+
f"Connection attempt {attempt + 1} out of {max_retries} failed. "
|
| 175 |
+
f"Retrying in {delay} seconds..."
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
time.sleep(delay)
|
| 179 |
+
delay *= 2
|
| 180 |
+
|
| 181 |
+
def _check_collection_exists(self, collection_name: str) -> bool:
|
| 182 |
+
try:
|
| 183 |
+
return self.client.collection_exists(collection_name)
|
| 184 |
+
except Exception as e:
|
| 185 |
+
raise HTTPException(
|
| 186 |
+
500,
|
| 187 |
+
f"Failed to check collection {collection_name} exists. Last error: {str(e)}",
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
def _create_collection(self, collection_name: str) -> None:
|
| 191 |
+
try:
|
| 192 |
+
self.client.create_collection(
|
| 193 |
+
collection_name=collection_name,
|
| 194 |
+
vectors_config=VectorParams(
|
| 195 |
+
size=self.embedder.get_vector_dimensionality(),
|
| 196 |
+
distance=Distance.COSINE,
|
| 197 |
+
),
|
| 198 |
+
)
|
| 199 |
+
self.client.create_payload_index(
|
| 200 |
+
collection_name=collection_name,
|
| 201 |
+
field_name="text",
|
| 202 |
+
field_schema=TextIndexParams(
|
| 203 |
+
type="text",
|
| 204 |
+
tokenizer=TokenizerType.WORD,
|
| 205 |
+
min_token_len=2,
|
| 206 |
+
max_token_len=30,
|
| 207 |
+
lowercase=True
|
| 208 |
+
)
|
| 209 |
+
)
|
| 210 |
+
except Exception as e:
|
| 211 |
+
raise HTTPException(
|
| 212 |
+
500, f"Failed to create collection {self.collection_name}: {str(e)}"
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
def create_collection(self, collection_name: str) -> None:
|
| 216 |
+
try:
|
| 217 |
+
if self._check_collection_exists(collection_name):
|
| 218 |
+
return
|
| 219 |
+
self._create_collection(collection_name)
|
| 220 |
+
except Exception as e:
|
| 221 |
+
print(e)
|
| 222 |
+
raise HTTPException(500, e)
|
| 223 |
+
|
| 224 |
+
def __del__(self):
|
| 225 |
+
if hasattr(self, "client"):
|
| 226 |
+
self.client.close()
|
| 227 |
+
|
| 228 |
+
def get_collections(self) -> list[str]:
|
| 229 |
+
try:
|
| 230 |
+
return self.client.get_collections()
|
| 231 |
+
except Exception as e:
|
| 232 |
+
print(e)
|
| 233 |
+
raise HTTPException(500, "Failed to get collection names")
|
app/core/document_validator.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Checks if the given path is valid and file exists
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def path_is_valid(path: str) -> bool:
|
| 9 |
+
return os.path.exists(path)
|
app/core/main.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.settings import settings, BASE_DIR
|
| 2 |
+
import uvicorn
|
| 3 |
+
import os
|
| 4 |
+
from app.backend.models.db_service import automigrate
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def initialize_system() -> bool:
|
| 8 |
+
success = True
|
| 9 |
+
path = BASE_DIR
|
| 10 |
+
temp_storage_path = os.path.join(path, "app", "temp_storage")
|
| 11 |
+
static_path = os.path.join(path, "static")
|
| 12 |
+
pdfs_path = os.path.join(path, "app", "temp_storage", "pdfs")
|
| 13 |
+
database_path = os.path.join(path, "database")
|
| 14 |
+
chats_storage_path = os.path.join(path, "chats_storage")
|
| 15 |
+
|
| 16 |
+
print(f"Base path: {BASE_DIR}")
|
| 17 |
+
print(f"Parent path: {path}")
|
| 18 |
+
print(f"Temp storage path: {temp_storage_path}")
|
| 19 |
+
print(f"Static path: {static_path}")
|
| 20 |
+
print(f"PDFs path: {pdfs_path}")
|
| 21 |
+
print(f"Database path: {database_path}")
|
| 22 |
+
print(f"Database path: {chats_storage_path}")
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
os.makedirs(temp_storage_path, exist_ok=True)
|
| 26 |
+
print("Created temp_storage_path")
|
| 27 |
+
os.makedirs(static_path, exist_ok=True)
|
| 28 |
+
print("Created static_path")
|
| 29 |
+
os.makedirs(pdfs_path, exist_ok=True)
|
| 30 |
+
print("Created pdfs_path")
|
| 31 |
+
os.makedirs(database_path, exist_ok=True)
|
| 32 |
+
print("Created database_path")
|
| 33 |
+
os.makedirs(chats_storage_path, exist_ok=True)
|
| 34 |
+
print("Created chats_storage_path")
|
| 35 |
+
except Exception as e:
|
| 36 |
+
success = False
|
| 37 |
+
print(f"Error creating directories: {str(e)}")
|
| 38 |
+
|
| 39 |
+
return success
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def main():
|
| 43 |
+
# automigrate() # Note: it will drop all existing dbs and create a new ones
|
| 44 |
+
initialize_system()
|
| 45 |
+
uvicorn.run(**settings.api.model_dump())
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
# ATTENTION: run from base dir ---> python -m app.main
|
| 50 |
+
main()
|
app/core/models.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from sentence_transformers import (
|
| 5 |
+
SentenceTransformer,
|
| 6 |
+
CrossEncoder,
|
| 7 |
+
) # SentenceTransformer -> model for embeddings, CrossEncoder -> re-ranker
|
| 8 |
+
from ctransformers import AutoModelForCausalLM
|
| 9 |
+
from torch import Tensor
|
| 10 |
+
from google import genai
|
| 11 |
+
from google.genai import types
|
| 12 |
+
from app.core.chunks import Chunk
|
| 13 |
+
from app.settings import settings, BASE_DIR, GeminiEmbeddingSettings
|
| 14 |
+
|
| 15 |
+
load_dotenv()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Embedder:
|
| 19 |
+
def __init__(self, model: str = "BAAI/bge-m3"):
|
| 20 |
+
self.device: str = settings.device
|
| 21 |
+
self.model_name: str = model
|
| 22 |
+
self.model: SentenceTransformer = SentenceTransformer(model, device=self.device)
|
| 23 |
+
|
| 24 |
+
"""
|
| 25 |
+
Encodes string to dense vector
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def encode(self, text: str | list[str]) -> Tensor | list[Tensor]:
|
| 29 |
+
return self.model.encode(sentences=text, show_progress_bar=False, batch_size=32)
|
| 30 |
+
|
| 31 |
+
"""
|
| 32 |
+
Returns the dimensionality of dense vector
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def get_vector_dimensionality(self) -> int | None:
|
| 36 |
+
return self.model.get_sentence_embedding_dimension()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class Reranker:
|
| 40 |
+
def __init__(self, model: str = "cross-encoder/ms-marco-MiniLM-L6-v2"):
|
| 41 |
+
self.device: str = settings.device
|
| 42 |
+
self.model_name: str = model
|
| 43 |
+
self.model: CrossEncoder = CrossEncoder(model, device=self.device)
|
| 44 |
+
|
| 45 |
+
"""
|
| 46 |
+
Returns re-sorted (by relevance) vector with dicts, from which we need only the 'corpus_id'
|
| 47 |
+
since it is a position of chunk in original list
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
def rank(self, query: str, chunks: list[Chunk]) -> list[dict[str, int]]:
|
| 51 |
+
return self.model.rank(query, [chunk.get_raw_text() for chunk in chunks])
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# TODO: add models parameters to global config file
|
| 55 |
+
# TODO: add exception handling when response have more tokens than was set
|
| 56 |
+
# TODO: find a way to restrict the model for providing too long answers
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class LocalLLM:
|
| 60 |
+
def __init__(self):
|
| 61 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 62 |
+
**settings.local_llm.model_dump()
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
"""
|
| 66 |
+
Produces the response to user's prompt
|
| 67 |
+
|
| 68 |
+
stream -> flag, determines weather we need to wait until the response is ready or can show it token by token
|
| 69 |
+
|
| 70 |
+
TODO: invent a way to really stream the answer (as return value)
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
def get_response(
|
| 74 |
+
self,
|
| 75 |
+
prompt: str,
|
| 76 |
+
stream: bool = True,
|
| 77 |
+
logging: bool = True,
|
| 78 |
+
use_default_config: bool = True,
|
| 79 |
+
) -> str:
|
| 80 |
+
|
| 81 |
+
with open("../prompt.txt", "w") as f:
|
| 82 |
+
f.write(prompt)
|
| 83 |
+
|
| 84 |
+
generated_text = ""
|
| 85 |
+
tokenized_text: list[int] = self.model.tokenize(text=prompt)
|
| 86 |
+
response: list[int] = self.model.generate(
|
| 87 |
+
tokens=tokenized_text, **settings.local_llm.model_dump()
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
if logging:
|
| 91 |
+
print(response)
|
| 92 |
+
|
| 93 |
+
if not stream:
|
| 94 |
+
return self.model.detokenize(response)
|
| 95 |
+
|
| 96 |
+
for token in response:
|
| 97 |
+
chunk = self.model.detokenize([token])
|
| 98 |
+
generated_text += chunk
|
| 99 |
+
if logging:
|
| 100 |
+
print(chunk, end="", flush=True) # flush -> clear the buffer
|
| 101 |
+
|
| 102 |
+
return generated_text
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
class GeminiLLM:
|
| 106 |
+
def __init__(self, model="gemini-2.0-flash"):
|
| 107 |
+
self.client = genai.Client(api_key=settings.api_key)
|
| 108 |
+
self.model = model
|
| 109 |
+
|
| 110 |
+
def get_response(
|
| 111 |
+
self,
|
| 112 |
+
prompt: str,
|
| 113 |
+
stream: bool = True,
|
| 114 |
+
logging: bool = True,
|
| 115 |
+
use_default_config: bool = False,
|
| 116 |
+
) -> str:
|
| 117 |
+
path_to_prompt = os.path.join(BASE_DIR, "prompt.txt")
|
| 118 |
+
with open(path_to_prompt, "w", encoding="utf-8", errors="replace") as f:
|
| 119 |
+
f.write(prompt)
|
| 120 |
+
|
| 121 |
+
response = self.client.models.generate_content(
|
| 122 |
+
model=self.model,
|
| 123 |
+
contents=prompt,
|
| 124 |
+
config=(
|
| 125 |
+
types.GenerateContentConfig(**settings.gemini_generation.model_dump())
|
| 126 |
+
if use_default_config
|
| 127 |
+
else None
|
| 128 |
+
),
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
return response.text
|
| 132 |
+
|
| 133 |
+
async def get_streaming_response(
|
| 134 |
+
self,
|
| 135 |
+
prompt: str,
|
| 136 |
+
stream: bool = True,
|
| 137 |
+
logging: bool = True,
|
| 138 |
+
use_default_config: bool = False,
|
| 139 |
+
):
|
| 140 |
+
path_to_prompt = os.path.join(BASE_DIR, "prompt.txt")
|
| 141 |
+
with open(path_to_prompt, "w", encoding="utf-8", errors="replace") as f:
|
| 142 |
+
f.write(prompt)
|
| 143 |
+
|
| 144 |
+
response = self.client.models.generate_content_stream(
|
| 145 |
+
model=self.model,
|
| 146 |
+
contents=prompt,
|
| 147 |
+
config=(
|
| 148 |
+
types.GenerateContentConfig(**settings.gemini_generation.model_dump())
|
| 149 |
+
if use_default_config
|
| 150 |
+
else None
|
| 151 |
+
),
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
for chunk in response:
|
| 155 |
+
yield chunk
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class GeminiEmbed:
|
| 159 |
+
def __init__(self, model="text-embedding-004"):
|
| 160 |
+
self.client = genai.Client(api_key=settings.api_key)
|
| 161 |
+
self.model = model
|
| 162 |
+
self.settings = GeminiEmbeddingSettings()
|
| 163 |
+
self.max_workers = 5
|
| 164 |
+
|
| 165 |
+
def _embed_batch(self, batch: list[str], idx: int) -> dict:
|
| 166 |
+
response = self.client.models.embed_content(
|
| 167 |
+
model=self.model,
|
| 168 |
+
contents=batch,
|
| 169 |
+
config=types.EmbedContentConfig(
|
| 170 |
+
**settings.gemini_embedding.model_dump()
|
| 171 |
+
),
|
| 172 |
+
).embeddings
|
| 173 |
+
return {"idx": idx, "embeddings": response}
|
| 174 |
+
|
| 175 |
+
def encode(self, text: str | list[str]) -> list[Tensor]:
|
| 176 |
+
|
| 177 |
+
if isinstance(text, str):
|
| 178 |
+
text = [text]
|
| 179 |
+
|
| 180 |
+
groups: list[list[float]] = []
|
| 181 |
+
max_batch_size = 100 # can not be changed due to google restrictions
|
| 182 |
+
|
| 183 |
+
batches: list[list[str]] = [text[i : i + max_batch_size] for i in range(0, len(text), max_batch_size)]
|
| 184 |
+
print(*[len(batch) for batch in batches])
|
| 185 |
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
| 186 |
+
futures = [executor.submit(self._embed_batch, batch, idx) for idx, batch in enumerate(batches)]
|
| 187 |
+
for future in as_completed(futures):
|
| 188 |
+
groups.append(future.result())
|
| 189 |
+
|
| 190 |
+
groups.sort(key=lambda x: x["idx"])
|
| 191 |
+
|
| 192 |
+
result: list[float] = []
|
| 193 |
+
for group in groups:
|
| 194 |
+
for vec in group["embeddings"]:
|
| 195 |
+
result.append(vec.values)
|
| 196 |
+
return result
|
| 197 |
+
|
| 198 |
+
def get_vector_dimensionality(self) -> int | None:
|
| 199 |
+
return getattr(self.settings, "output_dimensionality")
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
class Wrapper:
|
| 203 |
+
def __init__(self, model: str = "gemini-2.0-flash"):
|
| 204 |
+
self.model = model
|
| 205 |
+
self.client = genai.Client(api_key=settings.api_key)
|
| 206 |
+
|
| 207 |
+
def wrap(self, prompt: str) -> str:
|
| 208 |
+
response = self.client.models.generate_content(
|
| 209 |
+
model=self.model,
|
| 210 |
+
contents=prompt,
|
| 211 |
+
config=types.GenerateContentConfig(**settings.gemini_wrapper.model_dump())
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
return response.text
|
app/core/processor.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import (
|
| 2 |
+
UnstructuredWordDocumentLoader,
|
| 3 |
+
TextLoader,
|
| 4 |
+
CSVLoader,
|
| 5 |
+
UnstructuredMarkdownLoader,
|
| 6 |
+
)
|
| 7 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 8 |
+
from langchain_core.documents import Document
|
| 9 |
+
from app.core.chunks import Chunk
|
| 10 |
+
import nltk # used for proper tokenizer workflow
|
| 11 |
+
from uuid import (
|
| 12 |
+
uuid4,
|
| 13 |
+
) # for generating unique id as hex (uuid4 is used as it generates ids form pseudo random numbers unlike uuid1 and others)
|
| 14 |
+
import numpy as np
|
| 15 |
+
from app.settings import logging, settings
|
| 16 |
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
| 17 |
+
import os
|
| 18 |
+
import fitz
|
| 19 |
+
|
| 20 |
+
class PDFLoader:
|
| 21 |
+
def __init__(self, file_path: str):
|
| 22 |
+
self.file_path = file_path
|
| 23 |
+
|
| 24 |
+
def load(self) -> list[Document]:
|
| 25 |
+
docs = []
|
| 26 |
+
with fitz.open(self.file_path) as doc:
|
| 27 |
+
for page in doc:
|
| 28 |
+
text = page.get_text("text")
|
| 29 |
+
metadata = {
|
| 30 |
+
"source": self.file_path,
|
| 31 |
+
"page": page.number,
|
| 32 |
+
}
|
| 33 |
+
docs.append(Document(page_content=text, metadata=metadata))
|
| 34 |
+
return docs
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class DocumentProcessor:
|
| 38 |
+
"""
|
| 39 |
+
TODO: determine the most suitable chunk size
|
| 40 |
+
|
| 41 |
+
chunks -> the list of chunks from loaded files
|
| 42 |
+
chunks_unsaved -> the list of recently added chunks that have not been saved to db yet
|
| 43 |
+
processed -> the list of files that were already splitted into chunks
|
| 44 |
+
unprocessed -> !processed
|
| 45 |
+
text_splitter -> text splitting strategy
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(self):
|
| 49 |
+
self.chunks_unsaved: list[Chunk] = []
|
| 50 |
+
self.unprocessed: list[Document] = []
|
| 51 |
+
self.max_workers = min(4, os.cpu_count() or 1)
|
| 52 |
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
| 53 |
+
**settings.text_splitter.model_dump()
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
"""
|
| 57 |
+
Measures cosine between two vectors
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
def cosine_similarity(self, vec1, vec2):
|
| 61 |
+
return vec1 @ vec2 / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
| 62 |
+
|
| 63 |
+
"""
|
| 64 |
+
Updates a list of the most relevant chunks without interacting with db
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
def update_most_relevant_chunk(
|
| 68 |
+
self,
|
| 69 |
+
chunk: list[np.float64, Chunk],
|
| 70 |
+
relevant_chunks: list[list[np.float64, Chunk]],
|
| 71 |
+
mx_len=15,
|
| 72 |
+
):
|
| 73 |
+
relevant_chunks.append(chunk)
|
| 74 |
+
for i in range(len(relevant_chunks) - 1, 0, -1):
|
| 75 |
+
if relevant_chunks[i][0] > relevant_chunks[i - 1][0]:
|
| 76 |
+
relevant_chunks[i], relevant_chunks[i - 1] = (
|
| 77 |
+
relevant_chunks[i - 1],
|
| 78 |
+
relevant_chunks[i],
|
| 79 |
+
)
|
| 80 |
+
else:
|
| 81 |
+
break
|
| 82 |
+
|
| 83 |
+
if len(relevant_chunks) > mx_len:
|
| 84 |
+
del relevant_chunks[-1]
|
| 85 |
+
|
| 86 |
+
"""
|
| 87 |
+
Loads one file - extracts text from file
|
| 88 |
+
|
| 89 |
+
TODO: Replace UnstructuredWordDocumentLoader with Docx2txtLoader
|
| 90 |
+
TODO: Play with .pdf and text from img extraction
|
| 91 |
+
TODO: Try chunking with llm
|
| 92 |
+
|
| 93 |
+
add_to_unprocessed -> used to add loaded file to the list of unprocessed(unchunked) files if true
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
def check_size(self, file_path: str = "") -> bool:
|
| 97 |
+
try:
|
| 98 |
+
size = os.path.getsize(filename=file_path)
|
| 99 |
+
except Exception:
|
| 100 |
+
size = 0
|
| 101 |
+
|
| 102 |
+
if size > 1000000:
|
| 103 |
+
return True
|
| 104 |
+
return False
|
| 105 |
+
|
| 106 |
+
def document_multiplexer(self, filepath: str, get_loader: bool = False, get_chunking_strategy: bool = False):
|
| 107 |
+
loader = None
|
| 108 |
+
parallelization = False
|
| 109 |
+
if filepath.endswith(".pdf"):
|
| 110 |
+
loader = PDFLoader(
|
| 111 |
+
file_path=filepath
|
| 112 |
+
) # splits each presentation into slides and processes it as separate file
|
| 113 |
+
parallelization = False
|
| 114 |
+
elif filepath.endswith(".docx") or filepath.endswith(".doc"):
|
| 115 |
+
loader = UnstructuredWordDocumentLoader(file_path=filepath)
|
| 116 |
+
elif filepath.endswith(".txt"):
|
| 117 |
+
loader = TextLoader(file_path=filepath)
|
| 118 |
+
elif filepath.endswith(".csv"):
|
| 119 |
+
loader = CSVLoader(file_path=filepath)
|
| 120 |
+
elif filepath.endswith(".json"):
|
| 121 |
+
loader = TextLoader(file_path=filepath)
|
| 122 |
+
elif filepath.endswith(".md"):
|
| 123 |
+
loader = UnstructuredMarkdownLoader(file_path=filepath)
|
| 124 |
+
|
| 125 |
+
if filepath.endswith(".pdf"):
|
| 126 |
+
parallelization = False
|
| 127 |
+
else:
|
| 128 |
+
parallelization = self.check_size(file_path=filepath)
|
| 129 |
+
|
| 130 |
+
if get_loader:
|
| 131 |
+
return loader
|
| 132 |
+
elif get_chunking_strategy:
|
| 133 |
+
return parallelization
|
| 134 |
+
else:
|
| 135 |
+
raise RuntimeError("What to do, my lord?")
|
| 136 |
+
|
| 137 |
+
def load_document(
|
| 138 |
+
self, filepath: str, add_to_unprocessed: bool = False
|
| 139 |
+
) -> list[Document]:
|
| 140 |
+
loader = self.document_multiplexer(filepath=filepath, get_loader=True)
|
| 141 |
+
|
| 142 |
+
if loader is None:
|
| 143 |
+
raise RuntimeError("Unsupported type of file")
|
| 144 |
+
|
| 145 |
+
documents: list[Document] = [] # We can not assign a single value to the document since .pdf are splitted into several files
|
| 146 |
+
try:
|
| 147 |
+
documents = loader.load()
|
| 148 |
+
# print("-" * 100, documents, "-" * 100, sep="\n")
|
| 149 |
+
except Exception:
|
| 150 |
+
raise RuntimeError("File is corrupted")
|
| 151 |
+
|
| 152 |
+
if add_to_unprocessed:
|
| 153 |
+
for doc in documents:
|
| 154 |
+
self.unprocessed.append(doc)
|
| 155 |
+
|
| 156 |
+
strategy = self.document_multiplexer(filepath=filepath, get_chunking_strategy=True)
|
| 157 |
+
print(f"Strategy --> {strategy}")
|
| 158 |
+
self.generate_chunks(parallelization=strategy)
|
| 159 |
+
return documents
|
| 160 |
+
|
| 161 |
+
"""
|
| 162 |
+
Similar to load_document, but for multiple files
|
| 163 |
+
|
| 164 |
+
add_to_unprocessed -> used to add loaded files to the list of unprocessed(unchunked) files if true
|
| 165 |
+
"""
|
| 166 |
+
|
| 167 |
+
def load_documents(
|
| 168 |
+
self, documents: list[str], add_to_unprocessed: bool = False
|
| 169 |
+
) -> list[Document]:
|
| 170 |
+
extracted_documents: list[Document] = []
|
| 171 |
+
|
| 172 |
+
for doc in documents:
|
| 173 |
+
temp_storage: list[Document] = []
|
| 174 |
+
|
| 175 |
+
try:
|
| 176 |
+
temp_storage = self.load_document(
|
| 177 |
+
filepath=doc, add_to_unprocessed=True
|
| 178 |
+
)
|
| 179 |
+
except Exception as e:
|
| 180 |
+
logging.error(
|
| 181 |
+
"Error at load_documents while loading %s", doc, exc_info=e
|
| 182 |
+
)
|
| 183 |
+
continue
|
| 184 |
+
|
| 185 |
+
for extrc_doc in temp_storage:
|
| 186 |
+
extracted_documents.append(extrc_doc)
|
| 187 |
+
|
| 188 |
+
if add_to_unprocessed:
|
| 189 |
+
self.unprocessed.append(extrc_doc)
|
| 190 |
+
|
| 191 |
+
return extracted_documents
|
| 192 |
+
|
| 193 |
+
def split_into_groups(self, original_list: list[any], split_by: int = 15) -> list[list[any]]:
|
| 194 |
+
output = []
|
| 195 |
+
for i in range(0, len(original_list), split_by):
|
| 196 |
+
new_group = original_list[i: i + split_by]
|
| 197 |
+
output.append(new_group)
|
| 198 |
+
return output
|
| 199 |
+
|
| 200 |
+
def _chunkinize(self, document: Document, text: list[str], lines: list[dict]) -> list[Chunk]:
|
| 201 |
+
output: list[Chunk] = []
|
| 202 |
+
for chunk in text:
|
| 203 |
+
start_l, end_l = self.get_start_end_lines(
|
| 204 |
+
splitted_text=lines,
|
| 205 |
+
start_char=chunk.metadata.get("start_index", 0),
|
| 206 |
+
end_char=chunk.metadata.get("start_index", 0)
|
| 207 |
+
+ len(chunk.page_content),
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
new_chunk = Chunk(
|
| 211 |
+
id=uuid4(),
|
| 212 |
+
filename=document.metadata.get("source", ""),
|
| 213 |
+
page_number=document.metadata.get("page", 0),
|
| 214 |
+
start_index=chunk.metadata.get("start_index", 0),
|
| 215 |
+
start_line=start_l,
|
| 216 |
+
end_line=end_l,
|
| 217 |
+
text=chunk.page_content,
|
| 218 |
+
)
|
| 219 |
+
# print(new_chunk)
|
| 220 |
+
output.append(new_chunk)
|
| 221 |
+
return output
|
| 222 |
+
|
| 223 |
+
def precompute_lines(self, splitted_document: list[str]) -> list[dict]:
|
| 224 |
+
current_start = 0
|
| 225 |
+
output: list[dict] = []
|
| 226 |
+
for i, line in enumerate(splitted_document):
|
| 227 |
+
output.append({"id": i + 1, "start": current_start, "end": current_start + len(line) + 1, "text": line})
|
| 228 |
+
current_start += len(line) + 1
|
| 229 |
+
return output
|
| 230 |
+
|
| 231 |
+
def generate_chunks(self, parallelization: bool = True):
|
| 232 |
+
intermediate = []
|
| 233 |
+
for document in self.unprocessed:
|
| 234 |
+
text: list[str] = self.text_splitter.split_documents(documents=[document])
|
| 235 |
+
lines: list[dict] = self.precompute_lines(splitted_document=document.page_content.splitlines())
|
| 236 |
+
groups = self.split_into_groups(original_list=text, split_by=50)
|
| 237 |
+
|
| 238 |
+
if parallelization:
|
| 239 |
+
print("<------- Apply Parallel Execution ------->")
|
| 240 |
+
with ProcessPoolExecutor(max_workers=self.max_workers) as executor:
|
| 241 |
+
futures = [executor.submit(self._chunkinize, document, group, lines) for group in groups]
|
| 242 |
+
for feature in as_completed(futures):
|
| 243 |
+
intermediate.append(feature.result())
|
| 244 |
+
else:
|
| 245 |
+
intermediate.append(self._chunkinize(document=document, text=text, lines=lines))
|
| 246 |
+
|
| 247 |
+
for group in intermediate:
|
| 248 |
+
for chunk in group:
|
| 249 |
+
self.chunks_unsaved.append(chunk)
|
| 250 |
+
|
| 251 |
+
self.unprocessed = []
|
| 252 |
+
|
| 253 |
+
def find_line(self, splitted_text: list[dict], char) -> int:
|
| 254 |
+
l, r = 0, len(splitted_text) - 1
|
| 255 |
+
|
| 256 |
+
while l <= r:
|
| 257 |
+
m = (l + r) // 2
|
| 258 |
+
line = splitted_text[m]
|
| 259 |
+
|
| 260 |
+
if line["start"] <= char < line["end"]:
|
| 261 |
+
return m + 1
|
| 262 |
+
elif char < line["start"]:
|
| 263 |
+
r = m - 1
|
| 264 |
+
else:
|
| 265 |
+
l = m + 1
|
| 266 |
+
|
| 267 |
+
return r
|
| 268 |
+
|
| 269 |
+
def get_start_end_lines(
|
| 270 |
+
self,
|
| 271 |
+
splitted_text: list[dict],
|
| 272 |
+
start_char: int,
|
| 273 |
+
end_char: int,
|
| 274 |
+
debug_mode: bool = False,
|
| 275 |
+
) -> tuple[int, int]:
|
| 276 |
+
start = self.find_line(splitted_text=splitted_text, char=start_char)
|
| 277 |
+
end = self.find_line(splitted_text=splitted_text, char=end_char)
|
| 278 |
+
return (start, end)
|
| 279 |
+
|
| 280 |
+
"""
|
| 281 |
+
Note: it should be used only once to download tokenizers, futher usage is not recommended
|
| 282 |
+
"""
|
| 283 |
+
|
| 284 |
+
def update_nltk(self) -> None:
|
| 285 |
+
nltk.download("punkt")
|
| 286 |
+
nltk.download("averaged_perceptron_tagger")
|
| 287 |
+
|
| 288 |
+
"""
|
| 289 |
+
For now the system works as follows: we save recently loaded chunks in two arrays:
|
| 290 |
+
chunks - for all chunks, even for that ones that havn't been saveed to db
|
| 291 |
+
chunks_unsaved - for chunks that have been added recently
|
| 292 |
+
I do not know weather we really need to store all chunks that were added in the
|
| 293 |
+
current session, but chunks_unsaved are used to avoid dublications while saving to db.
|
| 294 |
+
"""
|
| 295 |
+
|
| 296 |
+
def get_and_save_unsaved_chunks(self) -> list[Chunk]:
|
| 297 |
+
chunks_copy: list[Chunk] = self.chunks_unsaved.copy()
|
| 298 |
+
self.clear_unsaved_chunks()
|
| 299 |
+
return chunks_copy
|
| 300 |
+
|
| 301 |
+
def clear_unsaved_chunks(self):
|
| 302 |
+
self.chunks_unsaved = []
|
| 303 |
+
|
| 304 |
+
def get_all_chunks(self) -> list[Chunk]:
|
| 305 |
+
return self.chunks_unsaved
|
app/core/rag_generator.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, AsyncGenerator
|
| 2 |
+
from app.core.models import LocalLLM, Embedder, Reranker, GeminiLLM, GeminiEmbed, Wrapper
|
| 3 |
+
from app.core.processor import DocumentProcessor
|
| 4 |
+
from app.core.database import VectorDatabase
|
| 5 |
+
import time
|
| 6 |
+
import os
|
| 7 |
+
from app.settings import settings, BASE_DIR
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class RagSystem:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.embedder = (
|
| 13 |
+
GeminiEmbed()
|
| 14 |
+
if settings.use_gemini
|
| 15 |
+
else Embedder(model=settings.models.embedder_model)
|
| 16 |
+
)
|
| 17 |
+
self.reranker = Reranker(model=settings.models.reranker_model)
|
| 18 |
+
self.processor = DocumentProcessor()
|
| 19 |
+
self.db = VectorDatabase(embedder=self.embedder)
|
| 20 |
+
self.llm = GeminiLLM() if settings.use_gemini else LocalLLM()
|
| 21 |
+
self.wrapper = Wrapper()
|
| 22 |
+
|
| 23 |
+
"""
|
| 24 |
+
Provides a prompt with substituted context from chunks
|
| 25 |
+
|
| 26 |
+
TODO: add template to prompt without docs
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
def get_general_prompt(self, user_prompt: str, collection_name: str) -> str:
|
| 30 |
+
enhanced_prompt = self.enhance_prompt(user_prompt.strip())
|
| 31 |
+
|
| 32 |
+
relevant_chunks = self.db.search(collection_name, query=enhanced_prompt, top_k=30)
|
| 33 |
+
if relevant_chunks is not None and len(relevant_chunks) > 0:
|
| 34 |
+
ranks = self.reranker.rank(query=enhanced_prompt, chunks=relevant_chunks)
|
| 35 |
+
relevant_chunks = [relevant_chunks[rank["corpus_id"]] for rank in ranks]
|
| 36 |
+
else:
|
| 37 |
+
relevant_chunks = []
|
| 38 |
+
|
| 39 |
+
sources = ""
|
| 40 |
+
prompt = ""
|
| 41 |
+
|
| 42 |
+
for chunk in relevant_chunks[: min(10, len(relevant_chunks))]:
|
| 43 |
+
citation = (
|
| 44 |
+
f"[Source: {chunk.filename}, "
|
| 45 |
+
f"Page: {chunk.page_number}, "
|
| 46 |
+
f"Lines: {chunk.start_line}-{chunk.end_line}, "
|
| 47 |
+
f"Start: {chunk.start_index}]\n\n"
|
| 48 |
+
)
|
| 49 |
+
sources += f"Original text:\n{chunk.get_raw_text()}\nCitation:{citation}"
|
| 50 |
+
|
| 51 |
+
with open(
|
| 52 |
+
os.path.join(BASE_DIR, "app", "prompt_templates", "test2.txt")
|
| 53 |
+
) as prompt_file:
|
| 54 |
+
prompt = prompt_file.read()
|
| 55 |
+
|
| 56 |
+
prompt += (
|
| 57 |
+
"**QUESTION**: "
|
| 58 |
+
f"{enhanced_prompt}\n"
|
| 59 |
+
"**CONTEXT DOCUMENTS**:\n"
|
| 60 |
+
f"{sources}\n"
|
| 61 |
+
)
|
| 62 |
+
print(prompt)
|
| 63 |
+
return prompt
|
| 64 |
+
|
| 65 |
+
def enhance_prompt(self, original_prompt: str) -> str:
|
| 66 |
+
path_to_wrapping_prompt = os.path.join(BASE_DIR, "app", "prompt_templates", "wrapper.txt")
|
| 67 |
+
enhanced_prompt = ""
|
| 68 |
+
with open(path_to_wrapping_prompt, "r") as f:
|
| 69 |
+
enhanced_prompt = f.read().replace("[USERS_PROMPT]", original_prompt)
|
| 70 |
+
return self.wrapper.wrap(enhanced_prompt)
|
| 71 |
+
|
| 72 |
+
"""
|
| 73 |
+
Splits the list of documents into groups with 'split_by' docs (done to avoid qdrant_client connection error handling), loads them,
|
| 74 |
+
splits into chunks, and saves to db
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
def upload_documents(
|
| 78 |
+
self,
|
| 79 |
+
collection_name: str,
|
| 80 |
+
documents: list[str],
|
| 81 |
+
split_by: int = 3,
|
| 82 |
+
debug_mode: bool = True,
|
| 83 |
+
) -> None:
|
| 84 |
+
|
| 85 |
+
for i in range(0, len(documents), split_by):
|
| 86 |
+
|
| 87 |
+
if debug_mode:
|
| 88 |
+
print(
|
| 89 |
+
"<"
|
| 90 |
+
+ "-" * 10
|
| 91 |
+
+ "New document group is taken into processing"
|
| 92 |
+
+ "-" * 10
|
| 93 |
+
+ ">"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
docs = documents[i : i + split_by]
|
| 97 |
+
|
| 98 |
+
loading_time = 0
|
| 99 |
+
chunk_generating_time = 0
|
| 100 |
+
db_saving_time = 0
|
| 101 |
+
|
| 102 |
+
print("Start loading the documents")
|
| 103 |
+
start = time.time()
|
| 104 |
+
self.processor.load_documents(documents=docs, add_to_unprocessed=False)
|
| 105 |
+
loading_time = time.time() - start
|
| 106 |
+
|
| 107 |
+
print("Start loading chunk generation")
|
| 108 |
+
start = time.time()
|
| 109 |
+
# self.processor.generate_chunks()
|
| 110 |
+
chunk_generating_time = time.time() - start
|
| 111 |
+
|
| 112 |
+
print("Start saving to db")
|
| 113 |
+
start = time.time()
|
| 114 |
+
self.db.store(collection_name, self.processor.get_and_save_unsaved_chunks())
|
| 115 |
+
db_saving_time = time.time() - start
|
| 116 |
+
|
| 117 |
+
if debug_mode:
|
| 118 |
+
print(
|
| 119 |
+
f"loading time = {loading_time}, chunk generation time = {chunk_generating_time}, saving time = {db_saving_time}\n"
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
def extract_text(self, response) -> str:
|
| 123 |
+
text = ""
|
| 124 |
+
try:
|
| 125 |
+
text = response.candidates[0].content.parts[0].text
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(e)
|
| 128 |
+
return text
|
| 129 |
+
|
| 130 |
+
"""
|
| 131 |
+
Produces answer to user's request. First, finds the most relevant chunks, generates prompt with them, and asks llm
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
async def generate_response(
|
| 135 |
+
self, collection_name: str, user_prompt: str, stream: bool = True
|
| 136 |
+
) -> str:
|
| 137 |
+
general_prompt = self.get_general_prompt(
|
| 138 |
+
user_prompt=user_prompt, collection_name=collection_name
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
return self.llm.get_response(prompt=general_prompt)
|
| 142 |
+
|
| 143 |
+
async def generate_response_stream(
|
| 144 |
+
self, collection_name: str, user_prompt: str, stream: bool = True
|
| 145 |
+
) -> AsyncGenerator[Any, Any]:
|
| 146 |
+
general_prompt = self.get_general_prompt(
|
| 147 |
+
user_prompt=user_prompt, collection_name=collection_name
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
async for chunk in self.llm.get_streaming_response(
|
| 151 |
+
prompt=general_prompt, stream=True
|
| 152 |
+
):
|
| 153 |
+
yield self.extract_text(chunk)
|
| 154 |
+
|
| 155 |
+
"""
|
| 156 |
+
Produces the list of the most relevant chunks
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
def get_relevant_chunks(self, collection_name: str, query):
|
| 160 |
+
relevant_chunks = self.db.search(collection_name, query=query, top_k=15)
|
| 161 |
+
relevant_chunks = [
|
| 162 |
+
relevant_chunks[ranked["corpus_id"]]
|
| 163 |
+
for ranked in self.reranker.rank(query=query, chunks=relevant_chunks)
|
| 164 |
+
]
|
| 165 |
+
return relevant_chunks
|
| 166 |
+
|
| 167 |
+
def create_new_collection(self, collection_name: str) -> None:
|
| 168 |
+
self.db.create_collection(collection_name)
|
| 169 |
+
|
| 170 |
+
def get_collections_names(self) -> list[str]:
|
| 171 |
+
return self.db.get_collections()
|
app/core/response_parser.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.core.document_validator import path_is_valid
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Replaces the matched regular exp with link via html <a></a>
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def create_url(match: re.Match) -> str:
|
| 10 |
+
path: str = match.group(1)
|
| 11 |
+
page: str = match.group(2)
|
| 12 |
+
lines: str = match.group(3)
|
| 13 |
+
start: str = match.group(4)
|
| 14 |
+
|
| 15 |
+
if not path_is_valid(path):
|
| 16 |
+
return "###NOT VALID PATH###"
|
| 17 |
+
|
| 18 |
+
return f'<a href="/viewer?path={path}&page={page}&lines={lines}&start={start}">[Source]</a>'
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
"""
|
| 22 |
+
Replaces all occurrences of citation pattern with links
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def add_links(response: str) -> str:
|
| 27 |
+
|
| 28 |
+
citation_format = r"\[Source:\s*([^,]+?)\s*,\s*Page:\s*(\d+)\s*,\s*Lines:\s*(\d+\s*-\s*\d+)\s*,\s*Start:?\s*(\d+)\]"
|
| 29 |
+
return re.sub(pattern=citation_format, repl=create_url, string=response)
|
app/core/some.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Replaces the matched regular exp with link via html <a></a>
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def create_url(match: re.Match) -> str:
|
| 9 |
+
path: str = match.group(1)
|
| 10 |
+
page: str = match.group(2)
|
| 11 |
+
lines: str = match.group(3)
|
| 12 |
+
start: str = match.group(4)
|
| 13 |
+
|
| 14 |
+
return f'<a href="{path}">[Source]</a>'
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
"""
|
| 18 |
+
Replaces all occurrences of citation pattern with links
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def add_links(response: str) -> str:
|
| 23 |
+
|
| 24 |
+
citation_format = r"\[Source:\s*([^,]+?)\s*,\s*Page:\s*(\d+)\s*,\s*Lines:\s*(\d+\s*-\s*\d+)\s*,\s*Start:?\s*(\d+)\]"
|
| 25 |
+
return re.sub(pattern=citation_format, repl=create_url, string=response)
|
| 26 |
+
|
| 27 |
+
print(add_links(r"[Source: C:\Users\User\mine\code\The-Ultimate-RAG\chats_storage\user_id=8b1be678-f2c7-4a63-b110-7627af9b1cf8\chat_id=d889d8dd-f74c-4b33-a214-d6c69b68eb98\documents\pdfs\7e2b5257-5261-4100-ae65-488e06af2e25.pdf, Page: 18, Lines: 1-2, Start: 0]"))
|
app/core/utils.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi.templating import Jinja2Templates
|
| 2 |
+
from fastapi import Request, UploadFile
|
| 3 |
+
|
| 4 |
+
from app.backend.controllers.chats import list_user_chats, verify_ownership_rights
|
| 5 |
+
from app.backend.controllers.users import get_current_user
|
| 6 |
+
from app.backend.models.users import User
|
| 7 |
+
from app.backend.models.documents import add_new_document
|
| 8 |
+
from app.core.rag_generator import RagSystem
|
| 9 |
+
from app.settings import BASE_DIR
|
| 10 |
+
|
| 11 |
+
from uuid import uuid4
|
| 12 |
+
import markdown
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
rag = None
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# <----------------------- System ----------------------->
|
| 19 |
+
def initialize_rag() -> RagSystem:
|
| 20 |
+
global rag
|
| 21 |
+
if rag is None:
|
| 22 |
+
rag = RagSystem()
|
| 23 |
+
return rag
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# <----------------------- Tools ----------------------->
|
| 27 |
+
"""
|
| 28 |
+
Updates response context and adds context of navbar (role, instance(or none)) and footer (none)
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def extend_context(context: dict, selected: int = None):
|
| 33 |
+
user = get_current_user(context.get("request"))
|
| 34 |
+
navbar = {
|
| 35 |
+
"navbar": False,
|
| 36 |
+
"navbar_path": "components/navbar.html",
|
| 37 |
+
"navbar_context": {
|
| 38 |
+
"chats": [],
|
| 39 |
+
"user": {"role": "user" if user else "guest", "instance": user},
|
| 40 |
+
},
|
| 41 |
+
}
|
| 42 |
+
sidebar = {
|
| 43 |
+
"sidebar": True,
|
| 44 |
+
"sidebar_path": "components/sidebar.html",
|
| 45 |
+
"sidebar_context": {
|
| 46 |
+
"selected": selected if selected is not None else None,
|
| 47 |
+
"chat_groups": list_user_chats(user.id) if user else [],
|
| 48 |
+
},
|
| 49 |
+
}
|
| 50 |
+
footer = {"footer": False, "footer_context": None}
|
| 51 |
+
|
| 52 |
+
context.update(**navbar)
|
| 53 |
+
context.update(**footer)
|
| 54 |
+
context.update(**sidebar)
|
| 55 |
+
|
| 56 |
+
return context
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
"""
|
| 60 |
+
Validates chat viewing permission by comparing user's chats and requested one
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def protect_chat(user: User, chat_id: str) -> bool:
|
| 65 |
+
return verify_ownership_rights(user, chat_id)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
async def save_documents(
|
| 69 |
+
collection_name: str,
|
| 70 |
+
files: list[UploadFile],
|
| 71 |
+
RAG: RagSystem,
|
| 72 |
+
user: User,
|
| 73 |
+
chat_id: str,
|
| 74 |
+
message_id: str
|
| 75 |
+
) -> None:
|
| 76 |
+
storage = os.path.join(
|
| 77 |
+
BASE_DIR,
|
| 78 |
+
"chats_storage",
|
| 79 |
+
f"user_id={user.id}",
|
| 80 |
+
f"chat_id={chat_id}",
|
| 81 |
+
"documents",
|
| 82 |
+
)
|
| 83 |
+
docs = []
|
| 84 |
+
|
| 85 |
+
if files is None or len(files) == 0:
|
| 86 |
+
return
|
| 87 |
+
|
| 88 |
+
os.makedirs(os.path.join(storage, "pdfs"), exist_ok=True)
|
| 89 |
+
|
| 90 |
+
for file in files:
|
| 91 |
+
content = await file.read()
|
| 92 |
+
id = str(uuid4())
|
| 93 |
+
if file.filename.endswith(".pdf"):
|
| 94 |
+
saved_file = os.path.join(storage, "pdfs", id + ".pdf")
|
| 95 |
+
else:
|
| 96 |
+
saved_file = os.path.join(
|
| 97 |
+
storage, id + "." + file.filename.split(".")[-1]
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
try:
|
| 101 |
+
add_new_document(id=id, name=file.filename, path=saved_file, message_id=message_id, size=file.size)
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(e)
|
| 104 |
+
raise RuntimeError("Error while adding document")
|
| 105 |
+
|
| 106 |
+
with open(saved_file, "wb") as f:
|
| 107 |
+
f.write(content)
|
| 108 |
+
|
| 109 |
+
docs.append(saved_file)
|
| 110 |
+
|
| 111 |
+
if len(files) > 0:
|
| 112 |
+
RAG.upload_documents(collection_name, docs)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def get_pdf_path(path: str) -> str:
|
| 116 |
+
parts = path.split("chats_storage")
|
| 117 |
+
if len(parts) < 2:
|
| 118 |
+
return ""
|
| 119 |
+
return "chats_storage" + "".join(parts[1:])
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def construct_collection_name(user: User, chat_id: int) -> str:
|
| 123 |
+
return f"user_id_{user.id}_chat_id_{chat_id}"
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def create_collection(user: User, chat_id: int, RAG: RagSystem) -> None:
|
| 127 |
+
if RAG is None:
|
| 128 |
+
raise RuntimeError("RAG was not initialized")
|
| 129 |
+
|
| 130 |
+
RAG.create_new_collection(construct_collection_name(user, chat_id))
|
| 131 |
+
print(rag.get_collections_names())
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def lines_to_markdown(lines: list[str]) -> list[str]:
|
| 135 |
+
return [markdown.markdown(line) for line in lines]
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# <----------------------- Handlers ----------------------->
|
| 139 |
+
def PDFHandler(
|
| 140 |
+
request: Request, path: str, page: int, templates
|
| 141 |
+
) -> Jinja2Templates.TemplateResponse:
|
| 142 |
+
print(path)
|
| 143 |
+
url_path = get_pdf_path(path=path)
|
| 144 |
+
print(url_path)
|
| 145 |
+
|
| 146 |
+
current_template = "pages/show_pdf.html"
|
| 147 |
+
return templates.TemplateResponse(
|
| 148 |
+
current_template,
|
| 149 |
+
extend_context(
|
| 150 |
+
{
|
| 151 |
+
"request": request,
|
| 152 |
+
"page": str(page or 1),
|
| 153 |
+
"url_path": url_path,
|
| 154 |
+
"user": get_current_user(request),
|
| 155 |
+
}
|
| 156 |
+
),
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def TextHandler(
|
| 161 |
+
request: Request, path: str, lines: str, templates
|
| 162 |
+
) -> Jinja2Templates.TemplateResponse:
|
| 163 |
+
file_content = ""
|
| 164 |
+
with open(path, "r") as f:
|
| 165 |
+
file_content = f.read()
|
| 166 |
+
|
| 167 |
+
start_line, end_line = map(int, lines.split("-"))
|
| 168 |
+
|
| 169 |
+
text_before_citation = []
|
| 170 |
+
text_after_citation = []
|
| 171 |
+
citation = []
|
| 172 |
+
anchor_added = False
|
| 173 |
+
|
| 174 |
+
for index, line in enumerate(file_content.split("\n")):
|
| 175 |
+
if line == "" or line == "\n":
|
| 176 |
+
continue
|
| 177 |
+
if index + 1 < start_line:
|
| 178 |
+
text_before_citation.append(line)
|
| 179 |
+
elif end_line < index + 1:
|
| 180 |
+
text_after_citation.append(line)
|
| 181 |
+
else:
|
| 182 |
+
anchor_added = True
|
| 183 |
+
citation.append(line)
|
| 184 |
+
|
| 185 |
+
current_template = "pages/show_text.html"
|
| 186 |
+
|
| 187 |
+
return templates.TemplateResponse(
|
| 188 |
+
current_template,
|
| 189 |
+
extend_context(
|
| 190 |
+
{
|
| 191 |
+
"request": request,
|
| 192 |
+
"text_before_citation": lines_to_markdown(text_before_citation),
|
| 193 |
+
"text_after_citation": lines_to_markdown(text_after_citation),
|
| 194 |
+
"citation": lines_to_markdown(citation),
|
| 195 |
+
"anchor_added": anchor_added,
|
| 196 |
+
"user": get_current_user(request),
|
| 197 |
+
}
|
| 198 |
+
),
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
"""
|
| 203 |
+
Optional handler
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def DocHandler():
|
| 208 |
+
pass
|
app/frontend/static/styles.css
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pdf-container {
|
| 2 |
+
margin: 0 auto;
|
| 3 |
+
max-width: 100%;
|
| 4 |
+
overflow-x: auto;
|
| 5 |
+
text-align: center;
|
| 6 |
+
padding: 20px 0;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
#pdf-canvas {
|
| 10 |
+
margin: 0 auto;
|
| 11 |
+
display: block;
|
| 12 |
+
max-width: 100%;
|
| 13 |
+
box-shadow: 0 0 5px rgba(0,0,0,0.2);
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
#pageNum {
|
| 17 |
+
height: 40px; /* optional */
|
| 18 |
+
font-size: 16px; /* makes text inside input larger */
|
| 19 |
+
padding: 10px;
|
| 20 |
+
width: 9vh; /* optional for more padding inside the box */
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
.page-input {
|
| 24 |
+
width: 60px;
|
| 25 |
+
padding: 8px;
|
| 26 |
+
padding-right: 40px; /* reserve space for label inside input box */
|
| 27 |
+
text-align: center;
|
| 28 |
+
border: 1px solid #ddd;
|
| 29 |
+
border-radius: 4px;
|
| 30 |
+
-moz-appearance: textfield;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.page-input-label {
|
| 34 |
+
position: absolute;
|
| 35 |
+
right: 12px;
|
| 36 |
+
top: 50%;
|
| 37 |
+
transform: translateY(-50%);
|
| 38 |
+
font-size: 12px;
|
| 39 |
+
color: #666;
|
| 40 |
+
pointer-events: none;
|
| 41 |
+
background-color: #fff; /* Match background to prevent text overlapping */
|
| 42 |
+
padding-left: 4px;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.page-input-container {
|
| 46 |
+
position: relative;
|
| 47 |
+
display: inline-flex;
|
| 48 |
+
align-items: center;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/* Hide number arrows in Chrome/Safari */
|
| 52 |
+
.page-input::-webkit-outer-spin-button,
|
| 53 |
+
.page-input::-webkit-inner-spin-button {
|
| 54 |
+
-webkit-appearance: none;
|
| 55 |
+
margin: 0;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
/* Pagination styling */
|
| 59 |
+
.pagination-container {
|
| 60 |
+
margin: 20px 0;
|
| 61 |
+
text-align: center;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.pagination {
|
| 65 |
+
display: inline-flex;
|
| 66 |
+
align-items: center;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
.pagination-button {
|
| 70 |
+
padding: 8px 16px;
|
| 71 |
+
background: #4a6fa5;
|
| 72 |
+
color: white;
|
| 73 |
+
border: none;
|
| 74 |
+
border-radius: 4px;
|
| 75 |
+
cursor: pointer;
|
| 76 |
+
display: flex;
|
| 77 |
+
align-items: center;
|
| 78 |
+
gap: 5px;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
.pagination-button-text:hover {
|
| 82 |
+
background-color: #e0e0e0;
|
| 83 |
+
transform: translateY(-1px);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.pagination-button-text:active {
|
| 87 |
+
transform: translateY(0);
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
.text-viewer {
|
| 91 |
+
overflow-y: auto; /* Enables vertical scrolling when needed */
|
| 92 |
+
height: 100%;
|
| 93 |
+
width: 100%; /* Or whatever height you prefer */
|
| 94 |
+
font-family: monospace;
|
| 95 |
+
white-space: pre-wrap; /* Preserve line breaks but wrap text */
|
| 96 |
+
background: #f8f8f8;
|
| 97 |
+
padding: 20px;
|
| 98 |
+
border-radius: 5px;
|
| 99 |
+
line-height: 1.5;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.citation {
|
| 103 |
+
background-color: rgba(0, 255, 0, 0.2);
|
| 104 |
+
padding: 2px 0;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
.no-content {
|
| 108 |
+
color: #999;
|
| 109 |
+
font-style: italic;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.pagination-container-text {
|
| 113 |
+
margin: 20px 0;
|
| 114 |
+
text-align: center;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.pagination-button-text {
|
| 118 |
+
padding: 8px 16px;
|
| 119 |
+
background: #4a6fa5;
|
| 120 |
+
color: white;
|
| 121 |
+
border: none;
|
| 122 |
+
border-radius: 4px;
|
| 123 |
+
cursor: pointer;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
/* -------------------------------------------- */
|
| 129 |
+
|
| 130 |
+
body {
|
| 131 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
| 132 |
+
background-color: #f7f7f8;
|
| 133 |
+
color: #111827;
|
| 134 |
+
margin: 0;
|
| 135 |
+
overflow: hidden;
|
| 136 |
+
height: 100vh;
|
| 137 |
+
padding: 0;
|
| 138 |
+
display: flex;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.sidebar {
|
| 142 |
+
width: 260px;
|
| 143 |
+
height: 100vh;
|
| 144 |
+
background-color: #1F2937;
|
| 145 |
+
/* border-right: 1px solid #e1e4e8; */
|
| 146 |
+
overflow-y: auto;
|
| 147 |
+
padding: 8px;
|
| 148 |
+
position: sticky;
|
| 149 |
+
top: 0;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
.chat-page {
|
| 153 |
+
background-color: #111827;
|
| 154 |
+
flex: 1;
|
| 155 |
+
display: flex;
|
| 156 |
+
flex-direction: column;
|
| 157 |
+
height: 100vh;
|
| 158 |
+
overflow: hidden; /* Prevent double scrollbars */
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.container {
|
| 162 |
+
flex: 1;
|
| 163 |
+
display: flex;
|
| 164 |
+
flex-direction: column;
|
| 165 |
+
padding: 0;
|
| 166 |
+
max-width: 100%;
|
| 167 |
+
height: 100%;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
/* Chat messages section */
|
| 171 |
+
.chat-messages {
|
| 172 |
+
flex: 1;
|
| 173 |
+
overflow-y: auto; /* Make only this section scrollable */
|
| 174 |
+
padding: 16px;
|
| 175 |
+
display: flex;
|
| 176 |
+
flex-direction: column;
|
| 177 |
+
gap: 16px;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
/* Input area - stays fixed at bottom */
|
| 181 |
+
.input-group {
|
| 182 |
+
/* padding: 16px;
|
| 183 |
+
background-color: #44444C; */
|
| 184 |
+
/* border-top: 1px solid #e1e4e8; */
|
| 185 |
+
position: sticky;
|
| 186 |
+
bottom: 0;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
/* General styles */
|
| 190 |
+
|
| 191 |
+
/* Sidebar styles */
|
| 192 |
+
|
| 193 |
+
.chat-group {
|
| 194 |
+
font-weight: 500;
|
| 195 |
+
color: #9bb8d3;
|
| 196 |
+
text-transform: uppercase;
|
| 197 |
+
letter-spacing: 0.5px;
|
| 198 |
+
font-size: 12px;
|
| 199 |
+
padding: 8px 12px;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.btn {
|
| 203 |
+
border-radius: 10px;
|
| 204 |
+
padding: 8px 12px;
|
| 205 |
+
font-size: 14px;
|
| 206 |
+
transition: all 0.2s;
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
.btn-success {
|
| 210 |
+
background-color: #19c37d;
|
| 211 |
+
border-color: #19c37d;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
.btn-success:hover {
|
| 215 |
+
background-color: #16a369;
|
| 216 |
+
border-color: #16a369;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
.btn-outline-secondary {
|
| 220 |
+
/* border-color: #e1e4e8; */
|
| 221 |
+
color: #374151;
|
| 222 |
+
background-color: transparent;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.btn-outline-secondary:hover {
|
| 226 |
+
background-color: #273c50;
|
| 227 |
+
border-color: #e1e4e8;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.btn-outline-light {
|
| 231 |
+
border-color: #e1e4e8;
|
| 232 |
+
color: #666;
|
| 233 |
+
background-color: transparent;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
.btn-outline-light:hover {
|
| 237 |
+
background-color: #e9ecef;
|
| 238 |
+
border-color: #e1e4e8;
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
/* Chat page styles */
|
| 242 |
+
|
| 243 |
+
.message {
|
| 244 |
+
max-width: 80%;
|
| 245 |
+
padding: 12px 16px;
|
| 246 |
+
border-radius: 12px;
|
| 247 |
+
line-height: 1.5;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
.user-message {
|
| 251 |
+
align-self: flex-end;
|
| 252 |
+
background-color: #19c37d;
|
| 253 |
+
color: white;
|
| 254 |
+
border-bottom-right-radius: 4px;
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
.assistant-message {
|
| 258 |
+
align-self: flex-start;
|
| 259 |
+
background-color: #f0f4f8;
|
| 260 |
+
border-bottom-left-radius: 4px;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.message-header {
|
| 264 |
+
font-weight: 600;
|
| 265 |
+
font-size: 12px;
|
| 266 |
+
margin-bottom: 4px;
|
| 267 |
+
color: #666;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
.user-message .message-header {
|
| 271 |
+
color: rgba(255, 255, 255, 0.8);
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
.message-content {
|
| 275 |
+
font-size: 14px;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
.form-control {
|
| 280 |
+
border-radius: 6px;
|
| 281 |
+
padding: 10px 12px;
|
| 282 |
+
background-color: #374151;
|
| 283 |
+
/* border: 1px solid #e1e4e8; */
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
.form-control:focus {
|
| 287 |
+
box-shadow: none;
|
| 288 |
+
border-color: #19c37d;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
/* File input button */
|
| 292 |
+
.btn-outline-secondary {
|
| 293 |
+
position: relative;
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
.btn-outline-secondary input[type="file"] {
|
| 297 |
+
position: absolute;
|
| 298 |
+
opacity: 0;
|
| 299 |
+
width: 100%;
|
| 300 |
+
height: 100%;
|
| 301 |
+
top: 0;
|
| 302 |
+
left: 0;
|
| 303 |
+
cursor: pointer;
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
/* Scrollbar styles */
|
| 307 |
+
::-webkit-scrollbar {
|
| 308 |
+
width: 8px;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
::-webkit-scrollbar-track {
|
| 312 |
+
background: #f1f1f1;
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
::-webkit-scrollbar-thumb {
|
| 316 |
+
background: #ccc;
|
| 317 |
+
border-radius: 4px;
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
::-webkit-scrollbar-thumb:hover {
|
| 321 |
+
background: #aaa;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
/* Responsive adjustments */
|
| 325 |
+
@media (max-width: 768px) {
|
| 326 |
+
.sidebar {
|
| 327 |
+
width: 220px;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
.message {
|
| 331 |
+
max-width: 90%;
|
| 332 |
+
}
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
#queryInput {
|
| 336 |
+
background-color: #374151;
|
| 337 |
+
color: white;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
#queryInput:focus {
|
| 341 |
+
background-color: #374151;
|
| 342 |
+
color: white;
|
| 343 |
+
outline: none;
|
| 344 |
+
box-shadow: none;
|
| 345 |
+
border-color: #19c37d; /* optional green border for focus, remove if unwanted */
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
#searchButton {
|
| 349 |
+
background-color: #374151;
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
#fileInput {
|
| 353 |
+
background-color: #374151;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
/* For the placeholder text color */
|
| 358 |
+
#queryInput::placeholder {
|
| 359 |
+
color: rgba(255, 255, 255, 0.7); /* Slightly transparent white */
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.auth-card {
|
| 363 |
+
background-color: #1F2937;
|
| 364 |
+
border: none;
|
| 365 |
+
border-radius: 12px;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
.auth-input {
|
| 369 |
+
background-color: #374151 !important;
|
| 370 |
+
border: none !important;
|
| 371 |
+
color: white !important;
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
.auth-input-group-text {
|
| 375 |
+
background-color: #374151 !important;
|
| 376 |
+
border: none !important;
|
| 377 |
+
}
|
app/frontend/templates/base.html
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
{% block title %}
|
| 7 |
+
{% endblock %}
|
| 8 |
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 9 |
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.5/font/bootstrap-icons.css" rel="stylesheet">
|
| 10 |
+
<link href="/static/styles.css" rel="stylesheet">
|
| 11 |
+
{% block head_scripts %}
|
| 12 |
+
{% endblock %}
|
| 13 |
+
</head>
|
| 14 |
+
<body>
|
| 15 |
+
{% if navbar %}
|
| 16 |
+
{% with context=navbar_context %}
|
| 17 |
+
{% include navbar_path %}
|
| 18 |
+
{% endwith %}
|
| 19 |
+
{% endif %}
|
| 20 |
+
|
| 21 |
+
{% if sidebar %}
|
| 22 |
+
{% with context=sidebar_context %}
|
| 23 |
+
{% include sidebar_path %}
|
| 24 |
+
{% endwith %}
|
| 25 |
+
{% endif %}
|
| 26 |
+
|
| 27 |
+
{% block content %}
|
| 28 |
+
{% with context=sidebar_context %}
|
| 29 |
+
{% include sidebar_path %}
|
| 30 |
+
{% endwith %}
|
| 31 |
+
{% endblock %}
|
| 32 |
+
|
| 33 |
+
{% if footer %}
|
| 34 |
+
{% with context=footer_context %}
|
| 35 |
+
{% include footer_path %}
|
| 36 |
+
{% endwith %}
|
| 37 |
+
{% endif %}
|
| 38 |
+
|
| 39 |
+
{% block body_scripts %}
|
| 40 |
+
{% endblock %}
|
| 41 |
+
</body>
|
| 42 |
+
</html>
|
app/frontend/templates/components/navbar.html
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- All the data is accessible via context -->
|
| 2 |
+
<div>
|
| 3 |
+
{% if context.user.role == "guest" %}
|
| 4 |
+
<p>Hello, guest!</p>
|
| 5 |
+
{% else %}
|
| 6 |
+
<p>Hello, {{ context.user.instance.email }}</p>
|
| 7 |
+
{% endif %}
|
| 8 |
+
|
| 9 |
+
<p>Today</p>
|
| 10 |
+
<ul>
|
| 11 |
+
{% for chat in context.chats.today %}
|
| 12 |
+
<li>{{ chat.title }}</li>
|
| 13 |
+
{% endfor %}
|
| 14 |
+
</ul>
|
| 15 |
+
<p>Last week</p>
|
| 16 |
+
<ul>
|
| 17 |
+
{% for chat in context.chats.last_week %}
|
| 18 |
+
<li>{{ chat.title }}</li>
|
| 19 |
+
{% endfor %}
|
| 20 |
+
</ul>
|
| 21 |
+
<p>Last month</p>
|
| 22 |
+
<ul>
|
| 23 |
+
{% for chat in context.chats.last_month %}
|
| 24 |
+
<li>{{ chat.title }}</li>
|
| 25 |
+
{% endfor %}
|
| 26 |
+
</ul>
|
| 27 |
+
<p>Later</p>
|
| 28 |
+
<ul>
|
| 29 |
+
{% for chat in context.chats.other %}
|
| 30 |
+
<li>{{ chat.title }}</li>
|
| 31 |
+
{% endfor %}
|
| 32 |
+
</ul>
|
| 33 |
+
</div>
|
app/frontend/templates/components/sidebar.html
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="sidebar">
|
| 2 |
+
<div class="d-flex justify-content-between align-items-center p-3">
|
| 3 |
+
<form action="/new_chat" method="post">
|
| 4 |
+
<button type="submit" class="btn btn-success w-100">+ Add new chat</button>
|
| 5 |
+
</form>
|
| 6 |
+
</div>
|
| 7 |
+
|
| 8 |
+
{% if context.chat_groups %}
|
| 9 |
+
{% for group in context.chat_groups %}
|
| 10 |
+
<div class="chat-group px-3 text mt-3">{{ group.title }}</div>
|
| 11 |
+
{% for chat in group.chats %}
|
| 12 |
+
<form action="/chats/id={{ chat.id }}" method="get" class="px-3 my-1">
|
| 13 |
+
{% if context.selected == chat.id %}
|
| 14 |
+
<button type="submit" class="btn btn-outline-secondary w-100 text-start text-truncate text-success">
|
| 15 |
+
{{ chat.title }}
|
| 16 |
+
</button>
|
| 17 |
+
{% else %}
|
| 18 |
+
<button type="submit" class="btn btn-outline-secondary w-100 text-start text-truncate text-white">
|
| 19 |
+
{{ chat.title }}
|
| 20 |
+
</button>
|
| 21 |
+
{% endif %}
|
| 22 |
+
</form>
|
| 23 |
+
{% endfor %}
|
| 24 |
+
{% endfor %}
|
| 25 |
+
{% endif %}
|
| 26 |
+
</div>
|
app/frontend/templates/pages/chat.html
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}
|
| 4 |
+
<title>
|
| 5 |
+
The Ultimate RAG
|
| 6 |
+
</title>
|
| 7 |
+
{% endblock %}
|
| 8 |
+
|
| 9 |
+
{% block content %}
|
| 10 |
+
<div class="chat-page">
|
| 11 |
+
<div class="container py-4">
|
| 12 |
+
<div id="chat-messages" class="chat-messages">
|
| 13 |
+
<!-- {% for message in history %}
|
| 14 |
+
<div class="message {{ message.role }}-message">
|
| 15 |
+
<div class="message-header">
|
| 16 |
+
{{ "You" if message.role == "user" else "Assistant" }}
|
| 17 |
+
</div>
|
| 18 |
+
<div class="message-content">{{ message.content | safe }}</div>
|
| 19 |
+
</div>
|
| 20 |
+
{% endfor %} -->
|
| 21 |
+
</div>
|
| 22 |
+
|
| 23 |
+
<form id="chat-form" class="input-group mt-4" enctype="multipart/form-data">
|
| 24 |
+
<input type="text" class="form-control" name="prompt" placeholder="Ask your question here" id="queryInput">
|
| 25 |
+
<label class="btn btn-outline-secondary btn-primary">
|
| 26 |
+
📎<input type="file" id="fileInput" name="files" multiple hidden>
|
| 27 |
+
</label>
|
| 28 |
+
<button type="button" class="btn text-white" id="searchButton">Send</button>
|
| 29 |
+
</form>
|
| 30 |
+
</div>
|
| 31 |
+
</div>
|
| 32 |
+
{% endblock %}
|
| 33 |
+
|
| 34 |
+
{% block body_scripts %}
|
| 35 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 36 |
+
<script>
|
| 37 |
+
const initialChatId = "{{ chat_id }}";
|
| 38 |
+
const initialHistory = {{ history | tojson | safe }};
|
| 39 |
+
// Conversation state
|
| 40 |
+
let conversationId = initialChatId || null;
|
| 41 |
+
|
| 42 |
+
if (initialHistory && Array.isArray(initialHistory)) {
|
| 43 |
+
initialHistory.forEach(msg => {
|
| 44 |
+
addMessageToChat(msg.role, msg.content);
|
| 45 |
+
});
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
// Main chat function
|
| 49 |
+
document.getElementById('searchButton').addEventListener('click', async function() {
|
| 50 |
+
const query = document.getElementById('queryInput').value.trim();
|
| 51 |
+
if (!query) return alert('Please enter a question');
|
| 52 |
+
|
| 53 |
+
addMessageToChat('user', escapeHTML(query));
|
| 54 |
+
document.getElementById('queryInput').value = '';
|
| 55 |
+
const loadingId = addMessageToChat('assistant', '', true);
|
| 56 |
+
|
| 57 |
+
try {
|
| 58 |
+
const formData = new FormData();
|
| 59 |
+
const fileInput = document.getElementById('fileInput');
|
| 60 |
+
const files = fileInput.files;
|
| 61 |
+
for (let i = 0; i < files.length; i++) {
|
| 62 |
+
formData.append('files', files[i]);
|
| 63 |
+
}
|
| 64 |
+
formData.append('prompt', query);
|
| 65 |
+
if (conversationId) formData.append('chat_id', conversationId);
|
| 66 |
+
|
| 67 |
+
const response = await fetch('/message_with_docs', {
|
| 68 |
+
method: 'POST',
|
| 69 |
+
body: formData
|
| 70 |
+
});
|
| 71 |
+
|
| 72 |
+
if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
|
| 73 |
+
|
| 74 |
+
const reader = response.body.getReader();
|
| 75 |
+
const decoder = new TextDecoder("utf-8");
|
| 76 |
+
let fullMessage = "";
|
| 77 |
+
|
| 78 |
+
while (true) {
|
| 79 |
+
const { value, done } = await reader.read();
|
| 80 |
+
if (done) break;
|
| 81 |
+
|
| 82 |
+
const chunk = decoder.decode(value, { stream: true });
|
| 83 |
+
fullMessage += chunk;
|
| 84 |
+
updateMessageContent(loadingId, marked.parse(fullMessage));
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
removeMessage(loadingId);
|
| 88 |
+
const finalId = addMessageToChat('assistant', marked.parse(fullMessage));
|
| 89 |
+
|
| 90 |
+
try {
|
| 91 |
+
const response = await fetch('/replace_message', {
|
| 92 |
+
method: 'POST',
|
| 93 |
+
headers: { "Content-Type": "application/json" },
|
| 94 |
+
body: JSON.stringify({ message: fullMessage, chat_id: initialChatId })
|
| 95 |
+
});
|
| 96 |
+
|
| 97 |
+
if (!response.ok) throw new Error(`Replace error: ${response.status}`);
|
| 98 |
+
|
| 99 |
+
const data = await response.json(); // expects { "updated_message": "..." }
|
| 100 |
+
|
| 101 |
+
updateMessageContent(finalId, marked.parse(data.updated_message));
|
| 102 |
+
|
| 103 |
+
} catch (error) {
|
| 104 |
+
console.error("Error replacing message:", error);
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
} catch (error) {
|
| 108 |
+
removeMessage(loadingId);
|
| 109 |
+
addMessageToChat('assistant', `Error: ${error.message}`, false, 'error');
|
| 110 |
+
console.error('Error:', error);
|
| 111 |
+
}
|
| 112 |
+
});
|
| 113 |
+
|
| 114 |
+
function updateMessageContent(messageId, newContent) {
|
| 115 |
+
const element = document.getElementById(messageId);
|
| 116 |
+
if (element) {
|
| 117 |
+
const contentDiv = element.querySelector('.message-content');
|
| 118 |
+
if (contentDiv) contentDiv.innerHTML = newContent;
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
// Message display helper
|
| 124 |
+
function addMessageToChat(role, content, isTemporary = false, className = '') {
|
| 125 |
+
const chatMessages = document.getElementById('chat-messages');
|
| 126 |
+
const messageId = 'msg-' + Date.now();
|
| 127 |
+
|
| 128 |
+
const messageDiv = document.createElement('div');
|
| 129 |
+
messageDiv.className = `message ${role}-message ${className}`;
|
| 130 |
+
messageDiv.id = messageId;
|
| 131 |
+
|
| 132 |
+
messageDiv.innerHTML = `
|
| 133 |
+
<div class="message-header">${role === 'user' ? 'You' : 'Assistant'}</div>
|
| 134 |
+
<div class="message-content">${marked.parse(content)}</div>
|
| 135 |
+
`;
|
| 136 |
+
|
| 137 |
+
chatMessages.appendChild(messageDiv);
|
| 138 |
+
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 139 |
+
|
| 140 |
+
return messageId; // always return the ID so you can update it later
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
function removeMessage(messageId) {
|
| 145 |
+
const element = document.getElementById(messageId);
|
| 146 |
+
if (element) element.remove();
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
function escapeHTML(str) {
|
| 150 |
+
const div = document.createElement('div');
|
| 151 |
+
div.textContent = str;
|
| 152 |
+
return div.innerHTML;
|
| 153 |
+
}
|
| 154 |
+
// New chat handler
|
| 155 |
+
document.querySelector('form[action="/new_chat"]').addEventListener('submit', function(e) {
|
| 156 |
+
e.preventDefault();
|
| 157 |
+
conversationId = null;
|
| 158 |
+
conversationHistory = [];
|
| 159 |
+
document.getElementById('chat-messages').innerHTML = '';
|
| 160 |
+
this.submit();
|
| 161 |
+
});
|
| 162 |
+
</script>
|
| 163 |
+
{% endblock %}
|
app/frontend/templates/pages/login.html
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}
|
| 4 |
+
<title>Login</title>
|
| 5 |
+
{% endblock %}
|
| 6 |
+
|
| 7 |
+
{% block content %}
|
| 8 |
+
<div class="d-flex justify-content-center align-items-center vh-100" style="background-color: #111827; width: 100%;">
|
| 9 |
+
<div class="card p-4" style="min-width: 360px; background-color: #1F2937; border: none; border-radius: 12px;">
|
| 10 |
+
<div class="text-center mb-4">
|
| 11 |
+
<div class="rounded-circle d-inline-flex align-items-center justify-content-center"
|
| 12 |
+
style="width: 60px; height: 60px; background-color: #19c37d;">
|
| 13 |
+
<i class="bi bi-person-fill text-white" style="font-size: 1.5rem;"></i>
|
| 14 |
+
</div>
|
| 15 |
+
<h3 class="mt-3 text-white">Log In</h3>
|
| 16 |
+
</div>
|
| 17 |
+
|
| 18 |
+
<form id="loginForm" method="POST">
|
| 19 |
+
<div class="mb-3">
|
| 20 |
+
<label class="form-label text-white">Email</label>
|
| 21 |
+
<div class="input-group">
|
| 22 |
+
<input type="text" id="email" name="email"
|
| 23 |
+
class="form-control"
|
| 24 |
+
placeholder="your@email.com"
|
| 25 |
+
style="background-color: #374151; border: none; color: white;"
|
| 26 |
+
required>
|
| 27 |
+
<span class="input-group-text" style="background-color: #374151; border: none;">
|
| 28 |
+
<i class="bi bi-envelope text-muted"></i>
|
| 29 |
+
</span>
|
| 30 |
+
</div>
|
| 31 |
+
<div id="emailError" class="text-danger small mt-1"></div>
|
| 32 |
+
</div>
|
| 33 |
+
|
| 34 |
+
<div class="mb-3">
|
| 35 |
+
<label class="form-label text-white">Password</label>
|
| 36 |
+
<div class="input-group">
|
| 37 |
+
<input type="password" id="password" name="password"
|
| 38 |
+
class="form-control"
|
| 39 |
+
placeholder="••••••••"
|
| 40 |
+
style="background-color: #374151; border: none; color: white;"
|
| 41 |
+
required>
|
| 42 |
+
<span class="input-group-text" style="background-color: #374151; border: none;">
|
| 43 |
+
<i class="bi bi-lock text-muted"></i>
|
| 44 |
+
</span>
|
| 45 |
+
</div>
|
| 46 |
+
<div id="passwordError" class="text-danger small mt-1"></div>
|
| 47 |
+
</div>
|
| 48 |
+
|
| 49 |
+
<div class="d-flex justify-content-between align-items-center mb-4">
|
| 50 |
+
<div class="form-check">
|
| 51 |
+
<input type="checkbox" class="form-check-input" id="rememberMe" name="remember"
|
| 52 |
+
style="background-color: #374151; border-color: #4B5563;">
|
| 53 |
+
<label class="form-check-label text-white" for="rememberMe">Remember me</label>
|
| 54 |
+
</div>
|
| 55 |
+
<a href="#" class="text-success small" style="text-decoration: none;">Forgot password?</a>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-grid mb-3">
|
| 59 |
+
<button type="submit" class="btn btn-success rounded-pill py-2"
|
| 60 |
+
style="background-color: #19c37d; border: none;">
|
| 61 |
+
Login
|
| 62 |
+
</button>
|
| 63 |
+
</div>
|
| 64 |
+
|
| 65 |
+
<div class="text-center small text-white">
|
| 66 |
+
Don't have an account?
|
| 67 |
+
<a href="/new_user" class="text-success" style="text-decoration: none;">Register</a>
|
| 68 |
+
</div>
|
| 69 |
+
</form>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
{% endblock %}
|
| 73 |
+
|
| 74 |
+
{% block body_scripts %}
|
| 75 |
+
<script>
|
| 76 |
+
document.getElementById('loginForm').addEventListener('submit', async function(e) {
|
| 77 |
+
e.preventDefault();
|
| 78 |
+
|
| 79 |
+
// Clear previous errors
|
| 80 |
+
document.getElementById('emailError').textContent = '';
|
| 81 |
+
document.getElementById('passwordError').textContent = '';
|
| 82 |
+
|
| 83 |
+
const email = document.getElementById('email').value.trim();
|
| 84 |
+
const password = document.getElementById('password').value;
|
| 85 |
+
|
| 86 |
+
try {
|
| 87 |
+
const response = await fetch('/login', {
|
| 88 |
+
method: 'POST',
|
| 89 |
+
headers: {
|
| 90 |
+
'Content-Type': 'application/json',
|
| 91 |
+
},
|
| 92 |
+
body: JSON.stringify({ email, password })
|
| 93 |
+
});
|
| 94 |
+
|
| 95 |
+
const data = await response.json();
|
| 96 |
+
|
| 97 |
+
if (!response.ok) {
|
| 98 |
+
if (data.detail) {
|
| 99 |
+
if (Array.isArray(data.detail)) {
|
| 100 |
+
data.detail.forEach(error => {
|
| 101 |
+
if (error.loc && error.loc.includes('email')) {
|
| 102 |
+
document.getElementById('emailError').textContent = error.msg;
|
| 103 |
+
}
|
| 104 |
+
if (error.loc && error.loc.includes('password')) {
|
| 105 |
+
document.getElementById('passwordError').textContent = error.msg;
|
| 106 |
+
}
|
| 107 |
+
});
|
| 108 |
+
} else {
|
| 109 |
+
alert(data.detail);
|
| 110 |
+
}
|
| 111 |
+
}
|
| 112 |
+
return;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
alert('You have logged in successfully!');
|
| 116 |
+
window.location.href = '/last_user_chat';
|
| 117 |
+
|
| 118 |
+
} catch (error) {
|
| 119 |
+
console.error('Error:', error);
|
| 120 |
+
alert('An error occurred during logging in');
|
| 121 |
+
}
|
| 122 |
+
});
|
| 123 |
+
|
| 124 |
+
// Password visibility toggle
|
| 125 |
+
document.querySelectorAll('.input-group').forEach(group => {
|
| 126 |
+
const input = group.querySelector('input');
|
| 127 |
+
const iconWrapper = group.querySelector('.input-group-text');
|
| 128 |
+
const icon = iconWrapper.querySelector('i');
|
| 129 |
+
|
| 130 |
+
iconWrapper.style.cursor = 'pointer';
|
| 131 |
+
|
| 132 |
+
iconWrapper.addEventListener('click', function () {
|
| 133 |
+
const isPassword = input.type === 'password';
|
| 134 |
+
input.type = isPassword ? 'text' : 'password';
|
| 135 |
+
|
| 136 |
+
// Swap icon based on visibility
|
| 137 |
+
if (icon.classList.contains('bi-lock-fill') || icon.classList.contains('bi-lock')) {
|
| 138 |
+
icon.className = isPassword ? 'bi bi-eye text-muted' : (input.id === 'confirmPassword' ? 'bi bi-lock-fill text-muted' : 'bi bi-lock text-muted');
|
| 139 |
+
} else if (icon.classList.contains('bi-eye')) {
|
| 140 |
+
icon.className = input.id === 'confirmPassword' ? 'bi bi-lock-fill text-muted' : 'bi bi-lock text-muted';
|
| 141 |
+
}
|
| 142 |
+
});
|
| 143 |
+
});
|
| 144 |
+
</script>
|
| 145 |
+
{% endblock %}
|
app/frontend/templates/pages/main.html
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}
|
| 4 |
+
<title>The Ultimate RAG</title>
|
| 5 |
+
{% endblock %}
|
| 6 |
+
|
| 7 |
+
{% block content %}
|
| 8 |
+
<div class="container text-center d-flex align-items-center justify-content-center" style="height: 100vh; color: #111827">
|
| 9 |
+
<div>
|
| 10 |
+
<h1 class="display-4 fw-bold">The Ultimate RAG</h1>
|
| 11 |
+
<p class="lead">ask anything...</p>
|
| 12 |
+
</div>
|
| 13 |
+
</div>
|
| 14 |
+
{% endblock %}
|
app/frontend/templates/pages/registration.html
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}
|
| 4 |
+
<title>SignUp</title>
|
| 5 |
+
{% endblock %}
|
| 6 |
+
|
| 7 |
+
{% block content %}
|
| 8 |
+
<div class="d-flex justify-content-center align-items-center vh-100" style="background-color: #111827; width: 100%;">
|
| 9 |
+
<div class="card p-4" style="min-width: 360px; background-color: #1F2937; border: none; border-radius: 12px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);">
|
| 10 |
+
<div class="text-center mb-4">
|
| 11 |
+
<div class="rounded-circle d-inline-flex align-items-center justify-content-center"
|
| 12 |
+
style="width: 60px; height: 60px; background-color: #19c37d;">
|
| 13 |
+
<i class="bi bi-person-plus-fill text-white" style="font-size: 1.5rem;"></i>
|
| 14 |
+
</div>
|
| 15 |
+
<h3 class="mt-3 text-white">Create Account</h3>
|
| 16 |
+
<p class="text-white small mt-1">Join our community</p>
|
| 17 |
+
</div>
|
| 18 |
+
|
| 19 |
+
<form id="registerForm">
|
| 20 |
+
<div class="mb-3">
|
| 21 |
+
<label class="form-label text-white">Email Address</label>
|
| 22 |
+
<div class="input-group">
|
| 23 |
+
<input type="email" id="email" name="email"
|
| 24 |
+
class="form-control"
|
| 25 |
+
style="background-color: #374151; border: none; color: white; height: 44px;"
|
| 26 |
+
placeholder="your@email.com"
|
| 27 |
+
required>
|
| 28 |
+
<span class="input-group-text" style="background-color: #374151; border: none;">
|
| 29 |
+
<i class="bi bi-envelope text-muted"></i>
|
| 30 |
+
</span>
|
| 31 |
+
</div>
|
| 32 |
+
<div id="emailError" class="text-danger small mt-1"></div>
|
| 33 |
+
</div>
|
| 34 |
+
|
| 35 |
+
<div class="mb-3">
|
| 36 |
+
<label class="form-label text-white">Password</label>
|
| 37 |
+
<div class="input-group">
|
| 38 |
+
<input type="password" id="password" name="password"
|
| 39 |
+
class="form-control"
|
| 40 |
+
style="background-color: #374151; border: none; color: white; height: 44px;"
|
| 41 |
+
placeholder="••••••••"
|
| 42 |
+
required>
|
| 43 |
+
<span class="input-group-text" style="background-color: #374151; border: none;">
|
| 44 |
+
<i class="bi bi-lock text-muted"></i>
|
| 45 |
+
</span>
|
| 46 |
+
</div>
|
| 47 |
+
<div id="passwordError" class="text-danger small mt-1"></div>
|
| 48 |
+
</div>
|
| 49 |
+
|
| 50 |
+
<div class="mb-4">
|
| 51 |
+
<label class="form-label text-white">Confirm Password</label>
|
| 52 |
+
<div class="input-group">
|
| 53 |
+
<input type="password" id="confirmPassword" name="confirmPassword"
|
| 54 |
+
class="form-control"
|
| 55 |
+
style="background-color: #374151; border: none; color: white; height: 44px;"
|
| 56 |
+
placeholder="••••••••"
|
| 57 |
+
required>
|
| 58 |
+
<span class="input-group-text" style="background-color: #374151; border: none;">
|
| 59 |
+
<i class="bi bi-lock-fill text-muted"></i>
|
| 60 |
+
</span>
|
| 61 |
+
</div>
|
| 62 |
+
<div id="confirmPasswordError" class="text-danger small mt-1"></div>
|
| 63 |
+
</div>
|
| 64 |
+
|
| 65 |
+
<div class="d-grid mb-3">
|
| 66 |
+
<button type="submit" class="btn rounded-pill py-2 fw-medium"
|
| 67 |
+
style="background-color: #19c37d; border: none; color: white;">
|
| 68 |
+
<i class="bi bi-person-plus me-2"></i> Sign Up
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
|
| 72 |
+
<div class="text-center small text-white pt-2" style="border-top: 1px solid #374151;">
|
| 73 |
+
Already registered?
|
| 74 |
+
<a href="/login" class="text-success fw-medium" style="text-decoration: none;">Sign In</a>
|
| 75 |
+
</div>
|
| 76 |
+
</form>
|
| 77 |
+
</div>
|
| 78 |
+
</div>
|
| 79 |
+
{% endblock %}
|
| 80 |
+
|
| 81 |
+
{% block body_scripts %}
|
| 82 |
+
<script>
|
| 83 |
+
document.getElementById('registerForm').addEventListener('submit', async function(e) {
|
| 84 |
+
e.preventDefault();
|
| 85 |
+
|
| 86 |
+
// Clear previous errors
|
| 87 |
+
document.getElementById('emailError').textContent = '';
|
| 88 |
+
document.getElementById('passwordError').textContent = '';
|
| 89 |
+
document.getElementById('confirmPasswordError').textContent = '';
|
| 90 |
+
|
| 91 |
+
const email = document.getElementById('email').value;
|
| 92 |
+
const password = document.getElementById('password').value;
|
| 93 |
+
const confirmPassword = document.getElementById('confirmPassword').value;
|
| 94 |
+
|
| 95 |
+
if (password !== confirmPassword) {
|
| 96 |
+
document.getElementById('confirmPasswordError').textContent = 'Passwords do not match';
|
| 97 |
+
return;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
try {
|
| 101 |
+
const response = await fetch('/new_user', {
|
| 102 |
+
method: 'POST',
|
| 103 |
+
headers: {
|
| 104 |
+
'Content-Type': 'application/json',
|
| 105 |
+
},
|
| 106 |
+
body: JSON.stringify({ email, password })
|
| 107 |
+
});
|
| 108 |
+
|
| 109 |
+
const data = await response.json();
|
| 110 |
+
|
| 111 |
+
if (!response.ok) {
|
| 112 |
+
if (data.detail) {
|
| 113 |
+
if (Array.isArray(data.detail)) {
|
| 114 |
+
data.detail.forEach(error => {
|
| 115 |
+
if (error.loc && error.loc.includes('email')) {
|
| 116 |
+
document.getElementById('emailError').textContent = error.msg;
|
| 117 |
+
}
|
| 118 |
+
if (error.loc && error.loc.includes('password')) {
|
| 119 |
+
document.getElementById('passwordError').textContent = error.msg;
|
| 120 |
+
}
|
| 121 |
+
});
|
| 122 |
+
} else {
|
| 123 |
+
alert(data.detail);
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
return;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
alert('Registration successful!');
|
| 130 |
+
window.location.href = '/last_user_chat';
|
| 131 |
+
|
| 132 |
+
} catch (error) {
|
| 133 |
+
console.error('Error:', error);
|
| 134 |
+
alert('An error occurred during registration');
|
| 135 |
+
}
|
| 136 |
+
});
|
| 137 |
+
|
| 138 |
+
// Add password visibility toggle
|
| 139 |
+
document.querySelectorAll('.input-group-text').forEach(icon => {
|
| 140 |
+
icon.style.cursor = 'pointer';
|
| 141 |
+
icon.addEventListener('click', function() {
|
| 142 |
+
const input = this.parentElement.querySelector('input');
|
| 143 |
+
if (input.type === 'password') {
|
| 144 |
+
input.type = 'text';
|
| 145 |
+
this.innerHTML = '<i class="bi bi-eye text-muted"></i>';
|
| 146 |
+
} else {
|
| 147 |
+
input.type = 'password';
|
| 148 |
+
this.innerHTML = this.classList.contains('bi-lock-fill') ?
|
| 149 |
+
'<i class="bi bi-lock-fill text-muted"></i>' :
|
| 150 |
+
'<i class="bi bi-lock text-muted"></i>';
|
| 151 |
+
}
|
| 152 |
+
});
|
| 153 |
+
});
|
| 154 |
+
</script>
|
| 155 |
+
{% endblock %}
|
app/frontend/templates/pages/show_pdf.html
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}
|
| 4 |
+
<title>PDF Viewer</title>
|
| 5 |
+
{% endblock %}
|
| 6 |
+
|
| 7 |
+
{% block head_scripts %}
|
| 8 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
|
| 9 |
+
{% endblock %}
|
| 10 |
+
|
| 11 |
+
{% block content %}
|
| 12 |
+
<div class="pagination-container">
|
| 13 |
+
<div class="pagination">
|
| 14 |
+
<button id="prev" class="pagination-button">
|
| 15 |
+
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
|
| 16 |
+
<path fill-rule="evenodd" d="M11.354 1.646a.5.5 0 0 1 0 .708L5.707 8l5.647 5.646a.5.5 0 0 1-.708.708l-6-6a.5.5 0 0 1 0-.708l6-6a.5.5 0 0 1 .708 0z"/>
|
| 17 |
+
</svg>
|
| 18 |
+
Previous
|
| 19 |
+
</button>
|
| 20 |
+
|
| 21 |
+
<div class="page-input-container">
|
| 22 |
+
<input type="number" id="pageNum" value="{{ page }}" class="page-input" style="padding-right: 30px;">
|
| 23 |
+
<span class="page-input-label">of {{ total_pages }}</span>
|
| 24 |
+
</div>
|
| 25 |
+
|
| 26 |
+
<button id="next" class="pagination-button">
|
| 27 |
+
Next
|
| 28 |
+
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
|
| 29 |
+
<path fill-rule="evenodd" d="M4.646 1.646a.5.5 0 0 1 .708 0l6 6a.5.5 0 0 1 0 .708l-6 6a.5.5 0 0 1-.708-.708L10.293 8 4.646 2.354a.5.5 0 0 1 0-.708z"/>
|
| 30 |
+
</svg>
|
| 31 |
+
</button>
|
| 32 |
+
</div>
|
| 33 |
+
</div>
|
| 34 |
+
|
| 35 |
+
<div id="pdf-container">
|
| 36 |
+
<canvas id="pdf-canvas"></canvas>
|
| 37 |
+
</div>
|
| 38 |
+
{% endblock %}
|
| 39 |
+
|
| 40 |
+
{% block body_scripts %}
|
| 41 |
+
<script>
|
| 42 |
+
pdfjsLib = window['pdfjs-dist/build/pdf'];
|
| 43 |
+
pdfjsLib.GlobalWorkerOptions.workerSrc =
|
| 44 |
+
'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.worker.min.js';
|
| 45 |
+
|
| 46 |
+
let pdfDoc = null;
|
| 47 |
+
let currentPage = {{ page }};
|
| 48 |
+
const urlPath = "{{ url_path }}";
|
| 49 |
+
|
| 50 |
+
pdfjsLib.getDocument(urlPath).promise.then(function(pdf) {
|
| 51 |
+
pdfDoc = pdf;
|
| 52 |
+
document.getElementById('pageNum').max = pdf.numPages;
|
| 53 |
+
document.querySelector('.page-input-label').textContent = `of ${pdf.numPages}`;
|
| 54 |
+
renderPage(currentPage);
|
| 55 |
+
});
|
| 56 |
+
|
| 57 |
+
function renderPage(num) {
|
| 58 |
+
pdfDoc.getPage(num).then(function(page) {
|
| 59 |
+
const scale = 1.5;
|
| 60 |
+
const viewport = page.getViewport({ scale });
|
| 61 |
+
const canvas = document.getElementById('pdf-canvas');
|
| 62 |
+
const ctx = canvas.getContext('2d');
|
| 63 |
+
|
| 64 |
+
// Set canvas dimensions
|
| 65 |
+
canvas.height = viewport.height;
|
| 66 |
+
canvas.width = viewport.width;
|
| 67 |
+
|
| 68 |
+
// Render PDF page
|
| 69 |
+
page.render({
|
| 70 |
+
canvasContext: ctx,
|
| 71 |
+
viewport: viewport
|
| 72 |
+
});
|
| 73 |
+
});
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
// Navigation controls
|
| 77 |
+
document.getElementById('prev').addEventListener('click', function() {
|
| 78 |
+
if (currentPage <= 1) return;
|
| 79 |
+
currentPage--;
|
| 80 |
+
document.getElementById('pageNum').value = currentPage;
|
| 81 |
+
renderPage(currentPage);
|
| 82 |
+
});
|
| 83 |
+
|
| 84 |
+
document.getElementById('next').addEventListener('click', function() {
|
| 85 |
+
if (currentPage >= pdfDoc.numPages) return;
|
| 86 |
+
currentPage++;
|
| 87 |
+
document.getElementById('pageNum').value = currentPage;
|
| 88 |
+
renderPage(currentPage);
|
| 89 |
+
});
|
| 90 |
+
|
| 91 |
+
document.getElementById('pageNum').addEventListener('change', function() {
|
| 92 |
+
const newPage = Math.min(Math.max(1, parseInt(this.value)), pdfDoc.numPages);
|
| 93 |
+
currentPage = newPage;
|
| 94 |
+
this.value = currentPage;
|
| 95 |
+
renderPage(currentPage);
|
| 96 |
+
});
|
| 97 |
+
</script>
|
| 98 |
+
{% endblock %}
|
app/frontend/templates/pages/show_text.html
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
{% block content %}
|
| 3 |
+
<div class="pagination-container-text">
|
| 4 |
+
<div class="pagination-text">
|
| 5 |
+
<button id="prev" class="pagination-button-text" onclick="location.href='#anchor'">
|
| 6 |
+
Look at the citation
|
| 7 |
+
</button>
|
| 8 |
+
</div>
|
| 9 |
+
</div>
|
| 10 |
+
|
| 11 |
+
<div class="text-viewer">
|
| 12 |
+
{% if text_before_citation %}
|
| 13 |
+
{% for line in text_before_citation -%}
|
| 14 |
+
<div>{{ line | safe }}</div>
|
| 15 |
+
{%- endfor %}
|
| 16 |
+
{% endif %}
|
| 17 |
+
|
| 18 |
+
{% if anchor_added %}
|
| 19 |
+
<a id="anchor"></a>
|
| 20 |
+
{% endif %}
|
| 21 |
+
|
| 22 |
+
{% if citation %}
|
| 23 |
+
<div class="citation">
|
| 24 |
+
{% for line in citation -%}
|
| 25 |
+
<div>{{ line | safe }}</div>
|
| 26 |
+
{%- endfor %}
|
| 27 |
+
</div>
|
| 28 |
+
{% endif %}
|
| 29 |
+
|
| 30 |
+
{% if text_after_citation %}
|
| 31 |
+
{% for line in text_after_citation -%}
|
| 32 |
+
<div>{{ line | safe }}</div>
|
| 33 |
+
{%- endfor %}
|
| 34 |
+
{% endif %}
|
| 35 |
+
</div>
|
| 36 |
+
{% endblock %}
|
| 37 |
+
|
| 38 |
+
{% block body_scripts %}
|
| 39 |
+
<script>
|
| 40 |
+
window.addEventListener('DOMContentLoaded', () => {
|
| 41 |
+
const anchor = document.getElementById("anchor");
|
| 42 |
+
if (anchor) {
|
| 43 |
+
anchor.scrollIntoView({ behavior: 'smooth' });
|
| 44 |
+
}
|
| 45 |
+
});
|
| 46 |
+
</script>
|
| 47 |
+
{% endblock %}
|
app/initializer.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from app.settings import BASE_DIR
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def initialize_system() -> bool:
|
| 7 |
+
success = True
|
| 8 |
+
path = BASE_DIR
|
| 9 |
+
temp_storage_path = os.path.join(path, "app", "temp_storage")
|
| 10 |
+
static_path = os.path.join(path, "static")
|
| 11 |
+
pdfs_path = os.path.join(path, "app", "temp_storage", "pdfs")
|
| 12 |
+
database_path = os.path.join(path, "database")
|
| 13 |
+
chats_storage_path = os.path.join(path, "chats_storage")
|
| 14 |
+
|
| 15 |
+
print(f"Base path: {BASE_DIR}")
|
| 16 |
+
print(f"Parent path: {path}")
|
| 17 |
+
print(f"Temp storage path: {temp_storage_path}")
|
| 18 |
+
print(f"Static path: {static_path}")
|
| 19 |
+
print(f"PDFs path: {pdfs_path}")
|
| 20 |
+
print(f"Database path: {database_path}")
|
| 21 |
+
print(f"Database path: {chats_storage_path}")
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
os.makedirs(temp_storage_path, exist_ok=True)
|
| 25 |
+
print("Created temp_storage_path")
|
| 26 |
+
os.makedirs(static_path, exist_ok=True)
|
| 27 |
+
print("Created static_path")
|
| 28 |
+
os.makedirs(pdfs_path, exist_ok=True)
|
| 29 |
+
print("Created pdfs_path")
|
| 30 |
+
os.makedirs(database_path, exist_ok=True)
|
| 31 |
+
print("Created database_path")
|
| 32 |
+
os.makedirs(chats_storage_path, exist_ok=True)
|
| 33 |
+
print("Created chats_storage_path")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
success = False
|
| 36 |
+
print(f"Error creating directories: {str(e)}")
|
| 37 |
+
|
| 38 |
+
return success
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
print(1111111111111111111111111111111111111111111111)
|
| 43 |
+
print(initialize_system())
|
| 44 |
+
print(2222222222222222222222222222222222222222222222)
|
app/prompt_templates/test1.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
**Role**: You are an expert information retrieval assistant. Your task is to provide precise answers using ONLY the provided context documents.
|
| 2 |
+
**Rules**
|
| 3 |
+
1. **Strict Source Usage**: Base answers SOLELY on provided context.
|
| 4 |
+
2. **Citation Format**: For every fact/quote, use:
|
| 5 |
+
`'[relevant text excerpt]' [Source: {filename}, Page: {page_number}, Lines: {start_line}-{end_line}, Start: {start_index}]`
|
| 6 |
+
3. **Response Limits**:
|
| 7 |
+
- Absolute maximum: 2048 tokens
|
| 8 |
+
- Target length: 2-4 concise sentences
|
| 9 |
+
- Complex topics: Maximum 5 sentences\n"
|
| 10 |
+
4. **Citation Placement**: Insert citations IMMEDIATELY after quoted text
|
| 11 |
+
5. **Verification**: Cross-check all facts against multiple sources where available
|
| 12 |
+
**Response Format**:
|
| 13 |
+
- Start with direct answer to question
|
| 14 |
+
- Include 1-3 supporting citations
|
| 15 |
+
- End with summary sentence
|
| 16 |
+
- Never invent information
|
app/prompt_templates/test2.txt
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INITIAL_QUERY: Here are some sources located at section CONTEXT_DOCUMENTS. Read these carefully, as you will be asked a Query about them.
|
| 2 |
+
|
| 3 |
+
# General Instructions
|
| 4 |
+
[Keep all original rules, especially citation formatting, language matching, and query-type specifications]
|
| 5 |
+
|
| 6 |
+
You are an expert information retrieval assistant. Your task is to provide precise answers using ONLY the provided context documents.
|
| 7 |
+
|
| 8 |
+
Base answers SOLELY on provided context.
|
| 9 |
+
|
| 10 |
+
Your primary function is to act as a database-grounded question-answering system. You must generate answers based **exclusively** on the information present in the provided context (`C:`). You are forbidden from using any external knowledge or information you were trained on. Every factual claim in your answer must be traceable to the provided sources. Think step-by-step to remove inconsistencies.
|
| 11 |
+
|
| 12 |
+
Write an accurate, detailed, and comprehensive response to the user's query located at QUESTION. Additional context is provided as "CONTEXT_DOCUMENTS" after specific questions. Your answer should be informed by the provided "Search results". Your answer must be precise, of high-quality, and written by an expert using an unbiased and journalistic tone.
|
| 13 |
+
Analyze the language of QUESTION. Your answer must be written in the SAME language as the QUESTION, even if language preference is different.
|
| 14 |
+
|
| 15 |
+
You MUST cite the most relevant search results that answer the query. Do not mention any irrelevant results. You MUST ADHERE to the following instructions for citing search results:
|
| 16 |
+
- Analyze the User's Question. Deconstruct the user's query to understand the specific information being requested.
|
| 17 |
+
- Scrutinize the Context. Critically evaluate each provided source for its relevance to the question. Identify the exact pages and/or lines that contain the pertinent information. Discard and ignore any sources that are irrelevant to the user's query.
|
| 18 |
+
- Synthesize the Answer. If relevant information is found, construct a comprehensive answer. Synthesize information from multiple sources if necessary. Do not simply copy-paste text; rephrase the information into a clear and coherent response.
|
| 19 |
+
- For every fact/quote, use: `[relevant text excerpt] [Source: {filename}, Page: {page_number}, Lines: {start_line}-{end_line}, Start: {start_index}]`. For example, `Water can be freezed and turned into ice. [Source: 'home/general_info.txt, Page: 12, Lines: 22-23, Start: 2890]`
|
| 20 |
+
- ALWAYS use brackets. Only use this format to cite search results. NEVER include a References section at the end of your answer. Insert citations IMMEDIATELY after quoted text.
|
| 21 |
+
- If you don't know the answer or the premise is incorrect, explain why.
|
| 22 |
+
- You can change ONLY 'lines' in a reference to adjust them to cited lines. It will increase the quality of your answer.
|
| 23 |
+
- If attached documents are not relevant ot the question, DO NOT answer it. Tell, that the list of documents is empty or all of them are IRRELEVANT.
|
| 24 |
+
|
| 25 |
+
Cross-check all facts against multiple sources where available
|
| 26 |
+
|
| 27 |
+
You MUST NEVER use moralization or hedging language. AVOID using the following phrases:
|
| 28 |
+
- "It is important to ..."
|
| 29 |
+
- "It is inappropriate ..."
|
| 30 |
+
- "It is subjective ..."
|
| 31 |
+
|
| 32 |
+
You MUST ADHERE to the following formatting instructions:
|
| 33 |
+
- Use markdown to format paragraphs, lists, tables, and quotes whenever possible.
|
| 34 |
+
- Use headings level 2 and 3 to separate sections of your response, like "## Header", but NEVER start an answer with a heading or title of any kind.
|
| 35 |
+
- Use single new lines for lists and double new lines for paragraphs.
|
| 36 |
+
- Use markdown to render images given in the search results.
|
| 37 |
+
- NEVER write URLs or links.
|
| 38 |
+
|
| 39 |
+
**NEW REASONING REQUIREMENTS**:
|
| 40 |
+
Before generating the answer, you MUST internally follow this workflow:
|
| 41 |
+
|
| 42 |
+
### A. STEP-BACK PROMPTING (ALWAYS PERFORM FIRST):
|
| 43 |
+
Identify 1-3 overarching principles/concepts relevant to the query. Use ONLY context documents.
|
| 44 |
+
Example:
|
| 45 |
+
> Query: "What causes acid rain?"
|
| 46 |
+
> Step-Back: "This question relates to environmental science principles: (1) Chemical reactions in the atmosphere, (2) Pollution sources, (3) Ecosystem impacts."
|
| 47 |
+
|
| 48 |
+
### B. CHAIN-OF-THOUGHT REASONING (APPLY TO ALL QUERIES):
|
| 49 |
+
Decompose the query into sequential steps. For each step:
|
| 50 |
+
1. **Relevance Filtering**: Flag context documents as [Relevant]/[Irrelevant] to the Step-Back principles.
|
| 51 |
+
2. **Cross-Validation**: Compare facts across relevant sources. Resolve conflicts by prioritizing:
|
| 52 |
+
- Technical/scientific documents > Informal sources
|
| 53 |
+
- Higher page/line density on topic
|
| 54 |
+
3. **Synthesis Plan**: Outline how fragments will combine into the answer.
|
| 55 |
+
|
| 56 |
+
### C. FINAL OUTPUT RULES:
|
| 57 |
+
- **NEVER** output Step-Back/CoT blocks in the final answer.
|
| 58 |
+
- **DO** use their conclusions to build citations and structure.
|
| 59 |
+
- For empty/irrelevant contexts:
|
| 60 |
+
> "No relevant context found. All documents are unrelated to [Step-Back Principle X]."
|
| 61 |
+
|
| 62 |
+
You can find examples of approved responses in the section **Approved Examples**
|
| 63 |
+
|
| 64 |
+
# Query type specifications
|
| 65 |
+
|
| 66 |
+
You must use different instructions to write your answer based on the type of the user's query. However, be sure to also follow the General Instructions, especially if the query doesn't match any of the defined types below. Here are the supported types.
|
| 67 |
+
|
| 68 |
+
## Academic Research
|
| 69 |
+
|
| 70 |
+
You must provide long and detailed answers for academic research queries. Your answer should be formatted as a scientific write-up, with paragraphs and sections, using markdown and headings.
|
| 71 |
+
|
| 72 |
+
## Recent News
|
| 73 |
+
|
| 74 |
+
You need to concisely summarize recent news events based on the provided search results, grouping them by topics. You MUST ALWAYS use lists and highlight the news title at the beginning of each list item. You MUST select news from diverse perspectives while also prioritizing trustworthy sources. If several search results mention the same news event, you must combine them and cite all of the search results. Prioritize more recent events, ensuring to compare timestamps. You MUST NEVER start your answer with a heading of any kind.
|
| 75 |
+
|
| 76 |
+
## Weather
|
| 77 |
+
|
| 78 |
+
Your answer should be very short and only provide the weather forecast. If the search results do not contain relevant weather information, you must state that you don't have the answer.
|
| 79 |
+
|
| 80 |
+
## People
|
| 81 |
+
|
| 82 |
+
You need to write a short biography for the person mentioned in the query. If search results refer to different people, you MUST describe each person individually and AVOID mixing their information together. NEVER start your answer with the person's name as a header.
|
| 83 |
+
|
| 84 |
+
## Coding
|
| 85 |
+
|
| 86 |
+
You MUST use markdown code blocks to write code, specifying the language for syntax highlighting, for example ```bash or ```python If the user's query asks for code, you should write the code first and then explain it.
|
| 87 |
+
|
| 88 |
+
## Cooking Recipes
|
| 89 |
+
|
| 90 |
+
You need to provide step-by-step cooking recipes, clearly specifying the ingredient, the amount, and precise instructions during each step.
|
| 91 |
+
|
| 92 |
+
## Translation
|
| 93 |
+
|
| 94 |
+
If a user asks you to translate something, you must not cite any search results and should just provide the translation.
|
| 95 |
+
|
| 96 |
+
## Creative Writing
|
| 97 |
+
|
| 98 |
+
If the query requires creative writing, you DO NOT need to use or cite search results, and you may ignore General Instructions pertaining only to search. You MUST follow the user's instructions precisely to help the user write exactly what they need.
|
| 99 |
+
|
| 100 |
+
## Science and Math
|
| 101 |
+
|
| 102 |
+
If the user query is about some simple calculation, only answer with the final result. Follow these rules for writing formulas:
|
| 103 |
+
- Always use \( and\) for inline formulas and\[ and\] for blocks, for example\(x^4 = x - 3 \)
|
| 104 |
+
- To cite a formula add citations to the end, for example\[ \sin(x) \] [1][2] or \(x^2-2\) [4].
|
| 105 |
+
- Never use $ or $$ to render LaTeX, even if it is present in the user query.
|
| 106 |
+
- Never use unicode to render math expressions, ALWAYS use LaTeX.
|
| 107 |
+
- Never use the \label instruction for LaTeX.
|
| 108 |
+
|
| 109 |
+
## URL Lookup
|
| 110 |
+
|
| 111 |
+
When the user's query includes a URL, you must rely solely on information from the corresponding search result. DO NOT cite other search results, ALWAYS cite the first result, e.g. you need to end with [1]. If the user's query consists only of a URL without any additional instructions, you should summarize the content of that URL.
|
| 112 |
+
|
| 113 |
+
## Shopping
|
| 114 |
+
|
| 115 |
+
If the user query is about shopping for a product, you MUST follow these rules:
|
| 116 |
+
- Organize the products into distinct sectors. For example, you could group shoes by style (boots, sneakers, etc.)
|
| 117 |
+
- Cite at most 5 search results using the format provided in General Instructions to avoid overwhelming the user with too many options.
|
| 118 |
+
|
| 119 |
+
# Additional tips:
|
| 120 |
+
|
| 121 |
+
When answering the following question, please use a step-by-step reasoning approach. Break down the problem into smaller parts, analyze each part logically, and explain your thought process clearly before providing the final answer.
|
| 122 |
+
Example:
|
| 123 |
+
Question: 'If a store has 10 apples and sells 3, how many are left?'
|
| 124 |
+
Thought Process:
|
| 125 |
+
- The store starts with 10 apples.
|
| 126 |
+
- It sells 3 apples, so we subtract 3 from the initial count: 10 - 3 = 7.
|
| 127 |
+
- Thus, the remaining apples are 7.
|
| 128 |
+
Answer: 7 apples.
|
| 129 |
+
|
| 130 |
+
Before answering, take a step back and identify the key principle or concept relevant to this problem.
|
| 131 |
+
Use abstraction. For each specified term, which are written in uppercase, apply reasoning.
|
| 132 |
+
Explain terms, try to understand the context. Before solving the problem, first identify the general principle involved. Then, apply it step-by-step.
|
| 133 |
+
|
| 134 |
+
If the user will be satisfied with your answer, we will tip you a $1000.
|
| 135 |
+
|
| 136 |
+
# Approved Examples
|
| 137 |
+
|
| 138 |
+
---
|
| 139 |
+
|
| 140 |
+
### Example 1: Successful Answer
|
| 141 |
+
**QUESTION**: What is the speed of light in a vacuum?
|
| 142 |
+
**CONTEXT DOCUMENTS**:
|
| 143 |
+
Original text: The constant c, representing the speed of light in a vacuum, is precisely defined as 299,792,458 meters per second.
|
| 144 |
+
Citation:[Source: Physics Fundamentals.pdf, Page: 15, Lines: 10-12, Start: 2890]
|
| 145 |
+
|
| 146 |
+
Original text: Nothing travels faster than light in a vacuum, which has a speed of 299,792,458 m/s.
|
| 147 |
+
Citation:[Source: Light and Optics.txt, Page: 1, Lines: 5-7, Start: 120]
|
| 148 |
+
|
| 149 |
+
Original text: Remember to get coffee filters.
|
| 150 |
+
Citation:[Source: Coffee break.txt, Page: 1, Lines: 1-1, Start: 0]
|
| 151 |
+
|
| 152 |
+
**INTERNAL WORKFLOW**:
|
| 153 |
+
STEP-BACK PRINCIPLES:
|
| 154 |
+
1. Definition of fundamental physical constants
|
| 155 |
+
2. Properties of electromagnetic waves in vacuums
|
| 156 |
+
|
| 157 |
+
CHAIN-OF-THOUGHT:
|
| 158 |
+
1. [Physics Fundamentals.pdf] provides the exact definition → [Relevant]
|
| 159 |
+
2. [Light and Optics.txt] confirms universality → [Relevant]
|
| 160 |
+
3. [Coffee break.txt] unrelated → [Discarded]
|
| 161 |
+
4. Cross-check: Both sources agree on value → [Verified]
|
| 162 |
+
|
| 163 |
+
**ANSWER**:
|
| 164 |
+
The speed of light in a vacuum, represented by the constant *c*, is exactly 299,792,458 meters per second. [Source: Physics Fundamentals.pdf, Page: 15, Lines: 10-12, Start: 2890]
|
| 165 |
+
This is considered a fundamental constant in physics. [Source: Light and Optics.txt, Page: 1, Lines: 5-7, Start: 120]
|
| 166 |
+
|
| 167 |
+
---
|
| 168 |
+
|
| 169 |
+
### Example 2: Successful answer in another language
|
| 170 |
+
**QUESTION**: Какое расстояние от Земли до Солнца?
|
| 171 |
+
**CONTEXT DOCUMENTS**:
|
| 172 |
+
Original text: The average distance from Earth to the Sun is about 149.6 million kilometers (1 astronomical unit).
|
| 173 |
+
Citation: [Source: Astronomy_Basics.pdf, Page: 22, Lines: 4-6, Start: 1500]
|
| 174 |
+
|
| 175 |
+
Original text: Earth orbits the Sun at an average distance of 93 million miles (149.6 million km).
|
| 176 |
+
Citation: [Source: Solar_System_Guide.txt, Page: 3, Lines: 1-2, Start: 200]
|
| 177 |
+
|
| 178 |
+
Original text: Buy sunscreen for the beach trip.
|
| 179 |
+
Citation: [Source: Reminders.txt, Page: 1, Lines: 1-1, Start: 0]
|
| 180 |
+
|
| 181 |
+
**INTERNAL WORKFLOW**:
|
| 182 |
+
1. Astronomical unit definitions
|
| 183 |
+
2. Unit conversion (km ↔ miles)
|
| 184 |
+
|
| 185 |
+
CHAIN-OF-THOUGHT:
|
| 186 |
+
1. [Astronomy_Basics.pdf] provides metric value → [Relevant]
|
| 187 |
+
2. [Solar_System_Guide.txt] confirms imperial equivalent → [Relevant]
|
| 188 |
+
3. [Reminders.txt] irrelevant → [Discarded]
|
| 189 |
+
4. Cross-check: 149.6M km = 93M miles → [Consistent]
|
| 190 |
+
|
| 191 |
+
**ANSWER**:
|
| 192 |
+
Среднее расстояние от Земли до Солнца составляет 149,6 миллиона километров (1 астрономическая единица).
|
| 193 |
+
[Source: Astronomy_Basics.pdf, Page: 22, Lines: 4-6, Start: 1500]
|
| 194 |
+
Это расстояние также эквивалентно 93 миллионам миль. [Source: Solar_System_Guide.txt, Page: 3, Lines: 1-2, Start: 200]
|
| 195 |
+
|
| 196 |
+
---
|
| 197 |
+
|
| 198 |
+
### Example 3: Successful answer in arabic language
|
| 199 |
+
**QUESTION**: ما هو أطول نهر في العالم؟
|
| 200 |
+
**CONTEXT DOCUMENTS**:
|
| 201 |
+
Original text: The Nile River is the longest river on Earth, spanning approximately 6,650 kilometers.
|
| 202 |
+
Citation: [Source: Geography_Facts.pdf, Page: 18, Lines: 7-8, Start: 980]
|
| 203 |
+
|
| 204 |
+
Original text: While the Amazon is the largest by volume, the Nile holds the record for length (6,650 km).
|
| 205 |
+
Citation: [Source: Rivers_of_the_World.txt, Page: 5, Lines: 12-13, Start: 420]
|
| 206 |
+
|
| 207 |
+
Original text: Call the dentist to reschedule the appointment.
|
| 208 |
+
Citation: [Source: ToDo_List.txt, Page: 1, Lines: 1-1, Start: 0]
|
| 209 |
+
|
| 210 |
+
**INTERNAL WORKFLOW**:
|
| 211 |
+
STEP-BACK PRINCIPLES:
|
| 212 |
+
1. River classification criteria (length vs. volume)
|
| 213 |
+
2. Geographic record-keeping standards
|
| 214 |
+
|
| 215 |
+
CHAIN-OF-THOUGHT:
|
| 216 |
+
1. [Geography_Facts.pdf] states Nile's length → [Relevant]
|
| 217 |
+
2. [Rivers_of_the_World.txt] contrasts Nile/Amazon → [Relevant]
|
| 218 |
+
3. [ToDo_List.txt] unrelated → [Discarded]
|
| 219 |
+
4. Cross-check: Both confirm 6,650 km → [Verified]
|
| 220 |
+
|
| 221 |
+
**ANSWER**:
|
| 222 |
+
أطول نهر في العالم هو نهر النيل، ويبلغ طوله حوالي 6,650 كيلومترًا.
|
| 223 |
+
[Source: Geography_Facts.pdf, Page: 18, Lines: 7-8, Start: 980]
|
| 224 |
+
يُعتبر النيل أطول من نهر الأمازون، الذي يتفوق عليه من حيث الحجم. [Source: Rivers_of_the_World.txt, Page: 5, Lines: 12-13, Start: 420]
|
| 225 |
+
|
| 226 |
+
---
|
| 227 |
+
|
| 228 |
+
### Example 4: No Answer Found
|
| 229 |
+
**QUESTION**: Could you please provide information about sleep deprivation?
|
| 230 |
+
**CONTEXT DOCUMENTS**:
|
| 231 |
+
Original text: Brawl Stars is a multiplayer online battle arena and third-person hero shooter video game.
|
| 232 |
+
Citation:[Source: Brawl_stars.pdf, Page: 1, Lines: 1-1, Start: 0]
|
| 233 |
+
|
| 234 |
+
Original text: Financial performance in Q4 was strong, with a 12% increase in revenue.
|
| 235 |
+
Citation: [Source: Annual Report 2023.docx, Page: 3, Lines: 15-16, Start: 450]
|
| 236 |
+
|
| 237 |
+
**INTERNAL WORKFLOW**:
|
| 238 |
+
1. Sleep physiology
|
| 239 |
+
2. Medical/psychological research
|
| 240 |
+
|
| 241 |
+
CHAIN-OF-THOUGHT:
|
| 242 |
+
1. [Brawl_stars.pdf] discusses gaming → [Irrelevant]
|
| 243 |
+
2. [Annual Report 2023.docx] covers finance → [Irrelevant]
|
| 244 |
+
3. Zero relevant sources → [Abort]
|
| 245 |
+
|
| 246 |
+
**ANSWER**:
|
| 247 |
+
There is no information about this in given database
|
| 248 |
+
|
| 249 |
+
---
|
| 250 |
+
|
| 251 |
+
### Example 5: Single Relevant Source
|
| 252 |
+
**QUESTION**: What new features were introduced in Python 3.9?
|
| 253 |
+
**CONTEXT DOCUMENTS**:
|
| 254 |
+
Original text: Major new features of the 3.9 series, compared to 3.8: Dictionary Merge & Update Operators (PEP 584); New String methods, `removeprefix()` and `removesuffix()` (PEP 616).
|
| 255 |
+
Citation:[Source: Python_3_9_Release.md, Page: 1, Lines: 22-25, Start: 1020]
|
| 256 |
+
|
| 257 |
+
Original text: The project planning phase will conclude in Q3.
|
| 258 |
+
Citation:[Source: Project_Timeline.txt, Page: 1, Lines: 1-1, Start: 0]
|
| 259 |
+
|
| 260 |
+
Original text: In Python 2.7, the print statement is used without parentheses.
|
| 261 |
+
Citation:[Source: Old_Python_Guide.pdf, Page: 10, Lines: 5-6, Start: 890]
|
| 262 |
+
|
| 263 |
+
**INTERNAL WORKFLOW**:
|
| 264 |
+
STEP-BACK PRINCIPLES:
|
| 265 |
+
1. Python versioning and PEP standards
|
| 266 |
+
2. Backward compatibility
|
| 267 |
+
|
| 268 |
+
CHAIN-OF-THOUGHT:
|
| 269 |
+
1. [Python_3_9_Release.md] lists features → [Relevant]
|
| 270 |
+
2. [Project_Timeline.txt] unrelated → [Discarded]
|
| 271 |
+
3. [Old_Python_Guide.pdf] discusses legacy syntax → [Irrelevant]
|
| 272 |
+
4. No conflicting data → [Direct Synthesis]
|
| 273 |
+
|
| 274 |
+
**ANSWER**:
|
| 275 |
+
Python 3.9 introduced several new features compared to its predecessors. Two major additions include:
|
| 276 |
+
- Dictionary merge and update operators (PEP 584). [Source: Python_3_9_Release.md, Page: 1, Lines: 22-25, Start: 1020]
|
| 277 |
+
- New string methods, `removeprefix()` and `removesuffix()` (PEP 616). [Source: Python_3_9_Release.md, Page: 1, Lines: 22-25, Start: 1020]
|
| 278 |
+
|
| 279 |
+
---
|
| 280 |
+
|
| 281 |
+
Try not to limit yourself in context of the output chunks. Tell as much USEFUL information as POSSIBLE.
|
| 282 |
+
DO NOT change the LINES in CITATIONS.
|
| 283 |
+
You MUST NOT provide you thoughts and **INTERNAL WORKFLOW** in the answer. Just give COMPREHENSIVE and RICH answer to the users question.
|
| 284 |
+
DO NOT provide your STEP-BACK PROMPTING prompting and CHAIN-OF-THOUGHT REASONING in the ANSWER. Keep them for you own reasoning, but NOT show it to the user.
|