Spaces:
Sleeping
Sleeping
FauziIsyrinApridal
commited on
Commit
·
82031c8
1
Parent(s):
498342f
perbaiki plaholsdr input dan hapus chunking
Browse files- app/chat.py +1 -1
- app/document_processor.py +18 -10
app/chat.py
CHANGED
|
@@ -50,7 +50,7 @@ def display_chat_history(chain):
|
|
| 50 |
|
| 51 |
# Input teks biasa
|
| 52 |
user_input_obj = st.chat_input(
|
| 53 |
-
"Masukkan pertanyaan
|
| 54 |
key="chat_input_field"
|
| 55 |
)
|
| 56 |
|
|
|
|
| 50 |
|
| 51 |
# Input teks biasa
|
| 52 |
user_input_obj = st.chat_input(
|
| 53 |
+
"Masukkan pertanyaan",
|
| 54 |
key="chat_input_field"
|
| 55 |
)
|
| 56 |
|
app/document_processor.py
CHANGED
|
@@ -3,8 +3,9 @@ from langchain_huggingface import HuggingFaceEmbeddings
|
|
| 3 |
from langchain_community.vectorstores import FAISS
|
| 4 |
import os
|
| 5 |
import tempfile
|
| 6 |
-
import zipfile
|
| 7 |
import streamlit as st
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
|
| 10 |
"""Save vector store to Supabase storage as separate files."""
|
|
@@ -94,18 +95,25 @@ def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_s
|
|
| 94 |
st.error(f"Error loading from Supabase: {e}")
|
| 95 |
return None
|
| 96 |
|
|
|
|
| 97 |
def process_documents(docs):
|
| 98 |
embeddings = HuggingFaceEmbeddings(
|
| 99 |
model_name="LazarusNLP/all-indo-e5-small-v4",
|
| 100 |
model_kwargs={"device": "cpu"},
|
| 101 |
encode_kwargs={"normalize_embeddings": True}
|
| 102 |
)
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from langchain_community.vectorstores import FAISS
|
| 4 |
import os
|
| 5 |
import tempfile
|
|
|
|
| 6 |
import streamlit as st
|
| 7 |
+
from langchain.schema import Document
|
| 8 |
+
|
| 9 |
|
| 10 |
def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
|
| 11 |
"""Save vector store to Supabase storage as separate files."""
|
|
|
|
| 95 |
st.error(f"Error loading from Supabase: {e}")
|
| 96 |
return None
|
| 97 |
|
| 98 |
+
|
| 99 |
def process_documents(docs):
|
| 100 |
embeddings = HuggingFaceEmbeddings(
|
| 101 |
model_name="LazarusNLP/all-indo-e5-small-v4",
|
| 102 |
model_kwargs={"device": "cpu"},
|
| 103 |
encode_kwargs={"normalize_embeddings": True}
|
| 104 |
)
|
| 105 |
+
|
| 106 |
+
combined_docs = []
|
| 107 |
+
for doc in docs:
|
| 108 |
+
# Gunakan page_content langsung tanpa split
|
| 109 |
+
text = doc.page_content if hasattr(doc, "page_content") else doc
|
| 110 |
+
metadata = doc.metadata if hasattr(doc, "metadata") else {}
|
| 111 |
+
|
| 112 |
+
combined_doc = Document(
|
| 113 |
+
page_content=text,
|
| 114 |
+
metadata=metadata
|
| 115 |
+
)
|
| 116 |
+
combined_docs.append(combined_doc)
|
| 117 |
+
|
| 118 |
+
vector_store = FAISS.from_documents(combined_docs, embeddings)
|
| 119 |
+
return vector_store
|