import gradio as gr
import chromadb
import pandas as pd
from sentence_transformers import SentenceTransformer
import re
import numpy

# --- 1. SETUP MODELS AND DATABASE ---

print("Loading embedding model...")
#embedding_model = SentenceTransformer('rufimelo/bert-large-portuguese-cased-sts')
embedding_model = SentenceTransformer('jmbrito/ptbr-similarity-e5-small')

client = chromadb.Client()
collection = client.get_or_create_collection(
    name="transcript_comparison_app",
    metadata={"hnsw:space": "cosine"}
)
print("ChromaDB collection ready.")


# --- 2. NEW: DEFINE AND PRE-COMPUTE GUIDELINE PROFILES ---

# Define the positive phrases for each guideline you want to test
GUIDELINE_PROFILES = {
    "Agent Empathy": [
        "Sinto muito por esse transtorno.",
        "Eu entendo completamente sua frustração.",
        "Imagino como isso deve ser chato, vamos resolver.",
        "Lamento que você tenha passado por isso.",
        "Compreendo sua situação e peço desculpas pelo ocorrido."
    ],
    "Problem Resolution Offer": [
        "Para resolver isso, posso te oferecer duas opções.",
        "Temos algumas alternativas para solucionar seu problema.",
        "A solução que posso propor é a seguinte.",
        "Vamos encontrar uma forma de resolver isso para você."
    ],
    "Polite Closing": [
        "Obrigado por sua ligação, tenha um ótimo dia.",
        "Agradecemos seu contato.",
        "Se precisar de mais alguma coisa, é só ligar.",
        "Tenha uma excelente semana."
    ]
}

# Pre-compute the averaged profile embeddings when the app starts
print("Computing guideline profile embeddings...")
profile_embeddings = {}
for guideline_name, phrases in GUIDELINE_PROFILES.items():
    phrase_embeddings = embedding_model.encode(phrases)
    profile_embeddings[guideline_name] = numpy.mean(phrase_embeddings, axis=0)
print("✅ Guideline profiles are ready.")


# --- 3. CORE FUNCTIONS ---

def index_transcript(transcript_text):
    # This function remains the same
    if not transcript_text.strip():
        return "Please paste a transcript before indexing.", pd.DataFrame()
    chunks = re.split(r'(?<=[.!?])\s+', transcript_text)
    chunks = [chunk.strip() for chunk in chunks if len(chunk.strip()) > 5]
    ids = [f"chunk_{i}" for i in range(len(chunks))]
    if collection.count() > 0:
        collection.delete(ids=collection.get()['ids'])
    collection.add(embeddings=embedding_model.encode(chunks).tolist(), documents=chunks, ids=ids)
    indexed_df = pd.DataFrame({"Indexed Chunks": chunks})
    return f"✅ Indexed {len(chunks)} chunks successfully!", indexed_df

def search_with_single_query(query):
    # This is the original search method
    if not query.strip():
        return pd.DataFrame()
    query_embedding = embedding_model.encode(query).tolist()
    results = collection.query(query_embeddings=[query_embedding], n_results=3)
    documents = results['documents'][0]
    distances = results['distances'][0]
    similarities = [f"{1 - dist:.2f}" for dist in distances]
    return pd.DataFrame({"Similarity": similarities, "Matching Chunk": documents})

def search_with_profile(guideline_name):
    # This is the NEW search method using the pre-computed profiles
    if not guideline_name:
        return pd.DataFrame()
    profile_embedding = profile_embeddings[guideline_name].tolist()
    results = collection.query(query_embeddings=[profile_embedding], n_results=3)
    documents = results['documents'][0]
    distances = results['distances'][0]
    similarities = [f"{1 - dist:.2f}" for dist in distances]
    return pd.DataFrame({"Similarity": similarities, "Matching Chunk": documents})


# --- 4. GRADIO INTERFACE FOR COMPARISON ---

sample_transcript = """Atendente: Olá, bem-vindo à EletroMax. Meu nome é Sofia, em que posso ajudar?
Cliente: Oi, Sofia. Eu comprei uma cafeteira no site de vocês na semana passada, e ela simplesmente parou de funcionar.
Atendente: Puxa, que chato isso. Lamento que você tenha passado por isso. Pode me informar o número do pedido para eu localizar sua compra?
Cliente: Claro, o número é 11223344. Estou bem decepcionado, usei a cafeteira só duas vezes.
Atendente: Entendo perfeitamente sua frustração. Para resolver isso, posso te oferecer duas opções.
Cliente: Prefiro receber um novo.
Atendente: Combinado. Obrigado por sua ligação, tenha um ótimo dia.
"""

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🔍 Search Method Comparison")
    gr.Markdown("Index a transcript once, then search using both methods to compare the results.")

    with gr.Row():
        # Indexing column is the same
        with gr.Column(scale=1):
            transcript_input = gr.Textbox(lines=15, label="Paste Transcript Here", value=sample_transcript)
            index_button = gr.Button("Index Transcript", variant="primary")
            index_status = gr.Label()
            indexed_preview = gr.DataFrame(headers=["Indexed Chunks"], label="Indexed Data Preview")

    gr.HTML("<hr>") # Add a horizontal line for separation

    with gr.Row():
        # Column for the simple, single query search
        with gr.Column():
            gr.Markdown("### Method 1: Single Query Search")
            query_input = gr.Textbox(label="Enter a Simple Query", placeholder="Ex: o agente foi empático?")
            search_button_single = gr.Button("Search Single Query")
            results_output_single = gr.DataFrame(label="Single Query Results")

        # Column for the new, profile-based search
        with gr.Column():
            gr.Markdown("### Method 2: Guideline Profile Search")
            profile_input = gr.Dropdown(choices=list(GUIDELINE_PROFILES.keys()), label="Select a Guideline Profile")
            search_button_profile = gr.Button("Search with Profile", variant="primary")
            results_output_profile = gr.DataFrame(label="Profile Search Results")

    # Wire up the components
    index_button.click(fn=index_transcript, inputs=[transcript_input], outputs=[index_status, indexed_preview])
    search_button_single.click(fn=search_with_single_query, inputs=[query_input], outputs=[results_output_single])
    search_button_profile.click(fn=search_with_profile, inputs=[profile_input], outputs=[results_output_profile])

demo.launch()