# app.py
from fastapi import FastAPI, File, UploadFile, Form
from typing import List
import openai
import json
import pandas as pd
import re

# Import our agent's tools
import tools

# Initialize FastAPI app
app = FastAPI()

# Initialize the OpenAI client.
# It will automatically pick up credentials from Hugging Face Secrets.
client = openai.OpenAI()

# Give the tools module access to the initialized OpenAI client
tools.set_openai_client(client)

@app.get("/")
async def read_root():
    """A simple root endpoint to confirm the API is running."""
    return {"message": "Data Analyst Agent API is running!"}

@app.post("/api/")
async def analyze_data(
    questions_file: UploadFile = File(..., alias="questions.txt"),
    files: List[UploadFile] = File([], alias="files"),
):
    questions_text = (await questions_file.read()).decode("utf-8")
    
    if "scrape" in questions_text.lower() and "http" in questions_text.lower():
        # ... (Steps 1, 2, and 3 are the same: get html, choose table, extract df) ...
        url = next((word for word in questions_text.split() if word.startswith("http")), None)
        if not url: return {"error": "URL not found."}
        html_content = await tools.get_dynamic_html(url)
        if isinstance(html_content, str) and "Error" in html_content: return {"error": html_content}
        choice_json_str = tools.choose_best_table_from_html(html_content, questions_text)
        try:
            choice = json.loads(choice_json_str)
            table_index = choice.get("index")
            if table_index is None: return {"error": "LLM failed to choose table."}
        except: return {"error": "Failed to decode LLM table choice."}
        df = tools.extract_table_to_dataframe(html_content, table_index)
        if isinstance(df, str): return {"error": df}

        # --- STEP 4: GENERATE & EXECUTE PYTHON CODE ---
        print("Step 4: Generating Python code for analysis.")

        df_head = df.head().to_string()
        df_info = f"Here is the head of the pandas DataFrame, named 'df':\n{df_head}"

        # --- THIS IS THE CRITICAL FIX ---
        # Extract only the numbered questions to prevent the LLM from getting distracted.
        analysis_questions = re.findall(r"^\d+\.\s.*", questions_text, re.MULTILINE)
        cleaned_questions_text = "\n".join(analysis_questions)
        if not cleaned_questions_text:
             # Fallback if no numbered questions are found
            cleaned_questions_text = questions_text
        
        print(f"--- Cleaned Questions for Code Gen ---\n{cleaned_questions_text}\n--------------------------------------")
        # --- END OF FIX ---

        # Final, simplified system prompt
        system_prompt = """
        You are an expert Python data analyst. Your only job is to write a Python script.
        A pandas DataFrame `df` and libraries `pd`, `re`, `plt`, `sns`, `np`, `io`, `base64`, `LinearRegression` are pre-loaded.

        CRITICAL:
        - DO NOT import libraries.
        - DO NOT load data.
        - Your output must be ONLY raw Python code.
        - Clean the `df` DataFrame.
        - For each question, `print()` the answer.
        - For plots, print a base64 data URI.
        """
        
        user_prompt = f"{df_info}\n\nAnswer these questions with a Python script:\n\n{cleaned_questions_text}"

        try:
            completion = client.chat.completions.create(model="gpt-5-nano", messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}])
            response_content = completion.choices[0].message.content
            python_code = response_content.strip().replace("```python", "").replace("```", "").strip()
            
            print(f"--- Generated Python Code ---\n{python_code}\n-----------------------------")
            
            print("Step 5: Executing generated code.")
            execution_result = tools.run_python_code_on_dataframe(df, python_code)
            
            final_answers = [line for line in execution_result.strip().split('\n') if line.strip()]
            return final_answers

        except Exception as e:
            return {"error": f"An error occurred during code generation or execution: {str(e)}"}

    else:
        return {"response": "This is a non-scraping task."}