|
|
""" |
|
|
Structured output parsing using LlamaIndex Pydantic Programs. |
|
|
Ensures consistent image formatting in agent responses. |
|
|
|
|
|
HACKATHON OPTIMIZED: Uses regex extraction instead of LLM calls for speed. |
|
|
""" |
|
|
from typing import List, Optional |
|
|
import re |
|
|
from pydantic import BaseModel, Field |
|
|
|
|
|
|
|
|
class BirdIdentificationResponse(BaseModel): |
|
|
"""Structured response for bird identification using LlamaIndex Pydantic.""" |
|
|
|
|
|
summary: str = Field( |
|
|
description="Main response text with bird identification, facts, or information" |
|
|
) |
|
|
species_name: Optional[str] = Field( |
|
|
default=None, |
|
|
description="Common name of the bird species (e.g., 'Northern Cardinal')" |
|
|
) |
|
|
image_urls: List[str] = Field( |
|
|
default_factory=list, |
|
|
description="List of image URLs to display for this bird" |
|
|
) |
|
|
audio_urls: List[str] = Field( |
|
|
default_factory=list, |
|
|
description="List of audio URLs (bird calls/songs)" |
|
|
) |
|
|
confidence_score: Optional[float] = Field( |
|
|
default=None, |
|
|
description="Confidence score from classifier (0.0-1.0)" |
|
|
) |
|
|
|
|
|
|
|
|
def extract_urls_from_text(text: str) -> tuple[List[str], List[str]]: |
|
|
""" |
|
|
Extract image and audio URLs from text using regex. |
|
|
|
|
|
Updated to handle URLs within markdown, JSON, and plain text. |
|
|
Supports both extension-based URLs (.jpg, .png) and domain-based (Unsplash). |
|
|
|
|
|
Returns: |
|
|
tuple: (image_urls, audio_urls) |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
image_pattern_ext = r'https?://[^\s)}\]]+?\.(?:jpg|jpeg|png|gif|webp|svg)(?:\?[^\s)}\]]*)?' |
|
|
|
|
|
|
|
|
|
|
|
image_pattern_unsplash = r'https?://images\.unsplash\.com/[^\s)}\]]*' |
|
|
|
|
|
|
|
|
|
|
|
audio_pattern_files = r'https?://[^\s)}\]]+?\.(?:mp3|wav|ogg|m4a)(?:\?[^\s)}\]]*)?' |
|
|
audio_pattern_xenocanto = r'https?://xeno-canto\.org/\d+(?:/download)?' |
|
|
|
|
|
print(f"[EXTRACT_URLS] Searching text of length {len(text)}") |
|
|
|
|
|
|
|
|
raw_image_urls_ext = re.findall(image_pattern_ext, text, re.IGNORECASE) |
|
|
raw_image_urls_unsplash = re.findall(image_pattern_unsplash, text, re.IGNORECASE) |
|
|
raw_audio_urls_files = re.findall(audio_pattern_files, text, re.IGNORECASE) |
|
|
audio_urls_xenocanto = list(set(re.findall(audio_pattern_xenocanto, text, re.IGNORECASE))) |
|
|
|
|
|
|
|
|
raw_image_urls = raw_image_urls_ext + raw_image_urls_unsplash |
|
|
|
|
|
print(f"[EXTRACT_URLS] Found {len(raw_image_urls_ext)} extension-based image URLs") |
|
|
print(f"[EXTRACT_URLS] Found {len(raw_image_urls_unsplash)} Unsplash image URLs") |
|
|
print(f"[EXTRACT_URLS] Found {len(raw_audio_urls_files)} audio file URLs") |
|
|
print(f"[EXTRACT_URLS] Found {len(audio_urls_xenocanto)} xeno-canto URLs") |
|
|
|
|
|
|
|
|
def clean_url(url: str) -> str: |
|
|
cleaned = url.rstrip('",;)') |
|
|
|
|
|
if cleaned.startswith('http://') or cleaned.startswith('https://'): |
|
|
return cleaned |
|
|
else: |
|
|
print(f"[EXTRACT_URLS] β οΈ Rejected malformed URL after cleaning: {cleaned}") |
|
|
return None |
|
|
|
|
|
image_urls = [u for u in (clean_url(url) for url in raw_image_urls) if u is not None] |
|
|
image_urls = list(set(image_urls)) |
|
|
|
|
|
audio_urls_files = [u for u in (clean_url(url) for url in raw_audio_urls_files) if u is not None] |
|
|
audio_urls_files = list(set(audio_urls_files)) |
|
|
|
|
|
|
|
|
audio_urls = audio_urls_files + audio_urls_xenocanto |
|
|
|
|
|
|
|
|
print(f"[EXTRACT_URLS] β
Cleaned image URLs ({len(image_urls)}): {image_urls}") |
|
|
print(f"[EXTRACT_URLS] β
Cleaned audio URLs ({len(audio_urls)}): {audio_urls}") |
|
|
|
|
|
return image_urls, audio_urls |
|
|
|
|
|
|
|
|
def extract_species_name(text: str) -> Optional[str]: |
|
|
""" |
|
|
Try to extract species name from common patterns in response. |
|
|
""" |
|
|
|
|
|
patterns = [ |
|
|
r'identified as[:\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})', |
|
|
r'species[:\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})', |
|
|
r'This is (?:a |an )?([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})', |
|
|
] |
|
|
|
|
|
for pattern in patterns: |
|
|
match = re.search(pattern, text) |
|
|
if match: |
|
|
return match.group(1) |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
async def parse_agent_response( |
|
|
raw_response: str, |
|
|
provider: str, |
|
|
api_key: str, |
|
|
model: str |
|
|
) -> str: |
|
|
""" |
|
|
Parse agent response into structured format and reformat with guaranteed markdown. |
|
|
|
|
|
OPTIMIZED FOR HACKATHON: Uses regex extraction instead of LLM call. |
|
|
Still uses LlamaIndex Pydantic models for structured data. |
|
|
|
|
|
Args: |
|
|
raw_response: The agent's raw text response |
|
|
provider: LLM provider ("openai", "anthropic", "huggingface") |
|
|
api_key: API key (unused in optimized version) |
|
|
model: Model name (unused in optimized version) |
|
|
|
|
|
Returns: |
|
|
Formatted markdown response with guaranteed image syntax |
|
|
""" |
|
|
try: |
|
|
print("[STRUCTURED OUTPUT] Starting parsing...") |
|
|
print(f"[STRUCTURED OUTPUT] Raw response length: {len(raw_response)} characters") |
|
|
print(f"[STRUCTURED OUTPUT] First 500 chars: {raw_response[:500]}") |
|
|
print(f"[STRUCTURED OUTPUT] Last 500 chars: {raw_response[-500:]}") |
|
|
|
|
|
|
|
|
image_urls, audio_urls = extract_urls_from_text(raw_response) |
|
|
|
|
|
print(f"[STRUCTURED OUTPUT] Found {len(image_urls)} images, {len(audio_urls)} audio files") |
|
|
|
|
|
|
|
|
species_name = extract_species_name(raw_response) |
|
|
|
|
|
|
|
|
structured = BirdIdentificationResponse( |
|
|
summary=raw_response, |
|
|
species_name=species_name, |
|
|
image_urls=image_urls, |
|
|
audio_urls=audio_urls, |
|
|
confidence_score=None |
|
|
) |
|
|
|
|
|
|
|
|
if not structured.image_urls and not structured.audio_urls: |
|
|
print("[STRUCTURED OUTPUT] No images or audio found, returning original") |
|
|
return raw_response |
|
|
|
|
|
|
|
|
formatted_parts = [] |
|
|
|
|
|
|
|
|
clean_summary = raw_response |
|
|
for url in image_urls: |
|
|
|
|
|
clean_summary = re.sub(rf'!\[([^\]]*)\]\({re.escape(url)}\)', '', clean_summary) |
|
|
|
|
|
clean_summary = clean_summary.replace(url, '') |
|
|
|
|
|
for url in audio_urls: |
|
|
|
|
|
clean_summary = clean_summary.replace(url, '') |
|
|
|
|
|
formatted_parts.append(clean_summary.strip()) |
|
|
|
|
|
|
|
|
if structured.image_urls: |
|
|
formatted_parts.append("\n### Images\n") |
|
|
for idx, url in enumerate(structured.image_urls, 1): |
|
|
|
|
|
alt_text = structured.species_name or f"Bird {idx}" |
|
|
img_markdown = f"" |
|
|
print(f"[STRUCTURED OUTPUT] Generated image markdown: {img_markdown}") |
|
|
formatted_parts.append(img_markdown) |
|
|
|
|
|
|
|
|
if structured.audio_urls: |
|
|
formatted_parts.append("\n### Audio Recordings\n") |
|
|
for idx, url in enumerate(structured.audio_urls, 1): |
|
|
|
|
|
display_url = url.replace("/download", "") if "xeno-canto.org" in url else url |
|
|
formatted_parts.append(f"π [Listen to recording {idx}]({display_url})") |
|
|
|
|
|
result = "\n\n".join(formatted_parts) |
|
|
print(f"[STRUCTURED OUTPUT] β
Successfully formatted response") |
|
|
print(f"[STRUCTURED OUTPUT] Final markdown length: {len(result)} characters") |
|
|
print(f"[STRUCTURED OUTPUT] Final markdown (last 500 chars): {result[-500:]}") |
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print(f"[STRUCTURED OUTPUT] β Parsing failed: {e}") |
|
|
return raw_response |
|
|
|