File size: 2,660 Bytes
507be68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pytest
import sys
import os

# Add project root to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from modules.tools import search_trials
from modules.utils import load_environment

# Mark as integration test since it loads the DB
@pytest.mark.integration
def test_hybrid_search_integration():
    """
    Integration test for Hybrid Search.
    Verifies that the search_trials tool can retrieve results using the hybrid retriever.
    """
    load_environment()
    
    # Test 1: Dynamic ID Search
    # First, find a valid ID from a broad search
    print("\n🔍 Finding a valid ID for testing...")
    broad_results = search_trials.invoke({"query": "cancer"})
    
    # Extract an ID from the results
    import re
    match = re.search(r"ID: (NCT\d+)", broad_results)
    if not match:
        pytest.skip("Could not find any studies in DB to test against.")
    
    target_id = match.group(1)
    print(f"🎯 Found target ID: {target_id}. Now testing exact search...")
    
    # Now search for that specific ID
    results_id = search_trials.invoke({"query": target_id})
    
    assert "Found" in results_id
    assert target_id in results_id, f"Hybrid search failed to retrieve exact ID {target_id}"

    # Extract sponsor from the first result to ensure we test with valid data
    # Result format: "**Title** ... - Sponsor: SponsorName ..."
    sponsor_match = re.search(r"Sponsor: (.*?)\n", broad_results)
    if not sponsor_match:
        print("⚠️ Could not extract sponsor from results. Skipping hybrid test.")
        return

    target_sponsor = sponsor_match.group(1).strip()
    # Normalize it to get the simple name if possible, or just use it
    # But search_trials expects a simple name to map to variations.
    # If we pass the full name, get_sponsor_variations might return None if not mapped.
    # So let's try to find a mapped sponsor if possible, or just skip if not mapped.
    
    from modules.utils import normalize_sponsor
    simple_sponsor = normalize_sponsor(target_sponsor)
    
    # If normalization didn't change it, it might not be in our alias list.
    # But we can still try to search with it.
    
    print(f"\n🔍 Testing Hybrid Search with dynamic sponsor: '{simple_sponsor}' (Original: {target_sponsor})")
    
    # Use a generic query that likely matches the study, or just "study"
    results_hybrid = search_trials.invoke({"query": "study", "sponsor": simple_sponsor})
    
    assert "Found" in results_hybrid, f"Should find results for valid sponsor {simple_sponsor}"
    assert target_sponsor in results_hybrid or simple_sponsor in results_hybrid