Spaces:
Paused
Paused
| import sys | |
| import os | |
| import io, asyncio | |
| import pytest | |
| import time | |
| from litellm import mock_completion | |
| from unittest.mock import MagicMock, AsyncMock, patch | |
| sys.path.insert(0, os.path.abspath("../..")) | |
| import litellm | |
| from litellm.proxy.guardrails.guardrail_hooks.presidio import _OPTIONAL_PresidioPIIMasking, PresidioPerRequestConfig | |
| from litellm.types.guardrails import PiiEntityType, PiiAction | |
| from litellm.proxy._types import UserAPIKeyAuth | |
| from litellm.caching.caching import DualCache | |
| from litellm.exceptions import BlockedPiiEntityError | |
| from litellm.types.utils import CallTypes as LitellmCallTypes | |
| async def test_presidio_with_entities_config(): | |
| """Test for Presidio guardrail with entities config - requires actual Presidio API""" | |
| # Setup the guardrail with specific entities config | |
| litellm._turn_on_debug() | |
| pii_entities_config = { | |
| PiiEntityType.CREDIT_CARD: PiiAction.MASK, | |
| PiiEntityType.EMAIL_ADDRESS: PiiAction.MASK, | |
| } | |
| presidio_guardrail = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config=pii_entities_config, | |
| presidio_analyzer_api_base=os.environ.get("PRESIDIO_ANALYZER_API_BASE"), | |
| presidio_anonymizer_api_base=os.environ.get("PRESIDIO_ANONYMIZER_API_BASE") | |
| ) | |
| # Test text with different PII types | |
| test_text = "My credit card number is 4111-1111-1111-1111, my email is test@example.com, and my phone is 555-123-4567" | |
| # Test the analyze request configuration | |
| analyze_request = presidio_guardrail._get_presidio_analyze_request_payload( | |
| text=test_text, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify entities were passed correctly | |
| assert "entities" in analyze_request | |
| assert set(analyze_request["entities"]) == set(pii_entities_config.keys()) | |
| # Test the check_pii method - this will call the actual Presidio API | |
| redacted_text = await presidio_guardrail.check_pii( | |
| text=test_text, | |
| output_parse_pii=True, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify PII has been masked/replaced/redacted in the result | |
| assert "4111-1111-1111-1111" not in redacted_text | |
| assert "test@example.com" not in redacted_text | |
| # Since this entity is not in the config, it should not be masked | |
| assert "555-123-4567" in redacted_text | |
| # The specific replacements will vary based on Presidio's implementation | |
| print(f"Redacted text: {redacted_text}") | |
| async def test_presidio_apply_guardrail(): | |
| """Test for Presidio guardrail apply guardrail - requires actual Presidio API""" | |
| litellm._turn_on_debug() | |
| presidio_guardrail = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config={}, | |
| presidio_analyzer_api_base=os.environ.get("PRESIDIO_ANALYZER_API_BASE"), | |
| presidio_anonymizer_api_base=os.environ.get("PRESIDIO_ANONYMIZER_API_BASE") | |
| ) | |
| response = await presidio_guardrail.apply_guardrail( | |
| text="My credit card number is 4111-1111-1111-1111 and my email is test@example.com", | |
| language="en", | |
| ) | |
| print("response from apply guardrail for presidio: ", response) | |
| # assert tthe default config masks the credit card and email | |
| assert "4111-1111-1111-1111" not in response | |
| assert "test@example.com" not in response | |
| async def test_presidio_with_blocked_entities(): | |
| """Test for Presidio guardrail with blocked entities - requires actual Presidio API""" | |
| # Setup the guardrail with specific entities config - BLOCK for credit card | |
| litellm._turn_on_debug() | |
| pii_entities_config = { | |
| PiiEntityType.CREDIT_CARD: PiiAction.BLOCK, # This entity should cause a block | |
| PiiEntityType.EMAIL_ADDRESS: PiiAction.MASK, # This entity should be masked | |
| } | |
| presidio_guardrail = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config=pii_entities_config, | |
| presidio_analyzer_api_base=os.environ.get("PRESIDIO_ANALYZER_API_BASE"), | |
| presidio_anonymizer_api_base=os.environ.get("PRESIDIO_ANONYMIZER_API_BASE") | |
| ) | |
| # Test text with blocked PII type | |
| test_text = "My credit card number is 4111-1111-1111-1111 and my email is test@example.com" | |
| # Verify the analyze request configuration | |
| analyze_request = presidio_guardrail._get_presidio_analyze_request_payload( | |
| text=test_text, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify entities were passed correctly | |
| assert "entities" in analyze_request | |
| assert set(analyze_request["entities"]) == set(pii_entities_config.keys()) | |
| # Test that BlockedPiiEntityError is raised when check_pii is called | |
| with pytest.raises(BlockedPiiEntityError) as excinfo: | |
| await presidio_guardrail.check_pii( | |
| text=test_text, | |
| output_parse_pii=True, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify the error contains the correct entity type | |
| assert excinfo.value.entity_type == PiiEntityType.CREDIT_CARD | |
| assert excinfo.value.guardrail_name == presidio_guardrail.guardrail_name | |
| async def test_presidio_pre_call_hook_with_blocked_entities(): | |
| """Test for Presidio guardrail pre-call hook with blocked entities on a chat completion request""" | |
| # Setup the guardrail with specific entities config | |
| pii_entities_config = { | |
| PiiEntityType.CREDIT_CARD: PiiAction.BLOCK, # This entity should cause a block | |
| PiiEntityType.EMAIL_ADDRESS: PiiAction.MASK, # This entity should be masked | |
| } | |
| presidio_guardrail = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config=pii_entities_config, | |
| presidio_analyzer_api_base=os.environ.get("PRESIDIO_ANALYZER_API_BASE"), | |
| presidio_anonymizer_api_base=os.environ.get("PRESIDIO_ANONYMIZER_API_BASE") | |
| ) | |
| # Create a sample chat completion request with PII data | |
| data = { | |
| "messages": [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": "My credit card is 4111-1111-1111-1111 and my email is test@example.com."} | |
| ], | |
| "model": "gpt-3.5-turbo" | |
| } | |
| # Mock objects needed for the pre-call hook | |
| user_api_key_dict = UserAPIKeyAuth(api_key="test_key") | |
| cache = DualCache() | |
| # Call the pre-call hook and expect BlockedPiiEntityError | |
| with pytest.raises(BlockedPiiEntityError) as excinfo: | |
| await presidio_guardrail.async_pre_call_hook( | |
| user_api_key_dict=user_api_key_dict, | |
| cache=cache, | |
| data=data, | |
| call_type="completion" | |
| ) | |
| print(f"got error: {excinfo}") | |
| # Verify the error contains the correct entity type | |
| assert excinfo.value.entity_type == PiiEntityType.CREDIT_CARD | |
| assert excinfo.value.guardrail_name == presidio_guardrail.guardrail_name | |
| async def test_presidio_pre_call_hook_with_different_call_types(call_type): | |
| """Test for Presidio guardrail pre-call hook with both completion and acompletion call types""" | |
| # Setup the guardrail with specific entities config | |
| pii_entities_config = { | |
| PiiEntityType.CREDIT_CARD: PiiAction.MASK, | |
| PiiEntityType.EMAIL_ADDRESS: PiiAction.MASK, | |
| } | |
| presidio_guardrail = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config=pii_entities_config, | |
| presidio_analyzer_api_base=os.environ.get("PRESIDIO_ANALYZER_API_BASE"), | |
| presidio_anonymizer_api_base=os.environ.get("PRESIDIO_ANONYMIZER_API_BASE") | |
| ) | |
| # Create a sample request with PII data | |
| data = { | |
| "messages": [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": "My credit card is 4111-1111-1111-1111 and my email is test@example.com. My phone number is 555-123-4567"} | |
| ], | |
| "model": "gpt-3.5-turbo" | |
| } | |
| # Mock objects needed for the pre-call hook | |
| user_api_key_dict = UserAPIKeyAuth(api_key="test_key") | |
| cache = DualCache() | |
| # Call the pre-call hook with the specified call type | |
| modified_data = await presidio_guardrail.async_pre_call_hook( | |
| user_api_key_dict=user_api_key_dict, | |
| cache=cache, | |
| data=data, | |
| call_type=call_type | |
| ) | |
| # Verify the messages have been modified to mask PII | |
| assert modified_data["messages"][0]["content"] == "You are a helpful assistant." # System prompt should be unchanged | |
| user_message = modified_data["messages"][1]["content"] | |
| assert "4111-1111-1111-1111" not in user_message | |
| assert "test@example.com" not in user_message | |
| # Since this entity is not in the config, it should not be masked | |
| assert "555-123-4567" in user_message | |
| print(f"Modified user message for call_type={call_type}: {user_message}") | |
| def test_validate_environment_missing_http(base_url): | |
| pii_masking = _OPTIONAL_PresidioPIIMasking(mock_testing=True) | |
| # Use patch.dict to temporarily modify environment variables only for this test | |
| env_vars = { | |
| "PRESIDIO_ANALYZER_API_BASE": f"{base_url}/analyze", | |
| "PRESIDIO_ANONYMIZER_API_BASE": f"{base_url}/anonymize" | |
| } | |
| with patch.dict(os.environ, env_vars): | |
| pii_masking.validate_environment() | |
| expected_url = base_url | |
| if not (base_url.startswith("https://") or base_url.startswith("http://")): | |
| expected_url = "http://" + base_url | |
| assert ( | |
| pii_masking.presidio_anonymizer_api_base == f"{expected_url}/anonymize/" | |
| ), "Got={}, Expected={}".format( | |
| pii_masking.presidio_anonymizer_api_base, f"{expected_url}/anonymize/" | |
| ) | |
| assert pii_masking.presidio_analyzer_api_base == f"{expected_url}/analyze/" | |
| async def test_output_parsing(): | |
| """ | |
| - have presidio pii masking - mask an input message | |
| - make llm completion call | |
| - have presidio pii masking - output parse message | |
| - assert that no masked tokens are in the input message | |
| """ | |
| litellm.set_verbose = True | |
| litellm.output_parse_pii = True | |
| pii_masking = _OPTIONAL_PresidioPIIMasking(mock_testing=True) | |
| initial_message = [ | |
| { | |
| "role": "user", | |
| "content": "hello world, my name is Jane Doe. My number is: 034453334", | |
| } | |
| ] | |
| filtered_message = [ | |
| { | |
| "role": "user", | |
| "content": "hello world, my name is <PERSON>. My number is: <PHONE_NUMBER>", | |
| } | |
| ] | |
| pii_masking.pii_tokens = {"<PERSON>": "Jane Doe", "<PHONE_NUMBER>": "034453334"} | |
| response = mock_completion( | |
| model="gpt-3.5-turbo", | |
| messages=filtered_message, | |
| mock_response="Hello <PERSON>! How can I assist you today?", | |
| ) | |
| new_response = await pii_masking.async_post_call_success_hook( | |
| user_api_key_dict=UserAPIKeyAuth(), | |
| data={ | |
| "messages": [{"role": "system", "content": "You are an helpfull assistant"}] | |
| }, | |
| response=response, | |
| ) | |
| assert ( | |
| new_response.choices[0].message.content | |
| == "Hello Jane Doe! How can I assist you today?" | |
| ) | |
| # asyncio.run(test_output_parsing()) | |
| ### UNIT TESTS FOR PRESIDIO PII MASKING ### | |
| input_a_anonymizer_results = { | |
| "text": "hello world, my name is <PERSON>. My number is: <PHONE_NUMBER>", | |
| "items": [ | |
| { | |
| "start": 48, | |
| "end": 62, | |
| "entity_type": "PHONE_NUMBER", | |
| "text": "<PHONE_NUMBER>", | |
| "operator": "replace", | |
| }, | |
| { | |
| "start": 24, | |
| "end": 32, | |
| "entity_type": "PERSON", | |
| "text": "<PERSON>", | |
| "operator": "replace", | |
| }, | |
| ], | |
| } | |
| input_b_anonymizer_results = { | |
| "text": "My name is <PERSON>, who are you? Say my name in your response", | |
| "items": [ | |
| { | |
| "start": 11, | |
| "end": 19, | |
| "entity_type": "PERSON", | |
| "text": "<PERSON>", | |
| "operator": "replace", | |
| } | |
| ], | |
| } | |
| # Test if PII masking works with input A | |
| async def test_presidio_pii_masking_input_a(): | |
| """ | |
| Tests to see if correct parts of sentence anonymized | |
| """ | |
| pii_masking = _OPTIONAL_PresidioPIIMasking( | |
| mock_testing=True, mock_redacted_text=input_a_anonymizer_results | |
| ) | |
| _api_key = "sk-12345" | |
| user_api_key_dict = UserAPIKeyAuth(api_key=_api_key) | |
| local_cache = DualCache() | |
| new_data = await pii_masking.async_pre_call_hook( | |
| user_api_key_dict=user_api_key_dict, | |
| cache=local_cache, | |
| data={ | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": "hello world, my name is Jane Doe. My number is: 23r323r23r2wwkl", | |
| } | |
| ] | |
| }, | |
| call_type="completion", | |
| ) | |
| assert "<PERSON>" in new_data["messages"][0]["content"] | |
| assert "<PHONE_NUMBER>" in new_data["messages"][0]["content"] | |
| # Test if PII masking works with input B (also test if the response != A's response) | |
| async def test_presidio_pii_masking_input_b(): | |
| """ | |
| Tests to see if correct parts of sentence anonymized | |
| """ | |
| pii_masking = _OPTIONAL_PresidioPIIMasking( | |
| mock_testing=True, mock_redacted_text=input_b_anonymizer_results | |
| ) | |
| _api_key = "sk-12345" | |
| user_api_key_dict = UserAPIKeyAuth(api_key=_api_key) | |
| local_cache = DualCache() | |
| new_data = await pii_masking.async_pre_call_hook( | |
| user_api_key_dict=user_api_key_dict, | |
| cache=local_cache, | |
| data={ | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": "My name is Jane Doe, who are you? Say my name in your response", | |
| } | |
| ] | |
| }, | |
| call_type="completion", | |
| ) | |
| assert "<PERSON>" in new_data["messages"][0]["content"] | |
| assert "<PHONE_NUMBER>" not in new_data["messages"][0]["content"] | |
| async def test_presidio_pii_masking_logging_output_only_no_pre_api_hook(): | |
| from litellm.types.guardrails import GuardrailEventHooks | |
| pii_masking = _OPTIONAL_PresidioPIIMasking( | |
| logging_only=True, | |
| mock_testing=True, | |
| mock_redacted_text=input_b_anonymizer_results, | |
| ) | |
| _api_key = "sk-12345" | |
| user_api_key_dict = UserAPIKeyAuth(api_key=_api_key) | |
| local_cache = DualCache() | |
| test_messages = [ | |
| { | |
| "role": "user", | |
| "content": "My name is Jane Doe, who are you? Say my name in your response", | |
| } | |
| ] | |
| assert ( | |
| pii_masking.should_run_guardrail( | |
| data={"messages": test_messages}, | |
| event_type=GuardrailEventHooks.pre_call, | |
| ) | |
| is False | |
| ) | |
| async def test_presidio_pii_masking_logging_output_only_logged_response_guardrails_config(): | |
| from typing import Dict, List, Optional | |
| import litellm | |
| from litellm.proxy.guardrails.init_guardrails import initialize_guardrails | |
| from litellm.types.guardrails import ( | |
| GuardrailItem, | |
| GuardrailItemSpec, | |
| GuardrailEventHooks, | |
| ) | |
| litellm.set_verbose = True | |
| # Environment variables are now patched via the decorator instead of setting them directly | |
| guardrails_config: List[Dict[str, GuardrailItemSpec]] = [ | |
| { | |
| "pii_masking": { | |
| "callbacks": ["presidio"], | |
| "default_on": True, | |
| "logging_only": True, | |
| } | |
| } | |
| ] | |
| litellm_settings = {"guardrails": guardrails_config} | |
| assert len(litellm.guardrail_name_config_map) == 0 | |
| initialize_guardrails( | |
| guardrails_config=guardrails_config, | |
| premium_user=True, | |
| config_file_path="", | |
| litellm_settings=litellm_settings, | |
| ) | |
| assert len(litellm.guardrail_name_config_map) == 1 | |
| pii_masking_obj: Optional[_OPTIONAL_PresidioPIIMasking] = None | |
| for callback in litellm.callbacks: | |
| print(f"CALLBACK: {callback}") | |
| if isinstance(callback, _OPTIONAL_PresidioPIIMasking): | |
| pii_masking_obj = callback | |
| assert pii_masking_obj is not None | |
| assert hasattr(pii_masking_obj, "logging_only") | |
| assert pii_masking_obj.event_hook == GuardrailEventHooks.logging_only | |
| assert pii_masking_obj.should_run_guardrail( | |
| data={}, event_type=GuardrailEventHooks.logging_only | |
| ) | |
| async def test_presidio_language_configuration(): | |
| """Test that presidio_language parameter is properly set and used in analyze requests""" | |
| litellm._turn_on_debug() | |
| # Test with German language using mock testing to avoid API calls | |
| presidio_guardrail_de = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config={}, | |
| presidio_language="de", | |
| mock_testing=True # This bypasses the API validation | |
| ) | |
| test_text = "Meine Telefonnummer ist +49 30 12345678" | |
| # Test the analyze request configuration | |
| analyze_request = presidio_guardrail_de._get_presidio_analyze_request_payload( | |
| text=test_text, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify the language is set to German | |
| assert analyze_request["language"] == "de" | |
| assert analyze_request["text"] == test_text | |
| # Test with Spanish language | |
| presidio_guardrail_es = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config={}, | |
| presidio_language="es", | |
| mock_testing=True | |
| ) | |
| test_text_es = "Mi número de teléfono es +34 912 345 678" | |
| analyze_request_es = presidio_guardrail_es._get_presidio_analyze_request_payload( | |
| text=test_text_es, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify the language is set to Spanish | |
| assert analyze_request_es["language"] == "es" | |
| assert analyze_request_es["text"] == test_text_es | |
| # Test default language (English) when not specified | |
| presidio_guardrail_default = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config={}, | |
| mock_testing=True | |
| ) | |
| test_text_en = "My phone number is +1 555-123-4567" | |
| analyze_request_default = presidio_guardrail_default._get_presidio_analyze_request_payload( | |
| text=test_text_en, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify the language defaults to English | |
| assert analyze_request_default["language"] == "en" | |
| assert analyze_request_default["text"] == test_text_en | |
| async def test_presidio_language_configuration_with_per_request_override(): | |
| """Test that per-request language configuration overrides the default configured language""" | |
| litellm._turn_on_debug() | |
| # Set up guardrail with German as default language | |
| presidio_guardrail = _OPTIONAL_PresidioPIIMasking( | |
| pii_entities_config={}, | |
| presidio_language="de", | |
| mock_testing=True | |
| ) | |
| test_text = "Test text with PII" | |
| # Test with per-request config overriding the default language | |
| presidio_config = PresidioPerRequestConfig(language="fr") | |
| analyze_request = presidio_guardrail._get_presidio_analyze_request_payload( | |
| text=test_text, | |
| presidio_config=presidio_config, | |
| request_data={} | |
| ) | |
| # Verify the per-request language (French) overrides the default (German) | |
| assert analyze_request["language"] == "fr" | |
| assert analyze_request["text"] == test_text | |
| # Test without per-request config - should use default language | |
| analyze_request_default = presidio_guardrail._get_presidio_analyze_request_payload( | |
| text=test_text, | |
| presidio_config=None, | |
| request_data={} | |
| ) | |
| # Verify the default language (German) is used | |
| assert analyze_request_default["language"] == "de" | |
| assert analyze_request_default["text"] == test_text | |