diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py index cb34f36e4f..6496bd3397 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py @@ -1,5 +1,10 @@ +import base64 import sentry_sdk -from sentry_sdk.ai.utils import set_data_normalized +from sentry_sdk.ai.utils import ( + normalize_message_roles, + set_data_normalized, + truncate_and_annotate_messages, +) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.utils import safe_serialize @@ -29,6 +34,7 @@ UserPromptPart, TextPart, ThinkingPart, + BinaryContent, ) except ImportError: # Fallback if these classes are not available @@ -38,6 +44,7 @@ UserPromptPart = None TextPart = None ThinkingPart = None + BinaryContent = None def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> None: @@ -107,6 +114,15 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non for item in part.content: if isinstance(item, str): content.append({"type": "text", "text": item}) + elif BinaryContent and isinstance(item, BinaryContent): + content.append( + { + "type": "blob", + "modality": item.media_type.split("/")[0], + "mime_type": item.media_type, + "content": f"data:{item.media_type};base64,{base64.b64encode(item.data).decode('utf-8')}", + } + ) else: content.append(safe_serialize(item)) else: @@ -124,8 +140,13 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non formatted_messages.append(message) if formatted_messages: + normalized_messages = normalize_message_roles(formatted_messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope + ) set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, formatted_messages, unpack=False + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) except Exception: # If we fail to format messages, just skip it diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index 629b3d1206..a22125e33d 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -1,5 +1,10 @@ import sentry_sdk -from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized +from sentry_sdk.ai.utils import ( + get_start_span_function, + normalize_message_roles, + set_data_normalized, + truncate_and_annotate_messages, +) from sentry_sdk.consts import OP, SPANDATA from ..consts import SPAN_ORIGIN @@ -102,8 +107,13 @@ def invoke_agent_span( ) if messages: + normalized_messages = normalize_message_roles(messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope + ) set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) return span diff --git a/sentry_sdk/integrations/pydantic_ai/spans/utils.py b/sentry_sdk/integrations/pydantic_ai/spans/utils.py index c70afd5f31..89fef172e1 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/utils.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/utils.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Union + from typing import Union, Dict, Any, List from pydantic_ai.usage import RequestUsage, RunUsage # type: ignore diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 049bcde39c..a7587a6fcd 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -1,12 +1,17 @@ import asyncio +import json import pytest +from unittest.mock import MagicMock from typing import Annotated from pydantic import Field +import sentry_sdk from sentry_sdk.integrations.pydantic_ai import PydanticAIIntegration +from sentry_sdk.integrations.pydantic_ai.spans.ai_client import _set_input_messages from pydantic_ai import Agent +from pydantic_ai.messages import BinaryContent, UserPromptPart from pydantic_ai.models.test import TestModel from pydantic_ai.exceptions import ModelRetry, UnexpectedModelBehavior @@ -2604,3 +2609,128 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_events) # Should not crash assert transaction is not None + + +def _get_messages_from_span(span_data): + """Helper to extract and parse messages from span data.""" + messages_data = span_data["gen_ai.request.messages"] + return ( + json.loads(messages_data) if isinstance(messages_data, str) else messages_data + ) + + +def _find_binary_content(messages_data, expected_modality, expected_mime_type): + """Helper to find and verify binary content in messages.""" + for msg in messages_data: + if "content" not in msg: + continue + for content_item in msg["content"]: + if content_item.get("type") == "blob": + assert content_item["modality"] == expected_modality + assert content_item["mime_type"] == expected_mime_type + assert "content" in content_item + content_str = str(content_item["content"]) + assert ( + f"data:{expected_mime_type};base64," in content_str + or "[Filtered]" in content_str + ) + return True + return False + + +@pytest.mark.asyncio +async def test_binary_content_encoding_image(sentry_init, capture_events): + """Test that BinaryContent with image data is properly encoded in messages.""" + sentry_init( + integrations=[PydanticAIIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + binary_content = BinaryContent( + data=b"fake_image_data_12345", media_type="image/png" + ) + user_part = UserPromptPart(content=["Look at this image:", binary_content]) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + _set_input_messages(span, [mock_msg]) + span.finish() + + (event,) = events + span_data = event["spans"][0]["data"] + messages_data = _get_messages_from_span(span_data) + assert _find_binary_content(messages_data, "image", "image/png") + + +@pytest.mark.asyncio +async def test_binary_content_encoding_mixed_content(sentry_init, capture_events): + """Test that BinaryContent mixed with text content is properly handled.""" + sentry_init( + integrations=[PydanticAIIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + with sentry_sdk.start_transaction(op="test", name="test"): + span = sentry_sdk.start_span(op="test_span") + binary_content = BinaryContent( + data=b"fake_image_bytes", media_type="image/jpeg" + ) + user_part = UserPromptPart( + content=["Here is an image:", binary_content, "What do you see?"] + ) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + _set_input_messages(span, [mock_msg]) + span.finish() + + (event,) = events + span_data = event["spans"][0]["data"] + messages_data = _get_messages_from_span(span_data) + + # Verify both text and binary content are present + found_text = any( + content_item.get("type") == "text" + for msg in messages_data + if "content" in msg + for content_item in msg["content"] + ) + assert found_text, "Text content should be found" + assert _find_binary_content(messages_data, "image", "image/jpeg") + + +@pytest.mark.asyncio +async def test_binary_content_in_agent_run(sentry_init, capture_events): + """Test that BinaryContent in actual agent run is properly captured in spans.""" + agent = Agent("test", name="test_binary_agent") + + sentry_init( + integrations=[PydanticAIIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + binary_content = BinaryContent( + data=b"fake_image_data_for_testing", media_type="image/png" + ) + await agent.run(["Analyze this image:", binary_content]) + + (transaction,) = events + chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"] + assert len(chat_spans) >= 1 + + chat_span = chat_spans[0] + if "gen_ai.request.messages" in chat_span["data"]: + messages_str = str(chat_span["data"]["gen_ai.request.messages"]) + assert any(keyword in messages_str for keyword in ["blob", "image", "base64"])