Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 65 additions & 1 deletion sentry_sdk/integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
safe_serialize,
)

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict

if TYPE_CHECKING:
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
Expand Down Expand Up @@ -177,6 +177,68 @@ def _calculate_token_usage(
)


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
if image_url.get("url", "").startswith("data:"):
return {
"type": "blob",
"modality": "image",
"mime_type": item["image_url"]["url"].split(";base64,")[0],
"content": item["image_url"]["url"].split(";base64,")[1],
}
else:
return {
"type": "uri",
"uri": item["image_url"]["url"],
}
return item

for message in messages:
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
return messages


def _set_input_data(
span: "Span",
kwargs: "dict[str, Any]",
Expand All @@ -198,6 +260,8 @@ def _set_input_data(
and integration.include_prompts
):
normalized_messages = normalize_message_roles(messages)
normalized_messages = _convert_message_parts(normalized_messages)

scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
if messages_data is not None:
Expand Down
72 changes: 72 additions & 0 deletions tests/integrations/openai/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from sentry_sdk.integrations.openai import (
OpenAIIntegration,
_calculate_token_usage,
_convert_message_parts,
)
from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES
from sentry_sdk._types import AnnotatedValue
Expand Down Expand Up @@ -1509,6 +1510,77 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
assert "ai" not in roles


def test_convert_message_parts_image_url_to_blob():
"""Test that OpenAI image_url message parts are correctly converted to blob format"""
messages = [
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text",
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
"detail": "high",
},
},
],
}
]

converted = _convert_message_parts(messages)

assert len(converted) == 1
assert converted[0]["role"] == "user"
assert isinstance(converted[0]["content"], list)
assert len(converted[0]["content"]) == 2

# First item (text) should remain unchanged
assert converted[0]["content"][0] == {
"text": "How many ponies do you see in the image?",
"type": "text",
}

# Second item (image_url) should be converted to blob format
blob_item = converted[0]["content"][1]
assert blob_item["type"] == "blob"
assert blob_item["modality"] == "image"
assert blob_item["mime_type"] == "data:image/jpeg"
assert blob_item["content"] == "/9j/4AAQSkZJRg=="
# Verify the original image_url structure is replaced
assert "image_url" not in blob_item


def test_convert_message_parts_image_url_to_uri():
"""Test that OpenAI image_url with non-data URLs are converted to uri format"""
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg",
"detail": "low",
},
},
],
}
]

converted = _convert_message_parts(messages)

assert len(converted) == 1
uri_item = converted[0]["content"][0]
assert uri_item["type"] == "uri"
assert uri_item["uri"] == "https://example.com/image.jpg"
# Verify the original image_url structure is replaced
assert "image_url" not in uri_item


def test_openai_message_truncation(sentry_init, capture_events):
"""Test that large messages are truncated properly in OpenAI integration."""
sentry_init(
Expand Down
Loading