langfuse.media

This module contains the LangfuseMedia class, which is used to wrap media objects for upload to Langfuse.

  1"""This module contains the LangfuseMedia class, which is used to wrap media objects for upload to Langfuse."""
  2
  3import base64
  4import hashlib
  5import logging
  6import os
  7from typing import Optional, cast, Tuple
  8
  9from langfuse.api import MediaContentType
 10from langfuse.types import ParsedMediaReference
 11
 12
 13class LangfuseMedia:
 14    """A class for wrapping media objects for upload to Langfuse.
 15
 16    This class handles the preparation and formatting of media content for Langfuse,
 17    supporting both base64 data URIs and raw content bytes.
 18
 19    Args:
 20        obj (Optional[object]): The source object to be wrapped. Can be accessed via the `obj` attribute.
 21        base64_data_uri (Optional[str]): A base64-encoded data URI containing the media content
 22            and content type (e.g., "...").
 23        content_type (Optional[str]): The MIME type of the media content when providing raw bytes.
 24        content_bytes (Optional[bytes]): Raw bytes of the media content.
 25        file_path (Optional[str]): The path to the file containing the media content. For relative paths,
 26            the current working directory is used.
 27
 28    Raises:
 29        ValueError: If neither base64_data_uri or the combination of content_bytes
 30            and content_type is provided.
 31    """
 32
 33    obj: object
 34
 35    _log = logging.getLogger(__name__)
 36    _content_bytes: Optional[bytes]
 37    _content_type: Optional[MediaContentType]
 38    _source: Optional[str]
 39    _media_id: Optional[str]
 40
 41    def __init__(
 42        self,
 43        *,
 44        obj: Optional[object] = None,
 45        base64_data_uri: Optional[str] = None,
 46        content_type: Optional[MediaContentType] = None,
 47        content_bytes: Optional[bytes] = None,
 48        file_path: Optional[str] = None,
 49    ):
 50        """Initialize a LangfuseMedia object.
 51
 52        Args:
 53            obj: The object to wrap.
 54
 55            base64_data_uri: A base64-encoded data URI containing the media content
 56                and content type (e.g., "...").
 57            content_type: The MIME type of the media content when providing raw bytes or reading from a file.
 58            content_bytes: Raw bytes of the media content.
 59            file_path: The path to the file containing the media content. For relative paths,
 60                the current working directory is used.
 61        """
 62        self.obj = obj
 63        self._media_id = None
 64
 65        if base64_data_uri is not None:
 66            parsed_data = self._parse_base64_data_uri(base64_data_uri)
 67            self._content_bytes, self._content_type = parsed_data
 68            self._source = "base64_data_uri"
 69
 70        elif content_bytes is not None and content_type is not None:
 71            self._content_type = content_type
 72            self._content_bytes = content_bytes
 73            self._source = "bytes"
 74        elif (
 75            file_path is not None
 76            and content_type is not None
 77            and os.path.exists(file_path)
 78        ):
 79            self._content_bytes = self._read_file(file_path)
 80            self._content_type = content_type if self._content_bytes else None
 81            self._source = "file" if self._content_bytes else None
 82        else:
 83            self._log.error(
 84                "base64_data_uri, or content_bytes and content_type, or file_path must be provided to LangfuseMedia"
 85            )
 86
 87            self._content_bytes = None
 88            self._content_type = None
 89            self._source = None
 90
 91    def _read_file(self, file_path: str) -> Optional[bytes]:
 92        try:
 93            with open(file_path, "rb") as file:
 94                return file.read()
 95        except Exception as e:
 96            self._log.error(f"Error reading file at path {file_path}", exc_info=e)
 97
 98            return None
 99
100    @property
101    def _content_length(self) -> Optional[int]:
102        return len(self._content_bytes) if self._content_bytes else None
103
104    @property
105    def _content_sha256_hash(self) -> Optional[str]:
106        if self._content_bytes is None:
107            return None
108
109        sha256_hash_bytes = hashlib.sha256(self._content_bytes).digest()
110
111        return base64.b64encode(sha256_hash_bytes).decode("utf-8")
112
113    @property
114    def _reference_string(self) -> Optional[str]:
115        if self._content_type is None or self._source is None or self._media_id is None:
116            return None
117
118        return f"@@@langfuseMedia:type={self._content_type}|id={self._media_id}|source={self._source}@@@"
119
120    @staticmethod
121    def parse_reference_string(reference_string: str) -> ParsedMediaReference:
122        """Parse a media reference string into a ParsedMediaReference.
123
124        Example reference string:
125            "@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@"
126
127        Args:
128            reference_string: The reference string to parse.
129
130        Returns:
131            A TypedDict with the media_id, source, and content_type.
132
133        Raises:
134            ValueError: If the reference string is empty or not a string.
135            ValueError: If the reference string does not start with "@@@langfuseMedia:type=".
136            ValueError: If the reference string does not end with "@@@".
137            ValueError: If the reference string is missing required fields.
138        """
139        if not reference_string:
140            raise ValueError("Reference string is empty")
141
142        if not isinstance(reference_string, str):
143            raise ValueError("Reference string is not a string")
144
145        if not reference_string.startswith("@@@langfuseMedia:type="):
146            raise ValueError(
147                "Reference string does not start with '@@@langfuseMedia:type='"
148            )
149
150        if not reference_string.endswith("@@@"):
151            raise ValueError("Reference string does not end with '@@@'")
152
153        content = reference_string[len("@@@langfuseMedia:") :].rstrip("@@@")
154
155        # Split into key-value pairs
156        pairs = content.split("|")
157        parsed_data = {}
158
159        for pair in pairs:
160            key, value = pair.split("=", 1)
161            parsed_data[key] = value
162
163        # Verify all required fields are present
164        if not all(key in parsed_data for key in ["type", "id", "source"]):
165            raise ValueError("Missing required fields in reference string")
166
167        return ParsedMediaReference(
168            media_id=parsed_data["id"],
169            source=parsed_data["source"],
170            content_type=parsed_data["type"],
171        )
172
173    def _parse_base64_data_uri(
174        self, data: str
175    ) -> Tuple[Optional[bytes], Optional[MediaContentType]]:
176        # Example data URI: ...
177        try:
178            if not data or not isinstance(data, str):
179                raise ValueError("Data URI is not a string")
180
181            if not data.startswith("data:"):
182                raise ValueError("Data URI does not start with 'data:'")
183
184            header, actual_data = data[5:].split(",", 1)
185            if not header or not actual_data:
186                raise ValueError("Invalid URI")
187
188            # Split header into parts and check for base64
189            header_parts = header.split(";")
190            if "base64" not in header_parts:
191                raise ValueError("Data is not base64 encoded")
192
193            # Content type is the first part
194            content_type = header_parts[0]
195            if not content_type:
196                raise ValueError("Content type is empty")
197
198            return base64.b64decode(actual_data), cast(MediaContentType, content_type)
199
200        except Exception as e:
201            self._log.error("Error parsing base64 data URI", exc_info=e)
202
203            return None, None
class LangfuseMedia:
 14class LangfuseMedia:
 15    """A class for wrapping media objects for upload to Langfuse.
 16
 17    This class handles the preparation and formatting of media content for Langfuse,
 18    supporting both base64 data URIs and raw content bytes.
 19
 20    Args:
 21        obj (Optional[object]): The source object to be wrapped. Can be accessed via the `obj` attribute.
 22        base64_data_uri (Optional[str]): A base64-encoded data URI containing the media content
 23            and content type (e.g., "...").
 24        content_type (Optional[str]): The MIME type of the media content when providing raw bytes.
 25        content_bytes (Optional[bytes]): Raw bytes of the media content.
 26        file_path (Optional[str]): The path to the file containing the media content. For relative paths,
 27            the current working directory is used.
 28
 29    Raises:
 30        ValueError: If neither base64_data_uri or the combination of content_bytes
 31            and content_type is provided.
 32    """
 33
 34    obj: object
 35
 36    _log = logging.getLogger(__name__)
 37    _content_bytes: Optional[bytes]
 38    _content_type: Optional[MediaContentType]
 39    _source: Optional[str]
 40    _media_id: Optional[str]
 41
 42    def __init__(
 43        self,
 44        *,
 45        obj: Optional[object] = None,
 46        base64_data_uri: Optional[str] = None,
 47        content_type: Optional[MediaContentType] = None,
 48        content_bytes: Optional[bytes] = None,
 49        file_path: Optional[str] = None,
 50    ):
 51        """Initialize a LangfuseMedia object.
 52
 53        Args:
 54            obj: The object to wrap.
 55
 56            base64_data_uri: A base64-encoded data URI containing the media content
 57                and content type (e.g., "...").
 58            content_type: The MIME type of the media content when providing raw bytes or reading from a file.
 59            content_bytes: Raw bytes of the media content.
 60            file_path: The path to the file containing the media content. For relative paths,
 61                the current working directory is used.
 62        """
 63        self.obj = obj
 64        self._media_id = None
 65
 66        if base64_data_uri is not None:
 67            parsed_data = self._parse_base64_data_uri(base64_data_uri)
 68            self._content_bytes, self._content_type = parsed_data
 69            self._source = "base64_data_uri"
 70
 71        elif content_bytes is not None and content_type is not None:
 72            self._content_type = content_type
 73            self._content_bytes = content_bytes
 74            self._source = "bytes"
 75        elif (
 76            file_path is not None
 77            and content_type is not None
 78            and os.path.exists(file_path)
 79        ):
 80            self._content_bytes = self._read_file(file_path)
 81            self._content_type = content_type if self._content_bytes else None
 82            self._source = "file" if self._content_bytes else None
 83        else:
 84            self._log.error(
 85                "base64_data_uri, or content_bytes and content_type, or file_path must be provided to LangfuseMedia"
 86            )
 87
 88            self._content_bytes = None
 89            self._content_type = None
 90            self._source = None
 91
 92    def _read_file(self, file_path: str) -> Optional[bytes]:
 93        try:
 94            with open(file_path, "rb") as file:
 95                return file.read()
 96        except Exception as e:
 97            self._log.error(f"Error reading file at path {file_path}", exc_info=e)
 98
 99            return None
100
101    @property
102    def _content_length(self) -> Optional[int]:
103        return len(self._content_bytes) if self._content_bytes else None
104
105    @property
106    def _content_sha256_hash(self) -> Optional[str]:
107        if self._content_bytes is None:
108            return None
109
110        sha256_hash_bytes = hashlib.sha256(self._content_bytes).digest()
111
112        return base64.b64encode(sha256_hash_bytes).decode("utf-8")
113
114    @property
115    def _reference_string(self) -> Optional[str]:
116        if self._content_type is None or self._source is None or self._media_id is None:
117            return None
118
119        return f"@@@langfuseMedia:type={self._content_type}|id={self._media_id}|source={self._source}@@@"
120
121    @staticmethod
122    def parse_reference_string(reference_string: str) -> ParsedMediaReference:
123        """Parse a media reference string into a ParsedMediaReference.
124
125        Example reference string:
126            "@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@"
127
128        Args:
129            reference_string: The reference string to parse.
130
131        Returns:
132            A TypedDict with the media_id, source, and content_type.
133
134        Raises:
135            ValueError: If the reference string is empty or not a string.
136            ValueError: If the reference string does not start with "@@@langfuseMedia:type=".
137            ValueError: If the reference string does not end with "@@@".
138            ValueError: If the reference string is missing required fields.
139        """
140        if not reference_string:
141            raise ValueError("Reference string is empty")
142
143        if not isinstance(reference_string, str):
144            raise ValueError("Reference string is not a string")
145
146        if not reference_string.startswith("@@@langfuseMedia:type="):
147            raise ValueError(
148                "Reference string does not start with '@@@langfuseMedia:type='"
149            )
150
151        if not reference_string.endswith("@@@"):
152            raise ValueError("Reference string does not end with '@@@'")
153
154        content = reference_string[len("@@@langfuseMedia:") :].rstrip("@@@")
155
156        # Split into key-value pairs
157        pairs = content.split("|")
158        parsed_data = {}
159
160        for pair in pairs:
161            key, value = pair.split("=", 1)
162            parsed_data[key] = value
163
164        # Verify all required fields are present
165        if not all(key in parsed_data for key in ["type", "id", "source"]):
166            raise ValueError("Missing required fields in reference string")
167
168        return ParsedMediaReference(
169            media_id=parsed_data["id"],
170            source=parsed_data["source"],
171            content_type=parsed_data["type"],
172        )
173
174    def _parse_base64_data_uri(
175        self, data: str
176    ) -> Tuple[Optional[bytes], Optional[MediaContentType]]:
177        # Example data URI: ...
178        try:
179            if not data or not isinstance(data, str):
180                raise ValueError("Data URI is not a string")
181
182            if not data.startswith("data:"):
183                raise ValueError("Data URI does not start with 'data:'")
184
185            header, actual_data = data[5:].split(",", 1)
186            if not header or not actual_data:
187                raise ValueError("Invalid URI")
188
189            # Split header into parts and check for base64
190            header_parts = header.split(";")
191            if "base64" not in header_parts:
192                raise ValueError("Data is not base64 encoded")
193
194            # Content type is the first part
195            content_type = header_parts[0]
196            if not content_type:
197                raise ValueError("Content type is empty")
198
199            return base64.b64decode(actual_data), cast(MediaContentType, content_type)
200
201        except Exception as e:
202            self._log.error("Error parsing base64 data URI", exc_info=e)
203
204            return None, None

A class for wrapping media objects for upload to Langfuse.

This class handles the preparation and formatting of media content for Langfuse, supporting both base64 data URIs and raw content bytes.

Arguments:
  • obj (Optional[object]): The source object to be wrapped. Can be accessed via the obj attribute.
  • base64_data_uri (Optional[str]): A base64-encoded data URI containing the media content and content type (e.g., "...").
  • content_type (Optional[str]): The MIME type of the media content when providing raw bytes.
  • content_bytes (Optional[bytes]): Raw bytes of the media content.
  • file_path (Optional[str]): The path to the file containing the media content. For relative paths, the current working directory is used.
Raises:
  • ValueError: If neither base64_data_uri or the combination of content_bytes and content_type is provided.
LangfuseMedia( *, obj: Optional[object] = None, base64_data_uri: Optional[str] = None, content_type: Optional[Literal['image/png', 'image/jpeg', 'image/jpg', 'image/webp', 'audio/mpeg', 'audio/mp3', 'audio/wav', 'text/plain', 'application/pdf']] = None, content_bytes: Optional[bytes] = None, file_path: Optional[str] = None)
42    def __init__(
43        self,
44        *,
45        obj: Optional[object] = None,
46        base64_data_uri: Optional[str] = None,
47        content_type: Optional[MediaContentType] = None,
48        content_bytes: Optional[bytes] = None,
49        file_path: Optional[str] = None,
50    ):
51        """Initialize a LangfuseMedia object.
52
53        Args:
54            obj: The object to wrap.
55
56            base64_data_uri: A base64-encoded data URI containing the media content
57                and content type (e.g., "...").
58            content_type: The MIME type of the media content when providing raw bytes or reading from a file.
59            content_bytes: Raw bytes of the media content.
60            file_path: The path to the file containing the media content. For relative paths,
61                the current working directory is used.
62        """
63        self.obj = obj
64        self._media_id = None
65
66        if base64_data_uri is not None:
67            parsed_data = self._parse_base64_data_uri(base64_data_uri)
68            self._content_bytes, self._content_type = parsed_data
69            self._source = "base64_data_uri"
70
71        elif content_bytes is not None and content_type is not None:
72            self._content_type = content_type
73            self._content_bytes = content_bytes
74            self._source = "bytes"
75        elif (
76            file_path is not None
77            and content_type is not None
78            and os.path.exists(file_path)
79        ):
80            self._content_bytes = self._read_file(file_path)
81            self._content_type = content_type if self._content_bytes else None
82            self._source = "file" if self._content_bytes else None
83        else:
84            self._log.error(
85                "base64_data_uri, or content_bytes and content_type, or file_path must be provided to LangfuseMedia"
86            )
87
88            self._content_bytes = None
89            self._content_type = None
90            self._source = None

Initialize a LangfuseMedia object.

Arguments:
  • obj: The object to wrap.
  • base64_data_uri: A base64-encoded data URI containing the media content and content type (e.g., "...").
  • content_type: The MIME type of the media content when providing raw bytes or reading from a file.
  • content_bytes: Raw bytes of the media content.
  • file_path: The path to the file containing the media content. For relative paths, the current working directory is used.
obj: object
@staticmethod
def parse_reference_string(reference_string: str) -> langfuse.types.ParsedMediaReference:
121    @staticmethod
122    def parse_reference_string(reference_string: str) -> ParsedMediaReference:
123        """Parse a media reference string into a ParsedMediaReference.
124
125        Example reference string:
126            "@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@"
127
128        Args:
129            reference_string: The reference string to parse.
130
131        Returns:
132            A TypedDict with the media_id, source, and content_type.
133
134        Raises:
135            ValueError: If the reference string is empty or not a string.
136            ValueError: If the reference string does not start with "@@@langfuseMedia:type=".
137            ValueError: If the reference string does not end with "@@@".
138            ValueError: If the reference string is missing required fields.
139        """
140        if not reference_string:
141            raise ValueError("Reference string is empty")
142
143        if not isinstance(reference_string, str):
144            raise ValueError("Reference string is not a string")
145
146        if not reference_string.startswith("@@@langfuseMedia:type="):
147            raise ValueError(
148                "Reference string does not start with '@@@langfuseMedia:type='"
149            )
150
151        if not reference_string.endswith("@@@"):
152            raise ValueError("Reference string does not end with '@@@'")
153
154        content = reference_string[len("@@@langfuseMedia:") :].rstrip("@@@")
155
156        # Split into key-value pairs
157        pairs = content.split("|")
158        parsed_data = {}
159
160        for pair in pairs:
161            key, value = pair.split("=", 1)
162            parsed_data[key] = value
163
164        # Verify all required fields are present
165        if not all(key in parsed_data for key in ["type", "id", "source"]):
166            raise ValueError("Missing required fields in reference string")
167
168        return ParsedMediaReference(
169            media_id=parsed_data["id"],
170            source=parsed_data["source"],
171            content_type=parsed_data["type"],
172        )

Parse a media reference string into a ParsedMediaReference.

Example reference string:

"@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@"

Arguments:
  • reference_string: The reference string to parse.
Returns:

A TypedDict with the media_id, source, and content_type.

Raises:
  • ValueError: If the reference string is empty or not a string.
  • ValueError: If the reference string does not start with "@@@langfuseMedia:type=".
  • ValueError: If the reference string does not end with "@@@".
  • ValueError: If the reference string is missing required fields.