langfuse.media
This module contains the LangfuseMedia class, which is used to wrap media objects for upload to Langfuse.
1"""This module contains the LangfuseMedia class, which is used to wrap media objects for upload to Langfuse.""" 2 3import base64 4import hashlib 5import logging 6import os 7from typing import Optional, cast, Tuple 8 9from langfuse.api import MediaContentType 10from langfuse.types import ParsedMediaReference 11 12 13class LangfuseMedia: 14 """A class for wrapping media objects for upload to Langfuse. 15 16 This class handles the preparation and formatting of media content for Langfuse, 17 supporting both base64 data URIs and raw content bytes. 18 19 Args: 20 obj (Optional[object]): The source object to be wrapped. Can be accessed via the `obj` attribute. 21 base64_data_uri (Optional[str]): A base64-encoded data URI containing the media content 22 and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ..."). 23 content_type (Optional[str]): The MIME type of the media content when providing raw bytes. 24 content_bytes (Optional[bytes]): Raw bytes of the media content. 25 file_path (Optional[str]): The path to the file containing the media content. For relative paths, 26 the current working directory is used. 27 28 Raises: 29 ValueError: If neither base64_data_uri or the combination of content_bytes 30 and content_type is provided. 31 """ 32 33 obj: object 34 35 _log = logging.getLogger(__name__) 36 _content_bytes: Optional[bytes] 37 _content_type: Optional[MediaContentType] 38 _source: Optional[str] 39 _media_id: Optional[str] 40 41 def __init__( 42 self, 43 *, 44 obj: Optional[object] = None, 45 base64_data_uri: Optional[str] = None, 46 content_type: Optional[MediaContentType] = None, 47 content_bytes: Optional[bytes] = None, 48 file_path: Optional[str] = None, 49 ): 50 """Initialize a LangfuseMedia object. 51 52 Args: 53 obj: The object to wrap. 54 55 base64_data_uri: A base64-encoded data URI containing the media content 56 and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ..."). 57 content_type: The MIME type of the media content when providing raw bytes or reading from a file. 58 content_bytes: Raw bytes of the media content. 59 file_path: The path to the file containing the media content. For relative paths, 60 the current working directory is used. 61 """ 62 self.obj = obj 63 self._media_id = None 64 65 if base64_data_uri is not None: 66 parsed_data = self._parse_base64_data_uri(base64_data_uri) 67 self._content_bytes, self._content_type = parsed_data 68 self._source = "base64_data_uri" 69 70 elif content_bytes is not None and content_type is not None: 71 self._content_type = content_type 72 self._content_bytes = content_bytes 73 self._source = "bytes" 74 elif ( 75 file_path is not None 76 and content_type is not None 77 and os.path.exists(file_path) 78 ): 79 self._content_bytes = self._read_file(file_path) 80 self._content_type = content_type if self._content_bytes else None 81 self._source = "file" if self._content_bytes else None 82 else: 83 self._log.error( 84 "base64_data_uri, or content_bytes and content_type, or file_path must be provided to LangfuseMedia" 85 ) 86 87 self._content_bytes = None 88 self._content_type = None 89 self._source = None 90 91 def _read_file(self, file_path: str) -> Optional[bytes]: 92 try: 93 with open(file_path, "rb") as file: 94 return file.read() 95 except Exception as e: 96 self._log.error(f"Error reading file at path {file_path}", exc_info=e) 97 98 return None 99 100 @property 101 def _content_length(self) -> Optional[int]: 102 return len(self._content_bytes) if self._content_bytes else None 103 104 @property 105 def _content_sha256_hash(self) -> Optional[str]: 106 if self._content_bytes is None: 107 return None 108 109 sha256_hash_bytes = hashlib.sha256(self._content_bytes).digest() 110 111 return base64.b64encode(sha256_hash_bytes).decode("utf-8") 112 113 @property 114 def _reference_string(self) -> Optional[str]: 115 if self._content_type is None or self._source is None or self._media_id is None: 116 return None 117 118 return f"@@@langfuseMedia:type={self._content_type}|id={self._media_id}|source={self._source}@@@" 119 120 @staticmethod 121 def parse_reference_string(reference_string: str) -> ParsedMediaReference: 122 """Parse a media reference string into a ParsedMediaReference. 123 124 Example reference string: 125 "@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@" 126 127 Args: 128 reference_string: The reference string to parse. 129 130 Returns: 131 A TypedDict with the media_id, source, and content_type. 132 133 Raises: 134 ValueError: If the reference string is empty or not a string. 135 ValueError: If the reference string does not start with "@@@langfuseMedia:type=". 136 ValueError: If the reference string does not end with "@@@". 137 ValueError: If the reference string is missing required fields. 138 """ 139 if not reference_string: 140 raise ValueError("Reference string is empty") 141 142 if not isinstance(reference_string, str): 143 raise ValueError("Reference string is not a string") 144 145 if not reference_string.startswith("@@@langfuseMedia:type="): 146 raise ValueError( 147 "Reference string does not start with '@@@langfuseMedia:type='" 148 ) 149 150 if not reference_string.endswith("@@@"): 151 raise ValueError("Reference string does not end with '@@@'") 152 153 content = reference_string[len("@@@langfuseMedia:") :].rstrip("@@@") 154 155 # Split into key-value pairs 156 pairs = content.split("|") 157 parsed_data = {} 158 159 for pair in pairs: 160 key, value = pair.split("=", 1) 161 parsed_data[key] = value 162 163 # Verify all required fields are present 164 if not all(key in parsed_data for key in ["type", "id", "source"]): 165 raise ValueError("Missing required fields in reference string") 166 167 return ParsedMediaReference( 168 media_id=parsed_data["id"], 169 source=parsed_data["source"], 170 content_type=parsed_data["type"], 171 ) 172 173 def _parse_base64_data_uri( 174 self, data: str 175 ) -> Tuple[Optional[bytes], Optional[MediaContentType]]: 176 # Example data URI: data:image/jpeg;base64,/9j/4AAQ... 177 try: 178 if not data or not isinstance(data, str): 179 raise ValueError("Data URI is not a string") 180 181 if not data.startswith("data:"): 182 raise ValueError("Data URI does not start with 'data:'") 183 184 header, actual_data = data[5:].split(",", 1) 185 if not header or not actual_data: 186 raise ValueError("Invalid URI") 187 188 # Split header into parts and check for base64 189 header_parts = header.split(";") 190 if "base64" not in header_parts: 191 raise ValueError("Data is not base64 encoded") 192 193 # Content type is the first part 194 content_type = header_parts[0] 195 if not content_type: 196 raise ValueError("Content type is empty") 197 198 return base64.b64decode(actual_data), cast(MediaContentType, content_type) 199 200 except Exception as e: 201 self._log.error("Error parsing base64 data URI", exc_info=e) 202 203 return None, None
class
LangfuseMedia:
14class LangfuseMedia: 15 """A class for wrapping media objects for upload to Langfuse. 16 17 This class handles the preparation and formatting of media content for Langfuse, 18 supporting both base64 data URIs and raw content bytes. 19 20 Args: 21 obj (Optional[object]): The source object to be wrapped. Can be accessed via the `obj` attribute. 22 base64_data_uri (Optional[str]): A base64-encoded data URI containing the media content 23 and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ..."). 24 content_type (Optional[str]): The MIME type of the media content when providing raw bytes. 25 content_bytes (Optional[bytes]): Raw bytes of the media content. 26 file_path (Optional[str]): The path to the file containing the media content. For relative paths, 27 the current working directory is used. 28 29 Raises: 30 ValueError: If neither base64_data_uri or the combination of content_bytes 31 and content_type is provided. 32 """ 33 34 obj: object 35 36 _log = logging.getLogger(__name__) 37 _content_bytes: Optional[bytes] 38 _content_type: Optional[MediaContentType] 39 _source: Optional[str] 40 _media_id: Optional[str] 41 42 def __init__( 43 self, 44 *, 45 obj: Optional[object] = None, 46 base64_data_uri: Optional[str] = None, 47 content_type: Optional[MediaContentType] = None, 48 content_bytes: Optional[bytes] = None, 49 file_path: Optional[str] = None, 50 ): 51 """Initialize a LangfuseMedia object. 52 53 Args: 54 obj: The object to wrap. 55 56 base64_data_uri: A base64-encoded data URI containing the media content 57 and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ..."). 58 content_type: The MIME type of the media content when providing raw bytes or reading from a file. 59 content_bytes: Raw bytes of the media content. 60 file_path: The path to the file containing the media content. For relative paths, 61 the current working directory is used. 62 """ 63 self.obj = obj 64 self._media_id = None 65 66 if base64_data_uri is not None: 67 parsed_data = self._parse_base64_data_uri(base64_data_uri) 68 self._content_bytes, self._content_type = parsed_data 69 self._source = "base64_data_uri" 70 71 elif content_bytes is not None and content_type is not None: 72 self._content_type = content_type 73 self._content_bytes = content_bytes 74 self._source = "bytes" 75 elif ( 76 file_path is not None 77 and content_type is not None 78 and os.path.exists(file_path) 79 ): 80 self._content_bytes = self._read_file(file_path) 81 self._content_type = content_type if self._content_bytes else None 82 self._source = "file" if self._content_bytes else None 83 else: 84 self._log.error( 85 "base64_data_uri, or content_bytes and content_type, or file_path must be provided to LangfuseMedia" 86 ) 87 88 self._content_bytes = None 89 self._content_type = None 90 self._source = None 91 92 def _read_file(self, file_path: str) -> Optional[bytes]: 93 try: 94 with open(file_path, "rb") as file: 95 return file.read() 96 except Exception as e: 97 self._log.error(f"Error reading file at path {file_path}", exc_info=e) 98 99 return None 100 101 @property 102 def _content_length(self) -> Optional[int]: 103 return len(self._content_bytes) if self._content_bytes else None 104 105 @property 106 def _content_sha256_hash(self) -> Optional[str]: 107 if self._content_bytes is None: 108 return None 109 110 sha256_hash_bytes = hashlib.sha256(self._content_bytes).digest() 111 112 return base64.b64encode(sha256_hash_bytes).decode("utf-8") 113 114 @property 115 def _reference_string(self) -> Optional[str]: 116 if self._content_type is None or self._source is None or self._media_id is None: 117 return None 118 119 return f"@@@langfuseMedia:type={self._content_type}|id={self._media_id}|source={self._source}@@@" 120 121 @staticmethod 122 def parse_reference_string(reference_string: str) -> ParsedMediaReference: 123 """Parse a media reference string into a ParsedMediaReference. 124 125 Example reference string: 126 "@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@" 127 128 Args: 129 reference_string: The reference string to parse. 130 131 Returns: 132 A TypedDict with the media_id, source, and content_type. 133 134 Raises: 135 ValueError: If the reference string is empty or not a string. 136 ValueError: If the reference string does not start with "@@@langfuseMedia:type=". 137 ValueError: If the reference string does not end with "@@@". 138 ValueError: If the reference string is missing required fields. 139 """ 140 if not reference_string: 141 raise ValueError("Reference string is empty") 142 143 if not isinstance(reference_string, str): 144 raise ValueError("Reference string is not a string") 145 146 if not reference_string.startswith("@@@langfuseMedia:type="): 147 raise ValueError( 148 "Reference string does not start with '@@@langfuseMedia:type='" 149 ) 150 151 if not reference_string.endswith("@@@"): 152 raise ValueError("Reference string does not end with '@@@'") 153 154 content = reference_string[len("@@@langfuseMedia:") :].rstrip("@@@") 155 156 # Split into key-value pairs 157 pairs = content.split("|") 158 parsed_data = {} 159 160 for pair in pairs: 161 key, value = pair.split("=", 1) 162 parsed_data[key] = value 163 164 # Verify all required fields are present 165 if not all(key in parsed_data for key in ["type", "id", "source"]): 166 raise ValueError("Missing required fields in reference string") 167 168 return ParsedMediaReference( 169 media_id=parsed_data["id"], 170 source=parsed_data["source"], 171 content_type=parsed_data["type"], 172 ) 173 174 def _parse_base64_data_uri( 175 self, data: str 176 ) -> Tuple[Optional[bytes], Optional[MediaContentType]]: 177 # Example data URI: data:image/jpeg;base64,/9j/4AAQ... 178 try: 179 if not data or not isinstance(data, str): 180 raise ValueError("Data URI is not a string") 181 182 if not data.startswith("data:"): 183 raise ValueError("Data URI does not start with 'data:'") 184 185 header, actual_data = data[5:].split(",", 1) 186 if not header or not actual_data: 187 raise ValueError("Invalid URI") 188 189 # Split header into parts and check for base64 190 header_parts = header.split(";") 191 if "base64" not in header_parts: 192 raise ValueError("Data is not base64 encoded") 193 194 # Content type is the first part 195 content_type = header_parts[0] 196 if not content_type: 197 raise ValueError("Content type is empty") 198 199 return base64.b64decode(actual_data), cast(MediaContentType, content_type) 200 201 except Exception as e: 202 self._log.error("Error parsing base64 data URI", exc_info=e) 203 204 return None, None
A class for wrapping media objects for upload to Langfuse.
This class handles the preparation and formatting of media content for Langfuse, supporting both base64 data URIs and raw content bytes.
Arguments:
- obj (Optional[object]): The source object to be wrapped. Can be accessed via the
obj
attribute. - base64_data_uri (Optional[str]): A base64-encoded data URI containing the media content and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ...").
- content_type (Optional[str]): The MIME type of the media content when providing raw bytes.
- content_bytes (Optional[bytes]): Raw bytes of the media content.
- file_path (Optional[str]): The path to the file containing the media content. For relative paths, the current working directory is used.
Raises:
- ValueError: If neither base64_data_uri or the combination of content_bytes and content_type is provided.
LangfuseMedia( *, obj: Optional[object] = None, base64_data_uri: Optional[str] = None, content_type: Optional[Literal['image/png', 'image/jpeg', 'image/jpg', 'image/webp', 'audio/mpeg', 'audio/mp3', 'audio/wav', 'text/plain', 'application/pdf']] = None, content_bytes: Optional[bytes] = None, file_path: Optional[str] = None)
42 def __init__( 43 self, 44 *, 45 obj: Optional[object] = None, 46 base64_data_uri: Optional[str] = None, 47 content_type: Optional[MediaContentType] = None, 48 content_bytes: Optional[bytes] = None, 49 file_path: Optional[str] = None, 50 ): 51 """Initialize a LangfuseMedia object. 52 53 Args: 54 obj: The object to wrap. 55 56 base64_data_uri: A base64-encoded data URI containing the media content 57 and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ..."). 58 content_type: The MIME type of the media content when providing raw bytes or reading from a file. 59 content_bytes: Raw bytes of the media content. 60 file_path: The path to the file containing the media content. For relative paths, 61 the current working directory is used. 62 """ 63 self.obj = obj 64 self._media_id = None 65 66 if base64_data_uri is not None: 67 parsed_data = self._parse_base64_data_uri(base64_data_uri) 68 self._content_bytes, self._content_type = parsed_data 69 self._source = "base64_data_uri" 70 71 elif content_bytes is not None and content_type is not None: 72 self._content_type = content_type 73 self._content_bytes = content_bytes 74 self._source = "bytes" 75 elif ( 76 file_path is not None 77 and content_type is not None 78 and os.path.exists(file_path) 79 ): 80 self._content_bytes = self._read_file(file_path) 81 self._content_type = content_type if self._content_bytes else None 82 self._source = "file" if self._content_bytes else None 83 else: 84 self._log.error( 85 "base64_data_uri, or content_bytes and content_type, or file_path must be provided to LangfuseMedia" 86 ) 87 88 self._content_bytes = None 89 self._content_type = None 90 self._source = None
Initialize a LangfuseMedia object.
Arguments:
- obj: The object to wrap.
- base64_data_uri: A base64-encoded data URI containing the media content and content type (e.g., "data:image/jpeg;base64,/9j/4AAQ...").
- content_type: The MIME type of the media content when providing raw bytes or reading from a file.
- content_bytes: Raw bytes of the media content.
- file_path: The path to the file containing the media content. For relative paths, the current working directory is used.
@staticmethod
def
parse_reference_string(reference_string: str) -> langfuse.types.ParsedMediaReference:
121 @staticmethod 122 def parse_reference_string(reference_string: str) -> ParsedMediaReference: 123 """Parse a media reference string into a ParsedMediaReference. 124 125 Example reference string: 126 "@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@" 127 128 Args: 129 reference_string: The reference string to parse. 130 131 Returns: 132 A TypedDict with the media_id, source, and content_type. 133 134 Raises: 135 ValueError: If the reference string is empty or not a string. 136 ValueError: If the reference string does not start with "@@@langfuseMedia:type=". 137 ValueError: If the reference string does not end with "@@@". 138 ValueError: If the reference string is missing required fields. 139 """ 140 if not reference_string: 141 raise ValueError("Reference string is empty") 142 143 if not isinstance(reference_string, str): 144 raise ValueError("Reference string is not a string") 145 146 if not reference_string.startswith("@@@langfuseMedia:type="): 147 raise ValueError( 148 "Reference string does not start with '@@@langfuseMedia:type='" 149 ) 150 151 if not reference_string.endswith("@@@"): 152 raise ValueError("Reference string does not end with '@@@'") 153 154 content = reference_string[len("@@@langfuseMedia:") :].rstrip("@@@") 155 156 # Split into key-value pairs 157 pairs = content.split("|") 158 parsed_data = {} 159 160 for pair in pairs: 161 key, value = pair.split("=", 1) 162 parsed_data[key] = value 163 164 # Verify all required fields are present 165 if not all(key in parsed_data for key in ["type", "id", "source"]): 166 raise ValueError("Missing required fields in reference string") 167 168 return ParsedMediaReference( 169 media_id=parsed_data["id"], 170 source=parsed_data["source"], 171 content_type=parsed_data["type"], 172 )
Parse a media reference string into a ParsedMediaReference.
Example reference string:
"@@@langfuseMedia:type=image/jpeg|id=some-uuid|source=base64_data_uri@@@"
Arguments:
- reference_string: The reference string to parse.
Returns:
A TypedDict with the media_id, source, and content_type.
Raises:
- ValueError: If the reference string is empty or not a string.
- ValueError: If the reference string does not start with "@@@langfuseMedia:type=".
- ValueError: If the reference string does not end with "@@@".
- ValueError: If the reference string is missing required fields.