langfuse

Langfuse GitHub Banner

Langfuse Python SDK

MIT License CI test status PyPI Version GitHub Repo stars Discord YC W23

Installation

Important

The SDK was rewritten in v3 and released in June 2025. Refer to the v3 migration guide for instructions on updating your code.

pip install langfuse

Docs

Please see our docs for detailed information on this SDK.

 1""".. include:: ../README.md"""
 2
 3from langfuse.batch_evaluation import (
 4    BatchEvaluationResult,
 5    BatchEvaluationResumeToken,
 6    CompositeEvaluatorFunction,
 7    EvaluatorInputs,
 8    EvaluatorStats,
 9    MapperFunction,
10)
11from langfuse.experiment import Evaluation
12
13from ._client import client as _client_module
14from ._client.attributes import LangfuseOtelSpanAttributes
15from ._client.constants import ObservationTypeLiteral
16from ._client.get_client import get_client
17from ._client.observe import observe
18from ._client.propagation import propagate_attributes
19from ._client.span import (
20    LangfuseAgent,
21    LangfuseChain,
22    LangfuseEmbedding,
23    LangfuseEvaluator,
24    LangfuseEvent,
25    LangfuseGeneration,
26    LangfuseGuardrail,
27    LangfuseRetriever,
28    LangfuseSpan,
29    LangfuseTool,
30)
31
32Langfuse = _client_module.Langfuse
33
34__all__ = [
35    "Langfuse",
36    "get_client",
37    "observe",
38    "propagate_attributes",
39    "ObservationTypeLiteral",
40    "LangfuseSpan",
41    "LangfuseGeneration",
42    "LangfuseEvent",
43    "LangfuseOtelSpanAttributes",
44    "LangfuseAgent",
45    "LangfuseTool",
46    "LangfuseChain",
47    "LangfuseEmbedding",
48    "LangfuseEvaluator",
49    "LangfuseRetriever",
50    "LangfuseGuardrail",
51    "Evaluation",
52    "EvaluatorInputs",
53    "MapperFunction",
54    "CompositeEvaluatorFunction",
55    "EvaluatorStats",
56    "BatchEvaluationResumeToken",
57    "BatchEvaluationResult",
58    "experiment",
59    "api",
60]
class Langfuse:
 134class Langfuse:
 135    """Main client for Langfuse tracing and platform features.
 136
 137    This class provides an interface for creating and managing traces, spans,
 138    and generations in Langfuse as well as interacting with the Langfuse API.
 139
 140    The client features a thread-safe singleton pattern for each unique public API key,
 141    ensuring consistent trace context propagation across your application. It implements
 142    efficient batching of spans with configurable flush settings and includes background
 143    thread management for media uploads and score ingestion.
 144
 145    Configuration is flexible through either direct parameters or environment variables,
 146    with graceful fallbacks and runtime configuration updates.
 147
 148    Attributes:
 149        api: Synchronous API client for Langfuse backend communication
 150        async_api: Asynchronous API client for Langfuse backend communication
 151        _otel_tracer: Internal LangfuseTracer instance managing OpenTelemetry components
 152
 153    Parameters:
 154        public_key (Optional[str]): Your Langfuse public API key. Can also be set via LANGFUSE_PUBLIC_KEY environment variable.
 155        secret_key (Optional[str]): Your Langfuse secret API key. Can also be set via LANGFUSE_SECRET_KEY environment variable.
 156        base_url (Optional[str]): The Langfuse API base URL. Defaults to "https://cloud.langfuse.com". Can also be set via LANGFUSE_BASE_URL environment variable.
 157        host (Optional[str]): Deprecated. Use base_url instead. The Langfuse API host URL. Defaults to "https://cloud.langfuse.com".
 158        timeout (Optional[int]): Timeout in seconds for API requests. Defaults to 5 seconds.
 159        httpx_client (Optional[httpx.Client]): Custom httpx client for making non-tracing HTTP requests. If not provided, a default client will be created.
 160        debug (bool): Enable debug logging. Defaults to False. Can also be set via LANGFUSE_DEBUG environment variable.
 161        tracing_enabled (Optional[bool]): Enable or disable tracing. Defaults to True. Can also be set via LANGFUSE_TRACING_ENABLED environment variable.
 162        flush_at (Optional[int]): Number of spans to batch before sending to the API. Defaults to 512. Can also be set via LANGFUSE_FLUSH_AT environment variable.
 163        flush_interval (Optional[float]): Time in seconds between batch flushes. Defaults to 5 seconds. Can also be set via LANGFUSE_FLUSH_INTERVAL environment variable.
 164        environment (Optional[str]): Environment name for tracing. Default is 'default'. Can also be set via LANGFUSE_TRACING_ENVIRONMENT environment variable. Can be any lowercase alphanumeric string with hyphens and underscores that does not start with 'langfuse'.
 165        release (Optional[str]): Release version/hash of your application. Used for grouping analytics by release.
 166        media_upload_thread_count (Optional[int]): Number of background threads for handling media uploads. Defaults to 1. Can also be set via LANGFUSE_MEDIA_UPLOAD_THREAD_COUNT environment variable.
 167        sample_rate (Optional[float]): Sampling rate for traces (0.0 to 1.0). Defaults to 1.0 (100% of traces are sampled). Can also be set via LANGFUSE_SAMPLE_RATE environment variable.
 168        mask (Optional[MaskFunction]): Function to mask sensitive data in traces before sending to the API.
 169        blocked_instrumentation_scopes (Optional[List[str]]): List of instrumentation scope names to block from being exported to Langfuse. Spans from these scopes will be filtered out before being sent to the API. Useful for filtering out spans from specific libraries or frameworks. For exported spans, you can see the instrumentation scope name in the span metadata in Langfuse (`metadata.scope.name`)
 170        additional_headers (Optional[Dict[str, str]]): Additional headers to include in all API requests and OTLPSpanExporter requests. These headers will be merged with default headers. Note: If httpx_client is provided, additional_headers must be set directly on your custom httpx_client as well.
 171        tracer_provider(Optional[TracerProvider]): OpenTelemetry TracerProvider to use for Langfuse. This can be useful to set to have disconnected tracing between Langfuse and other OpenTelemetry-span emitting libraries. Note: To track active spans, the context is still shared between TracerProviders. This may lead to broken trace trees.
 172
 173    Example:
 174        ```python
 175        from langfuse.otel import Langfuse
 176
 177        # Initialize the client (reads from env vars if not provided)
 178        langfuse = Langfuse(
 179            public_key="your-public-key",
 180            secret_key="your-secret-key",
 181            host="https://cloud.langfuse.com",  # Optional, default shown
 182        )
 183
 184        # Create a trace span
 185        with langfuse.start_as_current_span(name="process-query") as span:
 186            # Your application code here
 187
 188            # Create a nested generation span for an LLM call
 189            with span.start_as_current_generation(
 190                name="generate-response",
 191                model="gpt-4",
 192                input={"query": "Tell me about AI"},
 193                model_parameters={"temperature": 0.7, "max_tokens": 500}
 194            ) as generation:
 195                # Generate response here
 196                response = "AI is a field of computer science..."
 197
 198                generation.update(
 199                    output=response,
 200                    usage_details={"prompt_tokens": 10, "completion_tokens": 50},
 201                    cost_details={"total_cost": 0.0023}
 202                )
 203
 204                # Score the generation (supports NUMERIC, BOOLEAN, CATEGORICAL)
 205                generation.score(name="relevance", value=0.95, data_type="NUMERIC")
 206        ```
 207    """
 208
 209    _resources: Optional[LangfuseResourceManager] = None
 210    _mask: Optional[MaskFunction] = None
 211    _otel_tracer: otel_trace_api.Tracer
 212
 213    def __init__(
 214        self,
 215        *,
 216        public_key: Optional[str] = None,
 217        secret_key: Optional[str] = None,
 218        base_url: Optional[str] = None,
 219        host: Optional[str] = None,
 220        timeout: Optional[int] = None,
 221        httpx_client: Optional[httpx.Client] = None,
 222        debug: bool = False,
 223        tracing_enabled: Optional[bool] = True,
 224        flush_at: Optional[int] = None,
 225        flush_interval: Optional[float] = None,
 226        environment: Optional[str] = None,
 227        release: Optional[str] = None,
 228        media_upload_thread_count: Optional[int] = None,
 229        sample_rate: Optional[float] = None,
 230        mask: Optional[MaskFunction] = None,
 231        blocked_instrumentation_scopes: Optional[List[str]] = None,
 232        additional_headers: Optional[Dict[str, str]] = None,
 233        tracer_provider: Optional[TracerProvider] = None,
 234    ):
 235        self._base_url = (
 236            base_url
 237            or os.environ.get(LANGFUSE_BASE_URL)
 238            or host
 239            or os.environ.get(LANGFUSE_HOST, "https://cloud.langfuse.com")
 240        )
 241        self._environment = environment or cast(
 242            str, os.environ.get(LANGFUSE_TRACING_ENVIRONMENT)
 243        )
 244        self._project_id: Optional[str] = None
 245        sample_rate = sample_rate or float(os.environ.get(LANGFUSE_SAMPLE_RATE, 1.0))
 246        if not 0.0 <= sample_rate <= 1.0:
 247            raise ValueError(
 248                f"Sample rate must be between 0.0 and 1.0, got {sample_rate}"
 249            )
 250
 251        timeout = timeout or int(os.environ.get(LANGFUSE_TIMEOUT, 5))
 252
 253        self._tracing_enabled = (
 254            tracing_enabled
 255            and os.environ.get(LANGFUSE_TRACING_ENABLED, "true").lower() != "false"
 256        )
 257        if not self._tracing_enabled:
 258            langfuse_logger.info(
 259                "Configuration: Langfuse tracing is explicitly disabled. No data will be sent to the Langfuse API."
 260            )
 261
 262        debug = (
 263            debug if debug else (os.getenv(LANGFUSE_DEBUG, "false").lower() == "true")
 264        )
 265        if debug:
 266            logging.basicConfig(
 267                format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 268            )
 269            langfuse_logger.setLevel(logging.DEBUG)
 270
 271        public_key = public_key or os.environ.get(LANGFUSE_PUBLIC_KEY)
 272        if public_key is None:
 273            langfuse_logger.warning(
 274                "Authentication error: Langfuse client initialized without public_key. Client will be disabled. "
 275                "Provide a public_key parameter or set LANGFUSE_PUBLIC_KEY environment variable. "
 276            )
 277            self._otel_tracer = otel_trace_api.NoOpTracer()
 278            return
 279
 280        secret_key = secret_key or os.environ.get(LANGFUSE_SECRET_KEY)
 281        if secret_key is None:
 282            langfuse_logger.warning(
 283                "Authentication error: Langfuse client initialized without secret_key. Client will be disabled. "
 284                "Provide a secret_key parameter or set LANGFUSE_SECRET_KEY environment variable. "
 285            )
 286            self._otel_tracer = otel_trace_api.NoOpTracer()
 287            return
 288
 289        if os.environ.get("OTEL_SDK_DISABLED", "false").lower() == "true":
 290            langfuse_logger.warning(
 291                "OTEL_SDK_DISABLED is set. Langfuse tracing will be disabled and no traces will appear in the UI."
 292            )
 293
 294        # Initialize api and tracer if requirements are met
 295        self._resources = LangfuseResourceManager(
 296            public_key=public_key,
 297            secret_key=secret_key,
 298            base_url=self._base_url,
 299            timeout=timeout,
 300            environment=self._environment,
 301            release=release,
 302            flush_at=flush_at,
 303            flush_interval=flush_interval,
 304            httpx_client=httpx_client,
 305            media_upload_thread_count=media_upload_thread_count,
 306            sample_rate=sample_rate,
 307            mask=mask,
 308            tracing_enabled=self._tracing_enabled,
 309            blocked_instrumentation_scopes=blocked_instrumentation_scopes,
 310            additional_headers=additional_headers,
 311            tracer_provider=tracer_provider,
 312        )
 313        self._mask = self._resources.mask
 314
 315        self._otel_tracer = (
 316            self._resources.tracer
 317            if self._tracing_enabled and self._resources.tracer is not None
 318            else otel_trace_api.NoOpTracer()
 319        )
 320        self.api = self._resources.api
 321        self.async_api = self._resources.async_api
 322
 323    def start_span(
 324        self,
 325        *,
 326        trace_context: Optional[TraceContext] = None,
 327        name: str,
 328        input: Optional[Any] = None,
 329        output: Optional[Any] = None,
 330        metadata: Optional[Any] = None,
 331        version: Optional[str] = None,
 332        level: Optional[SpanLevel] = None,
 333        status_message: Optional[str] = None,
 334    ) -> LangfuseSpan:
 335        """Create a new span for tracing a unit of work.
 336
 337        This method creates a new span but does not set it as the current span in the
 338        context. To create and use a span within a context, use start_as_current_span().
 339
 340        The created span will be the child of the current span in the context.
 341
 342        Args:
 343            trace_context: Optional context for connecting to an existing trace
 344            name: Name of the span (e.g., function or operation name)
 345            input: Input data for the operation (can be any JSON-serializable object)
 346            output: Output data from the operation (can be any JSON-serializable object)
 347            metadata: Additional metadata to associate with the span
 348            version: Version identifier for the code or component
 349            level: Importance level of the span (info, warning, error)
 350            status_message: Optional status message for the span
 351
 352        Returns:
 353            A LangfuseSpan object that must be ended with .end() when the operation completes
 354
 355        Example:
 356            ```python
 357            span = langfuse.start_span(name="process-data")
 358            try:
 359                # Do work
 360                span.update(output="result")
 361            finally:
 362                span.end()
 363            ```
 364        """
 365        return self.start_observation(
 366            trace_context=trace_context,
 367            name=name,
 368            as_type="span",
 369            input=input,
 370            output=output,
 371            metadata=metadata,
 372            version=version,
 373            level=level,
 374            status_message=status_message,
 375        )
 376
 377    def start_as_current_span(
 378        self,
 379        *,
 380        trace_context: Optional[TraceContext] = None,
 381        name: str,
 382        input: Optional[Any] = None,
 383        output: Optional[Any] = None,
 384        metadata: Optional[Any] = None,
 385        version: Optional[str] = None,
 386        level: Optional[SpanLevel] = None,
 387        status_message: Optional[str] = None,
 388        end_on_exit: Optional[bool] = None,
 389    ) -> _AgnosticContextManager[LangfuseSpan]:
 390        """Create a new span and set it as the current span in a context manager.
 391
 392        This method creates a new span and sets it as the current span within a context
 393        manager. Use this method with a 'with' statement to automatically handle span
 394        lifecycle within a code block.
 395
 396        The created span will be the child of the current span in the context.
 397
 398        Args:
 399            trace_context: Optional context for connecting to an existing trace
 400            name: Name of the span (e.g., function or operation name)
 401            input: Input data for the operation (can be any JSON-serializable object)
 402            output: Output data from the operation (can be any JSON-serializable object)
 403            metadata: Additional metadata to associate with the span
 404            version: Version identifier for the code or component
 405            level: Importance level of the span (info, warning, error)
 406            status_message: Optional status message for the span
 407            end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
 408
 409        Returns:
 410            A context manager that yields a LangfuseSpan
 411
 412        Example:
 413            ```python
 414            with langfuse.start_as_current_span(name="process-query") as span:
 415                # Do work
 416                result = process_data()
 417                span.update(output=result)
 418
 419                # Create a child span automatically
 420                with span.start_as_current_span(name="sub-operation") as child_span:
 421                    # Do sub-operation work
 422                    child_span.update(output="sub-result")
 423            ```
 424        """
 425        return self.start_as_current_observation(
 426            trace_context=trace_context,
 427            name=name,
 428            as_type="span",
 429            input=input,
 430            output=output,
 431            metadata=metadata,
 432            version=version,
 433            level=level,
 434            status_message=status_message,
 435            end_on_exit=end_on_exit,
 436        )
 437
 438    @overload
 439    def start_observation(
 440        self,
 441        *,
 442        trace_context: Optional[TraceContext] = None,
 443        name: str,
 444        as_type: Literal["generation"],
 445        input: Optional[Any] = None,
 446        output: Optional[Any] = None,
 447        metadata: Optional[Any] = None,
 448        version: Optional[str] = None,
 449        level: Optional[SpanLevel] = None,
 450        status_message: Optional[str] = None,
 451        completion_start_time: Optional[datetime] = None,
 452        model: Optional[str] = None,
 453        model_parameters: Optional[Dict[str, MapValue]] = None,
 454        usage_details: Optional[Dict[str, int]] = None,
 455        cost_details: Optional[Dict[str, float]] = None,
 456        prompt: Optional[PromptClient] = None,
 457    ) -> LangfuseGeneration: ...
 458
 459    @overload
 460    def start_observation(
 461        self,
 462        *,
 463        trace_context: Optional[TraceContext] = None,
 464        name: str,
 465        as_type: Literal["span"] = "span",
 466        input: Optional[Any] = None,
 467        output: Optional[Any] = None,
 468        metadata: Optional[Any] = None,
 469        version: Optional[str] = None,
 470        level: Optional[SpanLevel] = None,
 471        status_message: Optional[str] = None,
 472    ) -> LangfuseSpan: ...
 473
 474    @overload
 475    def start_observation(
 476        self,
 477        *,
 478        trace_context: Optional[TraceContext] = None,
 479        name: str,
 480        as_type: Literal["agent"],
 481        input: Optional[Any] = None,
 482        output: Optional[Any] = None,
 483        metadata: Optional[Any] = None,
 484        version: Optional[str] = None,
 485        level: Optional[SpanLevel] = None,
 486        status_message: Optional[str] = None,
 487    ) -> LangfuseAgent: ...
 488
 489    @overload
 490    def start_observation(
 491        self,
 492        *,
 493        trace_context: Optional[TraceContext] = None,
 494        name: str,
 495        as_type: Literal["tool"],
 496        input: Optional[Any] = None,
 497        output: Optional[Any] = None,
 498        metadata: Optional[Any] = None,
 499        version: Optional[str] = None,
 500        level: Optional[SpanLevel] = None,
 501        status_message: Optional[str] = None,
 502    ) -> LangfuseTool: ...
 503
 504    @overload
 505    def start_observation(
 506        self,
 507        *,
 508        trace_context: Optional[TraceContext] = None,
 509        name: str,
 510        as_type: Literal["chain"],
 511        input: Optional[Any] = None,
 512        output: Optional[Any] = None,
 513        metadata: Optional[Any] = None,
 514        version: Optional[str] = None,
 515        level: Optional[SpanLevel] = None,
 516        status_message: Optional[str] = None,
 517    ) -> LangfuseChain: ...
 518
 519    @overload
 520    def start_observation(
 521        self,
 522        *,
 523        trace_context: Optional[TraceContext] = None,
 524        name: str,
 525        as_type: Literal["retriever"],
 526        input: Optional[Any] = None,
 527        output: Optional[Any] = None,
 528        metadata: Optional[Any] = None,
 529        version: Optional[str] = None,
 530        level: Optional[SpanLevel] = None,
 531        status_message: Optional[str] = None,
 532    ) -> LangfuseRetriever: ...
 533
 534    @overload
 535    def start_observation(
 536        self,
 537        *,
 538        trace_context: Optional[TraceContext] = None,
 539        name: str,
 540        as_type: Literal["evaluator"],
 541        input: Optional[Any] = None,
 542        output: Optional[Any] = None,
 543        metadata: Optional[Any] = None,
 544        version: Optional[str] = None,
 545        level: Optional[SpanLevel] = None,
 546        status_message: Optional[str] = None,
 547    ) -> LangfuseEvaluator: ...
 548
 549    @overload
 550    def start_observation(
 551        self,
 552        *,
 553        trace_context: Optional[TraceContext] = None,
 554        name: str,
 555        as_type: Literal["embedding"],
 556        input: Optional[Any] = None,
 557        output: Optional[Any] = None,
 558        metadata: Optional[Any] = None,
 559        version: Optional[str] = None,
 560        level: Optional[SpanLevel] = None,
 561        status_message: Optional[str] = None,
 562        completion_start_time: Optional[datetime] = None,
 563        model: Optional[str] = None,
 564        model_parameters: Optional[Dict[str, MapValue]] = None,
 565        usage_details: Optional[Dict[str, int]] = None,
 566        cost_details: Optional[Dict[str, float]] = None,
 567        prompt: Optional[PromptClient] = None,
 568    ) -> LangfuseEmbedding: ...
 569
 570    @overload
 571    def start_observation(
 572        self,
 573        *,
 574        trace_context: Optional[TraceContext] = None,
 575        name: str,
 576        as_type: Literal["guardrail"],
 577        input: Optional[Any] = None,
 578        output: Optional[Any] = None,
 579        metadata: Optional[Any] = None,
 580        version: Optional[str] = None,
 581        level: Optional[SpanLevel] = None,
 582        status_message: Optional[str] = None,
 583    ) -> LangfuseGuardrail: ...
 584
 585    def start_observation(
 586        self,
 587        *,
 588        trace_context: Optional[TraceContext] = None,
 589        name: str,
 590        as_type: ObservationTypeLiteralNoEvent = "span",
 591        input: Optional[Any] = None,
 592        output: Optional[Any] = None,
 593        metadata: Optional[Any] = None,
 594        version: Optional[str] = None,
 595        level: Optional[SpanLevel] = None,
 596        status_message: Optional[str] = None,
 597        completion_start_time: Optional[datetime] = None,
 598        model: Optional[str] = None,
 599        model_parameters: Optional[Dict[str, MapValue]] = None,
 600        usage_details: Optional[Dict[str, int]] = None,
 601        cost_details: Optional[Dict[str, float]] = None,
 602        prompt: Optional[PromptClient] = None,
 603    ) -> Union[
 604        LangfuseSpan,
 605        LangfuseGeneration,
 606        LangfuseAgent,
 607        LangfuseTool,
 608        LangfuseChain,
 609        LangfuseRetriever,
 610        LangfuseEvaluator,
 611        LangfuseEmbedding,
 612        LangfuseGuardrail,
 613    ]:
 614        """Create a new observation of the specified type.
 615
 616        This method creates a new observation but does not set it as the current span in the
 617        context. To create and use an observation within a context, use start_as_current_observation().
 618
 619        Args:
 620            trace_context: Optional context for connecting to an existing trace
 621            name: Name of the observation
 622            as_type: Type of observation to create (defaults to "span")
 623            input: Input data for the operation
 624            output: Output data from the operation
 625            metadata: Additional metadata to associate with the observation
 626            version: Version identifier for the code or component
 627            level: Importance level of the observation
 628            status_message: Optional status message for the observation
 629            completion_start_time: When the model started generating (for generation types)
 630            model: Name/identifier of the AI model used (for generation types)
 631            model_parameters: Parameters used for the model (for generation types)
 632            usage_details: Token usage information (for generation types)
 633            cost_details: Cost information (for generation types)
 634            prompt: Associated prompt template (for generation types)
 635
 636        Returns:
 637            An observation object of the appropriate type that must be ended with .end()
 638        """
 639        if trace_context:
 640            trace_id = trace_context.get("trace_id", None)
 641            parent_span_id = trace_context.get("parent_span_id", None)
 642
 643            if trace_id:
 644                remote_parent_span = self._create_remote_parent_span(
 645                    trace_id=trace_id, parent_span_id=parent_span_id
 646                )
 647
 648                with otel_trace_api.use_span(
 649                    cast(otel_trace_api.Span, remote_parent_span)
 650                ):
 651                    otel_span = self._otel_tracer.start_span(name=name)
 652                    otel_span.set_attribute(LangfuseOtelSpanAttributes.AS_ROOT, True)
 653
 654                    return self._create_observation_from_otel_span(
 655                        otel_span=otel_span,
 656                        as_type=as_type,
 657                        input=input,
 658                        output=output,
 659                        metadata=metadata,
 660                        version=version,
 661                        level=level,
 662                        status_message=status_message,
 663                        completion_start_time=completion_start_time,
 664                        model=model,
 665                        model_parameters=model_parameters,
 666                        usage_details=usage_details,
 667                        cost_details=cost_details,
 668                        prompt=prompt,
 669                    )
 670
 671        otel_span = self._otel_tracer.start_span(name=name)
 672
 673        return self._create_observation_from_otel_span(
 674            otel_span=otel_span,
 675            as_type=as_type,
 676            input=input,
 677            output=output,
 678            metadata=metadata,
 679            version=version,
 680            level=level,
 681            status_message=status_message,
 682            completion_start_time=completion_start_time,
 683            model=model,
 684            model_parameters=model_parameters,
 685            usage_details=usage_details,
 686            cost_details=cost_details,
 687            prompt=prompt,
 688        )
 689
 690    def _create_observation_from_otel_span(
 691        self,
 692        *,
 693        otel_span: otel_trace_api.Span,
 694        as_type: ObservationTypeLiteralNoEvent,
 695        input: Optional[Any] = None,
 696        output: Optional[Any] = None,
 697        metadata: Optional[Any] = None,
 698        version: Optional[str] = None,
 699        level: Optional[SpanLevel] = None,
 700        status_message: Optional[str] = None,
 701        completion_start_time: Optional[datetime] = None,
 702        model: Optional[str] = None,
 703        model_parameters: Optional[Dict[str, MapValue]] = None,
 704        usage_details: Optional[Dict[str, int]] = None,
 705        cost_details: Optional[Dict[str, float]] = None,
 706        prompt: Optional[PromptClient] = None,
 707    ) -> Union[
 708        LangfuseSpan,
 709        LangfuseGeneration,
 710        LangfuseAgent,
 711        LangfuseTool,
 712        LangfuseChain,
 713        LangfuseRetriever,
 714        LangfuseEvaluator,
 715        LangfuseEmbedding,
 716        LangfuseGuardrail,
 717    ]:
 718        """Create the appropriate observation type from an OTEL span."""
 719        if as_type in get_observation_types_list(ObservationTypeGenerationLike):
 720            observation_class = self._get_span_class(as_type)
 721            # Type ignore to prevent overloads of internal _get_span_class function,
 722            # issue is that LangfuseEvent could be returned and that classes have diff. args
 723            return observation_class(  # type: ignore[return-value,call-arg]
 724                otel_span=otel_span,
 725                langfuse_client=self,
 726                environment=self._environment,
 727                input=input,
 728                output=output,
 729                metadata=metadata,
 730                version=version,
 731                level=level,
 732                status_message=status_message,
 733                completion_start_time=completion_start_time,
 734                model=model,
 735                model_parameters=model_parameters,
 736                usage_details=usage_details,
 737                cost_details=cost_details,
 738                prompt=prompt,
 739            )
 740        else:
 741            # For other types (e.g. span, guardrail), create appropriate class without generation properties
 742            observation_class = self._get_span_class(as_type)
 743            # Type ignore to prevent overloads of internal _get_span_class function,
 744            # issue is that LangfuseEvent could be returned and that classes have diff. args
 745            return observation_class(  # type: ignore[return-value,call-arg]
 746                otel_span=otel_span,
 747                langfuse_client=self,
 748                environment=self._environment,
 749                input=input,
 750                output=output,
 751                metadata=metadata,
 752                version=version,
 753                level=level,
 754                status_message=status_message,
 755            )
 756            # span._observation_type = as_type
 757            # span._otel_span.set_attribute("langfuse.observation.type", as_type)
 758            # return span
 759
 760    def start_generation(
 761        self,
 762        *,
 763        trace_context: Optional[TraceContext] = None,
 764        name: str,
 765        input: Optional[Any] = None,
 766        output: Optional[Any] = None,
 767        metadata: Optional[Any] = None,
 768        version: Optional[str] = None,
 769        level: Optional[SpanLevel] = None,
 770        status_message: Optional[str] = None,
 771        completion_start_time: Optional[datetime] = None,
 772        model: Optional[str] = None,
 773        model_parameters: Optional[Dict[str, MapValue]] = None,
 774        usage_details: Optional[Dict[str, int]] = None,
 775        cost_details: Optional[Dict[str, float]] = None,
 776        prompt: Optional[PromptClient] = None,
 777    ) -> LangfuseGeneration:
 778        """Create a new generation span for model generations.
 779
 780        DEPRECATED: This method is deprecated and will be removed in a future version.
 781        Use start_observation(as_type='generation') instead.
 782
 783        This method creates a specialized span for tracking model generations.
 784        It includes additional fields specific to model generations such as model name,
 785        token usage, and cost details.
 786
 787        The created generation span will be the child of the current span in the context.
 788
 789        Args:
 790            trace_context: Optional context for connecting to an existing trace
 791            name: Name of the generation operation
 792            input: Input data for the model (e.g., prompts)
 793            output: Output from the model (e.g., completions)
 794            metadata: Additional metadata to associate with the generation
 795            version: Version identifier for the model or component
 796            level: Importance level of the generation (info, warning, error)
 797            status_message: Optional status message for the generation
 798            completion_start_time: When the model started generating the response
 799            model: Name/identifier of the AI model used (e.g., "gpt-4")
 800            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
 801            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
 802            cost_details: Cost information for the model call
 803            prompt: Associated prompt template from Langfuse prompt management
 804
 805        Returns:
 806            A LangfuseGeneration object that must be ended with .end() when complete
 807
 808        Example:
 809            ```python
 810            generation = langfuse.start_generation(
 811                name="answer-generation",
 812                model="gpt-4",
 813                input={"prompt": "Explain quantum computing"},
 814                model_parameters={"temperature": 0.7}
 815            )
 816            try:
 817                # Call model API
 818                response = llm.generate(...)
 819
 820                generation.update(
 821                    output=response.text,
 822                    usage_details={
 823                        "prompt_tokens": response.usage.prompt_tokens,
 824                        "completion_tokens": response.usage.completion_tokens
 825                    }
 826                )
 827            finally:
 828                generation.end()
 829            ```
 830        """
 831        warnings.warn(
 832            "start_generation is deprecated and will be removed in a future version. "
 833            "Use start_observation(as_type='generation') instead.",
 834            DeprecationWarning,
 835            stacklevel=2,
 836        )
 837        return self.start_observation(
 838            trace_context=trace_context,
 839            name=name,
 840            as_type="generation",
 841            input=input,
 842            output=output,
 843            metadata=metadata,
 844            version=version,
 845            level=level,
 846            status_message=status_message,
 847            completion_start_time=completion_start_time,
 848            model=model,
 849            model_parameters=model_parameters,
 850            usage_details=usage_details,
 851            cost_details=cost_details,
 852            prompt=prompt,
 853        )
 854
 855    def start_as_current_generation(
 856        self,
 857        *,
 858        trace_context: Optional[TraceContext] = None,
 859        name: str,
 860        input: Optional[Any] = None,
 861        output: Optional[Any] = None,
 862        metadata: Optional[Any] = None,
 863        version: Optional[str] = None,
 864        level: Optional[SpanLevel] = None,
 865        status_message: Optional[str] = None,
 866        completion_start_time: Optional[datetime] = None,
 867        model: Optional[str] = None,
 868        model_parameters: Optional[Dict[str, MapValue]] = None,
 869        usage_details: Optional[Dict[str, int]] = None,
 870        cost_details: Optional[Dict[str, float]] = None,
 871        prompt: Optional[PromptClient] = None,
 872        end_on_exit: Optional[bool] = None,
 873    ) -> _AgnosticContextManager[LangfuseGeneration]:
 874        """Create a new generation span and set it as the current span in a context manager.
 875
 876        DEPRECATED: This method is deprecated and will be removed in a future version.
 877        Use start_as_current_observation(as_type='generation') instead.
 878
 879        This method creates a specialized span for model generations and sets it as the
 880        current span within a context manager. Use this method with a 'with' statement to
 881        automatically handle the generation span lifecycle within a code block.
 882
 883        The created generation span will be the child of the current span in the context.
 884
 885        Args:
 886            trace_context: Optional context for connecting to an existing trace
 887            name: Name of the generation operation
 888            input: Input data for the model (e.g., prompts)
 889            output: Output from the model (e.g., completions)
 890            metadata: Additional metadata to associate with the generation
 891            version: Version identifier for the model or component
 892            level: Importance level of the generation (info, warning, error)
 893            status_message: Optional status message for the generation
 894            completion_start_time: When the model started generating the response
 895            model: Name/identifier of the AI model used (e.g., "gpt-4")
 896            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
 897            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
 898            cost_details: Cost information for the model call
 899            prompt: Associated prompt template from Langfuse prompt management
 900            end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
 901
 902        Returns:
 903            A context manager that yields a LangfuseGeneration
 904
 905        Example:
 906            ```python
 907            with langfuse.start_as_current_generation(
 908                name="answer-generation",
 909                model="gpt-4",
 910                input={"prompt": "Explain quantum computing"}
 911            ) as generation:
 912                # Call model API
 913                response = llm.generate(...)
 914
 915                # Update with results
 916                generation.update(
 917                    output=response.text,
 918                    usage_details={
 919                        "prompt_tokens": response.usage.prompt_tokens,
 920                        "completion_tokens": response.usage.completion_tokens
 921                    }
 922                )
 923            ```
 924        """
 925        warnings.warn(
 926            "start_as_current_generation is deprecated and will be removed in a future version. "
 927            "Use start_as_current_observation(as_type='generation') instead.",
 928            DeprecationWarning,
 929            stacklevel=2,
 930        )
 931        return self.start_as_current_observation(
 932            trace_context=trace_context,
 933            name=name,
 934            as_type="generation",
 935            input=input,
 936            output=output,
 937            metadata=metadata,
 938            version=version,
 939            level=level,
 940            status_message=status_message,
 941            completion_start_time=completion_start_time,
 942            model=model,
 943            model_parameters=model_parameters,
 944            usage_details=usage_details,
 945            cost_details=cost_details,
 946            prompt=prompt,
 947            end_on_exit=end_on_exit,
 948        )
 949
 950    @overload
 951    def start_as_current_observation(
 952        self,
 953        *,
 954        trace_context: Optional[TraceContext] = None,
 955        name: str,
 956        as_type: Literal["generation"],
 957        input: Optional[Any] = None,
 958        output: Optional[Any] = None,
 959        metadata: Optional[Any] = None,
 960        version: Optional[str] = None,
 961        level: Optional[SpanLevel] = None,
 962        status_message: Optional[str] = None,
 963        completion_start_time: Optional[datetime] = None,
 964        model: Optional[str] = None,
 965        model_parameters: Optional[Dict[str, MapValue]] = None,
 966        usage_details: Optional[Dict[str, int]] = None,
 967        cost_details: Optional[Dict[str, float]] = None,
 968        prompt: Optional[PromptClient] = None,
 969        end_on_exit: Optional[bool] = None,
 970    ) -> _AgnosticContextManager[LangfuseGeneration]: ...
 971
 972    @overload
 973    def start_as_current_observation(
 974        self,
 975        *,
 976        trace_context: Optional[TraceContext] = None,
 977        name: str,
 978        as_type: Literal["span"] = "span",
 979        input: Optional[Any] = None,
 980        output: Optional[Any] = None,
 981        metadata: Optional[Any] = None,
 982        version: Optional[str] = None,
 983        level: Optional[SpanLevel] = None,
 984        status_message: Optional[str] = None,
 985        end_on_exit: Optional[bool] = None,
 986    ) -> _AgnosticContextManager[LangfuseSpan]: ...
 987
 988    @overload
 989    def start_as_current_observation(
 990        self,
 991        *,
 992        trace_context: Optional[TraceContext] = None,
 993        name: str,
 994        as_type: Literal["agent"],
 995        input: Optional[Any] = None,
 996        output: Optional[Any] = None,
 997        metadata: Optional[Any] = None,
 998        version: Optional[str] = None,
 999        level: Optional[SpanLevel] = None,
1000        status_message: Optional[str] = None,
1001        end_on_exit: Optional[bool] = None,
1002    ) -> _AgnosticContextManager[LangfuseAgent]: ...
1003
1004    @overload
1005    def start_as_current_observation(
1006        self,
1007        *,
1008        trace_context: Optional[TraceContext] = None,
1009        name: str,
1010        as_type: Literal["tool"],
1011        input: Optional[Any] = None,
1012        output: Optional[Any] = None,
1013        metadata: Optional[Any] = None,
1014        version: Optional[str] = None,
1015        level: Optional[SpanLevel] = None,
1016        status_message: Optional[str] = None,
1017        end_on_exit: Optional[bool] = None,
1018    ) -> _AgnosticContextManager[LangfuseTool]: ...
1019
1020    @overload
1021    def start_as_current_observation(
1022        self,
1023        *,
1024        trace_context: Optional[TraceContext] = None,
1025        name: str,
1026        as_type: Literal["chain"],
1027        input: Optional[Any] = None,
1028        output: Optional[Any] = None,
1029        metadata: Optional[Any] = None,
1030        version: Optional[str] = None,
1031        level: Optional[SpanLevel] = None,
1032        status_message: Optional[str] = None,
1033        end_on_exit: Optional[bool] = None,
1034    ) -> _AgnosticContextManager[LangfuseChain]: ...
1035
1036    @overload
1037    def start_as_current_observation(
1038        self,
1039        *,
1040        trace_context: Optional[TraceContext] = None,
1041        name: str,
1042        as_type: Literal["retriever"],
1043        input: Optional[Any] = None,
1044        output: Optional[Any] = None,
1045        metadata: Optional[Any] = None,
1046        version: Optional[str] = None,
1047        level: Optional[SpanLevel] = None,
1048        status_message: Optional[str] = None,
1049        end_on_exit: Optional[bool] = None,
1050    ) -> _AgnosticContextManager[LangfuseRetriever]: ...
1051
1052    @overload
1053    def start_as_current_observation(
1054        self,
1055        *,
1056        trace_context: Optional[TraceContext] = None,
1057        name: str,
1058        as_type: Literal["evaluator"],
1059        input: Optional[Any] = None,
1060        output: Optional[Any] = None,
1061        metadata: Optional[Any] = None,
1062        version: Optional[str] = None,
1063        level: Optional[SpanLevel] = None,
1064        status_message: Optional[str] = None,
1065        end_on_exit: Optional[bool] = None,
1066    ) -> _AgnosticContextManager[LangfuseEvaluator]: ...
1067
1068    @overload
1069    def start_as_current_observation(
1070        self,
1071        *,
1072        trace_context: Optional[TraceContext] = None,
1073        name: str,
1074        as_type: Literal["embedding"],
1075        input: Optional[Any] = None,
1076        output: Optional[Any] = None,
1077        metadata: Optional[Any] = None,
1078        version: Optional[str] = None,
1079        level: Optional[SpanLevel] = None,
1080        status_message: Optional[str] = None,
1081        completion_start_time: Optional[datetime] = None,
1082        model: Optional[str] = None,
1083        model_parameters: Optional[Dict[str, MapValue]] = None,
1084        usage_details: Optional[Dict[str, int]] = None,
1085        cost_details: Optional[Dict[str, float]] = None,
1086        prompt: Optional[PromptClient] = None,
1087        end_on_exit: Optional[bool] = None,
1088    ) -> _AgnosticContextManager[LangfuseEmbedding]: ...
1089
1090    @overload
1091    def start_as_current_observation(
1092        self,
1093        *,
1094        trace_context: Optional[TraceContext] = None,
1095        name: str,
1096        as_type: Literal["guardrail"],
1097        input: Optional[Any] = None,
1098        output: Optional[Any] = None,
1099        metadata: Optional[Any] = None,
1100        version: Optional[str] = None,
1101        level: Optional[SpanLevel] = None,
1102        status_message: Optional[str] = None,
1103        end_on_exit: Optional[bool] = None,
1104    ) -> _AgnosticContextManager[LangfuseGuardrail]: ...
1105
1106    def start_as_current_observation(
1107        self,
1108        *,
1109        trace_context: Optional[TraceContext] = None,
1110        name: str,
1111        as_type: ObservationTypeLiteralNoEvent = "span",
1112        input: Optional[Any] = None,
1113        output: Optional[Any] = None,
1114        metadata: Optional[Any] = None,
1115        version: Optional[str] = None,
1116        level: Optional[SpanLevel] = None,
1117        status_message: Optional[str] = None,
1118        completion_start_time: Optional[datetime] = None,
1119        model: Optional[str] = None,
1120        model_parameters: Optional[Dict[str, MapValue]] = None,
1121        usage_details: Optional[Dict[str, int]] = None,
1122        cost_details: Optional[Dict[str, float]] = None,
1123        prompt: Optional[PromptClient] = None,
1124        end_on_exit: Optional[bool] = None,
1125    ) -> Union[
1126        _AgnosticContextManager[LangfuseGeneration],
1127        _AgnosticContextManager[LangfuseSpan],
1128        _AgnosticContextManager[LangfuseAgent],
1129        _AgnosticContextManager[LangfuseTool],
1130        _AgnosticContextManager[LangfuseChain],
1131        _AgnosticContextManager[LangfuseRetriever],
1132        _AgnosticContextManager[LangfuseEvaluator],
1133        _AgnosticContextManager[LangfuseEmbedding],
1134        _AgnosticContextManager[LangfuseGuardrail],
1135    ]:
1136        """Create a new observation and set it as the current span in a context manager.
1137
1138        This method creates a new observation of the specified type and sets it as the
1139        current span within a context manager. Use this method with a 'with' statement to
1140        automatically handle the observation lifecycle within a code block.
1141
1142        The created observation will be the child of the current span in the context.
1143
1144        Args:
1145            trace_context: Optional context for connecting to an existing trace
1146            name: Name of the observation (e.g., function or operation name)
1147            as_type: Type of observation to create (defaults to "span")
1148            input: Input data for the operation (can be any JSON-serializable object)
1149            output: Output data from the operation (can be any JSON-serializable object)
1150            metadata: Additional metadata to associate with the observation
1151            version: Version identifier for the code or component
1152            level: Importance level of the observation (info, warning, error)
1153            status_message: Optional status message for the observation
1154            end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
1155
1156            The following parameters are available when as_type is: "generation" or "embedding".
1157            completion_start_time: When the model started generating the response
1158            model: Name/identifier of the AI model used (e.g., "gpt-4")
1159            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1160            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1161            cost_details: Cost information for the model call
1162            prompt: Associated prompt template from Langfuse prompt management
1163
1164        Returns:
1165            A context manager that yields the appropriate observation type based on as_type
1166
1167        Example:
1168            ```python
1169            # Create a span
1170            with langfuse.start_as_current_observation(name="process-query", as_type="span") as span:
1171                # Do work
1172                result = process_data()
1173                span.update(output=result)
1174
1175                # Create a child span automatically
1176                with span.start_as_current_span(name="sub-operation") as child_span:
1177                    # Do sub-operation work
1178                    child_span.update(output="sub-result")
1179
1180            # Create a tool observation
1181            with langfuse.start_as_current_observation(name="web-search", as_type="tool") as tool:
1182                # Do tool work
1183                results = search_web(query)
1184                tool.update(output=results)
1185
1186            # Create a generation observation
1187            with langfuse.start_as_current_observation(
1188                name="answer-generation",
1189                as_type="generation",
1190                model="gpt-4"
1191            ) as generation:
1192                # Generate answer
1193                response = llm.generate(...)
1194                generation.update(output=response)
1195            ```
1196        """
1197        if as_type in get_observation_types_list(ObservationTypeGenerationLike):
1198            if trace_context:
1199                trace_id = trace_context.get("trace_id", None)
1200                parent_span_id = trace_context.get("parent_span_id", None)
1201
1202                if trace_id:
1203                    remote_parent_span = self._create_remote_parent_span(
1204                        trace_id=trace_id, parent_span_id=parent_span_id
1205                    )
1206
1207                    return cast(
1208                        Union[
1209                            _AgnosticContextManager[LangfuseGeneration],
1210                            _AgnosticContextManager[LangfuseEmbedding],
1211                        ],
1212                        self._create_span_with_parent_context(
1213                            as_type=as_type,
1214                            name=name,
1215                            remote_parent_span=remote_parent_span,
1216                            parent=None,
1217                            end_on_exit=end_on_exit,
1218                            input=input,
1219                            output=output,
1220                            metadata=metadata,
1221                            version=version,
1222                            level=level,
1223                            status_message=status_message,
1224                            completion_start_time=completion_start_time,
1225                            model=model,
1226                            model_parameters=model_parameters,
1227                            usage_details=usage_details,
1228                            cost_details=cost_details,
1229                            prompt=prompt,
1230                        ),
1231                    )
1232
1233            return cast(
1234                Union[
1235                    _AgnosticContextManager[LangfuseGeneration],
1236                    _AgnosticContextManager[LangfuseEmbedding],
1237                ],
1238                self._start_as_current_otel_span_with_processed_media(
1239                    as_type=as_type,
1240                    name=name,
1241                    end_on_exit=end_on_exit,
1242                    input=input,
1243                    output=output,
1244                    metadata=metadata,
1245                    version=version,
1246                    level=level,
1247                    status_message=status_message,
1248                    completion_start_time=completion_start_time,
1249                    model=model,
1250                    model_parameters=model_parameters,
1251                    usage_details=usage_details,
1252                    cost_details=cost_details,
1253                    prompt=prompt,
1254                ),
1255            )
1256
1257        if as_type in get_observation_types_list(ObservationTypeSpanLike):
1258            if trace_context:
1259                trace_id = trace_context.get("trace_id", None)
1260                parent_span_id = trace_context.get("parent_span_id", None)
1261
1262                if trace_id:
1263                    remote_parent_span = self._create_remote_parent_span(
1264                        trace_id=trace_id, parent_span_id=parent_span_id
1265                    )
1266
1267                    return cast(
1268                        Union[
1269                            _AgnosticContextManager[LangfuseSpan],
1270                            _AgnosticContextManager[LangfuseAgent],
1271                            _AgnosticContextManager[LangfuseTool],
1272                            _AgnosticContextManager[LangfuseChain],
1273                            _AgnosticContextManager[LangfuseRetriever],
1274                            _AgnosticContextManager[LangfuseEvaluator],
1275                            _AgnosticContextManager[LangfuseGuardrail],
1276                        ],
1277                        self._create_span_with_parent_context(
1278                            as_type=as_type,
1279                            name=name,
1280                            remote_parent_span=remote_parent_span,
1281                            parent=None,
1282                            end_on_exit=end_on_exit,
1283                            input=input,
1284                            output=output,
1285                            metadata=metadata,
1286                            version=version,
1287                            level=level,
1288                            status_message=status_message,
1289                        ),
1290                    )
1291
1292            return cast(
1293                Union[
1294                    _AgnosticContextManager[LangfuseSpan],
1295                    _AgnosticContextManager[LangfuseAgent],
1296                    _AgnosticContextManager[LangfuseTool],
1297                    _AgnosticContextManager[LangfuseChain],
1298                    _AgnosticContextManager[LangfuseRetriever],
1299                    _AgnosticContextManager[LangfuseEvaluator],
1300                    _AgnosticContextManager[LangfuseGuardrail],
1301                ],
1302                self._start_as_current_otel_span_with_processed_media(
1303                    as_type=as_type,
1304                    name=name,
1305                    end_on_exit=end_on_exit,
1306                    input=input,
1307                    output=output,
1308                    metadata=metadata,
1309                    version=version,
1310                    level=level,
1311                    status_message=status_message,
1312                ),
1313            )
1314
1315        # This should never be reached since all valid types are handled above
1316        langfuse_logger.warning(
1317            f"Unknown observation type: {as_type}, falling back to span"
1318        )
1319        return self._start_as_current_otel_span_with_processed_media(
1320            as_type="span",
1321            name=name,
1322            end_on_exit=end_on_exit,
1323            input=input,
1324            output=output,
1325            metadata=metadata,
1326            version=version,
1327            level=level,
1328            status_message=status_message,
1329        )
1330
1331    def _get_span_class(
1332        self,
1333        as_type: ObservationTypeLiteral,
1334    ) -> Union[
1335        Type[LangfuseAgent],
1336        Type[LangfuseTool],
1337        Type[LangfuseChain],
1338        Type[LangfuseRetriever],
1339        Type[LangfuseEvaluator],
1340        Type[LangfuseEmbedding],
1341        Type[LangfuseGuardrail],
1342        Type[LangfuseGeneration],
1343        Type[LangfuseEvent],
1344        Type[LangfuseSpan],
1345    ]:
1346        """Get the appropriate span class based on as_type."""
1347        normalized_type = as_type.lower()
1348
1349        if normalized_type == "agent":
1350            return LangfuseAgent
1351        elif normalized_type == "tool":
1352            return LangfuseTool
1353        elif normalized_type == "chain":
1354            return LangfuseChain
1355        elif normalized_type == "retriever":
1356            return LangfuseRetriever
1357        elif normalized_type == "evaluator":
1358            return LangfuseEvaluator
1359        elif normalized_type == "embedding":
1360            return LangfuseEmbedding
1361        elif normalized_type == "guardrail":
1362            return LangfuseGuardrail
1363        elif normalized_type == "generation":
1364            return LangfuseGeneration
1365        elif normalized_type == "event":
1366            return LangfuseEvent
1367        elif normalized_type == "span":
1368            return LangfuseSpan
1369        else:
1370            return LangfuseSpan
1371
1372    @_agnosticcontextmanager
1373    def _create_span_with_parent_context(
1374        self,
1375        *,
1376        name: str,
1377        parent: Optional[otel_trace_api.Span] = None,
1378        remote_parent_span: Optional[otel_trace_api.Span] = None,
1379        as_type: ObservationTypeLiteralNoEvent,
1380        end_on_exit: Optional[bool] = None,
1381        input: Optional[Any] = None,
1382        output: Optional[Any] = None,
1383        metadata: Optional[Any] = None,
1384        version: Optional[str] = None,
1385        level: Optional[SpanLevel] = None,
1386        status_message: Optional[str] = None,
1387        completion_start_time: Optional[datetime] = None,
1388        model: Optional[str] = None,
1389        model_parameters: Optional[Dict[str, MapValue]] = None,
1390        usage_details: Optional[Dict[str, int]] = None,
1391        cost_details: Optional[Dict[str, float]] = None,
1392        prompt: Optional[PromptClient] = None,
1393    ) -> Any:
1394        parent_span = parent or cast(otel_trace_api.Span, remote_parent_span)
1395
1396        with otel_trace_api.use_span(parent_span):
1397            with self._start_as_current_otel_span_with_processed_media(
1398                name=name,
1399                as_type=as_type,
1400                end_on_exit=end_on_exit,
1401                input=input,
1402                output=output,
1403                metadata=metadata,
1404                version=version,
1405                level=level,
1406                status_message=status_message,
1407                completion_start_time=completion_start_time,
1408                model=model,
1409                model_parameters=model_parameters,
1410                usage_details=usage_details,
1411                cost_details=cost_details,
1412                prompt=prompt,
1413            ) as langfuse_span:
1414                if remote_parent_span is not None:
1415                    langfuse_span._otel_span.set_attribute(
1416                        LangfuseOtelSpanAttributes.AS_ROOT, True
1417                    )
1418
1419                yield langfuse_span
1420
1421    @_agnosticcontextmanager
1422    def _start_as_current_otel_span_with_processed_media(
1423        self,
1424        *,
1425        name: str,
1426        as_type: Optional[ObservationTypeLiteralNoEvent] = None,
1427        end_on_exit: Optional[bool] = None,
1428        input: Optional[Any] = None,
1429        output: Optional[Any] = None,
1430        metadata: Optional[Any] = None,
1431        version: Optional[str] = None,
1432        level: Optional[SpanLevel] = None,
1433        status_message: Optional[str] = None,
1434        completion_start_time: Optional[datetime] = None,
1435        model: Optional[str] = None,
1436        model_parameters: Optional[Dict[str, MapValue]] = None,
1437        usage_details: Optional[Dict[str, int]] = None,
1438        cost_details: Optional[Dict[str, float]] = None,
1439        prompt: Optional[PromptClient] = None,
1440    ) -> Any:
1441        with self._otel_tracer.start_as_current_span(
1442            name=name,
1443            end_on_exit=end_on_exit if end_on_exit is not None else True,
1444        ) as otel_span:
1445            span_class = self._get_span_class(
1446                as_type or "generation"
1447            )  # default was "generation"
1448            common_args = {
1449                "otel_span": otel_span,
1450                "langfuse_client": self,
1451                "environment": self._environment,
1452                "input": input,
1453                "output": output,
1454                "metadata": metadata,
1455                "version": version,
1456                "level": level,
1457                "status_message": status_message,
1458            }
1459
1460            if span_class in [
1461                LangfuseGeneration,
1462                LangfuseEmbedding,
1463            ]:
1464                common_args.update(
1465                    {
1466                        "completion_start_time": completion_start_time,
1467                        "model": model,
1468                        "model_parameters": model_parameters,
1469                        "usage_details": usage_details,
1470                        "cost_details": cost_details,
1471                        "prompt": prompt,
1472                    }
1473                )
1474            # For span-like types (span, agent, tool, chain, retriever, evaluator, guardrail), no generation properties needed
1475
1476            yield span_class(**common_args)  # type: ignore[arg-type]
1477
1478    def _get_current_otel_span(self) -> Optional[otel_trace_api.Span]:
1479        current_span = otel_trace_api.get_current_span()
1480
1481        if current_span is otel_trace_api.INVALID_SPAN:
1482            langfuse_logger.warning(
1483                "Context error: No active span in current context. Operations that depend on an active span will be skipped. "
1484                "Ensure spans are created with start_as_current_span() or that you're operating within an active span context."
1485            )
1486            return None
1487
1488        return current_span
1489
1490    def update_current_generation(
1491        self,
1492        *,
1493        name: Optional[str] = None,
1494        input: Optional[Any] = None,
1495        output: Optional[Any] = None,
1496        metadata: Optional[Any] = None,
1497        version: Optional[str] = None,
1498        level: Optional[SpanLevel] = None,
1499        status_message: Optional[str] = None,
1500        completion_start_time: Optional[datetime] = None,
1501        model: Optional[str] = None,
1502        model_parameters: Optional[Dict[str, MapValue]] = None,
1503        usage_details: Optional[Dict[str, int]] = None,
1504        cost_details: Optional[Dict[str, float]] = None,
1505        prompt: Optional[PromptClient] = None,
1506    ) -> None:
1507        """Update the current active generation span with new information.
1508
1509        This method updates the current generation span in the active context with
1510        additional information. It's useful for adding output, usage stats, or other
1511        details that become available during or after model generation.
1512
1513        Args:
1514            name: The generation name
1515            input: Updated input data for the model
1516            output: Output from the model (e.g., completions)
1517            metadata: Additional metadata to associate with the generation
1518            version: Version identifier for the model or component
1519            level: Importance level of the generation (info, warning, error)
1520            status_message: Optional status message for the generation
1521            completion_start_time: When the model started generating the response
1522            model: Name/identifier of the AI model used (e.g., "gpt-4")
1523            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1524            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1525            cost_details: Cost information for the model call
1526            prompt: Associated prompt template from Langfuse prompt management
1527
1528        Example:
1529            ```python
1530            with langfuse.start_as_current_generation(name="answer-query") as generation:
1531                # Initial setup and API call
1532                response = llm.generate(...)
1533
1534                # Update with results that weren't available at creation time
1535                langfuse.update_current_generation(
1536                    output=response.text,
1537                    usage_details={
1538                        "prompt_tokens": response.usage.prompt_tokens,
1539                        "completion_tokens": response.usage.completion_tokens
1540                    }
1541                )
1542            ```
1543        """
1544        if not self._tracing_enabled:
1545            langfuse_logger.debug(
1546                "Operation skipped: update_current_generation - Tracing is disabled or client is in no-op mode."
1547            )
1548            return
1549
1550        current_otel_span = self._get_current_otel_span()
1551
1552        if current_otel_span is not None:
1553            generation = LangfuseGeneration(
1554                otel_span=current_otel_span, langfuse_client=self
1555            )
1556
1557            if name:
1558                current_otel_span.update_name(name)
1559
1560            generation.update(
1561                input=input,
1562                output=output,
1563                metadata=metadata,
1564                version=version,
1565                level=level,
1566                status_message=status_message,
1567                completion_start_time=completion_start_time,
1568                model=model,
1569                model_parameters=model_parameters,
1570                usage_details=usage_details,
1571                cost_details=cost_details,
1572                prompt=prompt,
1573            )
1574
1575    def update_current_span(
1576        self,
1577        *,
1578        name: Optional[str] = None,
1579        input: Optional[Any] = None,
1580        output: Optional[Any] = None,
1581        metadata: Optional[Any] = None,
1582        version: Optional[str] = None,
1583        level: Optional[SpanLevel] = None,
1584        status_message: Optional[str] = None,
1585    ) -> None:
1586        """Update the current active span with new information.
1587
1588        This method updates the current span in the active context with
1589        additional information. It's useful for adding outputs or metadata
1590        that become available during execution.
1591
1592        Args:
1593            name: The span name
1594            input: Updated input data for the operation
1595            output: Output data from the operation
1596            metadata: Additional metadata to associate with the span
1597            version: Version identifier for the code or component
1598            level: Importance level of the span (info, warning, error)
1599            status_message: Optional status message for the span
1600
1601        Example:
1602            ```python
1603            with langfuse.start_as_current_span(name="process-data") as span:
1604                # Initial processing
1605                result = process_first_part()
1606
1607                # Update with intermediate results
1608                langfuse.update_current_span(metadata={"intermediate_result": result})
1609
1610                # Continue processing
1611                final_result = process_second_part(result)
1612
1613                # Final update
1614                langfuse.update_current_span(output=final_result)
1615            ```
1616        """
1617        if not self._tracing_enabled:
1618            langfuse_logger.debug(
1619                "Operation skipped: update_current_span - Tracing is disabled or client is in no-op mode."
1620            )
1621            return
1622
1623        current_otel_span = self._get_current_otel_span()
1624
1625        if current_otel_span is not None:
1626            span = LangfuseSpan(
1627                otel_span=current_otel_span,
1628                langfuse_client=self,
1629                environment=self._environment,
1630            )
1631
1632            if name:
1633                current_otel_span.update_name(name)
1634
1635            span.update(
1636                input=input,
1637                output=output,
1638                metadata=metadata,
1639                version=version,
1640                level=level,
1641                status_message=status_message,
1642            )
1643
1644    def update_current_trace(
1645        self,
1646        *,
1647        name: Optional[str] = None,
1648        user_id: Optional[str] = None,
1649        session_id: Optional[str] = None,
1650        version: Optional[str] = None,
1651        input: Optional[Any] = None,
1652        output: Optional[Any] = None,
1653        metadata: Optional[Any] = None,
1654        tags: Optional[List[str]] = None,
1655        public: Optional[bool] = None,
1656    ) -> None:
1657        """Update the current trace with additional information.
1658
1659        Args:
1660            name: Updated name for the Langfuse trace
1661            user_id: ID of the user who initiated the Langfuse trace
1662            session_id: Session identifier for grouping related Langfuse traces
1663            version: Version identifier for the application or service
1664            input: Input data for the overall Langfuse trace
1665            output: Output data from the overall Langfuse trace
1666            metadata: Additional metadata to associate with the Langfuse trace
1667            tags: List of tags to categorize the Langfuse trace
1668            public: Whether the Langfuse trace should be publicly accessible
1669
1670        See Also:
1671            :func:`langfuse.propagate_attributes`: Recommended replacement
1672        """
1673        if not self._tracing_enabled:
1674            langfuse_logger.debug(
1675                "Operation skipped: update_current_trace - Tracing is disabled or client is in no-op mode."
1676            )
1677            return
1678
1679        current_otel_span = self._get_current_otel_span()
1680
1681        if current_otel_span is not None and current_otel_span.is_recording():
1682            existing_observation_type = current_otel_span.attributes.get(  # type: ignore[attr-defined]
1683                LangfuseOtelSpanAttributes.OBSERVATION_TYPE, "span"
1684            )
1685            # We need to preserve the class to keep the correct observation type
1686            span_class = self._get_span_class(existing_observation_type)
1687            span = span_class(
1688                otel_span=current_otel_span,
1689                langfuse_client=self,
1690                environment=self._environment,
1691            )
1692
1693            span.update_trace(
1694                name=name,
1695                user_id=user_id,
1696                session_id=session_id,
1697                version=version,
1698                input=input,
1699                output=output,
1700                metadata=metadata,
1701                tags=tags,
1702                public=public,
1703            )
1704
1705    def create_event(
1706        self,
1707        *,
1708        trace_context: Optional[TraceContext] = None,
1709        name: str,
1710        input: Optional[Any] = None,
1711        output: Optional[Any] = None,
1712        metadata: Optional[Any] = None,
1713        version: Optional[str] = None,
1714        level: Optional[SpanLevel] = None,
1715        status_message: Optional[str] = None,
1716    ) -> LangfuseEvent:
1717        """Create a new Langfuse observation of type 'EVENT'.
1718
1719        The created Langfuse Event observation will be the child of the current span in the context.
1720
1721        Args:
1722            trace_context: Optional context for connecting to an existing trace
1723            name: Name of the span (e.g., function or operation name)
1724            input: Input data for the operation (can be any JSON-serializable object)
1725            output: Output data from the operation (can be any JSON-serializable object)
1726            metadata: Additional metadata to associate with the span
1727            version: Version identifier for the code or component
1728            level: Importance level of the span (info, warning, error)
1729            status_message: Optional status message for the span
1730
1731        Returns:
1732            The Langfuse Event object
1733
1734        Example:
1735            ```python
1736            event = langfuse.create_event(name="process-event")
1737            ```
1738        """
1739        timestamp = time_ns()
1740
1741        if trace_context:
1742            trace_id = trace_context.get("trace_id", None)
1743            parent_span_id = trace_context.get("parent_span_id", None)
1744
1745            if trace_id:
1746                remote_parent_span = self._create_remote_parent_span(
1747                    trace_id=trace_id, parent_span_id=parent_span_id
1748                )
1749
1750                with otel_trace_api.use_span(
1751                    cast(otel_trace_api.Span, remote_parent_span)
1752                ):
1753                    otel_span = self._otel_tracer.start_span(
1754                        name=name, start_time=timestamp
1755                    )
1756                    otel_span.set_attribute(LangfuseOtelSpanAttributes.AS_ROOT, True)
1757
1758                    return cast(
1759                        LangfuseEvent,
1760                        LangfuseEvent(
1761                            otel_span=otel_span,
1762                            langfuse_client=self,
1763                            environment=self._environment,
1764                            input=input,
1765                            output=output,
1766                            metadata=metadata,
1767                            version=version,
1768                            level=level,
1769                            status_message=status_message,
1770                        ).end(end_time=timestamp),
1771                    )
1772
1773        otel_span = self._otel_tracer.start_span(name=name, start_time=timestamp)
1774
1775        return cast(
1776            LangfuseEvent,
1777            LangfuseEvent(
1778                otel_span=otel_span,
1779                langfuse_client=self,
1780                environment=self._environment,
1781                input=input,
1782                output=output,
1783                metadata=metadata,
1784                version=version,
1785                level=level,
1786                status_message=status_message,
1787            ).end(end_time=timestamp),
1788        )
1789
1790    def _create_remote_parent_span(
1791        self, *, trace_id: str, parent_span_id: Optional[str]
1792    ) -> Any:
1793        if not self._is_valid_trace_id(trace_id):
1794            langfuse_logger.warning(
1795                f"Passed trace ID '{trace_id}' is not a valid 32 lowercase hex char Langfuse trace id. Ignoring trace ID."
1796            )
1797
1798        if parent_span_id and not self._is_valid_span_id(parent_span_id):
1799            langfuse_logger.warning(
1800                f"Passed span ID '{parent_span_id}' is not a valid 16 lowercase hex char Langfuse span id. Ignoring parent span ID."
1801            )
1802
1803        int_trace_id = int(trace_id, 16)
1804        int_parent_span_id = (
1805            int(parent_span_id, 16)
1806            if parent_span_id
1807            else RandomIdGenerator().generate_span_id()
1808        )
1809
1810        span_context = otel_trace_api.SpanContext(
1811            trace_id=int_trace_id,
1812            span_id=int_parent_span_id,
1813            trace_flags=otel_trace_api.TraceFlags(0x01),  # mark span as sampled
1814            is_remote=False,
1815        )
1816
1817        return otel_trace_api.NonRecordingSpan(span_context)
1818
1819    def _is_valid_trace_id(self, trace_id: str) -> bool:
1820        pattern = r"^[0-9a-f]{32}$"
1821
1822        return bool(re.match(pattern, trace_id))
1823
1824    def _is_valid_span_id(self, span_id: str) -> bool:
1825        pattern = r"^[0-9a-f]{16}$"
1826
1827        return bool(re.match(pattern, span_id))
1828
1829    def _create_observation_id(self, *, seed: Optional[str] = None) -> str:
1830        """Create a unique observation ID for use with Langfuse.
1831
1832        This method generates a unique observation ID (span ID in OpenTelemetry terms)
1833        for use with various Langfuse APIs. It can either generate a random ID or
1834        create a deterministic ID based on a seed string.
1835
1836        Observation IDs must be 16 lowercase hexadecimal characters, representing 8 bytes.
1837        This method ensures the generated ID meets this requirement. If you need to
1838        correlate an external ID with a Langfuse observation ID, use the external ID as
1839        the seed to get a valid, deterministic observation ID.
1840
1841        Args:
1842            seed: Optional string to use as a seed for deterministic ID generation.
1843                 If provided, the same seed will always produce the same ID.
1844                 If not provided, a random ID will be generated.
1845
1846        Returns:
1847            A 16-character lowercase hexadecimal string representing the observation ID.
1848
1849        Example:
1850            ```python
1851            # Generate a random observation ID
1852            obs_id = langfuse.create_observation_id()
1853
1854            # Generate a deterministic ID based on a seed
1855            user_obs_id = langfuse.create_observation_id(seed="user-123-feedback")
1856
1857            # Correlate an external item ID with a Langfuse observation ID
1858            item_id = "item-789012"
1859            correlated_obs_id = langfuse.create_observation_id(seed=item_id)
1860
1861            # Use the ID with Langfuse APIs
1862            langfuse.create_score(
1863                name="relevance",
1864                value=0.95,
1865                trace_id=trace_id,
1866                observation_id=obs_id
1867            )
1868            ```
1869        """
1870        if not seed:
1871            span_id_int = RandomIdGenerator().generate_span_id()
1872
1873            return self._format_otel_span_id(span_id_int)
1874
1875        return sha256(seed.encode("utf-8")).digest()[:8].hex()
1876
1877    @staticmethod
1878    def create_trace_id(*, seed: Optional[str] = None) -> str:
1879        """Create a unique trace ID for use with Langfuse.
1880
1881        This method generates a unique trace ID for use with various Langfuse APIs.
1882        It can either generate a random ID or create a deterministic ID based on
1883        a seed string.
1884
1885        Trace IDs must be 32 lowercase hexadecimal characters, representing 16 bytes.
1886        This method ensures the generated ID meets this requirement. If you need to
1887        correlate an external ID with a Langfuse trace ID, use the external ID as the
1888        seed to get a valid, deterministic Langfuse trace ID.
1889
1890        Args:
1891            seed: Optional string to use as a seed for deterministic ID generation.
1892                 If provided, the same seed will always produce the same ID.
1893                 If not provided, a random ID will be generated.
1894
1895        Returns:
1896            A 32-character lowercase hexadecimal string representing the Langfuse trace ID.
1897
1898        Example:
1899            ```python
1900            # Generate a random trace ID
1901            trace_id = langfuse.create_trace_id()
1902
1903            # Generate a deterministic ID based on a seed
1904            session_trace_id = langfuse.create_trace_id(seed="session-456")
1905
1906            # Correlate an external ID with a Langfuse trace ID
1907            external_id = "external-system-123456"
1908            correlated_trace_id = langfuse.create_trace_id(seed=external_id)
1909
1910            # Use the ID with trace context
1911            with langfuse.start_as_current_span(
1912                name="process-request",
1913                trace_context={"trace_id": trace_id}
1914            ) as span:
1915                # Operation will be part of the specific trace
1916                pass
1917            ```
1918        """
1919        if not seed:
1920            trace_id_int = RandomIdGenerator().generate_trace_id()
1921
1922            return Langfuse._format_otel_trace_id(trace_id_int)
1923
1924        return sha256(seed.encode("utf-8")).digest()[:16].hex()
1925
1926    def _get_otel_trace_id(self, otel_span: otel_trace_api.Span) -> str:
1927        span_context = otel_span.get_span_context()
1928
1929        return self._format_otel_trace_id(span_context.trace_id)
1930
1931    def _get_otel_span_id(self, otel_span: otel_trace_api.Span) -> str:
1932        span_context = otel_span.get_span_context()
1933
1934        return self._format_otel_span_id(span_context.span_id)
1935
1936    @staticmethod
1937    def _format_otel_span_id(span_id_int: int) -> str:
1938        """Format an integer span ID to a 16-character lowercase hex string.
1939
1940        Internal method to convert an OpenTelemetry integer span ID to the standard
1941        W3C Trace Context format (16-character lowercase hex string).
1942
1943        Args:
1944            span_id_int: 64-bit integer representing a span ID
1945
1946        Returns:
1947            A 16-character lowercase hexadecimal string
1948        """
1949        return format(span_id_int, "016x")
1950
1951    @staticmethod
1952    def _format_otel_trace_id(trace_id_int: int) -> str:
1953        """Format an integer trace ID to a 32-character lowercase hex string.
1954
1955        Internal method to convert an OpenTelemetry integer trace ID to the standard
1956        W3C Trace Context format (32-character lowercase hex string).
1957
1958        Args:
1959            trace_id_int: 128-bit integer representing a trace ID
1960
1961        Returns:
1962            A 32-character lowercase hexadecimal string
1963        """
1964        return format(trace_id_int, "032x")
1965
1966    @overload
1967    def create_score(
1968        self,
1969        *,
1970        name: str,
1971        value: float,
1972        session_id: Optional[str] = None,
1973        dataset_run_id: Optional[str] = None,
1974        trace_id: Optional[str] = None,
1975        observation_id: Optional[str] = None,
1976        score_id: Optional[str] = None,
1977        data_type: Optional[Literal["NUMERIC", "BOOLEAN"]] = None,
1978        comment: Optional[str] = None,
1979        config_id: Optional[str] = None,
1980        metadata: Optional[Any] = None,
1981        timestamp: Optional[datetime] = None,
1982    ) -> None: ...
1983
1984    @overload
1985    def create_score(
1986        self,
1987        *,
1988        name: str,
1989        value: str,
1990        session_id: Optional[str] = None,
1991        dataset_run_id: Optional[str] = None,
1992        trace_id: Optional[str] = None,
1993        score_id: Optional[str] = None,
1994        observation_id: Optional[str] = None,
1995        data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
1996        comment: Optional[str] = None,
1997        config_id: Optional[str] = None,
1998        metadata: Optional[Any] = None,
1999        timestamp: Optional[datetime] = None,
2000    ) -> None: ...
2001
2002    def create_score(
2003        self,
2004        *,
2005        name: str,
2006        value: Union[float, str],
2007        session_id: Optional[str] = None,
2008        dataset_run_id: Optional[str] = None,
2009        trace_id: Optional[str] = None,
2010        observation_id: Optional[str] = None,
2011        score_id: Optional[str] = None,
2012        data_type: Optional[ScoreDataType] = None,
2013        comment: Optional[str] = None,
2014        config_id: Optional[str] = None,
2015        metadata: Optional[Any] = None,
2016        timestamp: Optional[datetime] = None,
2017    ) -> None:
2018        """Create a score for a specific trace or observation.
2019
2020        This method creates a score for evaluating a Langfuse trace or observation. Scores can be
2021        used to track quality metrics, user feedback, or automated evaluations.
2022
2023        Args:
2024            name: Name of the score (e.g., "relevance", "accuracy")
2025            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
2026            session_id: ID of the Langfuse session to associate the score with
2027            dataset_run_id: ID of the Langfuse dataset run to associate the score with
2028            trace_id: ID of the Langfuse trace to associate the score with
2029            observation_id: Optional ID of the specific observation to score. Trace ID must be provided too.
2030            score_id: Optional custom ID for the score (auto-generated if not provided)
2031            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
2032            comment: Optional comment or explanation for the score
2033            config_id: Optional ID of a score config defined in Langfuse
2034            metadata: Optional metadata to be attached to the score
2035            timestamp: Optional timestamp for the score (defaults to current UTC time)
2036
2037        Example:
2038            ```python
2039            # Create a numeric score for accuracy
2040            langfuse.create_score(
2041                name="accuracy",
2042                value=0.92,
2043                trace_id="abcdef1234567890abcdef1234567890",
2044                data_type="NUMERIC",
2045                comment="High accuracy with minor irrelevant details"
2046            )
2047
2048            # Create a categorical score for sentiment
2049            langfuse.create_score(
2050                name="sentiment",
2051                value="positive",
2052                trace_id="abcdef1234567890abcdef1234567890",
2053                observation_id="abcdef1234567890",
2054                data_type="CATEGORICAL"
2055            )
2056            ```
2057        """
2058        if not self._tracing_enabled:
2059            return
2060
2061        score_id = score_id or self._create_observation_id()
2062
2063        try:
2064            new_body = ScoreBody(
2065                id=score_id,
2066                sessionId=session_id,
2067                datasetRunId=dataset_run_id,
2068                traceId=trace_id,
2069                observationId=observation_id,
2070                name=name,
2071                value=value,
2072                dataType=data_type,  # type: ignore
2073                comment=comment,
2074                configId=config_id,
2075                environment=self._environment,
2076                metadata=metadata,
2077            )
2078
2079            event = {
2080                "id": self.create_trace_id(),
2081                "type": "score-create",
2082                "timestamp": timestamp or _get_timestamp(),
2083                "body": new_body,
2084            }
2085
2086            if self._resources is not None:
2087                # Force the score to be in sample if it was for a legacy trace ID, i.e. non-32 hexchar
2088                force_sample = (
2089                    not self._is_valid_trace_id(trace_id) if trace_id else True
2090                )
2091
2092                self._resources.add_score_task(
2093                    event,
2094                    force_sample=force_sample,
2095                )
2096
2097        except Exception as e:
2098            langfuse_logger.exception(
2099                f"Error creating score: Failed to process score event for trace_id={trace_id}, name={name}. Error: {e}"
2100            )
2101
2102    @overload
2103    def score_current_span(
2104        self,
2105        *,
2106        name: str,
2107        value: float,
2108        score_id: Optional[str] = None,
2109        data_type: Optional[Literal["NUMERIC", "BOOLEAN"]] = None,
2110        comment: Optional[str] = None,
2111        config_id: Optional[str] = None,
2112        metadata: Optional[Any] = None,
2113    ) -> None: ...
2114
2115    @overload
2116    def score_current_span(
2117        self,
2118        *,
2119        name: str,
2120        value: str,
2121        score_id: Optional[str] = None,
2122        data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
2123        comment: Optional[str] = None,
2124        config_id: Optional[str] = None,
2125        metadata: Optional[Any] = None,
2126    ) -> None: ...
2127
2128    def score_current_span(
2129        self,
2130        *,
2131        name: str,
2132        value: Union[float, str],
2133        score_id: Optional[str] = None,
2134        data_type: Optional[ScoreDataType] = None,
2135        comment: Optional[str] = None,
2136        config_id: Optional[str] = None,
2137        metadata: Optional[Any] = None,
2138    ) -> None:
2139        """Create a score for the current active span.
2140
2141        This method scores the currently active span in the context. It's a convenient
2142        way to score the current operation without needing to know its trace and span IDs.
2143
2144        Args:
2145            name: Name of the score (e.g., "relevance", "accuracy")
2146            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
2147            score_id: Optional custom ID for the score (auto-generated if not provided)
2148            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
2149            comment: Optional comment or explanation for the score
2150            config_id: Optional ID of a score config defined in Langfuse
2151            metadata: Optional metadata to be attached to the score
2152
2153        Example:
2154            ```python
2155            with langfuse.start_as_current_generation(name="answer-query") as generation:
2156                # Generate answer
2157                response = generate_answer(...)
2158                generation.update(output=response)
2159
2160                # Score the generation
2161                langfuse.score_current_span(
2162                    name="relevance",
2163                    value=0.85,
2164                    data_type="NUMERIC",
2165                    comment="Mostly relevant but contains some tangential information",
2166                    metadata={"model": "gpt-4", "prompt_version": "v2"}
2167                )
2168            ```
2169        """
2170        current_span = self._get_current_otel_span()
2171
2172        if current_span is not None:
2173            trace_id = self._get_otel_trace_id(current_span)
2174            observation_id = self._get_otel_span_id(current_span)
2175
2176            langfuse_logger.info(
2177                f"Score: Creating score name='{name}' value={value} for current span ({observation_id}) in trace {trace_id}"
2178            )
2179
2180            self.create_score(
2181                trace_id=trace_id,
2182                observation_id=observation_id,
2183                name=name,
2184                value=cast(str, value),
2185                score_id=score_id,
2186                data_type=cast(Literal["CATEGORICAL"], data_type),
2187                comment=comment,
2188                config_id=config_id,
2189                metadata=metadata,
2190            )
2191
2192    @overload
2193    def score_current_trace(
2194        self,
2195        *,
2196        name: str,
2197        value: float,
2198        score_id: Optional[str] = None,
2199        data_type: Optional[Literal["NUMERIC", "BOOLEAN"]] = None,
2200        comment: Optional[str] = None,
2201        config_id: Optional[str] = None,
2202        metadata: Optional[Any] = None,
2203    ) -> None: ...
2204
2205    @overload
2206    def score_current_trace(
2207        self,
2208        *,
2209        name: str,
2210        value: str,
2211        score_id: Optional[str] = None,
2212        data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
2213        comment: Optional[str] = None,
2214        config_id: Optional[str] = None,
2215        metadata: Optional[Any] = None,
2216    ) -> None: ...
2217
2218    def score_current_trace(
2219        self,
2220        *,
2221        name: str,
2222        value: Union[float, str],
2223        score_id: Optional[str] = None,
2224        data_type: Optional[ScoreDataType] = None,
2225        comment: Optional[str] = None,
2226        config_id: Optional[str] = None,
2227        metadata: Optional[Any] = None,
2228    ) -> None:
2229        """Create a score for the current trace.
2230
2231        This method scores the trace of the currently active span. Unlike score_current_span,
2232        this method associates the score with the entire trace rather than a specific span.
2233        It's useful for scoring overall performance or quality of the entire operation.
2234
2235        Args:
2236            name: Name of the score (e.g., "user_satisfaction", "overall_quality")
2237            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
2238            score_id: Optional custom ID for the score (auto-generated if not provided)
2239            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
2240            comment: Optional comment or explanation for the score
2241            config_id: Optional ID of a score config defined in Langfuse
2242            metadata: Optional metadata to be attached to the score
2243
2244        Example:
2245            ```python
2246            with langfuse.start_as_current_span(name="process-user-request") as span:
2247                # Process request
2248                result = process_complete_request()
2249                span.update(output=result)
2250
2251                # Score the overall trace
2252                langfuse.score_current_trace(
2253                    name="overall_quality",
2254                    value=0.95,
2255                    data_type="NUMERIC",
2256                    comment="High quality end-to-end response",
2257                    metadata={"evaluator": "gpt-4", "criteria": "comprehensive"}
2258                )
2259            ```
2260        """
2261        current_span = self._get_current_otel_span()
2262
2263        if current_span is not None:
2264            trace_id = self._get_otel_trace_id(current_span)
2265
2266            langfuse_logger.info(
2267                f"Score: Creating score name='{name}' value={value} for entire trace {trace_id}"
2268            )
2269
2270            self.create_score(
2271                trace_id=trace_id,
2272                name=name,
2273                value=cast(str, value),
2274                score_id=score_id,
2275                data_type=cast(Literal["CATEGORICAL"], data_type),
2276                comment=comment,
2277                config_id=config_id,
2278                metadata=metadata,
2279            )
2280
2281    def flush(self) -> None:
2282        """Force flush all pending spans and events to the Langfuse API.
2283
2284        This method manually flushes any pending spans, scores, and other events to the
2285        Langfuse API. It's useful in scenarios where you want to ensure all data is sent
2286        before proceeding, without waiting for the automatic flush interval.
2287
2288        Example:
2289            ```python
2290            # Record some spans and scores
2291            with langfuse.start_as_current_span(name="operation") as span:
2292                # Do work...
2293                pass
2294
2295            # Ensure all data is sent to Langfuse before proceeding
2296            langfuse.flush()
2297
2298            # Continue with other work
2299            ```
2300        """
2301        if self._resources is not None:
2302            self._resources.flush()
2303
2304    def shutdown(self) -> None:
2305        """Shut down the Langfuse client and flush all pending data.
2306
2307        This method cleanly shuts down the Langfuse client, ensuring all pending data
2308        is flushed to the API and all background threads are properly terminated.
2309
2310        It's important to call this method when your application is shutting down to
2311        prevent data loss and resource leaks. For most applications, using the client
2312        as a context manager or relying on the automatic shutdown via atexit is sufficient.
2313
2314        Example:
2315            ```python
2316            # Initialize Langfuse
2317            langfuse = Langfuse(public_key="...", secret_key="...")
2318
2319            # Use Langfuse throughout your application
2320            # ...
2321
2322            # When application is shutting down
2323            langfuse.shutdown()
2324            ```
2325        """
2326        if self._resources is not None:
2327            self._resources.shutdown()
2328
2329    def get_current_trace_id(self) -> Optional[str]:
2330        """Get the trace ID of the current active span.
2331
2332        This method retrieves the trace ID from the currently active span in the context.
2333        It can be used to get the trace ID for referencing in logs, external systems,
2334        or for creating related operations.
2335
2336        Returns:
2337            The current trace ID as a 32-character lowercase hexadecimal string,
2338            or None if there is no active span.
2339
2340        Example:
2341            ```python
2342            with langfuse.start_as_current_span(name="process-request") as span:
2343                # Get the current trace ID for reference
2344                trace_id = langfuse.get_current_trace_id()
2345
2346                # Use it for external correlation
2347                log.info(f"Processing request with trace_id: {trace_id}")
2348
2349                # Or pass to another system
2350                external_system.process(data, trace_id=trace_id)
2351            ```
2352        """
2353        if not self._tracing_enabled:
2354            langfuse_logger.debug(
2355                "Operation skipped: get_current_trace_id - Tracing is disabled or client is in no-op mode."
2356            )
2357            return None
2358
2359        current_otel_span = self._get_current_otel_span()
2360
2361        return self._get_otel_trace_id(current_otel_span) if current_otel_span else None
2362
2363    def get_current_observation_id(self) -> Optional[str]:
2364        """Get the observation ID (span ID) of the current active span.
2365
2366        This method retrieves the observation ID from the currently active span in the context.
2367        It can be used to get the observation ID for referencing in logs, external systems,
2368        or for creating scores or other related operations.
2369
2370        Returns:
2371            The current observation ID as a 16-character lowercase hexadecimal string,
2372            or None if there is no active span.
2373
2374        Example:
2375            ```python
2376            with langfuse.start_as_current_span(name="process-user-query") as span:
2377                # Get the current observation ID
2378                observation_id = langfuse.get_current_observation_id()
2379
2380                # Store it for later reference
2381                cache.set(f"query_{query_id}_observation", observation_id)
2382
2383                # Process the query...
2384            ```
2385        """
2386        if not self._tracing_enabled:
2387            langfuse_logger.debug(
2388                "Operation skipped: get_current_observation_id - Tracing is disabled or client is in no-op mode."
2389            )
2390            return None
2391
2392        current_otel_span = self._get_current_otel_span()
2393
2394        return self._get_otel_span_id(current_otel_span) if current_otel_span else None
2395
2396    def _get_project_id(self) -> Optional[str]:
2397        """Fetch and return the current project id. Persisted across requests. Returns None if no project id is found for api keys."""
2398        if not self._project_id:
2399            proj = self.api.projects.get()
2400            if not proj.data or not proj.data[0].id:
2401                return None
2402
2403            self._project_id = proj.data[0].id
2404
2405        return self._project_id
2406
2407    def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
2408        """Get the URL to view a trace in the Langfuse UI.
2409
2410        This method generates a URL that links directly to a trace in the Langfuse UI.
2411        It's useful for providing links in logs, notifications, or debugging tools.
2412
2413        Args:
2414            trace_id: Optional trace ID to generate a URL for. If not provided,
2415                     the trace ID of the current active span will be used.
2416
2417        Returns:
2418            A URL string pointing to the trace in the Langfuse UI,
2419            or None if the project ID couldn't be retrieved or no trace ID is available.
2420
2421        Example:
2422            ```python
2423            # Get URL for the current trace
2424            with langfuse.start_as_current_span(name="process-request") as span:
2425                trace_url = langfuse.get_trace_url()
2426                log.info(f"Processing trace: {trace_url}")
2427
2428            # Get URL for a specific trace
2429            specific_trace_url = langfuse.get_trace_url(trace_id="1234567890abcdef1234567890abcdef")
2430            send_notification(f"Review needed for trace: {specific_trace_url}")
2431            ```
2432        """
2433        final_trace_id = trace_id or self.get_current_trace_id()
2434        if not final_trace_id:
2435            return None
2436
2437        project_id = self._get_project_id()
2438
2439        return (
2440            f"{self._base_url}/project/{project_id}/traces/{final_trace_id}"
2441            if project_id and final_trace_id
2442            else None
2443        )
2444
2445    def get_dataset(
2446        self, name: str, *, fetch_items_page_size: Optional[int] = 50
2447    ) -> "DatasetClient":
2448        """Fetch a dataset by its name.
2449
2450        Args:
2451            name (str): The name of the dataset to fetch.
2452            fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
2453
2454        Returns:
2455            DatasetClient: The dataset with the given name.
2456        """
2457        try:
2458            langfuse_logger.debug(f"Getting datasets {name}")
2459            dataset = self.api.datasets.get(dataset_name=self._url_encode(name))
2460
2461            dataset_items = []
2462            page = 1
2463
2464            while True:
2465                new_items = self.api.dataset_items.list(
2466                    dataset_name=self._url_encode(name, is_url_param=True),
2467                    page=page,
2468                    limit=fetch_items_page_size,
2469                )
2470                dataset_items.extend(new_items.data)
2471
2472                if new_items.meta.total_pages <= page:
2473                    break
2474
2475                page += 1
2476
2477            items = [DatasetItemClient(i, langfuse=self) for i in dataset_items]
2478
2479            return DatasetClient(dataset, items=items)
2480
2481        except Error as e:
2482            handle_fern_exception(e)
2483            raise e
2484
2485    def get_dataset_run(
2486        self, *, dataset_name: str, run_name: str
2487    ) -> DatasetRunWithItems:
2488        """Fetch a dataset run by dataset name and run name.
2489
2490        Args:
2491            dataset_name (str): The name of the dataset.
2492            run_name (str): The name of the run.
2493
2494        Returns:
2495            DatasetRunWithItems: The dataset run with its items.
2496        """
2497        try:
2498            return self.api.datasets.get_run(
2499                dataset_name=self._url_encode(dataset_name),
2500                run_name=self._url_encode(run_name),
2501                request_options=None,
2502            )
2503        except Error as e:
2504            handle_fern_exception(e)
2505            raise e
2506
2507    def get_dataset_runs(
2508        self,
2509        *,
2510        dataset_name: str,
2511        page: Optional[int] = None,
2512        limit: Optional[int] = None,
2513    ) -> PaginatedDatasetRuns:
2514        """Fetch all runs for a dataset.
2515
2516        Args:
2517            dataset_name (str): The name of the dataset.
2518            page (Optional[int]): Page number, starts at 1.
2519            limit (Optional[int]): Limit of items per page.
2520
2521        Returns:
2522            PaginatedDatasetRuns: Paginated list of dataset runs.
2523        """
2524        try:
2525            return self.api.datasets.get_runs(
2526                dataset_name=self._url_encode(dataset_name),
2527                page=page,
2528                limit=limit,
2529                request_options=None,
2530            )
2531        except Error as e:
2532            handle_fern_exception(e)
2533            raise e
2534
2535    def delete_dataset_run(
2536        self, *, dataset_name: str, run_name: str
2537    ) -> DeleteDatasetRunResponse:
2538        """Delete a dataset run and all its run items. This action is irreversible.
2539
2540        Args:
2541            dataset_name (str): The name of the dataset.
2542            run_name (str): The name of the run.
2543
2544        Returns:
2545            DeleteDatasetRunResponse: Confirmation of deletion.
2546        """
2547        try:
2548            return self.api.datasets.delete_run(
2549                dataset_name=self._url_encode(dataset_name),
2550                run_name=self._url_encode(run_name),
2551                request_options=None,
2552            )
2553        except Error as e:
2554            handle_fern_exception(e)
2555            raise e
2556
2557    def run_experiment(
2558        self,
2559        *,
2560        name: str,
2561        run_name: Optional[str] = None,
2562        description: Optional[str] = None,
2563        data: ExperimentData,
2564        task: TaskFunction,
2565        evaluators: List[EvaluatorFunction] = [],
2566        composite_evaluator: Optional[CompositeEvaluatorFunction] = None,
2567        run_evaluators: List[RunEvaluatorFunction] = [],
2568        max_concurrency: int = 50,
2569        metadata: Optional[Dict[str, str]] = None,
2570    ) -> ExperimentResult:
2571        """Run an experiment on a dataset with automatic tracing and evaluation.
2572
2573        This method executes a task function on each item in the provided dataset,
2574        automatically traces all executions with Langfuse for observability, runs
2575        item-level and run-level evaluators on the outputs, and returns comprehensive
2576        results with evaluation metrics.
2577
2578        The experiment system provides:
2579        - Automatic tracing of all task executions
2580        - Concurrent processing with configurable limits
2581        - Comprehensive error handling that isolates failures
2582        - Integration with Langfuse datasets for experiment tracking
2583        - Flexible evaluation framework supporting both sync and async evaluators
2584
2585        Args:
2586            name: Human-readable name for the experiment. Used for identification
2587                in the Langfuse UI.
2588            run_name: Optional exact name for the experiment run. If provided, this will be
2589                used as the exact dataset run name if the `data` contains Langfuse dataset items.
2590                If not provided, this will default to the experiment name appended with an ISO timestamp.
2591            description: Optional description explaining the experiment's purpose,
2592                methodology, or expected outcomes.
2593            data: Array of data items to process. Can be either:
2594                - List of dict-like items with 'input', 'expected_output', 'metadata' keys
2595                - List of Langfuse DatasetItem objects from dataset.items
2596            task: Function that processes each data item and returns output.
2597                Must accept 'item' as keyword argument and can return sync or async results.
2598                The task function signature should be: task(*, item, **kwargs) -> Any
2599            evaluators: List of functions to evaluate each item's output individually.
2600                Each evaluator receives input, output, expected_output, and metadata.
2601                Can return single Evaluation dict or list of Evaluation dicts.
2602            composite_evaluator: Optional function that creates composite scores from item-level evaluations.
2603                Receives the same inputs as item-level evaluators (input, output, expected_output, metadata)
2604                plus the list of evaluations from item-level evaluators. Useful for weighted averages,
2605                pass/fail decisions based on multiple criteria, or custom scoring logic combining multiple metrics.
2606            run_evaluators: List of functions to evaluate the entire experiment run.
2607                Each run evaluator receives all item_results and can compute aggregate metrics.
2608                Useful for calculating averages, distributions, or cross-item comparisons.
2609            max_concurrency: Maximum number of concurrent task executions (default: 50).
2610                Controls the number of items processed simultaneously. Adjust based on
2611                API rate limits and system resources.
2612            metadata: Optional metadata dictionary to attach to all experiment traces.
2613                This metadata will be included in every trace created during the experiment.
2614                If `data` are Langfuse dataset items, the metadata will be attached to the dataset run, too.
2615
2616        Returns:
2617            ExperimentResult containing:
2618            - run_name: The experiment run name. This is equal to the dataset run name if experiment was on Langfuse dataset.
2619            - item_results: List of results for each processed item with outputs and evaluations
2620            - run_evaluations: List of aggregate evaluation results for the entire run
2621            - dataset_run_id: ID of the dataset run (if using Langfuse datasets)
2622            - dataset_run_url: Direct URL to view results in Langfuse UI (if applicable)
2623
2624        Raises:
2625            ValueError: If required parameters are missing or invalid
2626            Exception: If experiment setup fails (individual item failures are handled gracefully)
2627
2628        Examples:
2629            Basic experiment with local data:
2630            ```python
2631            def summarize_text(*, item, **kwargs):
2632                return f"Summary: {item['input'][:50]}..."
2633
2634            def length_evaluator(*, input, output, expected_output=None, **kwargs):
2635                return {
2636                    "name": "output_length",
2637                    "value": len(output),
2638                    "comment": f"Output contains {len(output)} characters"
2639                }
2640
2641            result = langfuse.run_experiment(
2642                name="Text Summarization Test",
2643                description="Evaluate summarization quality and length",
2644                data=[
2645                    {"input": "Long article text...", "expected_output": "Expected summary"},
2646                    {"input": "Another article...", "expected_output": "Another summary"}
2647                ],
2648                task=summarize_text,
2649                evaluators=[length_evaluator]
2650            )
2651
2652            print(f"Processed {len(result.item_results)} items")
2653            for item_result in result.item_results:
2654                print(f"Input: {item_result.item['input']}")
2655                print(f"Output: {item_result.output}")
2656                print(f"Evaluations: {item_result.evaluations}")
2657            ```
2658
2659            Advanced experiment with async task and multiple evaluators:
2660            ```python
2661            async def llm_task(*, item, **kwargs):
2662                # Simulate async LLM call
2663                response = await openai_client.chat.completions.create(
2664                    model="gpt-4",
2665                    messages=[{"role": "user", "content": item["input"]}]
2666                )
2667                return response.choices[0].message.content
2668
2669            def accuracy_evaluator(*, input, output, expected_output=None, **kwargs):
2670                if expected_output and expected_output.lower() in output.lower():
2671                    return {"name": "accuracy", "value": 1.0, "comment": "Correct answer"}
2672                return {"name": "accuracy", "value": 0.0, "comment": "Incorrect answer"}
2673
2674            def toxicity_evaluator(*, input, output, expected_output=None, **kwargs):
2675                # Simulate toxicity check
2676                toxicity_score = check_toxicity(output)  # Your toxicity checker
2677                return {
2678                    "name": "toxicity",
2679                    "value": toxicity_score,
2680                    "comment": f"Toxicity level: {'high' if toxicity_score > 0.7 else 'low'}"
2681                }
2682
2683            def average_accuracy(*, item_results, **kwargs):
2684                accuracies = [
2685                    eval.value for result in item_results
2686                    for eval in result.evaluations
2687                    if eval.name == "accuracy"
2688                ]
2689                return {
2690                    "name": "average_accuracy",
2691                    "value": sum(accuracies) / len(accuracies) if accuracies else 0,
2692                    "comment": f"Average accuracy across {len(accuracies)} items"
2693                }
2694
2695            result = langfuse.run_experiment(
2696                name="LLM Safety and Accuracy Test",
2697                description="Evaluate model accuracy and safety across diverse prompts",
2698                data=test_dataset,  # Your dataset items
2699                task=llm_task,
2700                evaluators=[accuracy_evaluator, toxicity_evaluator],
2701                run_evaluators=[average_accuracy],
2702                max_concurrency=5,  # Limit concurrent API calls
2703                metadata={"model": "gpt-4", "temperature": 0.7}
2704            )
2705            ```
2706
2707            Using with Langfuse datasets:
2708            ```python
2709            # Get dataset from Langfuse
2710            dataset = langfuse.get_dataset("my-eval-dataset")
2711
2712            result = dataset.run_experiment(
2713                name="Production Model Evaluation",
2714                description="Monthly evaluation of production model performance",
2715                task=my_production_task,
2716                evaluators=[accuracy_evaluator, latency_evaluator]
2717            )
2718
2719            # Results automatically linked to dataset in Langfuse UI
2720            print(f"View results: {result['dataset_run_url']}")
2721            ```
2722
2723        Note:
2724            - Task and evaluator functions can be either synchronous or asynchronous
2725            - Individual item failures are logged but don't stop the experiment
2726            - All executions are automatically traced and visible in Langfuse UI
2727            - When using Langfuse datasets, results are automatically linked for easy comparison
2728            - This method works in both sync and async contexts (Jupyter notebooks, web apps, etc.)
2729            - Async execution is handled automatically with smart event loop detection
2730        """
2731        return cast(
2732            ExperimentResult,
2733            run_async_safely(
2734                self._run_experiment_async(
2735                    name=name,
2736                    run_name=self._create_experiment_run_name(
2737                        name=name, run_name=run_name
2738                    ),
2739                    description=description,
2740                    data=data,
2741                    task=task,
2742                    evaluators=evaluators or [],
2743                    composite_evaluator=composite_evaluator,
2744                    run_evaluators=run_evaluators or [],
2745                    max_concurrency=max_concurrency,
2746                    metadata=metadata,
2747                ),
2748            ),
2749        )
2750
2751    async def _run_experiment_async(
2752        self,
2753        *,
2754        name: str,
2755        run_name: str,
2756        description: Optional[str],
2757        data: ExperimentData,
2758        task: TaskFunction,
2759        evaluators: List[EvaluatorFunction],
2760        composite_evaluator: Optional[CompositeEvaluatorFunction],
2761        run_evaluators: List[RunEvaluatorFunction],
2762        max_concurrency: int,
2763        metadata: Optional[Dict[str, Any]] = None,
2764    ) -> ExperimentResult:
2765        langfuse_logger.debug(
2766            f"Starting experiment '{name}' run '{run_name}' with {len(data)} items"
2767        )
2768
2769        # Set up concurrency control
2770        semaphore = asyncio.Semaphore(max_concurrency)
2771
2772        # Process all items
2773        async def process_item(item: ExperimentItem) -> ExperimentItemResult:
2774            async with semaphore:
2775                return await self._process_experiment_item(
2776                    item,
2777                    task,
2778                    evaluators,
2779                    composite_evaluator,
2780                    name,
2781                    run_name,
2782                    description,
2783                    metadata,
2784                )
2785
2786        # Run all items concurrently
2787        tasks = [process_item(item) for item in data]
2788        item_results = await asyncio.gather(*tasks, return_exceptions=True)
2789
2790        # Filter out any exceptions and log errors
2791        valid_results: List[ExperimentItemResult] = []
2792        for i, result in enumerate(item_results):
2793            if isinstance(result, Exception):
2794                langfuse_logger.error(f"Item {i} failed: {result}")
2795            elif isinstance(result, ExperimentItemResult):
2796                valid_results.append(result)  # type: ignore
2797
2798        # Run experiment-level evaluators
2799        run_evaluations: List[Evaluation] = []
2800        for run_evaluator in run_evaluators:
2801            try:
2802                evaluations = await _run_evaluator(
2803                    run_evaluator, item_results=valid_results
2804                )
2805                run_evaluations.extend(evaluations)
2806            except Exception as e:
2807                langfuse_logger.error(f"Run evaluator failed: {e}")
2808
2809        # Generate dataset run URL if applicable
2810        dataset_run_id = valid_results[0].dataset_run_id if valid_results else None
2811        dataset_run_url = None
2812        if dataset_run_id and data:
2813            try:
2814                # Check if the first item has dataset_id (for DatasetItem objects)
2815                first_item = data[0]
2816                dataset_id = None
2817
2818                if hasattr(first_item, "dataset_id"):
2819                    dataset_id = getattr(first_item, "dataset_id", None)
2820
2821                if dataset_id:
2822                    project_id = self._get_project_id()
2823
2824                    if project_id:
2825                        dataset_run_url = f"{self._base_url}/project/{project_id}/datasets/{dataset_id}/runs/{dataset_run_id}"
2826
2827            except Exception:
2828                pass  # URL generation is optional
2829
2830        # Store run-level evaluations as scores
2831        for evaluation in run_evaluations:
2832            try:
2833                if dataset_run_id:
2834                    self.create_score(
2835                        dataset_run_id=dataset_run_id,
2836                        name=evaluation.name or "<unknown>",
2837                        value=evaluation.value,  # type: ignore
2838                        comment=evaluation.comment,
2839                        metadata=evaluation.metadata,
2840                        data_type=evaluation.data_type,  # type: ignore
2841                        config_id=evaluation.config_id,
2842                    )
2843
2844            except Exception as e:
2845                langfuse_logger.error(f"Failed to store run evaluation: {e}")
2846
2847        # Flush scores and traces
2848        self.flush()
2849
2850        return ExperimentResult(
2851            name=name,
2852            run_name=run_name,
2853            description=description,
2854            item_results=valid_results,
2855            run_evaluations=run_evaluations,
2856            dataset_run_id=dataset_run_id,
2857            dataset_run_url=dataset_run_url,
2858        )
2859
2860    async def _process_experiment_item(
2861        self,
2862        item: ExperimentItem,
2863        task: Callable,
2864        evaluators: List[Callable],
2865        composite_evaluator: Optional[CompositeEvaluatorFunction],
2866        experiment_name: str,
2867        experiment_run_name: str,
2868        experiment_description: Optional[str],
2869        experiment_metadata: Optional[Dict[str, Any]] = None,
2870    ) -> ExperimentItemResult:
2871        span_name = "experiment-item-run"
2872
2873        with self.start_as_current_span(name=span_name) as span:
2874            try:
2875                input_data = (
2876                    item.get("input")
2877                    if isinstance(item, dict)
2878                    else getattr(item, "input", None)
2879                )
2880
2881                if input_data is None:
2882                    raise ValueError("Experiment Item is missing input. Skipping item.")
2883
2884                expected_output = (
2885                    item.get("expected_output")
2886                    if isinstance(item, dict)
2887                    else getattr(item, "expected_output", None)
2888                )
2889
2890                item_metadata = (
2891                    item.get("metadata")
2892                    if isinstance(item, dict)
2893                    else getattr(item, "metadata", None)
2894                )
2895
2896                final_observation_metadata = {
2897                    "experiment_name": experiment_name,
2898                    "experiment_run_name": experiment_run_name,
2899                    **(experiment_metadata or {}),
2900                }
2901
2902                trace_id = span.trace_id
2903                dataset_id = None
2904                dataset_item_id = None
2905                dataset_run_id = None
2906
2907                # Link to dataset run if this is a dataset item
2908                if hasattr(item, "id") and hasattr(item, "dataset_id"):
2909                    try:
2910                        # Use sync API to avoid event loop issues when run_async_safely
2911                        # creates multiple event loops across different threads
2912                        dataset_run_item = await asyncio.to_thread(
2913                            self.api.dataset_run_items.create,
2914                            request=CreateDatasetRunItemRequest(
2915                                runName=experiment_run_name,
2916                                runDescription=experiment_description,
2917                                metadata=experiment_metadata,
2918                                datasetItemId=item.id,  # type: ignore
2919                                traceId=trace_id,
2920                                observationId=span.id,
2921                            ),
2922                        )
2923
2924                        dataset_run_id = dataset_run_item.dataset_run_id
2925
2926                    except Exception as e:
2927                        langfuse_logger.error(f"Failed to create dataset run item: {e}")
2928
2929                if (
2930                    not isinstance(item, dict)
2931                    and hasattr(item, "dataset_id")
2932                    and hasattr(item, "id")
2933                ):
2934                    dataset_id = item.dataset_id
2935                    dataset_item_id = item.id
2936
2937                    final_observation_metadata.update(
2938                        {"dataset_id": dataset_id, "dataset_item_id": dataset_item_id}
2939                    )
2940
2941                if isinstance(item_metadata, dict):
2942                    final_observation_metadata.update(item_metadata)
2943
2944                experiment_id = dataset_run_id or self._create_observation_id()
2945                experiment_item_id = (
2946                    dataset_item_id or get_sha256_hash_hex(_serialize(input_data))[:16]
2947                )
2948                span._otel_span.set_attributes(
2949                    {
2950                        k: v
2951                        for k, v in {
2952                            LangfuseOtelSpanAttributes.ENVIRONMENT: LANGFUSE_SDK_EXPERIMENT_ENVIRONMENT,
2953                            LangfuseOtelSpanAttributes.EXPERIMENT_DESCRIPTION: experiment_description,
2954                            LangfuseOtelSpanAttributes.EXPERIMENT_ITEM_EXPECTED_OUTPUT: _serialize(
2955                                expected_output
2956                            ),
2957                        }.items()
2958                        if v is not None
2959                    }
2960                )
2961
2962                propagated_experiment_attributes = PropagatedExperimentAttributes(
2963                    experiment_id=experiment_id,
2964                    experiment_name=experiment_run_name,
2965                    experiment_metadata=_serialize(experiment_metadata),
2966                    experiment_dataset_id=dataset_id,
2967                    experiment_item_id=experiment_item_id,
2968                    experiment_item_metadata=_serialize(item_metadata),
2969                    experiment_item_root_observation_id=span.id,
2970                )
2971
2972                with _propagate_attributes(experiment=propagated_experiment_attributes):
2973                    output = await _run_task(task, item)
2974
2975                span.update(
2976                    input=input_data,
2977                    output=output,
2978                    metadata=final_observation_metadata,
2979                )
2980
2981            except Exception as e:
2982                span.update(
2983                    output=f"Error: {str(e)}", level="ERROR", status_message=str(e)
2984                )
2985                raise e
2986
2987            # Run evaluators
2988            evaluations = []
2989
2990            for evaluator in evaluators:
2991                try:
2992                    eval_metadata: Optional[Dict[str, Any]] = None
2993
2994                    if isinstance(item, dict):
2995                        eval_metadata = item.get("metadata")
2996                    elif hasattr(item, "metadata"):
2997                        eval_metadata = item.metadata
2998
2999                    with _propagate_attributes(
3000                        experiment=propagated_experiment_attributes
3001                    ):
3002                        eval_results = await _run_evaluator(
3003                            evaluator,
3004                            input=input_data,
3005                            output=output,
3006                            expected_output=expected_output,
3007                            metadata=eval_metadata,
3008                        )
3009                        evaluations.extend(eval_results)
3010
3011                        # Store evaluations as scores
3012                        for evaluation in eval_results:
3013                            self.create_score(
3014                                trace_id=trace_id,
3015                                observation_id=span.id,
3016                                name=evaluation.name,
3017                                value=evaluation.value,  # type: ignore
3018                                comment=evaluation.comment,
3019                                metadata=evaluation.metadata,
3020                                config_id=evaluation.config_id,
3021                                data_type=evaluation.data_type,  # type: ignore
3022                            )
3023
3024                except Exception as e:
3025                    langfuse_logger.error(f"Evaluator failed: {e}")
3026
3027            # Run composite evaluator if provided and we have evaluations
3028            if composite_evaluator and evaluations:
3029                try:
3030                    composite_eval_metadata: Optional[Dict[str, Any]] = None
3031                    if isinstance(item, dict):
3032                        composite_eval_metadata = item.get("metadata")
3033                    elif hasattr(item, "metadata"):
3034                        composite_eval_metadata = item.metadata
3035
3036                    with _propagate_attributes(
3037                        experiment=propagated_experiment_attributes
3038                    ):
3039                        result = composite_evaluator(
3040                            input=input_data,
3041                            output=output,
3042                            expected_output=expected_output,
3043                            metadata=composite_eval_metadata,
3044                            evaluations=evaluations,
3045                        )
3046
3047                        # Handle async composite evaluators
3048                        if asyncio.iscoroutine(result):
3049                            result = await result
3050
3051                        # Normalize to list
3052                        composite_evals: List[Evaluation] = []
3053                        if isinstance(result, (dict, Evaluation)):
3054                            composite_evals = [result]  # type: ignore
3055                        elif isinstance(result, list):
3056                            composite_evals = result  # type: ignore
3057
3058                        # Store composite evaluations as scores and add to evaluations list
3059                        for composite_evaluation in composite_evals:
3060                            self.create_score(
3061                                trace_id=trace_id,
3062                                observation_id=span.id,
3063                                name=composite_evaluation.name,
3064                                value=composite_evaluation.value,  # type: ignore
3065                                comment=composite_evaluation.comment,
3066                                metadata=composite_evaluation.metadata,
3067                                config_id=composite_evaluation.config_id,
3068                                data_type=composite_evaluation.data_type,  # type: ignore
3069                            )
3070                            evaluations.append(composite_evaluation)
3071
3072                except Exception as e:
3073                    langfuse_logger.error(f"Composite evaluator failed: {e}")
3074
3075            return ExperimentItemResult(
3076                item=item,
3077                output=output,
3078                evaluations=evaluations,
3079                trace_id=trace_id,
3080                dataset_run_id=dataset_run_id,
3081            )
3082
3083    def _create_experiment_run_name(
3084        self, *, name: Optional[str] = None, run_name: Optional[str] = None
3085    ) -> str:
3086        if run_name:
3087            return run_name
3088
3089        iso_timestamp = _get_timestamp().isoformat().replace("+00:00", "Z")
3090
3091        return f"{name} - {iso_timestamp}"
3092
3093    def run_batched_evaluation(
3094        self,
3095        *,
3096        scope: Literal["traces", "observations"],
3097        mapper: MapperFunction,
3098        filter: Optional[str] = None,
3099        fetch_batch_size: int = 50,
3100        max_items: Optional[int] = None,
3101        max_retries: int = 3,
3102        evaluators: List[EvaluatorFunction],
3103        composite_evaluator: Optional[CompositeEvaluatorFunction] = None,
3104        max_concurrency: int = 50,
3105        metadata: Optional[Dict[str, Any]] = None,
3106        resume_from: Optional[BatchEvaluationResumeToken] = None,
3107        verbose: bool = False,
3108    ) -> BatchEvaluationResult:
3109        """Fetch traces or observations and run evaluations on each item.
3110
3111        This method provides a powerful way to evaluate existing data in Langfuse at scale.
3112        It fetches items based on filters, transforms them using a mapper function, runs
3113        evaluators on each item, and creates scores that are linked back to the original
3114        entities. This is ideal for:
3115
3116        - Running evaluations on production traces after deployment
3117        - Backtesting new evaluation metrics on historical data
3118        - Batch scoring of observations for quality monitoring
3119        - Periodic evaluation runs on recent data
3120
3121        The method uses a streaming/pipeline approach to process items in batches, making
3122        it memory-efficient for large datasets. It includes comprehensive error handling,
3123        retry logic, and resume capability for long-running evaluations.
3124
3125        Args:
3126            scope: The type of items to evaluate. Must be one of:
3127                - "traces": Evaluate complete traces with all their observations
3128                - "observations": Evaluate individual observations (spans, generations, events)
3129            mapper: Function that transforms API response objects into evaluator inputs.
3130                Receives a trace/observation object and returns an EvaluatorInputs
3131                instance with input, output, expected_output, and metadata fields.
3132                Can be sync or async.
3133            evaluators: List of evaluation functions to run on each item. Each evaluator
3134                receives the mapped inputs and returns Evaluation object(s). Evaluator
3135                failures are logged but don't stop the batch evaluation.
3136            filter: Optional JSON filter string for querying items (same format as Langfuse API). Examples:
3137                - '{"tags": ["production"]}'
3138                - '{"user_id": "user123", "timestamp": {"operator": ">", "value": "2024-01-01"}}'
3139                Default: None (fetches all items).
3140            fetch_batch_size: Number of items to fetch per API call and hold in memory.
3141                Larger values may be faster but use more memory. Default: 50.
3142            max_items: Maximum total number of items to process. If None, processes all
3143                items matching the filter. Useful for testing or limiting evaluation runs.
3144                Default: None (process all).
3145            max_concurrency: Maximum number of items to evaluate concurrently. Controls
3146                parallelism and resource usage. Default: 50.
3147            composite_evaluator: Optional function that creates a composite score from
3148                item-level evaluations. Receives the original item and its evaluations,
3149                returns a single Evaluation. Useful for weighted averages or combined metrics.
3150                Default: None.
3151            metadata: Optional metadata dict to add to all created scores. Useful for
3152                tracking evaluation runs, versions, or other context. Default: None.
3153            max_retries: Maximum number of retry attempts for failed batch fetches.
3154                Uses exponential backoff (1s, 2s, 4s). Default: 3.
3155            verbose: If True, logs progress information to console. Useful for monitoring
3156                long-running evaluations. Default: False.
3157            resume_from: Optional resume token from a previous incomplete run. Allows
3158                continuing evaluation after interruption or failure. Default: None.
3159
3160
3161        Returns:
3162            BatchEvaluationResult containing:
3163                - total_items_fetched: Number of items fetched from API
3164                - total_items_processed: Number of items successfully evaluated
3165                - total_items_failed: Number of items that failed evaluation
3166                - total_scores_created: Scores created by item-level evaluators
3167                - total_composite_scores_created: Scores created by composite evaluator
3168                - total_evaluations_failed: Individual evaluator failures
3169                - evaluator_stats: Per-evaluator statistics (success rate, scores created)
3170                - resume_token: Token for resuming if incomplete (None if completed)
3171                - completed: True if all items processed
3172                - duration_seconds: Total execution time
3173                - failed_item_ids: IDs of items that failed
3174                - error_summary: Error types and counts
3175                - has_more_items: True if max_items reached but more exist
3176
3177        Raises:
3178            ValueError: If invalid scope is provided.
3179
3180        Examples:
3181            Basic trace evaluation:
3182            ```python
3183            from langfuse import Langfuse, EvaluatorInputs, Evaluation
3184
3185            client = Langfuse()
3186
3187            # Define mapper to extract fields from traces
3188            def trace_mapper(trace):
3189                return EvaluatorInputs(
3190                    input=trace.input,
3191                    output=trace.output,
3192                    expected_output=None,
3193                    metadata={"trace_id": trace.id}
3194                )
3195
3196            # Define evaluator
3197            def length_evaluator(*, input, output, expected_output, metadata):
3198                return Evaluation(
3199                    name="output_length",
3200                    value=len(output) if output else 0
3201                )
3202
3203            # Run batch evaluation
3204            result = client.run_batched_evaluation(
3205                scope="traces",
3206                mapper=trace_mapper,
3207                evaluators=[length_evaluator],
3208                filter='{"tags": ["production"]}',
3209                max_items=1000,
3210                verbose=True
3211            )
3212
3213            print(f"Processed {result.total_items_processed} traces")
3214            print(f"Created {result.total_scores_created} scores")
3215            ```
3216
3217            Evaluation with composite scorer:
3218            ```python
3219            def accuracy_evaluator(*, input, output, expected_output, metadata):
3220                # ... evaluation logic
3221                return Evaluation(name="accuracy", value=0.85)
3222
3223            def relevance_evaluator(*, input, output, expected_output, metadata):
3224                # ... evaluation logic
3225                return Evaluation(name="relevance", value=0.92)
3226
3227            def composite_evaluator(*, item, evaluations):
3228                # Weighted average of evaluations
3229                weights = {"accuracy": 0.6, "relevance": 0.4}
3230                total = sum(
3231                    e.value * weights.get(e.name, 0)
3232                    for e in evaluations
3233                    if isinstance(e.value, (int, float))
3234                )
3235                return Evaluation(
3236                    name="composite_score",
3237                    value=total,
3238                    comment=f"Weighted average of {len(evaluations)} metrics"
3239                )
3240
3241            result = client.run_batched_evaluation(
3242                scope="traces",
3243                mapper=trace_mapper,
3244                evaluators=[accuracy_evaluator, relevance_evaluator],
3245                composite_evaluator=composite_evaluator,
3246                filter='{"user_id": "important_user"}',
3247                verbose=True
3248            )
3249            ```
3250
3251            Handling incomplete runs with resume:
3252            ```python
3253            # Initial run that may fail or timeout
3254            result = client.run_batched_evaluation(
3255                scope="observations",
3256                mapper=obs_mapper,
3257                evaluators=[my_evaluator],
3258                max_items=10000,
3259                verbose=True
3260            )
3261
3262            # Check if incomplete
3263            if not result.completed and result.resume_token:
3264                print(f"Processed {result.resume_token.items_processed} items before interruption")
3265
3266                # Resume from where it left off
3267                result = client.run_batched_evaluation(
3268                    scope="observations",
3269                    mapper=obs_mapper,
3270                    evaluators=[my_evaluator],
3271                    resume_from=result.resume_token,
3272                    verbose=True
3273                )
3274
3275            print(f"Total items processed: {result.total_items_processed}")
3276            ```
3277
3278            Monitoring evaluator performance:
3279            ```python
3280            result = client.run_batched_evaluation(...)
3281
3282            for stats in result.evaluator_stats:
3283                success_rate = stats.successful_runs / stats.total_runs
3284                print(f"{stats.name}:")
3285                print(f"  Success rate: {success_rate:.1%}")
3286                print(f"  Scores created: {stats.total_scores_created}")
3287
3288                if stats.failed_runs > 0:
3289                    print(f"  âš ī¸  Failed {stats.failed_runs} times")
3290            ```
3291
3292        Note:
3293            - Evaluator failures are logged but don't stop the batch evaluation
3294            - Individual item failures are tracked but don't stop processing
3295            - Fetch failures are retried with exponential backoff
3296            - All scores are automatically flushed to Langfuse at the end
3297            - The resume mechanism uses timestamp-based filtering to avoid duplicates
3298        """
3299        runner = BatchEvaluationRunner(self)
3300
3301        return cast(
3302            BatchEvaluationResult,
3303            run_async_safely(
3304                runner.run_async(
3305                    scope=scope,
3306                    mapper=mapper,
3307                    evaluators=evaluators,
3308                    filter=filter,
3309                    fetch_batch_size=fetch_batch_size,
3310                    max_items=max_items,
3311                    max_concurrency=max_concurrency,
3312                    composite_evaluator=composite_evaluator,
3313                    metadata=metadata,
3314                    max_retries=max_retries,
3315                    verbose=verbose,
3316                    resume_from=resume_from,
3317                )
3318            ),
3319        )
3320
3321    def auth_check(self) -> bool:
3322        """Check if the provided credentials (public and secret key) are valid.
3323
3324        Raises:
3325            Exception: If no projects were found for the provided credentials.
3326
3327        Note:
3328            This method is blocking. It is discouraged to use it in production code.
3329        """
3330        try:
3331            projects = self.api.projects.get()
3332            langfuse_logger.debug(
3333                f"Auth check successful, found {len(projects.data)} projects"
3334            )
3335            if len(projects.data) == 0:
3336                raise Exception(
3337                    "Auth check failed, no project found for the keys provided."
3338                )
3339            return True
3340
3341        except AttributeError as e:
3342            langfuse_logger.warning(
3343                f"Auth check failed: Client not properly initialized. Error: {e}"
3344            )
3345            return False
3346
3347        except Error as e:
3348            handle_fern_exception(e)
3349            raise e
3350
3351    def create_dataset(
3352        self,
3353        *,
3354        name: str,
3355        description: Optional[str] = None,
3356        metadata: Optional[Any] = None,
3357        input_schema: Optional[Any] = None,
3358        expected_output_schema: Optional[Any] = None,
3359    ) -> Dataset:
3360        """Create a dataset with the given name on Langfuse.
3361
3362        Args:
3363            name: Name of the dataset to create.
3364            description: Description of the dataset. Defaults to None.
3365            metadata: Additional metadata. Defaults to None.
3366            input_schema: JSON Schema for validating dataset item inputs. When set, all new items will be validated against this schema.
3367            expected_output_schema: JSON Schema for validating dataset item expected outputs. When set, all new items will be validated against this schema.
3368
3369        Returns:
3370            Dataset: The created dataset as returned by the Langfuse API.
3371        """
3372        try:
3373            body = CreateDatasetRequest(
3374                name=name,
3375                description=description,
3376                metadata=metadata,
3377                inputSchema=input_schema,
3378                expectedOutputSchema=expected_output_schema,
3379            )
3380            langfuse_logger.debug(f"Creating datasets {body}")
3381
3382            return self.api.datasets.create(request=body)
3383
3384        except Error as e:
3385            handle_fern_exception(e)
3386            raise e
3387
3388    def create_dataset_item(
3389        self,
3390        *,
3391        dataset_name: str,
3392        input: Optional[Any] = None,
3393        expected_output: Optional[Any] = None,
3394        metadata: Optional[Any] = None,
3395        source_trace_id: Optional[str] = None,
3396        source_observation_id: Optional[str] = None,
3397        status: Optional[DatasetStatus] = None,
3398        id: Optional[str] = None,
3399    ) -> DatasetItem:
3400        """Create a dataset item.
3401
3402        Upserts if an item with id already exists.
3403
3404        Args:
3405            dataset_name: Name of the dataset in which the dataset item should be created.
3406            input: Input data. Defaults to None. Can contain any dict, list or scalar.
3407            expected_output: Expected output data. Defaults to None. Can contain any dict, list or scalar.
3408            metadata: Additional metadata. Defaults to None. Can contain any dict, list or scalar.
3409            source_trace_id: Id of the source trace. Defaults to None.
3410            source_observation_id: Id of the source observation. Defaults to None.
3411            status: Status of the dataset item. Defaults to ACTIVE for newly created items.
3412            id: Id of the dataset item. Defaults to None. Provide your own id if you want to dedupe dataset items. Id needs to be globally unique and cannot be reused across datasets.
3413
3414        Returns:
3415            DatasetItem: The created dataset item as returned by the Langfuse API.
3416
3417        Example:
3418            ```python
3419            from langfuse import Langfuse
3420
3421            langfuse = Langfuse()
3422
3423            # Uploading items to the Langfuse dataset named "capital_cities"
3424            langfuse.create_dataset_item(
3425                dataset_name="capital_cities",
3426                input={"input": {"country": "Italy"}},
3427                expected_output={"expected_output": "Rome"},
3428                metadata={"foo": "bar"}
3429            )
3430            ```
3431        """
3432        try:
3433            body = CreateDatasetItemRequest(
3434                datasetName=dataset_name,
3435                input=input,
3436                expectedOutput=expected_output,
3437                metadata=metadata,
3438                sourceTraceId=source_trace_id,
3439                sourceObservationId=source_observation_id,
3440                status=status,
3441                id=id,
3442            )
3443            langfuse_logger.debug(f"Creating dataset item {body}")
3444            return self.api.dataset_items.create(request=body)
3445        except Error as e:
3446            handle_fern_exception(e)
3447            raise e
3448
3449    def resolve_media_references(
3450        self,
3451        *,
3452        obj: Any,
3453        resolve_with: Literal["base64_data_uri"],
3454        max_depth: int = 10,
3455        content_fetch_timeout_seconds: int = 5,
3456    ) -> Any:
3457        """Replace media reference strings in an object with base64 data URIs.
3458
3459        This method recursively traverses an object (up to max_depth) looking for media reference strings
3460        in the format "@@@langfuseMedia:...@@@". When found, it (synchronously) fetches the actual media content using
3461        the provided Langfuse client and replaces the reference string with a base64 data URI.
3462
3463        If fetching media content fails for a reference string, a warning is logged and the reference
3464        string is left unchanged.
3465
3466        Args:
3467            obj: The object to process. Can be a primitive value, array, or nested object.
3468                If the object has a __dict__ attribute, a dict will be returned instead of the original object type.
3469            resolve_with: The representation of the media content to replace the media reference string with.
3470                Currently only "base64_data_uri" is supported.
3471            max_depth: int: The maximum depth to traverse the object. Default is 10.
3472            content_fetch_timeout_seconds: int: The timeout in seconds for fetching media content. Default is 5.
3473
3474        Returns:
3475            A deep copy of the input object with all media references replaced with base64 data URIs where possible.
3476            If the input object has a __dict__ attribute, a dict will be returned instead of the original object type.
3477
3478        Example:
3479            obj = {
3480                "image": "@@@langfuseMedia:type=image/jpeg|id=123|source=bytes@@@",
3481                "nested": {
3482                    "pdf": "@@@langfuseMedia:type=application/pdf|id=456|source=bytes@@@"
3483                }
3484            }
3485
3486            result = await LangfuseMedia.resolve_media_references(obj, langfuse_client)
3487
3488            # Result:
3489            # {
3490            #     "image": "data:image/jpeg;base64,/9j/4AAQSkZJRg...",
3491            #     "nested": {
3492            #         "pdf": "data:application/pdf;base64,JVBERi0xLjcK..."
3493            #     }
3494            # }
3495        """
3496        return LangfuseMedia.resolve_media_references(
3497            langfuse_client=self,
3498            obj=obj,
3499            resolve_with=resolve_with,
3500            max_depth=max_depth,
3501            content_fetch_timeout_seconds=content_fetch_timeout_seconds,
3502        )
3503
3504    @overload
3505    def get_prompt(
3506        self,
3507        name: str,
3508        *,
3509        version: Optional[int] = None,
3510        label: Optional[str] = None,
3511        type: Literal["chat"],
3512        cache_ttl_seconds: Optional[int] = None,
3513        fallback: Optional[List[ChatMessageDict]] = None,
3514        max_retries: Optional[int] = None,
3515        fetch_timeout_seconds: Optional[int] = None,
3516    ) -> ChatPromptClient: ...
3517
3518    @overload
3519    def get_prompt(
3520        self,
3521        name: str,
3522        *,
3523        version: Optional[int] = None,
3524        label: Optional[str] = None,
3525        type: Literal["text"] = "text",
3526        cache_ttl_seconds: Optional[int] = None,
3527        fallback: Optional[str] = None,
3528        max_retries: Optional[int] = None,
3529        fetch_timeout_seconds: Optional[int] = None,
3530    ) -> TextPromptClient: ...
3531
3532    def get_prompt(
3533        self,
3534        name: str,
3535        *,
3536        version: Optional[int] = None,
3537        label: Optional[str] = None,
3538        type: Literal["chat", "text"] = "text",
3539        cache_ttl_seconds: Optional[int] = None,
3540        fallback: Union[Optional[List[ChatMessageDict]], Optional[str]] = None,
3541        max_retries: Optional[int] = None,
3542        fetch_timeout_seconds: Optional[int] = None,
3543    ) -> PromptClient:
3544        """Get a prompt.
3545
3546        This method attempts to fetch the requested prompt from the local cache. If the prompt is not found
3547        in the cache or if the cached prompt has expired, it will try to fetch the prompt from the server again
3548        and update the cache. If fetching the new prompt fails, and there is an expired prompt in the cache, it will
3549        return the expired prompt as a fallback.
3550
3551        Args:
3552            name (str): The name of the prompt to retrieve.
3553
3554        Keyword Args:
3555            version (Optional[int]): The version of the prompt to retrieve. If no label and version is specified, the `production` label is returned. Specify either version or label, not both.
3556            label: Optional[str]: The label of the prompt to retrieve. If no label and version is specified, the `production` label is returned. Specify either version or label, not both.
3557            cache_ttl_seconds: Optional[int]: Time-to-live in seconds for caching the prompt. Must be specified as a
3558            keyword argument. If not set, defaults to 60 seconds. Disables caching if set to 0.
3559            type: Literal["chat", "text"]: The type of the prompt to retrieve. Defaults to "text".
3560            fallback: Union[Optional[List[ChatMessageDict]], Optional[str]]: The prompt string to return if fetching the prompt fails. Important on the first call where no cached prompt is available. Follows Langfuse prompt formatting with double curly braces for variables. Defaults to None.
3561            max_retries: Optional[int]: The maximum number of retries in case of API/network errors. Defaults to 2. The maximum value is 4. Retries have an exponential backoff with a maximum delay of 10 seconds.
3562            fetch_timeout_seconds: Optional[int]: The timeout in milliseconds for fetching the prompt. Defaults to the default timeout set on the SDK, which is 5 seconds per default.
3563
3564        Returns:
3565            The prompt object retrieved from the cache or directly fetched if not cached or expired of type
3566            - TextPromptClient, if type argument is 'text'.
3567            - ChatPromptClient, if type argument is 'chat'.
3568
3569        Raises:
3570            Exception: Propagates any exceptions raised during the fetching of a new prompt, unless there is an
3571            expired prompt in the cache, in which case it logs a warning and returns the expired prompt.
3572        """
3573        if self._resources is None:
3574            raise Error(
3575                "SDK is not correctly initialized. Check the init logs for more details."
3576            )
3577        if version is not None and label is not None:
3578            raise ValueError("Cannot specify both version and label at the same time.")
3579
3580        if not name:
3581            raise ValueError("Prompt name cannot be empty.")
3582
3583        cache_key = PromptCache.generate_cache_key(name, version=version, label=label)
3584        bounded_max_retries = self._get_bounded_max_retries(
3585            max_retries, default_max_retries=2, max_retries_upper_bound=4
3586        )
3587
3588        langfuse_logger.debug(f"Getting prompt '{cache_key}'")
3589        cached_prompt = self._resources.prompt_cache.get(cache_key)
3590
3591        if cached_prompt is None or cache_ttl_seconds == 0:
3592            langfuse_logger.debug(
3593                f"Prompt '{cache_key}' not found in cache or caching disabled."
3594            )
3595            try:
3596                return self._fetch_prompt_and_update_cache(
3597                    name,
3598                    version=version,
3599                    label=label,
3600                    ttl_seconds=cache_ttl_seconds,
3601                    max_retries=bounded_max_retries,
3602                    fetch_timeout_seconds=fetch_timeout_seconds,
3603                )
3604            except Exception as e:
3605                if fallback:
3606                    langfuse_logger.warning(
3607                        f"Returning fallback prompt for '{cache_key}' due to fetch error: {e}"
3608                    )
3609
3610                    fallback_client_args: Dict[str, Any] = {
3611                        "name": name,
3612                        "prompt": fallback,
3613                        "type": type,
3614                        "version": version or 0,
3615                        "config": {},
3616                        "labels": [label] if label else [],
3617                        "tags": [],
3618                    }
3619
3620                    if type == "text":
3621                        return TextPromptClient(
3622                            prompt=Prompt_Text(**fallback_client_args),
3623                            is_fallback=True,
3624                        )
3625
3626                    if type == "chat":
3627                        return ChatPromptClient(
3628                            prompt=Prompt_Chat(**fallback_client_args),
3629                            is_fallback=True,
3630                        )
3631
3632                raise e
3633
3634        if cached_prompt.is_expired():
3635            langfuse_logger.debug(f"Stale prompt '{cache_key}' found in cache.")
3636            try:
3637                # refresh prompt in background thread, refresh_prompt deduplicates tasks
3638                langfuse_logger.debug(f"Refreshing prompt '{cache_key}' in background.")
3639
3640                def refresh_task() -> None:
3641                    self._fetch_prompt_and_update_cache(
3642                        name,
3643                        version=version,
3644                        label=label,
3645                        ttl_seconds=cache_ttl_seconds,
3646                        max_retries=bounded_max_retries,
3647                        fetch_timeout_seconds=fetch_timeout_seconds,
3648                    )
3649
3650                self._resources.prompt_cache.add_refresh_prompt_task(
3651                    cache_key,
3652                    refresh_task,
3653                )
3654                langfuse_logger.debug(
3655                    f"Returning stale prompt '{cache_key}' from cache."
3656                )
3657                # return stale prompt
3658                return cached_prompt.value
3659
3660            except Exception as e:
3661                langfuse_logger.warning(
3662                    f"Error when refreshing cached prompt '{cache_key}', returning cached version. Error: {e}"
3663                )
3664                # creation of refresh prompt task failed, return stale prompt
3665                return cached_prompt.value
3666
3667        return cached_prompt.value
3668
3669    def _fetch_prompt_and_update_cache(
3670        self,
3671        name: str,
3672        *,
3673        version: Optional[int] = None,
3674        label: Optional[str] = None,
3675        ttl_seconds: Optional[int] = None,
3676        max_retries: int,
3677        fetch_timeout_seconds: Optional[int],
3678    ) -> PromptClient:
3679        cache_key = PromptCache.generate_cache_key(name, version=version, label=label)
3680        langfuse_logger.debug(f"Fetching prompt '{cache_key}' from server...")
3681
3682        try:
3683
3684            @backoff.on_exception(
3685                backoff.constant, Exception, max_tries=max_retries + 1, logger=None
3686            )
3687            def fetch_prompts() -> Any:
3688                return self.api.prompts.get(
3689                    self._url_encode(name),
3690                    version=version,
3691                    label=label,
3692                    request_options={
3693                        "timeout_in_seconds": fetch_timeout_seconds,
3694                    }
3695                    if fetch_timeout_seconds is not None
3696                    else None,
3697                )
3698
3699            prompt_response = fetch_prompts()
3700
3701            prompt: PromptClient
3702            if prompt_response.type == "chat":
3703                prompt = ChatPromptClient(prompt_response)
3704            else:
3705                prompt = TextPromptClient(prompt_response)
3706
3707            if self._resources is not None:
3708                self._resources.prompt_cache.set(cache_key, prompt, ttl_seconds)
3709
3710            return prompt
3711
3712        except NotFoundError as not_found_error:
3713            langfuse_logger.warning(
3714                f"Prompt '{cache_key}' not found during refresh, evicting from cache."
3715            )
3716            if self._resources is not None:
3717                self._resources.prompt_cache.delete(cache_key)
3718            raise not_found_error
3719
3720        except Exception as e:
3721            langfuse_logger.error(
3722                f"Error while fetching prompt '{cache_key}': {str(e)}"
3723            )
3724            raise e
3725
3726    def _get_bounded_max_retries(
3727        self,
3728        max_retries: Optional[int],
3729        *,
3730        default_max_retries: int = 2,
3731        max_retries_upper_bound: int = 4,
3732    ) -> int:
3733        if max_retries is None:
3734            return default_max_retries
3735
3736        bounded_max_retries = min(
3737            max(max_retries, 0),
3738            max_retries_upper_bound,
3739        )
3740
3741        return bounded_max_retries
3742
3743    @overload
3744    def create_prompt(
3745        self,
3746        *,
3747        name: str,
3748        prompt: List[Union[ChatMessageDict, ChatMessageWithPlaceholdersDict]],
3749        labels: List[str] = [],
3750        tags: Optional[List[str]] = None,
3751        type: Optional[Literal["chat"]],
3752        config: Optional[Any] = None,
3753        commit_message: Optional[str] = None,
3754    ) -> ChatPromptClient: ...
3755
3756    @overload
3757    def create_prompt(
3758        self,
3759        *,
3760        name: str,
3761        prompt: str,
3762        labels: List[str] = [],
3763        tags: Optional[List[str]] = None,
3764        type: Optional[Literal["text"]] = "text",
3765        config: Optional[Any] = None,
3766        commit_message: Optional[str] = None,
3767    ) -> TextPromptClient: ...
3768
3769    def create_prompt(
3770        self,
3771        *,
3772        name: str,
3773        prompt: Union[
3774            str, List[Union[ChatMessageDict, ChatMessageWithPlaceholdersDict]]
3775        ],
3776        labels: List[str] = [],
3777        tags: Optional[List[str]] = None,
3778        type: Optional[Literal["chat", "text"]] = "text",
3779        config: Optional[Any] = None,
3780        commit_message: Optional[str] = None,
3781    ) -> PromptClient:
3782        """Create a new prompt in Langfuse.
3783
3784        Keyword Args:
3785            name : The name of the prompt to be created.
3786            prompt : The content of the prompt to be created.
3787            is_active [DEPRECATED] : A flag indicating whether the prompt is active or not. This is deprecated and will be removed in a future release. Please use the 'production' label instead.
3788            labels: The labels of the prompt. Defaults to None. To create a default-served prompt, add the 'production' label.
3789            tags: The tags of the prompt. Defaults to None. Will be applied to all versions of the prompt.
3790            config: Additional structured data to be saved with the prompt. Defaults to None.
3791            type: The type of the prompt to be created. "chat" vs. "text". Defaults to "text".
3792            commit_message: Optional string describing the change.
3793
3794        Returns:
3795            TextPromptClient: The prompt if type argument is 'text'.
3796            ChatPromptClient: The prompt if type argument is 'chat'.
3797        """
3798        try:
3799            langfuse_logger.debug(f"Creating prompt {name=}, {labels=}")
3800
3801            if type == "chat":
3802                if not isinstance(prompt, list):
3803                    raise ValueError(
3804                        "For 'chat' type, 'prompt' must be a list of chat messages with role and content attributes."
3805                    )
3806                request: Union[CreatePromptRequest_Chat, CreatePromptRequest_Text] = (
3807                    CreatePromptRequest_Chat(
3808                        name=name,
3809                        prompt=cast(Any, prompt),
3810                        labels=labels,
3811                        tags=tags,
3812                        config=config or {},
3813                        commitMessage=commit_message,
3814                        type="chat",
3815                    )
3816                )
3817                server_prompt = self.api.prompts.create(request=request)
3818
3819                if self._resources is not None:
3820                    self._resources.prompt_cache.invalidate(name)
3821
3822                return ChatPromptClient(prompt=cast(Prompt_Chat, server_prompt))
3823
3824            if not isinstance(prompt, str):
3825                raise ValueError("For 'text' type, 'prompt' must be a string.")
3826
3827            request = CreatePromptRequest_Text(
3828                name=name,
3829                prompt=prompt,
3830                labels=labels,
3831                tags=tags,
3832                config=config or {},
3833                commitMessage=commit_message,
3834                type="text",
3835            )
3836
3837            server_prompt = self.api.prompts.create(request=request)
3838
3839            if self._resources is not None:
3840                self._resources.prompt_cache.invalidate(name)
3841
3842            return TextPromptClient(prompt=cast(Prompt_Text, server_prompt))
3843
3844        except Error as e:
3845            handle_fern_exception(e)
3846            raise e
3847
3848    def update_prompt(
3849        self,
3850        *,
3851        name: str,
3852        version: int,
3853        new_labels: List[str] = [],
3854    ) -> Any:
3855        """Update an existing prompt version in Langfuse. The Langfuse SDK prompt cache is invalidated for all prompts witht he specified name.
3856
3857        Args:
3858            name (str): The name of the prompt to update.
3859            version (int): The version number of the prompt to update.
3860            new_labels (List[str], optional): New labels to assign to the prompt version. Labels are unique across versions. The "latest" label is reserved and managed by Langfuse. Defaults to [].
3861
3862        Returns:
3863            Prompt: The updated prompt from the Langfuse API.
3864
3865        """
3866        updated_prompt = self.api.prompt_version.update(
3867            name=self._url_encode(name),
3868            version=version,
3869            new_labels=new_labels,
3870        )
3871
3872        if self._resources is not None:
3873            self._resources.prompt_cache.invalidate(name)
3874
3875        return updated_prompt
3876
3877    def _url_encode(self, url: str, *, is_url_param: Optional[bool] = False) -> str:
3878        # httpx â‰Ĩ 0.28 does its own WHATWG-compliant quoting (eg. encodes bare
3879        # “%”, “?”, “#”, “|”, â€Ļ in query/path parts).  Re-quoting here would
3880        # double-encode, so we skip when the value is about to be sent straight
3881        # to httpx (`is_url_param=True`) and the installed version is â‰Ĩ 0.28.
3882        if is_url_param and Version(httpx.__version__) >= Version("0.28.0"):
3883            return url
3884
3885        # urllib.parse.quote does not escape slashes "/" by default; we need to add safe="" to force escaping
3886        # we need add safe="" to force escaping of slashes
3887        # This is necessary for prompts in prompt folders
3888        return urllib.parse.quote(url, safe="")
3889
3890    def clear_prompt_cache(self) -> None:
3891        """Clear the entire prompt cache, removing all cached prompts.
3892
3893        This method is useful when you want to force a complete refresh of all
3894        cached prompts, for example after major updates or when you need to
3895        ensure the latest versions are fetched from the server.
3896        """
3897        if self._resources is not None:
3898            self._resources.prompt_cache.clear()

Main client for Langfuse tracing and platform features.

This class provides an interface for creating and managing traces, spans, and generations in Langfuse as well as interacting with the Langfuse API.

The client features a thread-safe singleton pattern for each unique public API key, ensuring consistent trace context propagation across your application. It implements efficient batching of spans with configurable flush settings and includes background thread management for media uploads and score ingestion.

Configuration is flexible through either direct parameters or environment variables, with graceful fallbacks and runtime configuration updates.

Attributes:
  • api: Synchronous API client for Langfuse backend communication
  • async_api: Asynchronous API client for Langfuse backend communication
  • _otel_tracer: Internal LangfuseTracer instance managing OpenTelemetry components
Arguments:
  • public_key (Optional[str]): Your Langfuse public API key. Can also be set via LANGFUSE_PUBLIC_KEY environment variable.
  • secret_key (Optional[str]): Your Langfuse secret API key. Can also be set via LANGFUSE_SECRET_KEY environment variable.
  • base_url (Optional[str]): The Langfuse API base URL. Defaults to "https://cloud.langfuse.com". Can also be set via LANGFUSE_BASE_URL environment variable.
  • host (Optional[str]): Deprecated. Use base_url instead. The Langfuse API host URL. Defaults to "https://cloud.langfuse.com".
  • timeout (Optional[int]): Timeout in seconds for API requests. Defaults to 5 seconds.
  • httpx_client (Optional[httpx.Client]): Custom httpx client for making non-tracing HTTP requests. If not provided, a default client will be created.
  • debug (bool): Enable debug logging. Defaults to False. Can also be set via LANGFUSE_DEBUG environment variable.
  • tracing_enabled (Optional[bool]): Enable or disable tracing. Defaults to True. Can also be set via LANGFUSE_TRACING_ENABLED environment variable.
  • flush_at (Optional[int]): Number of spans to batch before sending to the API. Defaults to 512. Can also be set via LANGFUSE_FLUSH_AT environment variable.
  • flush_interval (Optional[float]): Time in seconds between batch flushes. Defaults to 5 seconds. Can also be set via LANGFUSE_FLUSH_INTERVAL environment variable.
  • environment (Optional[str]): Environment name for tracing. Default is 'default'. Can also be set via LANGFUSE_TRACING_ENVIRONMENT environment variable. Can be any lowercase alphanumeric string with hyphens and underscores that does not start with 'langfuse'.
  • release (Optional[str]): Release version/hash of your application. Used for grouping analytics by release.
  • media_upload_thread_count (Optional[int]): Number of background threads for handling media uploads. Defaults to 1. Can also be set via LANGFUSE_MEDIA_UPLOAD_THREAD_COUNT environment variable.
  • sample_rate (Optional[float]): Sampling rate for traces (0.0 to 1.0). Defaults to 1.0 (100% of traces are sampled). Can also be set via LANGFUSE_SAMPLE_RATE environment variable.
  • mask (Optional[MaskFunction]): Function to mask sensitive data in traces before sending to the API.
  • blocked_instrumentation_scopes (Optional[List[str]]): List of instrumentation scope names to block from being exported to Langfuse. Spans from these scopes will be filtered out before being sent to the API. Useful for filtering out spans from specific libraries or frameworks. For exported spans, you can see the instrumentation scope name in the span metadata in Langfuse (metadata.scope.name)
  • additional_headers (Optional[Dict[str, str]]): Additional headers to include in all API requests and OTLPSpanExporter requests. These headers will be merged with default headers. Note: If httpx_client is provided, additional_headers must be set directly on your custom httpx_client as well.
  • tracer_provider(Optional[TracerProvider]): OpenTelemetry TracerProvider to use for Langfuse. This can be useful to set to have disconnected tracing between Langfuse and other OpenTelemetry-span emitting libraries. Note: To track active spans, the context is still shared between TracerProviders. This may lead to broken trace trees.
Example:
from langfuse.otel import Langfuse

# Initialize the client (reads from env vars if not provided)
langfuse = Langfuse(
    public_key="your-public-key",
    secret_key="your-secret-key",
    host="https://cloud.langfuse.com",  # Optional, default shown
)

# Create a trace span
with langfuse.start_as_current_span(name="process-query") as span:
    # Your application code here

    # Create a nested generation span for an LLM call
    with span.start_as_current_generation(
        name="generate-response",
        model="gpt-4",
        input={"query": "Tell me about AI"},
        model_parameters={"temperature": 0.7, "max_tokens": 500}
    ) as generation:
        # Generate response here
        response = "AI is a field of computer science..."

        generation.update(
            output=response,
            usage_details={"prompt_tokens": 10, "completion_tokens": 50},
            cost_details={"total_cost": 0.0023}
        )

        # Score the generation (supports NUMERIC, BOOLEAN, CATEGORICAL)
        generation.score(name="relevance", value=0.95, data_type="NUMERIC")
Langfuse( *, public_key: Optional[str] = None, secret_key: Optional[str] = None, base_url: Optional[str] = None, host: Optional[str] = None, timeout: Optional[int] = None, httpx_client: Optional[httpx.Client] = None, debug: bool = False, tracing_enabled: Optional[bool] = True, flush_at: Optional[int] = None, flush_interval: Optional[float] = None, environment: Optional[str] = None, release: Optional[str] = None, media_upload_thread_count: Optional[int] = None, sample_rate: Optional[float] = None, mask: Optional[langfuse.types.MaskFunction] = None, blocked_instrumentation_scopes: Optional[List[str]] = None, additional_headers: Optional[Dict[str, str]] = None, tracer_provider: Optional[opentelemetry.sdk.trace.TracerProvider] = None)
213    def __init__(
214        self,
215        *,
216        public_key: Optional[str] = None,
217        secret_key: Optional[str] = None,
218        base_url: Optional[str] = None,
219        host: Optional[str] = None,
220        timeout: Optional[int] = None,
221        httpx_client: Optional[httpx.Client] = None,
222        debug: bool = False,
223        tracing_enabled: Optional[bool] = True,
224        flush_at: Optional[int] = None,
225        flush_interval: Optional[float] = None,
226        environment: Optional[str] = None,
227        release: Optional[str] = None,
228        media_upload_thread_count: Optional[int] = None,
229        sample_rate: Optional[float] = None,
230        mask: Optional[MaskFunction] = None,
231        blocked_instrumentation_scopes: Optional[List[str]] = None,
232        additional_headers: Optional[Dict[str, str]] = None,
233        tracer_provider: Optional[TracerProvider] = None,
234    ):
235        self._base_url = (
236            base_url
237            or os.environ.get(LANGFUSE_BASE_URL)
238            or host
239            or os.environ.get(LANGFUSE_HOST, "https://cloud.langfuse.com")
240        )
241        self._environment = environment or cast(
242            str, os.environ.get(LANGFUSE_TRACING_ENVIRONMENT)
243        )
244        self._project_id: Optional[str] = None
245        sample_rate = sample_rate or float(os.environ.get(LANGFUSE_SAMPLE_RATE, 1.0))
246        if not 0.0 <= sample_rate <= 1.0:
247            raise ValueError(
248                f"Sample rate must be between 0.0 and 1.0, got {sample_rate}"
249            )
250
251        timeout = timeout or int(os.environ.get(LANGFUSE_TIMEOUT, 5))
252
253        self._tracing_enabled = (
254            tracing_enabled
255            and os.environ.get(LANGFUSE_TRACING_ENABLED, "true").lower() != "false"
256        )
257        if not self._tracing_enabled:
258            langfuse_logger.info(
259                "Configuration: Langfuse tracing is explicitly disabled. No data will be sent to the Langfuse API."
260            )
261
262        debug = (
263            debug if debug else (os.getenv(LANGFUSE_DEBUG, "false").lower() == "true")
264        )
265        if debug:
266            logging.basicConfig(
267                format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
268            )
269            langfuse_logger.setLevel(logging.DEBUG)
270
271        public_key = public_key or os.environ.get(LANGFUSE_PUBLIC_KEY)
272        if public_key is None:
273            langfuse_logger.warning(
274                "Authentication error: Langfuse client initialized without public_key. Client will be disabled. "
275                "Provide a public_key parameter or set LANGFUSE_PUBLIC_KEY environment variable. "
276            )
277            self._otel_tracer = otel_trace_api.NoOpTracer()
278            return
279
280        secret_key = secret_key or os.environ.get(LANGFUSE_SECRET_KEY)
281        if secret_key is None:
282            langfuse_logger.warning(
283                "Authentication error: Langfuse client initialized without secret_key. Client will be disabled. "
284                "Provide a secret_key parameter or set LANGFUSE_SECRET_KEY environment variable. "
285            )
286            self._otel_tracer = otel_trace_api.NoOpTracer()
287            return
288
289        if os.environ.get("OTEL_SDK_DISABLED", "false").lower() == "true":
290            langfuse_logger.warning(
291                "OTEL_SDK_DISABLED is set. Langfuse tracing will be disabled and no traces will appear in the UI."
292            )
293
294        # Initialize api and tracer if requirements are met
295        self._resources = LangfuseResourceManager(
296            public_key=public_key,
297            secret_key=secret_key,
298            base_url=self._base_url,
299            timeout=timeout,
300            environment=self._environment,
301            release=release,
302            flush_at=flush_at,
303            flush_interval=flush_interval,
304            httpx_client=httpx_client,
305            media_upload_thread_count=media_upload_thread_count,
306            sample_rate=sample_rate,
307            mask=mask,
308            tracing_enabled=self._tracing_enabled,
309            blocked_instrumentation_scopes=blocked_instrumentation_scopes,
310            additional_headers=additional_headers,
311            tracer_provider=tracer_provider,
312        )
313        self._mask = self._resources.mask
314
315        self._otel_tracer = (
316            self._resources.tracer
317            if self._tracing_enabled and self._resources.tracer is not None
318            else otel_trace_api.NoOpTracer()
319        )
320        self.api = self._resources.api
321        self.async_api = self._resources.async_api
api
async_api
def start_span( self, *, trace_context: Optional[langfuse.types.TraceContext] = None, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None) -> LangfuseSpan:
323    def start_span(
324        self,
325        *,
326        trace_context: Optional[TraceContext] = None,
327        name: str,
328        input: Optional[Any] = None,
329        output: Optional[Any] = None,
330        metadata: Optional[Any] = None,
331        version: Optional[str] = None,
332        level: Optional[SpanLevel] = None,
333        status_message: Optional[str] = None,
334    ) -> LangfuseSpan:
335        """Create a new span for tracing a unit of work.
336
337        This method creates a new span but does not set it as the current span in the
338        context. To create and use a span within a context, use start_as_current_span().
339
340        The created span will be the child of the current span in the context.
341
342        Args:
343            trace_context: Optional context for connecting to an existing trace
344            name: Name of the span (e.g., function or operation name)
345            input: Input data for the operation (can be any JSON-serializable object)
346            output: Output data from the operation (can be any JSON-serializable object)
347            metadata: Additional metadata to associate with the span
348            version: Version identifier for the code or component
349            level: Importance level of the span (info, warning, error)
350            status_message: Optional status message for the span
351
352        Returns:
353            A LangfuseSpan object that must be ended with .end() when the operation completes
354
355        Example:
356            ```python
357            span = langfuse.start_span(name="process-data")
358            try:
359                # Do work
360                span.update(output="result")
361            finally:
362                span.end()
363            ```
364        """
365        return self.start_observation(
366            trace_context=trace_context,
367            name=name,
368            as_type="span",
369            input=input,
370            output=output,
371            metadata=metadata,
372            version=version,
373            level=level,
374            status_message=status_message,
375        )

Create a new span for tracing a unit of work.

This method creates a new span but does not set it as the current span in the context. To create and use a span within a context, use start_as_current_span().

The created span will be the child of the current span in the context.

Arguments:
  • trace_context: Optional context for connecting to an existing trace
  • name: Name of the span (e.g., function or operation name)
  • input: Input data for the operation (can be any JSON-serializable object)
  • output: Output data from the operation (can be any JSON-serializable object)
  • metadata: Additional metadata to associate with the span
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
Returns:

A LangfuseSpan object that must be ended with .end() when the operation completes

Example:
span = langfuse.start_span(name="process-data")
try:
    # Do work
    span.update(output="result")
finally:
    span.end()
def start_as_current_span( self, *, trace_context: Optional[langfuse.types.TraceContext] = None, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, end_on_exit: Optional[bool] = None) -> opentelemetry.util._decorator._AgnosticContextManager[LangfuseSpan]:
377    def start_as_current_span(
378        self,
379        *,
380        trace_context: Optional[TraceContext] = None,
381        name: str,
382        input: Optional[Any] = None,
383        output: Optional[Any] = None,
384        metadata: Optional[Any] = None,
385        version: Optional[str] = None,
386        level: Optional[SpanLevel] = None,
387        status_message: Optional[str] = None,
388        end_on_exit: Optional[bool] = None,
389    ) -> _AgnosticContextManager[LangfuseSpan]:
390        """Create a new span and set it as the current span in a context manager.
391
392        This method creates a new span and sets it as the current span within a context
393        manager. Use this method with a 'with' statement to automatically handle span
394        lifecycle within a code block.
395
396        The created span will be the child of the current span in the context.
397
398        Args:
399            trace_context: Optional context for connecting to an existing trace
400            name: Name of the span (e.g., function or operation name)
401            input: Input data for the operation (can be any JSON-serializable object)
402            output: Output data from the operation (can be any JSON-serializable object)
403            metadata: Additional metadata to associate with the span
404            version: Version identifier for the code or component
405            level: Importance level of the span (info, warning, error)
406            status_message: Optional status message for the span
407            end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
408
409        Returns:
410            A context manager that yields a LangfuseSpan
411
412        Example:
413            ```python
414            with langfuse.start_as_current_span(name="process-query") as span:
415                # Do work
416                result = process_data()
417                span.update(output=result)
418
419                # Create a child span automatically
420                with span.start_as_current_span(name="sub-operation") as child_span:
421                    # Do sub-operation work
422                    child_span.update(output="sub-result")
423            ```
424        """
425        return self.start_as_current_observation(
426            trace_context=trace_context,
427            name=name,
428            as_type="span",
429            input=input,
430            output=output,
431            metadata=metadata,
432            version=version,
433            level=level,
434            status_message=status_message,
435            end_on_exit=end_on_exit,
436        )

Create a new span and set it as the current span in a context manager.

This method creates a new span and sets it as the current span within a context manager. Use this method with a 'with' statement to automatically handle span lifecycle within a code block.

The created span will be the child of the current span in the context.

Arguments:
  • trace_context: Optional context for connecting to an existing trace
  • name: Name of the span (e.g., function or operation name)
  • input: Input data for the operation (can be any JSON-serializable object)
  • output: Output data from the operation (can be any JSON-serializable object)
  • metadata: Additional metadata to associate with the span
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
  • end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
Returns:

A context manager that yields a LangfuseSpan

Example:
with langfuse.start_as_current_span(name="process-query") as span:
    # Do work
    result = process_data()
    span.update(output=result)

    # Create a child span automatically
    with span.start_as_current_span(name="sub-operation") as child_span:
        # Do sub-operation work
        child_span.update(output="sub-result")
def start_observation( self, *, trace_context: Optional[langfuse.types.TraceContext] = None, name: str, as_type: Union[Literal['generation', 'embedding'], Literal['span', 'agent', 'tool', 'chain', 'retriever', 'evaluator', 'guardrail']] = 'span', input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None) -> Union[LangfuseSpan, LangfuseGeneration, LangfuseAgent, LangfuseTool, LangfuseChain, LangfuseRetriever, LangfuseEvaluator, LangfuseEmbedding, LangfuseGuardrail]:
585    def start_observation(
586        self,
587        *,
588        trace_context: Optional[TraceContext] = None,
589        name: str,
590        as_type: ObservationTypeLiteralNoEvent = "span",
591        input: Optional[Any] = None,
592        output: Optional[Any] = None,
593        metadata: Optional[Any] = None,
594        version: Optional[str] = None,
595        level: Optional[SpanLevel] = None,
596        status_message: Optional[str] = None,
597        completion_start_time: Optional[datetime] = None,
598        model: Optional[str] = None,
599        model_parameters: Optional[Dict[str, MapValue]] = None,
600        usage_details: Optional[Dict[str, int]] = None,
601        cost_details: Optional[Dict[str, float]] = None,
602        prompt: Optional[PromptClient] = None,
603    ) -> Union[
604        LangfuseSpan,
605        LangfuseGeneration,
606        LangfuseAgent,
607        LangfuseTool,
608        LangfuseChain,
609        LangfuseRetriever,
610        LangfuseEvaluator,
611        LangfuseEmbedding,
612        LangfuseGuardrail,
613    ]:
614        """Create a new observation of the specified type.
615
616        This method creates a new observation but does not set it as the current span in the
617        context. To create and use an observation within a context, use start_as_current_observation().
618
619        Args:
620            trace_context: Optional context for connecting to an existing trace
621            name: Name of the observation
622            as_type: Type of observation to create (defaults to "span")
623            input: Input data for the operation
624            output: Output data from the operation
625            metadata: Additional metadata to associate with the observation
626            version: Version identifier for the code or component
627            level: Importance level of the observation
628            status_message: Optional status message for the observation
629            completion_start_time: When the model started generating (for generation types)
630            model: Name/identifier of the AI model used (for generation types)
631            model_parameters: Parameters used for the model (for generation types)
632            usage_details: Token usage information (for generation types)
633            cost_details: Cost information (for generation types)
634            prompt: Associated prompt template (for generation types)
635
636        Returns:
637            An observation object of the appropriate type that must be ended with .end()
638        """
639        if trace_context:
640            trace_id = trace_context.get("trace_id", None)
641            parent_span_id = trace_context.get("parent_span_id", None)
642
643            if trace_id:
644                remote_parent_span = self._create_remote_parent_span(
645                    trace_id=trace_id, parent_span_id=parent_span_id
646                )
647
648                with otel_trace_api.use_span(
649                    cast(otel_trace_api.Span, remote_parent_span)
650                ):
651                    otel_span = self._otel_tracer.start_span(name=name)
652                    otel_span.set_attribute(LangfuseOtelSpanAttributes.AS_ROOT, True)
653
654                    return self._create_observation_from_otel_span(
655                        otel_span=otel_span,
656                        as_type=as_type,
657                        input=input,
658                        output=output,
659                        metadata=metadata,
660                        version=version,
661                        level=level,
662                        status_message=status_message,
663                        completion_start_time=completion_start_time,
664                        model=model,
665                        model_parameters=model_parameters,
666                        usage_details=usage_details,
667                        cost_details=cost_details,
668                        prompt=prompt,
669                    )
670
671        otel_span = self._otel_tracer.start_span(name=name)
672
673        return self._create_observation_from_otel_span(
674            otel_span=otel_span,
675            as_type=as_type,
676            input=input,
677            output=output,
678            metadata=metadata,
679            version=version,
680            level=level,
681            status_message=status_message,
682            completion_start_time=completion_start_time,
683            model=model,
684            model_parameters=model_parameters,
685            usage_details=usage_details,
686            cost_details=cost_details,
687            prompt=prompt,
688        )

Create a new observation of the specified type.

This method creates a new observation but does not set it as the current span in the context. To create and use an observation within a context, use start_as_current_observation().

Arguments:
  • trace_context: Optional context for connecting to an existing trace
  • name: Name of the observation
  • as_type: Type of observation to create (defaults to "span")
  • input: Input data for the operation
  • output: Output data from the operation
  • metadata: Additional metadata to associate with the observation
  • version: Version identifier for the code or component
  • level: Importance level of the observation
  • status_message: Optional status message for the observation
  • completion_start_time: When the model started generating (for generation types)
  • model: Name/identifier of the AI model used (for generation types)
  • model_parameters: Parameters used for the model (for generation types)
  • usage_details: Token usage information (for generation types)
  • cost_details: Cost information (for generation types)
  • prompt: Associated prompt template (for generation types)
Returns:

An observation object of the appropriate type that must be ended with .end()

def start_generation( self, *, trace_context: Optional[langfuse.types.TraceContext] = None, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None) -> LangfuseGeneration:
760    def start_generation(
761        self,
762        *,
763        trace_context: Optional[TraceContext] = None,
764        name: str,
765        input: Optional[Any] = None,
766        output: Optional[Any] = None,
767        metadata: Optional[Any] = None,
768        version: Optional[str] = None,
769        level: Optional[SpanLevel] = None,
770        status_message: Optional[str] = None,
771        completion_start_time: Optional[datetime] = None,
772        model: Optional[str] = None,
773        model_parameters: Optional[Dict[str, MapValue]] = None,
774        usage_details: Optional[Dict[str, int]] = None,
775        cost_details: Optional[Dict[str, float]] = None,
776        prompt: Optional[PromptClient] = None,
777    ) -> LangfuseGeneration:
778        """Create a new generation span for model generations.
779
780        DEPRECATED: This method is deprecated and will be removed in a future version.
781        Use start_observation(as_type='generation') instead.
782
783        This method creates a specialized span for tracking model generations.
784        It includes additional fields specific to model generations such as model name,
785        token usage, and cost details.
786
787        The created generation span will be the child of the current span in the context.
788
789        Args:
790            trace_context: Optional context for connecting to an existing trace
791            name: Name of the generation operation
792            input: Input data for the model (e.g., prompts)
793            output: Output from the model (e.g., completions)
794            metadata: Additional metadata to associate with the generation
795            version: Version identifier for the model or component
796            level: Importance level of the generation (info, warning, error)
797            status_message: Optional status message for the generation
798            completion_start_time: When the model started generating the response
799            model: Name/identifier of the AI model used (e.g., "gpt-4")
800            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
801            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
802            cost_details: Cost information for the model call
803            prompt: Associated prompt template from Langfuse prompt management
804
805        Returns:
806            A LangfuseGeneration object that must be ended with .end() when complete
807
808        Example:
809            ```python
810            generation = langfuse.start_generation(
811                name="answer-generation",
812                model="gpt-4",
813                input={"prompt": "Explain quantum computing"},
814                model_parameters={"temperature": 0.7}
815            )
816            try:
817                # Call model API
818                response = llm.generate(...)
819
820                generation.update(
821                    output=response.text,
822                    usage_details={
823                        "prompt_tokens": response.usage.prompt_tokens,
824                        "completion_tokens": response.usage.completion_tokens
825                    }
826                )
827            finally:
828                generation.end()
829            ```
830        """
831        warnings.warn(
832            "start_generation is deprecated and will be removed in a future version. "
833            "Use start_observation(as_type='generation') instead.",
834            DeprecationWarning,
835            stacklevel=2,
836        )
837        return self.start_observation(
838            trace_context=trace_context,
839            name=name,
840            as_type="generation",
841            input=input,
842            output=output,
843            metadata=metadata,
844            version=version,
845            level=level,
846            status_message=status_message,
847            completion_start_time=completion_start_time,
848            model=model,
849            model_parameters=model_parameters,
850            usage_details=usage_details,
851            cost_details=cost_details,
852            prompt=prompt,
853        )

Create a new generation span for model generations.

DEPRECATED: This method is deprecated and will be removed in a future version. Use start_observation(as_type='generation') instead.

This method creates a specialized span for tracking model generations. It includes additional fields specific to model generations such as model name, token usage, and cost details.

The created generation span will be the child of the current span in the context.

Arguments:
  • trace_context: Optional context for connecting to an existing trace
  • name: Name of the generation operation
  • input: Input data for the model (e.g., prompts)
  • output: Output from the model (e.g., completions)
  • metadata: Additional metadata to associate with the generation
  • version: Version identifier for the model or component
  • level: Importance level of the generation (info, warning, error)
  • status_message: Optional status message for the generation
  • completion_start_time: When the model started generating the response
  • model: Name/identifier of the AI model used (e.g., "gpt-4")
  • model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
  • usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
  • cost_details: Cost information for the model call
  • prompt: Associated prompt template from Langfuse prompt management
Returns:

A LangfuseGeneration object that must be ended with .end() when complete

Example:
generation = langfuse.start_generation(
    name="answer-generation",
    model="gpt-4",
    input={"prompt": "Explain quantum computing"},
    model_parameters={"temperature": 0.7}
)
try:
    # Call model API
    response = llm.generate(...)

    generation.update(
        output=response.text,
        usage_details={
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens
        }
    )
finally:
    generation.end()
def start_as_current_generation( self, *, trace_context: Optional[langfuse.types.TraceContext] = None, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None, end_on_exit: Optional[bool] = None) -> opentelemetry.util._decorator._AgnosticContextManager[LangfuseGeneration]:
855    def start_as_current_generation(
856        self,
857        *,
858        trace_context: Optional[TraceContext] = None,
859        name: str,
860        input: Optional[Any] = None,
861        output: Optional[Any] = None,
862        metadata: Optional[Any] = None,
863        version: Optional[str] = None,
864        level: Optional[SpanLevel] = None,
865        status_message: Optional[str] = None,
866        completion_start_time: Optional[datetime] = None,
867        model: Optional[str] = None,
868        model_parameters: Optional[Dict[str, MapValue]] = None,
869        usage_details: Optional[Dict[str, int]] = None,
870        cost_details: Optional[Dict[str, float]] = None,
871        prompt: Optional[PromptClient] = None,
872        end_on_exit: Optional[bool] = None,
873    ) -> _AgnosticContextManager[LangfuseGeneration]:
874        """Create a new generation span and set it as the current span in a context manager.
875
876        DEPRECATED: This method is deprecated and will be removed in a future version.
877        Use start_as_current_observation(as_type='generation') instead.
878
879        This method creates a specialized span for model generations and sets it as the
880        current span within a context manager. Use this method with a 'with' statement to
881        automatically handle the generation span lifecycle within a code block.
882
883        The created generation span will be the child of the current span in the context.
884
885        Args:
886            trace_context: Optional context for connecting to an existing trace
887            name: Name of the generation operation
888            input: Input data for the model (e.g., prompts)
889            output: Output from the model (e.g., completions)
890            metadata: Additional metadata to associate with the generation
891            version: Version identifier for the model or component
892            level: Importance level of the generation (info, warning, error)
893            status_message: Optional status message for the generation
894            completion_start_time: When the model started generating the response
895            model: Name/identifier of the AI model used (e.g., "gpt-4")
896            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
897            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
898            cost_details: Cost information for the model call
899            prompt: Associated prompt template from Langfuse prompt management
900            end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
901
902        Returns:
903            A context manager that yields a LangfuseGeneration
904
905        Example:
906            ```python
907            with langfuse.start_as_current_generation(
908                name="answer-generation",
909                model="gpt-4",
910                input={"prompt": "Explain quantum computing"}
911            ) as generation:
912                # Call model API
913                response = llm.generate(...)
914
915                # Update with results
916                generation.update(
917                    output=response.text,
918                    usage_details={
919                        "prompt_tokens": response.usage.prompt_tokens,
920                        "completion_tokens": response.usage.completion_tokens
921                    }
922                )
923            ```
924        """
925        warnings.warn(
926            "start_as_current_generation is deprecated and will be removed in a future version. "
927            "Use start_as_current_observation(as_type='generation') instead.",
928            DeprecationWarning,
929            stacklevel=2,
930        )
931        return self.start_as_current_observation(
932            trace_context=trace_context,
933            name=name,
934            as_type="generation",
935            input=input,
936            output=output,
937            metadata=metadata,
938            version=version,
939            level=level,
940            status_message=status_message,
941            completion_start_time=completion_start_time,
942            model=model,
943            model_parameters=model_parameters,
944            usage_details=usage_details,
945            cost_details=cost_details,
946            prompt=prompt,
947            end_on_exit=end_on_exit,
948        )

Create a new generation span and set it as the current span in a context manager.

DEPRECATED: This method is deprecated and will be removed in a future version. Use start_as_current_observation(as_type='generation') instead.

This method creates a specialized span for model generations and sets it as the current span within a context manager. Use this method with a 'with' statement to automatically handle the generation span lifecycle within a code block.

The created generation span will be the child of the current span in the context.

Arguments:
  • trace_context: Optional context for connecting to an existing trace
  • name: Name of the generation operation
  • input: Input data for the model (e.g., prompts)
  • output: Output from the model (e.g., completions)
  • metadata: Additional metadata to associate with the generation
  • version: Version identifier for the model or component
  • level: Importance level of the generation (info, warning, error)
  • status_message: Optional status message for the generation
  • completion_start_time: When the model started generating the response
  • model: Name/identifier of the AI model used (e.g., "gpt-4")
  • model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
  • usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
  • cost_details: Cost information for the model call
  • prompt: Associated prompt template from Langfuse prompt management
  • end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
Returns:

A context manager that yields a LangfuseGeneration

Example:
with langfuse.start_as_current_generation(
    name="answer-generation",
    model="gpt-4",
    input={"prompt": "Explain quantum computing"}
) as generation:
    # Call model API
    response = llm.generate(...)

    # Update with results
    generation.update(
        output=response.text,
        usage_details={
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens
        }
    )
def start_as_current_observation( self, *, trace_context: Optional[langfuse.types.TraceContext] = None, name: str, as_type: Union[Literal['generation', 'embedding'], Literal['span', 'agent', 'tool', 'chain', 'retriever', 'evaluator', 'guardrail']] = 'span', input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None, end_on_exit: Optional[bool] = None) -> Union[opentelemetry.util._decorator._AgnosticContextManager[LangfuseGeneration], opentelemetry.util._decorator._AgnosticContextManager[LangfuseSpan], opentelemetry.util._decorator._AgnosticContextManager[LangfuseAgent], opentelemetry.util._decorator._AgnosticContextManager[LangfuseTool], opentelemetry.util._decorator._AgnosticContextManager[LangfuseChain], opentelemetry.util._decorator._AgnosticContextManager[LangfuseRetriever], opentelemetry.util._decorator._AgnosticContextManager[LangfuseEvaluator], opentelemetry.util._decorator._AgnosticContextManager[LangfuseEmbedding], opentelemetry.util._decorator._AgnosticContextManager[LangfuseGuardrail]]:
1106    def start_as_current_observation(
1107        self,
1108        *,
1109        trace_context: Optional[TraceContext] = None,
1110        name: str,
1111        as_type: ObservationTypeLiteralNoEvent = "span",
1112        input: Optional[Any] = None,
1113        output: Optional[Any] = None,
1114        metadata: Optional[Any] = None,
1115        version: Optional[str] = None,
1116        level: Optional[SpanLevel] = None,
1117        status_message: Optional[str] = None,
1118        completion_start_time: Optional[datetime] = None,
1119        model: Optional[str] = None,
1120        model_parameters: Optional[Dict[str, MapValue]] = None,
1121        usage_details: Optional[Dict[str, int]] = None,
1122        cost_details: Optional[Dict[str, float]] = None,
1123        prompt: Optional[PromptClient] = None,
1124        end_on_exit: Optional[bool] = None,
1125    ) -> Union[
1126        _AgnosticContextManager[LangfuseGeneration],
1127        _AgnosticContextManager[LangfuseSpan],
1128        _AgnosticContextManager[LangfuseAgent],
1129        _AgnosticContextManager[LangfuseTool],
1130        _AgnosticContextManager[LangfuseChain],
1131        _AgnosticContextManager[LangfuseRetriever],
1132        _AgnosticContextManager[LangfuseEvaluator],
1133        _AgnosticContextManager[LangfuseEmbedding],
1134        _AgnosticContextManager[LangfuseGuardrail],
1135    ]:
1136        """Create a new observation and set it as the current span in a context manager.
1137
1138        This method creates a new observation of the specified type and sets it as the
1139        current span within a context manager. Use this method with a 'with' statement to
1140        automatically handle the observation lifecycle within a code block.
1141
1142        The created observation will be the child of the current span in the context.
1143
1144        Args:
1145            trace_context: Optional context for connecting to an existing trace
1146            name: Name of the observation (e.g., function or operation name)
1147            as_type: Type of observation to create (defaults to "span")
1148            input: Input data for the operation (can be any JSON-serializable object)
1149            output: Output data from the operation (can be any JSON-serializable object)
1150            metadata: Additional metadata to associate with the observation
1151            version: Version identifier for the code or component
1152            level: Importance level of the observation (info, warning, error)
1153            status_message: Optional status message for the observation
1154            end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
1155
1156            The following parameters are available when as_type is: "generation" or "embedding".
1157            completion_start_time: When the model started generating the response
1158            model: Name/identifier of the AI model used (e.g., "gpt-4")
1159            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1160            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1161            cost_details: Cost information for the model call
1162            prompt: Associated prompt template from Langfuse prompt management
1163
1164        Returns:
1165            A context manager that yields the appropriate observation type based on as_type
1166
1167        Example:
1168            ```python
1169            # Create a span
1170            with langfuse.start_as_current_observation(name="process-query", as_type="span") as span:
1171                # Do work
1172                result = process_data()
1173                span.update(output=result)
1174
1175                # Create a child span automatically
1176                with span.start_as_current_span(name="sub-operation") as child_span:
1177                    # Do sub-operation work
1178                    child_span.update(output="sub-result")
1179
1180            # Create a tool observation
1181            with langfuse.start_as_current_observation(name="web-search", as_type="tool") as tool:
1182                # Do tool work
1183                results = search_web(query)
1184                tool.update(output=results)
1185
1186            # Create a generation observation
1187            with langfuse.start_as_current_observation(
1188                name="answer-generation",
1189                as_type="generation",
1190                model="gpt-4"
1191            ) as generation:
1192                # Generate answer
1193                response = llm.generate(...)
1194                generation.update(output=response)
1195            ```
1196        """
1197        if as_type in get_observation_types_list(ObservationTypeGenerationLike):
1198            if trace_context:
1199                trace_id = trace_context.get("trace_id", None)
1200                parent_span_id = trace_context.get("parent_span_id", None)
1201
1202                if trace_id:
1203                    remote_parent_span = self._create_remote_parent_span(
1204                        trace_id=trace_id, parent_span_id=parent_span_id
1205                    )
1206
1207                    return cast(
1208                        Union[
1209                            _AgnosticContextManager[LangfuseGeneration],
1210                            _AgnosticContextManager[LangfuseEmbedding],
1211                        ],
1212                        self._create_span_with_parent_context(
1213                            as_type=as_type,
1214                            name=name,
1215                            remote_parent_span=remote_parent_span,
1216                            parent=None,
1217                            end_on_exit=end_on_exit,
1218                            input=input,
1219                            output=output,
1220                            metadata=metadata,
1221                            version=version,
1222                            level=level,
1223                            status_message=status_message,
1224                            completion_start_time=completion_start_time,
1225                            model=model,
1226                            model_parameters=model_parameters,
1227                            usage_details=usage_details,
1228                            cost_details=cost_details,
1229                            prompt=prompt,
1230                        ),
1231                    )
1232
1233            return cast(
1234                Union[
1235                    _AgnosticContextManager[LangfuseGeneration],
1236                    _AgnosticContextManager[LangfuseEmbedding],
1237                ],
1238                self._start_as_current_otel_span_with_processed_media(
1239                    as_type=as_type,
1240                    name=name,
1241                    end_on_exit=end_on_exit,
1242                    input=input,
1243                    output=output,
1244                    metadata=metadata,
1245                    version=version,
1246                    level=level,
1247                    status_message=status_message,
1248                    completion_start_time=completion_start_time,
1249                    model=model,
1250                    model_parameters=model_parameters,
1251                    usage_details=usage_details,
1252                    cost_details=cost_details,
1253                    prompt=prompt,
1254                ),
1255            )
1256
1257        if as_type in get_observation_types_list(ObservationTypeSpanLike):
1258            if trace_context:
1259                trace_id = trace_context.get("trace_id", None)
1260                parent_span_id = trace_context.get("parent_span_id", None)
1261
1262                if trace_id:
1263                    remote_parent_span = self._create_remote_parent_span(
1264                        trace_id=trace_id, parent_span_id=parent_span_id
1265                    )
1266
1267                    return cast(
1268                        Union[
1269                            _AgnosticContextManager[LangfuseSpan],
1270                            _AgnosticContextManager[LangfuseAgent],
1271                            _AgnosticContextManager[LangfuseTool],
1272                            _AgnosticContextManager[LangfuseChain],
1273                            _AgnosticContextManager[LangfuseRetriever],
1274                            _AgnosticContextManager[LangfuseEvaluator],
1275                            _AgnosticContextManager[LangfuseGuardrail],
1276                        ],
1277                        self._create_span_with_parent_context(
1278                            as_type=as_type,
1279                            name=name,
1280                            remote_parent_span=remote_parent_span,
1281                            parent=None,
1282                            end_on_exit=end_on_exit,
1283                            input=input,
1284                            output=output,
1285                            metadata=metadata,
1286                            version=version,
1287                            level=level,
1288                            status_message=status_message,
1289                        ),
1290                    )
1291
1292            return cast(
1293                Union[
1294                    _AgnosticContextManager[LangfuseSpan],
1295                    _AgnosticContextManager[LangfuseAgent],
1296                    _AgnosticContextManager[LangfuseTool],
1297                    _AgnosticContextManager[LangfuseChain],
1298                    _AgnosticContextManager[LangfuseRetriever],
1299                    _AgnosticContextManager[LangfuseEvaluator],
1300                    _AgnosticContextManager[LangfuseGuardrail],
1301                ],
1302                self._start_as_current_otel_span_with_processed_media(
1303                    as_type=as_type,
1304                    name=name,
1305                    end_on_exit=end_on_exit,
1306                    input=input,
1307                    output=output,
1308                    metadata=metadata,
1309                    version=version,
1310                    level=level,
1311                    status_message=status_message,
1312                ),
1313            )
1314
1315        # This should never be reached since all valid types are handled above
1316        langfuse_logger.warning(
1317            f"Unknown observation type: {as_type}, falling back to span"
1318        )
1319        return self._start_as_current_otel_span_with_processed_media(
1320            as_type="span",
1321            name=name,
1322            end_on_exit=end_on_exit,
1323            input=input,
1324            output=output,
1325            metadata=metadata,
1326            version=version,
1327            level=level,
1328            status_message=status_message,
1329        )

Create a new observation and set it as the current span in a context manager.

This method creates a new observation of the specified type and sets it as the current span within a context manager. Use this method with a 'with' statement to automatically handle the observation lifecycle within a code block.

The created observation will be the child of the current span in the context.

Arguments:
  • trace_context: Optional context for connecting to an existing trace
  • name: Name of the observation (e.g., function or operation name)
  • as_type: Type of observation to create (defaults to "span")
  • input: Input data for the operation (can be any JSON-serializable object)
  • output: Output data from the operation (can be any JSON-serializable object)
  • metadata: Additional metadata to associate with the observation
  • version: Version identifier for the code or component
  • level: Importance level of the observation (info, warning, error)
  • status_message: Optional status message for the observation
  • end_on_exit (default: True): Whether to end the span automatically when leaving the context manager. If False, the span must be manually ended to avoid memory leaks.
  • The following parameters are available when as_type is: "generation" or "embedding".
  • completion_start_time: When the model started generating the response
  • model: Name/identifier of the AI model used (e.g., "gpt-4")
  • model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
  • usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
  • cost_details: Cost information for the model call
  • prompt: Associated prompt template from Langfuse prompt management
Returns:

A context manager that yields the appropriate observation type based on as_type

Example:
# Create a span
with langfuse.start_as_current_observation(name="process-query", as_type="span") as span:
    # Do work
    result = process_data()
    span.update(output=result)

    # Create a child span automatically
    with span.start_as_current_span(name="sub-operation") as child_span:
        # Do sub-operation work
        child_span.update(output="sub-result")

# Create a tool observation
with langfuse.start_as_current_observation(name="web-search", as_type="tool") as tool:
    # Do tool work
    results = search_web(query)
    tool.update(output=results)

# Create a generation observation
with langfuse.start_as_current_observation(
    name="answer-generation",
    as_type="generation",
    model="gpt-4"
) as generation:
    # Generate answer
    response = llm.generate(...)
    generation.update(output=response)
def update_current_generation( self, *, name: Optional[str] = None, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None) -> None:
1490    def update_current_generation(
1491        self,
1492        *,
1493        name: Optional[str] = None,
1494        input: Optional[Any] = None,
1495        output: Optional[Any] = None,
1496        metadata: Optional[Any] = None,
1497        version: Optional[str] = None,
1498        level: Optional[SpanLevel] = None,
1499        status_message: Optional[str] = None,
1500        completion_start_time: Optional[datetime] = None,
1501        model: Optional[str] = None,
1502        model_parameters: Optional[Dict[str, MapValue]] = None,
1503        usage_details: Optional[Dict[str, int]] = None,
1504        cost_details: Optional[Dict[str, float]] = None,
1505        prompt: Optional[PromptClient] = None,
1506    ) -> None:
1507        """Update the current active generation span with new information.
1508
1509        This method updates the current generation span in the active context with
1510        additional information. It's useful for adding output, usage stats, or other
1511        details that become available during or after model generation.
1512
1513        Args:
1514            name: The generation name
1515            input: Updated input data for the model
1516            output: Output from the model (e.g., completions)
1517            metadata: Additional metadata to associate with the generation
1518            version: Version identifier for the model or component
1519            level: Importance level of the generation (info, warning, error)
1520            status_message: Optional status message for the generation
1521            completion_start_time: When the model started generating the response
1522            model: Name/identifier of the AI model used (e.g., "gpt-4")
1523            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1524            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1525            cost_details: Cost information for the model call
1526            prompt: Associated prompt template from Langfuse prompt management
1527
1528        Example:
1529            ```python
1530            with langfuse.start_as_current_generation(name="answer-query") as generation:
1531                # Initial setup and API call
1532                response = llm.generate(...)
1533
1534                # Update with results that weren't available at creation time
1535                langfuse.update_current_generation(
1536                    output=response.text,
1537                    usage_details={
1538                        "prompt_tokens": response.usage.prompt_tokens,
1539                        "completion_tokens": response.usage.completion_tokens
1540                    }
1541                )
1542            ```
1543        """
1544        if not self._tracing_enabled:
1545            langfuse_logger.debug(
1546                "Operation skipped: update_current_generation - Tracing is disabled or client is in no-op mode."
1547            )
1548            return
1549
1550        current_otel_span = self._get_current_otel_span()
1551
1552        if current_otel_span is not None:
1553            generation = LangfuseGeneration(
1554                otel_span=current_otel_span, langfuse_client=self
1555            )
1556
1557            if name:
1558                current_otel_span.update_name(name)
1559
1560            generation.update(
1561                input=input,
1562                output=output,
1563                metadata=metadata,
1564                version=version,
1565                level=level,
1566                status_message=status_message,
1567                completion_start_time=completion_start_time,
1568                model=model,
1569                model_parameters=model_parameters,
1570                usage_details=usage_details,
1571                cost_details=cost_details,
1572                prompt=prompt,
1573            )

Update the current active generation span with new information.

This method updates the current generation span in the active context with additional information. It's useful for adding output, usage stats, or other details that become available during or after model generation.

Arguments:
  • name: The generation name
  • input: Updated input data for the model
  • output: Output from the model (e.g., completions)
  • metadata: Additional metadata to associate with the generation
  • version: Version identifier for the model or component
  • level: Importance level of the generation (info, warning, error)
  • status_message: Optional status message for the generation
  • completion_start_time: When the model started generating the response
  • model: Name/identifier of the AI model used (e.g., "gpt-4")
  • model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
  • usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
  • cost_details: Cost information for the model call
  • prompt: Associated prompt template from Langfuse prompt management
Example:
with langfuse.start_as_current_generation(name="answer-query") as generation:
    # Initial setup and API call
    response = llm.generate(...)

    # Update with results that weren't available at creation time
    langfuse.update_current_generation(
        output=response.text,
        usage_details={
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens
        }
    )
def update_current_span( self, *, name: Optional[str] = None, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None) -> None:
1575    def update_current_span(
1576        self,
1577        *,
1578        name: Optional[str] = None,
1579        input: Optional[Any] = None,
1580        output: Optional[Any] = None,
1581        metadata: Optional[Any] = None,
1582        version: Optional[str] = None,
1583        level: Optional[SpanLevel] = None,
1584        status_message: Optional[str] = None,
1585    ) -> None:
1586        """Update the current active span with new information.
1587
1588        This method updates the current span in the active context with
1589        additional information. It's useful for adding outputs or metadata
1590        that become available during execution.
1591
1592        Args:
1593            name: The span name
1594            input: Updated input data for the operation
1595            output: Output data from the operation
1596            metadata: Additional metadata to associate with the span
1597            version: Version identifier for the code or component
1598            level: Importance level of the span (info, warning, error)
1599            status_message: Optional status message for the span
1600
1601        Example:
1602            ```python
1603            with langfuse.start_as_current_span(name="process-data") as span:
1604                # Initial processing
1605                result = process_first_part()
1606
1607                # Update with intermediate results
1608                langfuse.update_current_span(metadata={"intermediate_result": result})
1609
1610                # Continue processing
1611                final_result = process_second_part(result)
1612
1613                # Final update
1614                langfuse.update_current_span(output=final_result)
1615            ```
1616        """
1617        if not self._tracing_enabled:
1618            langfuse_logger.debug(
1619                "Operation skipped: update_current_span - Tracing is disabled or client is in no-op mode."
1620            )
1621            return
1622
1623        current_otel_span = self._get_current_otel_span()
1624
1625        if current_otel_span is not None:
1626            span = LangfuseSpan(
1627                otel_span=current_otel_span,
1628                langfuse_client=self,
1629                environment=self._environment,
1630            )
1631
1632            if name:
1633                current_otel_span.update_name(name)
1634
1635            span.update(
1636                input=input,
1637                output=output,
1638                metadata=metadata,
1639                version=version,
1640                level=level,
1641                status_message=status_message,
1642            )

Update the current active span with new information.

This method updates the current span in the active context with additional information. It's useful for adding outputs or metadata that become available during execution.

Arguments:
  • name: The span name
  • input: Updated input data for the operation
  • output: Output data from the operation
  • metadata: Additional metadata to associate with the span
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
Example:
with langfuse.start_as_current_span(name="process-data") as span:
    # Initial processing
    result = process_first_part()

    # Update with intermediate results
    langfuse.update_current_span(metadata={"intermediate_result": result})

    # Continue processing
    final_result = process_second_part(result)

    # Final update
    langfuse.update_current_span(output=final_result)
def update_current_trace( self, *, name: Optional[str] = None, user_id: Optional[str] = None, session_id: Optional[str] = None, version: Optional[str] = None, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, tags: Optional[List[str]] = None, public: Optional[bool] = None) -> None:
1644    def update_current_trace(
1645        self,
1646        *,
1647        name: Optional[str] = None,
1648        user_id: Optional[str] = None,
1649        session_id: Optional[str] = None,
1650        version: Optional[str] = None,
1651        input: Optional[Any] = None,
1652        output: Optional[Any] = None,
1653        metadata: Optional[Any] = None,
1654        tags: Optional[List[str]] = None,
1655        public: Optional[bool] = None,
1656    ) -> None:
1657        """Update the current trace with additional information.
1658
1659        Args:
1660            name: Updated name for the Langfuse trace
1661            user_id: ID of the user who initiated the Langfuse trace
1662            session_id: Session identifier for grouping related Langfuse traces
1663            version: Version identifier for the application or service
1664            input: Input data for the overall Langfuse trace
1665            output: Output data from the overall Langfuse trace
1666            metadata: Additional metadata to associate with the Langfuse trace
1667            tags: List of tags to categorize the Langfuse trace
1668            public: Whether the Langfuse trace should be publicly accessible
1669
1670        See Also:
1671            :func:`langfuse.propagate_attributes`: Recommended replacement
1672        """
1673        if not self._tracing_enabled:
1674            langfuse_logger.debug(
1675                "Operation skipped: update_current_trace - Tracing is disabled or client is in no-op mode."
1676            )
1677            return
1678
1679        current_otel_span = self._get_current_otel_span()
1680
1681        if current_otel_span is not None and current_otel_span.is_recording():
1682            existing_observation_type = current_otel_span.attributes.get(  # type: ignore[attr-defined]
1683                LangfuseOtelSpanAttributes.OBSERVATION_TYPE, "span"
1684            )
1685            # We need to preserve the class to keep the correct observation type
1686            span_class = self._get_span_class(existing_observation_type)
1687            span = span_class(
1688                otel_span=current_otel_span,
1689                langfuse_client=self,
1690                environment=self._environment,
1691            )
1692
1693            span.update_trace(
1694                name=name,
1695                user_id=user_id,
1696                session_id=session_id,
1697                version=version,
1698                input=input,
1699                output=output,
1700                metadata=metadata,
1701                tags=tags,
1702                public=public,
1703            )

Update the current trace with additional information.

Arguments:
  • name: Updated name for the Langfuse trace
  • user_id: ID of the user who initiated the Langfuse trace
  • session_id: Session identifier for grouping related Langfuse traces
  • version: Version identifier for the application or service
  • input: Input data for the overall Langfuse trace
  • output: Output data from the overall Langfuse trace
  • metadata: Additional metadata to associate with the Langfuse trace
  • tags: List of tags to categorize the Langfuse trace
  • public: Whether the Langfuse trace should be publicly accessible
See Also:

langfuse.propagate_attributes(): Recommended replacement

def create_event( self, *, trace_context: Optional[langfuse.types.TraceContext] = None, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None) -> LangfuseEvent:
1705    def create_event(
1706        self,
1707        *,
1708        trace_context: Optional[TraceContext] = None,
1709        name: str,
1710        input: Optional[Any] = None,
1711        output: Optional[Any] = None,
1712        metadata: Optional[Any] = None,
1713        version: Optional[str] = None,
1714        level: Optional[SpanLevel] = None,
1715        status_message: Optional[str] = None,
1716    ) -> LangfuseEvent:
1717        """Create a new Langfuse observation of type 'EVENT'.
1718
1719        The created Langfuse Event observation will be the child of the current span in the context.
1720
1721        Args:
1722            trace_context: Optional context for connecting to an existing trace
1723            name: Name of the span (e.g., function or operation name)
1724            input: Input data for the operation (can be any JSON-serializable object)
1725            output: Output data from the operation (can be any JSON-serializable object)
1726            metadata: Additional metadata to associate with the span
1727            version: Version identifier for the code or component
1728            level: Importance level of the span (info, warning, error)
1729            status_message: Optional status message for the span
1730
1731        Returns:
1732            The Langfuse Event object
1733
1734        Example:
1735            ```python
1736            event = langfuse.create_event(name="process-event")
1737            ```
1738        """
1739        timestamp = time_ns()
1740
1741        if trace_context:
1742            trace_id = trace_context.get("trace_id", None)
1743            parent_span_id = trace_context.get("parent_span_id", None)
1744
1745            if trace_id:
1746                remote_parent_span = self._create_remote_parent_span(
1747                    trace_id=trace_id, parent_span_id=parent_span_id
1748                )
1749
1750                with otel_trace_api.use_span(
1751                    cast(otel_trace_api.Span, remote_parent_span)
1752                ):
1753                    otel_span = self._otel_tracer.start_span(
1754                        name=name, start_time=timestamp
1755                    )
1756                    otel_span.set_attribute(LangfuseOtelSpanAttributes.AS_ROOT, True)
1757
1758                    return cast(
1759                        LangfuseEvent,
1760                        LangfuseEvent(
1761                            otel_span=otel_span,
1762                            langfuse_client=self,
1763                            environment=self._environment,
1764                            input=input,
1765                            output=output,
1766                            metadata=metadata,
1767                            version=version,
1768                            level=level,
1769                            status_message=status_message,
1770                        ).end(end_time=timestamp),
1771                    )
1772
1773        otel_span = self._otel_tracer.start_span(name=name, start_time=timestamp)
1774
1775        return cast(
1776            LangfuseEvent,
1777            LangfuseEvent(
1778                otel_span=otel_span,
1779                langfuse_client=self,
1780                environment=self._environment,
1781                input=input,
1782                output=output,
1783                metadata=metadata,
1784                version=version,
1785                level=level,
1786                status_message=status_message,
1787            ).end(end_time=timestamp),
1788        )

Create a new Langfuse observation of type 'EVENT'.

The created Langfuse Event observation will be the child of the current span in the context.

Arguments:
  • trace_context: Optional context for connecting to an existing trace
  • name: Name of the span (e.g., function or operation name)
  • input: Input data for the operation (can be any JSON-serializable object)
  • output: Output data from the operation (can be any JSON-serializable object)
  • metadata: Additional metadata to associate with the span
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
Returns:

The Langfuse Event object

Example:
event = langfuse.create_event(name="process-event")
@staticmethod
def create_trace_id(*, seed: Optional[str] = None) -> str:
1877    @staticmethod
1878    def create_trace_id(*, seed: Optional[str] = None) -> str:
1879        """Create a unique trace ID for use with Langfuse.
1880
1881        This method generates a unique trace ID for use with various Langfuse APIs.
1882        It can either generate a random ID or create a deterministic ID based on
1883        a seed string.
1884
1885        Trace IDs must be 32 lowercase hexadecimal characters, representing 16 bytes.
1886        This method ensures the generated ID meets this requirement. If you need to
1887        correlate an external ID with a Langfuse trace ID, use the external ID as the
1888        seed to get a valid, deterministic Langfuse trace ID.
1889
1890        Args:
1891            seed: Optional string to use as a seed for deterministic ID generation.
1892                 If provided, the same seed will always produce the same ID.
1893                 If not provided, a random ID will be generated.
1894
1895        Returns:
1896            A 32-character lowercase hexadecimal string representing the Langfuse trace ID.
1897
1898        Example:
1899            ```python
1900            # Generate a random trace ID
1901            trace_id = langfuse.create_trace_id()
1902
1903            # Generate a deterministic ID based on a seed
1904            session_trace_id = langfuse.create_trace_id(seed="session-456")
1905
1906            # Correlate an external ID with a Langfuse trace ID
1907            external_id = "external-system-123456"
1908            correlated_trace_id = langfuse.create_trace_id(seed=external_id)
1909
1910            # Use the ID with trace context
1911            with langfuse.start_as_current_span(
1912                name="process-request",
1913                trace_context={"trace_id": trace_id}
1914            ) as span:
1915                # Operation will be part of the specific trace
1916                pass
1917            ```
1918        """
1919        if not seed:
1920            trace_id_int = RandomIdGenerator().generate_trace_id()
1921
1922            return Langfuse._format_otel_trace_id(trace_id_int)
1923
1924        return sha256(seed.encode("utf-8")).digest()[:16].hex()

Create a unique trace ID for use with Langfuse.

This method generates a unique trace ID for use with various Langfuse APIs. It can either generate a random ID or create a deterministic ID based on a seed string.

Trace IDs must be 32 lowercase hexadecimal characters, representing 16 bytes. This method ensures the generated ID meets this requirement. If you need to correlate an external ID with a Langfuse trace ID, use the external ID as the seed to get a valid, deterministic Langfuse trace ID.

Arguments:
  • seed: Optional string to use as a seed for deterministic ID generation. If provided, the same seed will always produce the same ID. If not provided, a random ID will be generated.
Returns:

A 32-character lowercase hexadecimal string representing the Langfuse trace ID.

Example:
# Generate a random trace ID
trace_id = langfuse.create_trace_id()

# Generate a deterministic ID based on a seed
session_trace_id = langfuse.create_trace_id(seed="session-456")

# Correlate an external ID with a Langfuse trace ID
external_id = "external-system-123456"
correlated_trace_id = langfuse.create_trace_id(seed=external_id)

# Use the ID with trace context
with langfuse.start_as_current_span(
    name="process-request",
    trace_context={"trace_id": trace_id}
) as span:
    # Operation will be part of the specific trace
    pass
def create_score( self, *, name: str, value: Union[float, str], session_id: Optional[str] = None, dataset_run_id: Optional[str] = None, trace_id: Optional[str] = None, observation_id: Optional[str] = None, score_id: Optional[str] = None, data_type: Optional[Literal['NUMERIC', 'CATEGORICAL', 'BOOLEAN']] = None, comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, timestamp: Optional[datetime.datetime] = None) -> None:
2002    def create_score(
2003        self,
2004        *,
2005        name: str,
2006        value: Union[float, str],
2007        session_id: Optional[str] = None,
2008        dataset_run_id: Optional[str] = None,
2009        trace_id: Optional[str] = None,
2010        observation_id: Optional[str] = None,
2011        score_id: Optional[str] = None,
2012        data_type: Optional[ScoreDataType] = None,
2013        comment: Optional[str] = None,
2014        config_id: Optional[str] = None,
2015        metadata: Optional[Any] = None,
2016        timestamp: Optional[datetime] = None,
2017    ) -> None:
2018        """Create a score for a specific trace or observation.
2019
2020        This method creates a score for evaluating a Langfuse trace or observation. Scores can be
2021        used to track quality metrics, user feedback, or automated evaluations.
2022
2023        Args:
2024            name: Name of the score (e.g., "relevance", "accuracy")
2025            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
2026            session_id: ID of the Langfuse session to associate the score with
2027            dataset_run_id: ID of the Langfuse dataset run to associate the score with
2028            trace_id: ID of the Langfuse trace to associate the score with
2029            observation_id: Optional ID of the specific observation to score. Trace ID must be provided too.
2030            score_id: Optional custom ID for the score (auto-generated if not provided)
2031            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
2032            comment: Optional comment or explanation for the score
2033            config_id: Optional ID of a score config defined in Langfuse
2034            metadata: Optional metadata to be attached to the score
2035            timestamp: Optional timestamp for the score (defaults to current UTC time)
2036
2037        Example:
2038            ```python
2039            # Create a numeric score for accuracy
2040            langfuse.create_score(
2041                name="accuracy",
2042                value=0.92,
2043                trace_id="abcdef1234567890abcdef1234567890",
2044                data_type="NUMERIC",
2045                comment="High accuracy with minor irrelevant details"
2046            )
2047
2048            # Create a categorical score for sentiment
2049            langfuse.create_score(
2050                name="sentiment",
2051                value="positive",
2052                trace_id="abcdef1234567890abcdef1234567890",
2053                observation_id="abcdef1234567890",
2054                data_type="CATEGORICAL"
2055            )
2056            ```
2057        """
2058        if not self._tracing_enabled:
2059            return
2060
2061        score_id = score_id or self._create_observation_id()
2062
2063        try:
2064            new_body = ScoreBody(
2065                id=score_id,
2066                sessionId=session_id,
2067                datasetRunId=dataset_run_id,
2068                traceId=trace_id,
2069                observationId=observation_id,
2070                name=name,
2071                value=value,
2072                dataType=data_type,  # type: ignore
2073                comment=comment,
2074                configId=config_id,
2075                environment=self._environment,
2076                metadata=metadata,
2077            )
2078
2079            event = {
2080                "id": self.create_trace_id(),
2081                "type": "score-create",
2082                "timestamp": timestamp or _get_timestamp(),
2083                "body": new_body,
2084            }
2085
2086            if self._resources is not None:
2087                # Force the score to be in sample if it was for a legacy trace ID, i.e. non-32 hexchar
2088                force_sample = (
2089                    not self._is_valid_trace_id(trace_id) if trace_id else True
2090                )
2091
2092                self._resources.add_score_task(
2093                    event,
2094                    force_sample=force_sample,
2095                )
2096
2097        except Exception as e:
2098            langfuse_logger.exception(
2099                f"Error creating score: Failed to process score event for trace_id={trace_id}, name={name}. Error: {e}"
2100            )

Create a score for a specific trace or observation.

This method creates a score for evaluating a Langfuse trace or observation. Scores can be used to track quality metrics, user feedback, or automated evaluations.

Arguments:
  • name: Name of the score (e.g., "relevance", "accuracy")
  • value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
  • session_id: ID of the Langfuse session to associate the score with
  • dataset_run_id: ID of the Langfuse dataset run to associate the score with
  • trace_id: ID of the Langfuse trace to associate the score with
  • observation_id: Optional ID of the specific observation to score. Trace ID must be provided too.
  • score_id: Optional custom ID for the score (auto-generated if not provided)
  • data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
  • comment: Optional comment or explanation for the score
  • config_id: Optional ID of a score config defined in Langfuse
  • metadata: Optional metadata to be attached to the score
  • timestamp: Optional timestamp for the score (defaults to current UTC time)
Example:
# Create a numeric score for accuracy
langfuse.create_score(
    name="accuracy",
    value=0.92,
    trace_id="abcdef1234567890abcdef1234567890",
    data_type="NUMERIC",
    comment="High accuracy with minor irrelevant details"
)

# Create a categorical score for sentiment
langfuse.create_score(
    name="sentiment",
    value="positive",
    trace_id="abcdef1234567890abcdef1234567890",
    observation_id="abcdef1234567890",
    data_type="CATEGORICAL"
)
def score_current_span( self, *, name: str, value: Union[float, str], score_id: Optional[str] = None, data_type: Optional[Literal['NUMERIC', 'CATEGORICAL', 'BOOLEAN']] = None, comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None) -> None:
2128    def score_current_span(
2129        self,
2130        *,
2131        name: str,
2132        value: Union[float, str],
2133        score_id: Optional[str] = None,
2134        data_type: Optional[ScoreDataType] = None,
2135        comment: Optional[str] = None,
2136        config_id: Optional[str] = None,
2137        metadata: Optional[Any] = None,
2138    ) -> None:
2139        """Create a score for the current active span.
2140
2141        This method scores the currently active span in the context. It's a convenient
2142        way to score the current operation without needing to know its trace and span IDs.
2143
2144        Args:
2145            name: Name of the score (e.g., "relevance", "accuracy")
2146            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
2147            score_id: Optional custom ID for the score (auto-generated if not provided)
2148            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
2149            comment: Optional comment or explanation for the score
2150            config_id: Optional ID of a score config defined in Langfuse
2151            metadata: Optional metadata to be attached to the score
2152
2153        Example:
2154            ```python
2155            with langfuse.start_as_current_generation(name="answer-query") as generation:
2156                # Generate answer
2157                response = generate_answer(...)
2158                generation.update(output=response)
2159
2160                # Score the generation
2161                langfuse.score_current_span(
2162                    name="relevance",
2163                    value=0.85,
2164                    data_type="NUMERIC",
2165                    comment="Mostly relevant but contains some tangential information",
2166                    metadata={"model": "gpt-4", "prompt_version": "v2"}
2167                )
2168            ```
2169        """
2170        current_span = self._get_current_otel_span()
2171
2172        if current_span is not None:
2173            trace_id = self._get_otel_trace_id(current_span)
2174            observation_id = self._get_otel_span_id(current_span)
2175
2176            langfuse_logger.info(
2177                f"Score: Creating score name='{name}' value={value} for current span ({observation_id}) in trace {trace_id}"
2178            )
2179
2180            self.create_score(
2181                trace_id=trace_id,
2182                observation_id=observation_id,
2183                name=name,
2184                value=cast(str, value),
2185                score_id=score_id,
2186                data_type=cast(Literal["CATEGORICAL"], data_type),
2187                comment=comment,
2188                config_id=config_id,
2189                metadata=metadata,
2190            )

Create a score for the current active span.

This method scores the currently active span in the context. It's a convenient way to score the current operation without needing to know its trace and span IDs.

Arguments:
  • name: Name of the score (e.g., "relevance", "accuracy")
  • value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
  • score_id: Optional custom ID for the score (auto-generated if not provided)
  • data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
  • comment: Optional comment or explanation for the score
  • config_id: Optional ID of a score config defined in Langfuse
  • metadata: Optional metadata to be attached to the score
Example:
with langfuse.start_as_current_generation(name="answer-query") as generation:
    # Generate answer
    response = generate_answer(...)
    generation.update(output=response)

    # Score the generation
    langfuse.score_current_span(
        name="relevance",
        value=0.85,
        data_type="NUMERIC",
        comment="Mostly relevant but contains some tangential information",
        metadata={"model": "gpt-4", "prompt_version": "v2"}
    )
def score_current_trace( self, *, name: str, value: Union[float, str], score_id: Optional[str] = None, data_type: Optional[Literal['NUMERIC', 'CATEGORICAL', 'BOOLEAN']] = None, comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None) -> None:
2218    def score_current_trace(
2219        self,
2220        *,
2221        name: str,
2222        value: Union[float, str],
2223        score_id: Optional[str] = None,
2224        data_type: Optional[ScoreDataType] = None,
2225        comment: Optional[str] = None,
2226        config_id: Optional[str] = None,
2227        metadata: Optional[Any] = None,
2228    ) -> None:
2229        """Create a score for the current trace.
2230
2231        This method scores the trace of the currently active span. Unlike score_current_span,
2232        this method associates the score with the entire trace rather than a specific span.
2233        It's useful for scoring overall performance or quality of the entire operation.
2234
2235        Args:
2236            name: Name of the score (e.g., "user_satisfaction", "overall_quality")
2237            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
2238            score_id: Optional custom ID for the score (auto-generated if not provided)
2239            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
2240            comment: Optional comment or explanation for the score
2241            config_id: Optional ID of a score config defined in Langfuse
2242            metadata: Optional metadata to be attached to the score
2243
2244        Example:
2245            ```python
2246            with langfuse.start_as_current_span(name="process-user-request") as span:
2247                # Process request
2248                result = process_complete_request()
2249                span.update(output=result)
2250
2251                # Score the overall trace
2252                langfuse.score_current_trace(
2253                    name="overall_quality",
2254                    value=0.95,
2255                    data_type="NUMERIC",
2256                    comment="High quality end-to-end response",
2257                    metadata={"evaluator": "gpt-4", "criteria": "comprehensive"}
2258                )
2259            ```
2260        """
2261        current_span = self._get_current_otel_span()
2262
2263        if current_span is not None:
2264            trace_id = self._get_otel_trace_id(current_span)
2265
2266            langfuse_logger.info(
2267                f"Score: Creating score name='{name}' value={value} for entire trace {trace_id}"
2268            )
2269
2270            self.create_score(
2271                trace_id=trace_id,
2272                name=name,
2273                value=cast(str, value),
2274                score_id=score_id,
2275                data_type=cast(Literal["CATEGORICAL"], data_type),
2276                comment=comment,
2277                config_id=config_id,
2278                metadata=metadata,
2279            )

Create a score for the current trace.

This method scores the trace of the currently active span. Unlike score_current_span, this method associates the score with the entire trace rather than a specific span. It's useful for scoring overall performance or quality of the entire operation.

Arguments:
  • name: Name of the score (e.g., "user_satisfaction", "overall_quality")
  • value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
  • score_id: Optional custom ID for the score (auto-generated if not provided)
  • data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
  • comment: Optional comment or explanation for the score
  • config_id: Optional ID of a score config defined in Langfuse
  • metadata: Optional metadata to be attached to the score
Example:
with langfuse.start_as_current_span(name="process-user-request") as span:
    # Process request
    result = process_complete_request()
    span.update(output=result)

    # Score the overall trace
    langfuse.score_current_trace(
        name="overall_quality",
        value=0.95,
        data_type="NUMERIC",
        comment="High quality end-to-end response",
        metadata={"evaluator": "gpt-4", "criteria": "comprehensive"}
    )
def flush(self) -> None:
2281    def flush(self) -> None:
2282        """Force flush all pending spans and events to the Langfuse API.
2283
2284        This method manually flushes any pending spans, scores, and other events to the
2285        Langfuse API. It's useful in scenarios where you want to ensure all data is sent
2286        before proceeding, without waiting for the automatic flush interval.
2287
2288        Example:
2289            ```python
2290            # Record some spans and scores
2291            with langfuse.start_as_current_span(name="operation") as span:
2292                # Do work...
2293                pass
2294
2295            # Ensure all data is sent to Langfuse before proceeding
2296            langfuse.flush()
2297
2298            # Continue with other work
2299            ```
2300        """
2301        if self._resources is not None:
2302            self._resources.flush()

Force flush all pending spans and events to the Langfuse API.

This method manually flushes any pending spans, scores, and other events to the Langfuse API. It's useful in scenarios where you want to ensure all data is sent before proceeding, without waiting for the automatic flush interval.

Example:
# Record some spans and scores
with langfuse.start_as_current_span(name="operation") as span:
    # Do work...
    pass

# Ensure all data is sent to Langfuse before proceeding
langfuse.flush()

# Continue with other work
def shutdown(self) -> None:
2304    def shutdown(self) -> None:
2305        """Shut down the Langfuse client and flush all pending data.
2306
2307        This method cleanly shuts down the Langfuse client, ensuring all pending data
2308        is flushed to the API and all background threads are properly terminated.
2309
2310        It's important to call this method when your application is shutting down to
2311        prevent data loss and resource leaks. For most applications, using the client
2312        as a context manager or relying on the automatic shutdown via atexit is sufficient.
2313
2314        Example:
2315            ```python
2316            # Initialize Langfuse
2317            langfuse = Langfuse(public_key="...", secret_key="...")
2318
2319            # Use Langfuse throughout your application
2320            # ...
2321
2322            # When application is shutting down
2323            langfuse.shutdown()
2324            ```
2325        """
2326        if self._resources is not None:
2327            self._resources.shutdown()

Shut down the Langfuse client and flush all pending data.

This method cleanly shuts down the Langfuse client, ensuring all pending data is flushed to the API and all background threads are properly terminated.

It's important to call this method when your application is shutting down to prevent data loss and resource leaks. For most applications, using the client as a context manager or relying on the automatic shutdown via atexit is sufficient.

Example:
# Initialize Langfuse
langfuse = Langfuse(public_key="...", secret_key="...")

# Use Langfuse throughout your application
# ...

# When application is shutting down
langfuse.shutdown()
def get_current_trace_id(self) -> Optional[str]:
2329    def get_current_trace_id(self) -> Optional[str]:
2330        """Get the trace ID of the current active span.
2331
2332        This method retrieves the trace ID from the currently active span in the context.
2333        It can be used to get the trace ID for referencing in logs, external systems,
2334        or for creating related operations.
2335
2336        Returns:
2337            The current trace ID as a 32-character lowercase hexadecimal string,
2338            or None if there is no active span.
2339
2340        Example:
2341            ```python
2342            with langfuse.start_as_current_span(name="process-request") as span:
2343                # Get the current trace ID for reference
2344                trace_id = langfuse.get_current_trace_id()
2345
2346                # Use it for external correlation
2347                log.info(f"Processing request with trace_id: {trace_id}")
2348
2349                # Or pass to another system
2350                external_system.process(data, trace_id=trace_id)
2351            ```
2352        """
2353        if not self._tracing_enabled:
2354            langfuse_logger.debug(
2355                "Operation skipped: get_current_trace_id - Tracing is disabled or client is in no-op mode."
2356            )
2357            return None
2358
2359        current_otel_span = self._get_current_otel_span()
2360
2361        return self._get_otel_trace_id(current_otel_span) if current_otel_span else None

Get the trace ID of the current active span.

This method retrieves the trace ID from the currently active span in the context. It can be used to get the trace ID for referencing in logs, external systems, or for creating related operations.

Returns:

The current trace ID as a 32-character lowercase hexadecimal string, or None if there is no active span.

Example:
with langfuse.start_as_current_span(name="process-request") as span:
    # Get the current trace ID for reference
    trace_id = langfuse.get_current_trace_id()

    # Use it for external correlation
    log.info(f"Processing request with trace_id: {trace_id}")

    # Or pass to another system
    external_system.process(data, trace_id=trace_id)
def get_current_observation_id(self) -> Optional[str]:
2363    def get_current_observation_id(self) -> Optional[str]:
2364        """Get the observation ID (span ID) of the current active span.
2365
2366        This method retrieves the observation ID from the currently active span in the context.
2367        It can be used to get the observation ID for referencing in logs, external systems,
2368        or for creating scores or other related operations.
2369
2370        Returns:
2371            The current observation ID as a 16-character lowercase hexadecimal string,
2372            or None if there is no active span.
2373
2374        Example:
2375            ```python
2376            with langfuse.start_as_current_span(name="process-user-query") as span:
2377                # Get the current observation ID
2378                observation_id = langfuse.get_current_observation_id()
2379
2380                # Store it for later reference
2381                cache.set(f"query_{query_id}_observation", observation_id)
2382
2383                # Process the query...
2384            ```
2385        """
2386        if not self._tracing_enabled:
2387            langfuse_logger.debug(
2388                "Operation skipped: get_current_observation_id - Tracing is disabled or client is in no-op mode."
2389            )
2390            return None
2391
2392        current_otel_span = self._get_current_otel_span()
2393
2394        return self._get_otel_span_id(current_otel_span) if current_otel_span else None

Get the observation ID (span ID) of the current active span.

This method retrieves the observation ID from the currently active span in the context. It can be used to get the observation ID for referencing in logs, external systems, or for creating scores or other related operations.

Returns:

The current observation ID as a 16-character lowercase hexadecimal string, or None if there is no active span.

Example:
with langfuse.start_as_current_span(name="process-user-query") as span:
    # Get the current observation ID
    observation_id = langfuse.get_current_observation_id()

    # Store it for later reference
    cache.set(f"query_{query_id}_observation", observation_id)

    # Process the query...
def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
2407    def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
2408        """Get the URL to view a trace in the Langfuse UI.
2409
2410        This method generates a URL that links directly to a trace in the Langfuse UI.
2411        It's useful for providing links in logs, notifications, or debugging tools.
2412
2413        Args:
2414            trace_id: Optional trace ID to generate a URL for. If not provided,
2415                     the trace ID of the current active span will be used.
2416
2417        Returns:
2418            A URL string pointing to the trace in the Langfuse UI,
2419            or None if the project ID couldn't be retrieved or no trace ID is available.
2420
2421        Example:
2422            ```python
2423            # Get URL for the current trace
2424            with langfuse.start_as_current_span(name="process-request") as span:
2425                trace_url = langfuse.get_trace_url()
2426                log.info(f"Processing trace: {trace_url}")
2427
2428            # Get URL for a specific trace
2429            specific_trace_url = langfuse.get_trace_url(trace_id="1234567890abcdef1234567890abcdef")
2430            send_notification(f"Review needed for trace: {specific_trace_url}")
2431            ```
2432        """
2433        final_trace_id = trace_id or self.get_current_trace_id()
2434        if not final_trace_id:
2435            return None
2436
2437        project_id = self._get_project_id()
2438
2439        return (
2440            f"{self._base_url}/project/{project_id}/traces/{final_trace_id}"
2441            if project_id and final_trace_id
2442            else None
2443        )

Get the URL to view a trace in the Langfuse UI.

This method generates a URL that links directly to a trace in the Langfuse UI. It's useful for providing links in logs, notifications, or debugging tools.

Arguments:
  • trace_id: Optional trace ID to generate a URL for. If not provided, the trace ID of the current active span will be used.
Returns:

A URL string pointing to the trace in the Langfuse UI, or None if the project ID couldn't be retrieved or no trace ID is available.

Example:
# Get URL for the current trace
with langfuse.start_as_current_span(name="process-request") as span:
    trace_url = langfuse.get_trace_url()
    log.info(f"Processing trace: {trace_url}")

# Get URL for a specific trace
specific_trace_url = langfuse.get_trace_url(trace_id="1234567890abcdef1234567890abcdef")
send_notification(f"Review needed for trace: {specific_trace_url}")
def get_dataset( self, name: str, *, fetch_items_page_size: Optional[int] = 50) -> langfuse._client.datasets.DatasetClient:
2445    def get_dataset(
2446        self, name: str, *, fetch_items_page_size: Optional[int] = 50
2447    ) -> "DatasetClient":
2448        """Fetch a dataset by its name.
2449
2450        Args:
2451            name (str): The name of the dataset to fetch.
2452            fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
2453
2454        Returns:
2455            DatasetClient: The dataset with the given name.
2456        """
2457        try:
2458            langfuse_logger.debug(f"Getting datasets {name}")
2459            dataset = self.api.datasets.get(dataset_name=self._url_encode(name))
2460
2461            dataset_items = []
2462            page = 1
2463
2464            while True:
2465                new_items = self.api.dataset_items.list(
2466                    dataset_name=self._url_encode(name, is_url_param=True),
2467                    page=page,
2468                    limit=fetch_items_page_size,
2469                )
2470                dataset_items.extend(new_items.data)
2471
2472                if new_items.meta.total_pages <= page:
2473                    break
2474
2475                page += 1
2476
2477            items = [DatasetItemClient(i, langfuse=self) for i in dataset_items]
2478
2479            return DatasetClient(dataset, items=items)
2480
2481        except Error as e:
2482            handle_fern_exception(e)
2483            raise e

Fetch a dataset by its name.

Arguments:
  • name (str): The name of the dataset to fetch.
  • fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
Returns:

DatasetClient: The dataset with the given name.

def get_dataset_run( self, *, dataset_name: str, run_name: str) -> langfuse.api.DatasetRunWithItems:
2485    def get_dataset_run(
2486        self, *, dataset_name: str, run_name: str
2487    ) -> DatasetRunWithItems:
2488        """Fetch a dataset run by dataset name and run name.
2489
2490        Args:
2491            dataset_name (str): The name of the dataset.
2492            run_name (str): The name of the run.
2493
2494        Returns:
2495            DatasetRunWithItems: The dataset run with its items.
2496        """
2497        try:
2498            return self.api.datasets.get_run(
2499                dataset_name=self._url_encode(dataset_name),
2500                run_name=self._url_encode(run_name),
2501                request_options=None,
2502            )
2503        except Error as e:
2504            handle_fern_exception(e)
2505            raise e

Fetch a dataset run by dataset name and run name.

Arguments:
  • dataset_name (str): The name of the dataset.
  • run_name (str): The name of the run.
Returns:

DatasetRunWithItems: The dataset run with its items.

def get_dataset_runs( self, *, dataset_name: str, page: Optional[int] = None, limit: Optional[int] = None) -> langfuse.api.PaginatedDatasetRuns:
2507    def get_dataset_runs(
2508        self,
2509        *,
2510        dataset_name: str,
2511        page: Optional[int] = None,
2512        limit: Optional[int] = None,
2513    ) -> PaginatedDatasetRuns:
2514        """Fetch all runs for a dataset.
2515
2516        Args:
2517            dataset_name (str): The name of the dataset.
2518            page (Optional[int]): Page number, starts at 1.
2519            limit (Optional[int]): Limit of items per page.
2520
2521        Returns:
2522            PaginatedDatasetRuns: Paginated list of dataset runs.
2523        """
2524        try:
2525            return self.api.datasets.get_runs(
2526                dataset_name=self._url_encode(dataset_name),
2527                page=page,
2528                limit=limit,
2529                request_options=None,
2530            )
2531        except Error as e:
2532            handle_fern_exception(e)
2533            raise e

Fetch all runs for a dataset.

Arguments:
  • dataset_name (str): The name of the dataset.
  • page (Optional[int]): Page number, starts at 1.
  • limit (Optional[int]): Limit of items per page.
Returns:

PaginatedDatasetRuns: Paginated list of dataset runs.

def delete_dataset_run( self, *, dataset_name: str, run_name: str) -> langfuse.api.DeleteDatasetRunResponse:
2535    def delete_dataset_run(
2536        self, *, dataset_name: str, run_name: str
2537    ) -> DeleteDatasetRunResponse:
2538        """Delete a dataset run and all its run items. This action is irreversible.
2539
2540        Args:
2541            dataset_name (str): The name of the dataset.
2542            run_name (str): The name of the run.
2543
2544        Returns:
2545            DeleteDatasetRunResponse: Confirmation of deletion.
2546        """
2547        try:
2548            return self.api.datasets.delete_run(
2549                dataset_name=self._url_encode(dataset_name),
2550                run_name=self._url_encode(run_name),
2551                request_options=None,
2552            )
2553        except Error as e:
2554            handle_fern_exception(e)
2555            raise e

Delete a dataset run and all its run items. This action is irreversible.

Arguments:
  • dataset_name (str): The name of the dataset.
  • run_name (str): The name of the run.
Returns:

DeleteDatasetRunResponse: Confirmation of deletion.

def run_experiment( self, *, name: str, run_name: Optional[str] = None, description: Optional[str] = None, data: Union[List[langfuse.experiment.LocalExperimentItem], List[langfuse._client.datasets.DatasetItemClient]], task: langfuse.experiment.TaskFunction, evaluators: List[langfuse.experiment.EvaluatorFunction] = [], composite_evaluator: Optional[CompositeEvaluatorFunction] = None, run_evaluators: List[langfuse.experiment.RunEvaluatorFunction] = [], max_concurrency: int = 50, metadata: Optional[Dict[str, str]] = None) -> langfuse.experiment.ExperimentResult:
2557    def run_experiment(
2558        self,
2559        *,
2560        name: str,
2561        run_name: Optional[str] = None,
2562        description: Optional[str] = None,
2563        data: ExperimentData,
2564        task: TaskFunction,
2565        evaluators: List[EvaluatorFunction] = [],
2566        composite_evaluator: Optional[CompositeEvaluatorFunction] = None,
2567        run_evaluators: List[RunEvaluatorFunction] = [],
2568        max_concurrency: int = 50,
2569        metadata: Optional[Dict[str, str]] = None,
2570    ) -> ExperimentResult:
2571        """Run an experiment on a dataset with automatic tracing and evaluation.
2572
2573        This method executes a task function on each item in the provided dataset,
2574        automatically traces all executions with Langfuse for observability, runs
2575        item-level and run-level evaluators on the outputs, and returns comprehensive
2576        results with evaluation metrics.
2577
2578        The experiment system provides:
2579        - Automatic tracing of all task executions
2580        - Concurrent processing with configurable limits
2581        - Comprehensive error handling that isolates failures
2582        - Integration with Langfuse datasets for experiment tracking
2583        - Flexible evaluation framework supporting both sync and async evaluators
2584
2585        Args:
2586            name: Human-readable name for the experiment. Used for identification
2587                in the Langfuse UI.
2588            run_name: Optional exact name for the experiment run. If provided, this will be
2589                used as the exact dataset run name if the `data` contains Langfuse dataset items.
2590                If not provided, this will default to the experiment name appended with an ISO timestamp.
2591            description: Optional description explaining the experiment's purpose,
2592                methodology, or expected outcomes.
2593            data: Array of data items to process. Can be either:
2594                - List of dict-like items with 'input', 'expected_output', 'metadata' keys
2595                - List of Langfuse DatasetItem objects from dataset.items
2596            task: Function that processes each data item and returns output.
2597                Must accept 'item' as keyword argument and can return sync or async results.
2598                The task function signature should be: task(*, item, **kwargs) -> Any
2599            evaluators: List of functions to evaluate each item's output individually.
2600                Each evaluator receives input, output, expected_output, and metadata.
2601                Can return single Evaluation dict or list of Evaluation dicts.
2602            composite_evaluator: Optional function that creates composite scores from item-level evaluations.
2603                Receives the same inputs as item-level evaluators (input, output, expected_output, metadata)
2604                plus the list of evaluations from item-level evaluators. Useful for weighted averages,
2605                pass/fail decisions based on multiple criteria, or custom scoring logic combining multiple metrics.
2606            run_evaluators: List of functions to evaluate the entire experiment run.
2607                Each run evaluator receives all item_results and can compute aggregate metrics.
2608                Useful for calculating averages, distributions, or cross-item comparisons.
2609            max_concurrency: Maximum number of concurrent task executions (default: 50).
2610                Controls the number of items processed simultaneously. Adjust based on
2611                API rate limits and system resources.
2612            metadata: Optional metadata dictionary to attach to all experiment traces.
2613                This metadata will be included in every trace created during the experiment.
2614                If `data` are Langfuse dataset items, the metadata will be attached to the dataset run, too.
2615
2616        Returns:
2617            ExperimentResult containing:
2618            - run_name: The experiment run name. This is equal to the dataset run name if experiment was on Langfuse dataset.
2619            - item_results: List of results for each processed item with outputs and evaluations
2620            - run_evaluations: List of aggregate evaluation results for the entire run
2621            - dataset_run_id: ID of the dataset run (if using Langfuse datasets)
2622            - dataset_run_url: Direct URL to view results in Langfuse UI (if applicable)
2623
2624        Raises:
2625            ValueError: If required parameters are missing or invalid
2626            Exception: If experiment setup fails (individual item failures are handled gracefully)
2627
2628        Examples:
2629            Basic experiment with local data:
2630            ```python
2631            def summarize_text(*, item, **kwargs):
2632                return f"Summary: {item['input'][:50]}..."
2633
2634            def length_evaluator(*, input, output, expected_output=None, **kwargs):
2635                return {
2636                    "name": "output_length",
2637                    "value": len(output),
2638                    "comment": f"Output contains {len(output)} characters"
2639                }
2640
2641            result = langfuse.run_experiment(
2642                name="Text Summarization Test",
2643                description="Evaluate summarization quality and length",
2644                data=[
2645                    {"input": "Long article text...", "expected_output": "Expected summary"},
2646                    {"input": "Another article...", "expected_output": "Another summary"}
2647                ],
2648                task=summarize_text,
2649                evaluators=[length_evaluator]
2650            )
2651
2652            print(f"Processed {len(result.item_results)} items")
2653            for item_result in result.item_results:
2654                print(f"Input: {item_result.item['input']}")
2655                print(f"Output: {item_result.output}")
2656                print(f"Evaluations: {item_result.evaluations}")
2657            ```
2658
2659            Advanced experiment with async task and multiple evaluators:
2660            ```python
2661            async def llm_task(*, item, **kwargs):
2662                # Simulate async LLM call
2663                response = await openai_client.chat.completions.create(
2664                    model="gpt-4",
2665                    messages=[{"role": "user", "content": item["input"]}]
2666                )
2667                return response.choices[0].message.content
2668
2669            def accuracy_evaluator(*, input, output, expected_output=None, **kwargs):
2670                if expected_output and expected_output.lower() in output.lower():
2671                    return {"name": "accuracy", "value": 1.0, "comment": "Correct answer"}
2672                return {"name": "accuracy", "value": 0.0, "comment": "Incorrect answer"}
2673
2674            def toxicity_evaluator(*, input, output, expected_output=None, **kwargs):
2675                # Simulate toxicity check
2676                toxicity_score = check_toxicity(output)  # Your toxicity checker
2677                return {
2678                    "name": "toxicity",
2679                    "value": toxicity_score,
2680                    "comment": f"Toxicity level: {'high' if toxicity_score > 0.7 else 'low'}"
2681                }
2682
2683            def average_accuracy(*, item_results, **kwargs):
2684                accuracies = [
2685                    eval.value for result in item_results
2686                    for eval in result.evaluations
2687                    if eval.name == "accuracy"
2688                ]
2689                return {
2690                    "name": "average_accuracy",
2691                    "value": sum(accuracies) / len(accuracies) if accuracies else 0,
2692                    "comment": f"Average accuracy across {len(accuracies)} items"
2693                }
2694
2695            result = langfuse.run_experiment(
2696                name="LLM Safety and Accuracy Test",
2697                description="Evaluate model accuracy and safety across diverse prompts",
2698                data=test_dataset,  # Your dataset items
2699                task=llm_task,
2700                evaluators=[accuracy_evaluator, toxicity_evaluator],
2701                run_evaluators=[average_accuracy],
2702                max_concurrency=5,  # Limit concurrent API calls
2703                metadata={"model": "gpt-4", "temperature": 0.7}
2704            )
2705            ```
2706
2707            Using with Langfuse datasets:
2708            ```python
2709            # Get dataset from Langfuse
2710            dataset = langfuse.get_dataset("my-eval-dataset")
2711
2712            result = dataset.run_experiment(
2713                name="Production Model Evaluation",
2714                description="Monthly evaluation of production model performance",
2715                task=my_production_task,
2716                evaluators=[accuracy_evaluator, latency_evaluator]
2717            )
2718
2719            # Results automatically linked to dataset in Langfuse UI
2720            print(f"View results: {result['dataset_run_url']}")
2721            ```
2722
2723        Note:
2724            - Task and evaluator functions can be either synchronous or asynchronous
2725            - Individual item failures are logged but don't stop the experiment
2726            - All executions are automatically traced and visible in Langfuse UI
2727            - When using Langfuse datasets, results are automatically linked for easy comparison
2728            - This method works in both sync and async contexts (Jupyter notebooks, web apps, etc.)
2729            - Async execution is handled automatically with smart event loop detection
2730        """
2731        return cast(
2732            ExperimentResult,
2733            run_async_safely(
2734                self._run_experiment_async(
2735                    name=name,
2736                    run_name=self._create_experiment_run_name(
2737                        name=name, run_name=run_name
2738                    ),
2739                    description=description,
2740                    data=data,
2741                    task=task,
2742                    evaluators=evaluators or [],
2743                    composite_evaluator=composite_evaluator,
2744                    run_evaluators=run_evaluators or [],
2745                    max_concurrency=max_concurrency,
2746                    metadata=metadata,
2747                ),
2748            ),
2749        )

Run an experiment on a dataset with automatic tracing and evaluation.

This method executes a task function on each item in the provided dataset, automatically traces all executions with Langfuse for observability, runs item-level and run-level evaluators on the outputs, and returns comprehensive results with evaluation metrics.

The experiment system provides:

  • Automatic tracing of all task executions
  • Concurrent processing with configurable limits
  • Comprehensive error handling that isolates failures
  • Integration with Langfuse datasets for experiment tracking
  • Flexible evaluation framework supporting both sync and async evaluators
Arguments:
  • name: Human-readable name for the experiment. Used for identification in the Langfuse UI.
  • run_name: Optional exact name for the experiment run. If provided, this will be used as the exact dataset run name if the data contains Langfuse dataset items. If not provided, this will default to the experiment name appended with an ISO timestamp.
  • description: Optional description explaining the experiment's purpose, methodology, or expected outcomes.
  • data: Array of data items to process. Can be either:
    • List of dict-like items with 'input', 'expected_output', 'metadata' keys
    • List of Langfuse DatasetItem objects from dataset.items
  • task: Function that processes each data item and returns output. Must accept 'item' as keyword argument and can return sync or async results. The task function signature should be: task(, item, *kwargs) -> Any
  • evaluators: List of functions to evaluate each item's output individually. Each evaluator receives input, output, expected_output, and metadata. Can return single Evaluation dict or list of Evaluation dicts.
  • composite_evaluator: Optional function that creates composite scores from item-level evaluations. Receives the same inputs as item-level evaluators (input, output, expected_output, metadata) plus the list of evaluations from item-level evaluators. Useful for weighted averages, pass/fail decisions based on multiple criteria, or custom scoring logic combining multiple metrics.
  • run_evaluators: List of functions to evaluate the entire experiment run. Each run evaluator receives all item_results and can compute aggregate metrics. Useful for calculating averages, distributions, or cross-item comparisons.
  • max_concurrency: Maximum number of concurrent task executions (default: 50). Controls the number of items processed simultaneously. Adjust based on API rate limits and system resources.
  • metadata: Optional metadata dictionary to attach to all experiment traces. This metadata will be included in every trace created during the experiment. If data are Langfuse dataset items, the metadata will be attached to the dataset run, too.
Returns:

ExperimentResult containing:

  • run_name: The experiment run name. This is equal to the dataset run name if experiment was on Langfuse dataset.
  • item_results: List of results for each processed item with outputs and evaluations
  • run_evaluations: List of aggregate evaluation results for the entire run
  • dataset_run_id: ID of the dataset run (if using Langfuse datasets)
  • dataset_run_url: Direct URL to view results in Langfuse UI (if applicable)
Raises:
  • ValueError: If required parameters are missing or invalid
  • Exception: If experiment setup fails (individual item failures are handled gracefully)
Examples:

Basic experiment with local data:

def summarize_text(*, item, **kwargs):
    return f"Summary: {item['input'][:50]}..."

def length_evaluator(*, input, output, expected_output=None, **kwargs):
    return {
        "name": "output_length",
        "value": len(output),
        "comment": f"Output contains {len(output)} characters"
    }

result = langfuse.run_experiment(
    name="Text Summarization Test",
    description="Evaluate summarization quality and length",
    data=[
        {"input": "Long article text...", "expected_output": "Expected summary"},
        {"input": "Another article...", "expected_output": "Another summary"}
    ],
    task=summarize_text,
    evaluators=[length_evaluator]
)

print(f"Processed {len(result.item_results)} items")
for item_result in result.item_results:
    print(f"Input: {item_result.item['input']}")
    print(f"Output: {item_result.output}")
    print(f"Evaluations: {item_result.evaluations}")

Advanced experiment with async task and multiple evaluators:

async def llm_task(*, item, **kwargs):
    # Simulate async LLM call
    response = await openai_client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": item["input"]}]
    )
    return response.choices[0].message.content

def accuracy_evaluator(*, input, output, expected_output=None, **kwargs):
    if expected_output and expected_output.lower() in output.lower():
        return {"name": "accuracy", "value": 1.0, "comment": "Correct answer"}
    return {"name": "accuracy", "value": 0.0, "comment": "Incorrect answer"}

def toxicity_evaluator(*, input, output, expected_output=None, **kwargs):
    # Simulate toxicity check
    toxicity_score = check_toxicity(output)  # Your toxicity checker
    return {
        "name": "toxicity",
        "value": toxicity_score,
        "comment": f"Toxicity level: {'high' if toxicity_score > 0.7 else 'low'}"
    }

def average_accuracy(*, item_results, **kwargs):
    accuracies = [
        eval.value for result in item_results
        for eval in result.evaluations
        if eval.name == "accuracy"
    ]
    return {
        "name": "average_accuracy",
        "value": sum(accuracies) / len(accuracies) if accuracies else 0,
        "comment": f"Average accuracy across {len(accuracies)} items"
    }

result = langfuse.run_experiment(
    name="LLM Safety and Accuracy Test",
    description="Evaluate model accuracy and safety across diverse prompts",
    data=test_dataset,  # Your dataset items
    task=llm_task,
    evaluators=[accuracy_evaluator, toxicity_evaluator],
    run_evaluators=[average_accuracy],
    max_concurrency=5,  # Limit concurrent API calls
    metadata={"model": "gpt-4", "temperature": 0.7}
)

Using with Langfuse datasets:

# Get dataset from Langfuse
dataset = langfuse.get_dataset("my-eval-dataset")

result = dataset.run_experiment(
    name="Production Model Evaluation",
    description="Monthly evaluation of production model performance",
    task=my_production_task,
    evaluators=[accuracy_evaluator, latency_evaluator]
)

# Results automatically linked to dataset in Langfuse UI
print(f"View results: {result['dataset_run_url']}")
Note:
  • Task and evaluator functions can be either synchronous or asynchronous
  • Individual item failures are logged but don't stop the experiment
  • All executions are automatically traced and visible in Langfuse UI
  • When using Langfuse datasets, results are automatically linked for easy comparison
  • This method works in both sync and async contexts (Jupyter notebooks, web apps, etc.)
  • Async execution is handled automatically with smart event loop detection
def run_batched_evaluation( self, *, scope: Literal['traces', 'observations'], mapper: MapperFunction, filter: Optional[str] = None, fetch_batch_size: int = 50, max_items: Optional[int] = None, max_retries: int = 3, evaluators: List[langfuse.experiment.EvaluatorFunction], composite_evaluator: Optional[CompositeEvaluatorFunction] = None, max_concurrency: int = 50, metadata: Optional[Dict[str, Any]] = None, resume_from: Optional[BatchEvaluationResumeToken] = None, verbose: bool = False) -> BatchEvaluationResult:
3093    def run_batched_evaluation(
3094        self,
3095        *,
3096        scope: Literal["traces", "observations"],
3097        mapper: MapperFunction,
3098        filter: Optional[str] = None,
3099        fetch_batch_size: int = 50,
3100        max_items: Optional[int] = None,
3101        max_retries: int = 3,
3102        evaluators: List[EvaluatorFunction],
3103        composite_evaluator: Optional[CompositeEvaluatorFunction] = None,
3104        max_concurrency: int = 50,
3105        metadata: Optional[Dict[str, Any]] = None,
3106        resume_from: Optional[BatchEvaluationResumeToken] = None,
3107        verbose: bool = False,
3108    ) -> BatchEvaluationResult:
3109        """Fetch traces or observations and run evaluations on each item.
3110
3111        This method provides a powerful way to evaluate existing data in Langfuse at scale.
3112        It fetches items based on filters, transforms them using a mapper function, runs
3113        evaluators on each item, and creates scores that are linked back to the original
3114        entities. This is ideal for:
3115
3116        - Running evaluations on production traces after deployment
3117        - Backtesting new evaluation metrics on historical data
3118        - Batch scoring of observations for quality monitoring
3119        - Periodic evaluation runs on recent data
3120
3121        The method uses a streaming/pipeline approach to process items in batches, making
3122        it memory-efficient for large datasets. It includes comprehensive error handling,
3123        retry logic, and resume capability for long-running evaluations.
3124
3125        Args:
3126            scope: The type of items to evaluate. Must be one of:
3127                - "traces": Evaluate complete traces with all their observations
3128                - "observations": Evaluate individual observations (spans, generations, events)
3129            mapper: Function that transforms API response objects into evaluator inputs.
3130                Receives a trace/observation object and returns an EvaluatorInputs
3131                instance with input, output, expected_output, and metadata fields.
3132                Can be sync or async.
3133            evaluators: List of evaluation functions to run on each item. Each evaluator
3134                receives the mapped inputs and returns Evaluation object(s). Evaluator
3135                failures are logged but don't stop the batch evaluation.
3136            filter: Optional JSON filter string for querying items (same format as Langfuse API). Examples:
3137                - '{"tags": ["production"]}'
3138                - '{"user_id": "user123", "timestamp": {"operator": ">", "value": "2024-01-01"}}'
3139                Default: None (fetches all items).
3140            fetch_batch_size: Number of items to fetch per API call and hold in memory.
3141                Larger values may be faster but use more memory. Default: 50.
3142            max_items: Maximum total number of items to process. If None, processes all
3143                items matching the filter. Useful for testing or limiting evaluation runs.
3144                Default: None (process all).
3145            max_concurrency: Maximum number of items to evaluate concurrently. Controls
3146                parallelism and resource usage. Default: 50.
3147            composite_evaluator: Optional function that creates a composite score from
3148                item-level evaluations. Receives the original item and its evaluations,
3149                returns a single Evaluation. Useful for weighted averages or combined metrics.
3150                Default: None.
3151            metadata: Optional metadata dict to add to all created scores. Useful for
3152                tracking evaluation runs, versions, or other context. Default: None.
3153            max_retries: Maximum number of retry attempts for failed batch fetches.
3154                Uses exponential backoff (1s, 2s, 4s). Default: 3.
3155            verbose: If True, logs progress information to console. Useful for monitoring
3156                long-running evaluations. Default: False.
3157            resume_from: Optional resume token from a previous incomplete run. Allows
3158                continuing evaluation after interruption or failure. Default: None.
3159
3160
3161        Returns:
3162            BatchEvaluationResult containing:
3163                - total_items_fetched: Number of items fetched from API
3164                - total_items_processed: Number of items successfully evaluated
3165                - total_items_failed: Number of items that failed evaluation
3166                - total_scores_created: Scores created by item-level evaluators
3167                - total_composite_scores_created: Scores created by composite evaluator
3168                - total_evaluations_failed: Individual evaluator failures
3169                - evaluator_stats: Per-evaluator statistics (success rate, scores created)
3170                - resume_token: Token for resuming if incomplete (None if completed)
3171                - completed: True if all items processed
3172                - duration_seconds: Total execution time
3173                - failed_item_ids: IDs of items that failed
3174                - error_summary: Error types and counts
3175                - has_more_items: True if max_items reached but more exist
3176
3177        Raises:
3178            ValueError: If invalid scope is provided.
3179
3180        Examples:
3181            Basic trace evaluation:
3182            ```python
3183            from langfuse import Langfuse, EvaluatorInputs, Evaluation
3184
3185            client = Langfuse()
3186
3187            # Define mapper to extract fields from traces
3188            def trace_mapper(trace):
3189                return EvaluatorInputs(
3190                    input=trace.input,
3191                    output=trace.output,
3192                    expected_output=None,
3193                    metadata={"trace_id": trace.id}
3194                )
3195
3196            # Define evaluator
3197            def length_evaluator(*, input, output, expected_output, metadata):
3198                return Evaluation(
3199                    name="output_length",
3200                    value=len(output) if output else 0
3201                )
3202
3203            # Run batch evaluation
3204            result = client.run_batched_evaluation(
3205                scope="traces",
3206                mapper=trace_mapper,
3207                evaluators=[length_evaluator],
3208                filter='{"tags": ["production"]}',
3209                max_items=1000,
3210                verbose=True
3211            )
3212
3213            print(f"Processed {result.total_items_processed} traces")
3214            print(f"Created {result.total_scores_created} scores")
3215            ```
3216
3217            Evaluation with composite scorer:
3218            ```python
3219            def accuracy_evaluator(*, input, output, expected_output, metadata):
3220                # ... evaluation logic
3221                return Evaluation(name="accuracy", value=0.85)
3222
3223            def relevance_evaluator(*, input, output, expected_output, metadata):
3224                # ... evaluation logic
3225                return Evaluation(name="relevance", value=0.92)
3226
3227            def composite_evaluator(*, item, evaluations):
3228                # Weighted average of evaluations
3229                weights = {"accuracy": 0.6, "relevance": 0.4}
3230                total = sum(
3231                    e.value * weights.get(e.name, 0)
3232                    for e in evaluations
3233                    if isinstance(e.value, (int, float))
3234                )
3235                return Evaluation(
3236                    name="composite_score",
3237                    value=total,
3238                    comment=f"Weighted average of {len(evaluations)} metrics"
3239                )
3240
3241            result = client.run_batched_evaluation(
3242                scope="traces",
3243                mapper=trace_mapper,
3244                evaluators=[accuracy_evaluator, relevance_evaluator],
3245                composite_evaluator=composite_evaluator,
3246                filter='{"user_id": "important_user"}',
3247                verbose=True
3248            )
3249            ```
3250
3251            Handling incomplete runs with resume:
3252            ```python
3253            # Initial run that may fail or timeout
3254            result = client.run_batched_evaluation(
3255                scope="observations",
3256                mapper=obs_mapper,
3257                evaluators=[my_evaluator],
3258                max_items=10000,
3259                verbose=True
3260            )
3261
3262            # Check if incomplete
3263            if not result.completed and result.resume_token:
3264                print(f"Processed {result.resume_token.items_processed} items before interruption")
3265
3266                # Resume from where it left off
3267                result = client.run_batched_evaluation(
3268                    scope="observations",
3269                    mapper=obs_mapper,
3270                    evaluators=[my_evaluator],
3271                    resume_from=result.resume_token,
3272                    verbose=True
3273                )
3274
3275            print(f"Total items processed: {result.total_items_processed}")
3276            ```
3277
3278            Monitoring evaluator performance:
3279            ```python
3280            result = client.run_batched_evaluation(...)
3281
3282            for stats in result.evaluator_stats:
3283                success_rate = stats.successful_runs / stats.total_runs
3284                print(f"{stats.name}:")
3285                print(f"  Success rate: {success_rate:.1%}")
3286                print(f"  Scores created: {stats.total_scores_created}")
3287
3288                if stats.failed_runs > 0:
3289                    print(f"  âš ī¸  Failed {stats.failed_runs} times")
3290            ```
3291
3292        Note:
3293            - Evaluator failures are logged but don't stop the batch evaluation
3294            - Individual item failures are tracked but don't stop processing
3295            - Fetch failures are retried with exponential backoff
3296            - All scores are automatically flushed to Langfuse at the end
3297            - The resume mechanism uses timestamp-based filtering to avoid duplicates
3298        """
3299        runner = BatchEvaluationRunner(self)
3300
3301        return cast(
3302            BatchEvaluationResult,
3303            run_async_safely(
3304                runner.run_async(
3305                    scope=scope,
3306                    mapper=mapper,
3307                    evaluators=evaluators,
3308                    filter=filter,
3309                    fetch_batch_size=fetch_batch_size,
3310                    max_items=max_items,
3311                    max_concurrency=max_concurrency,
3312                    composite_evaluator=composite_evaluator,
3313                    metadata=metadata,
3314                    max_retries=max_retries,
3315                    verbose=verbose,
3316                    resume_from=resume_from,
3317                )
3318            ),
3319        )

Fetch traces or observations and run evaluations on each item.

This method provides a powerful way to evaluate existing data in Langfuse at scale. It fetches items based on filters, transforms them using a mapper function, runs evaluators on each item, and creates scores that are linked back to the original entities. This is ideal for:

  • Running evaluations on production traces after deployment
  • Backtesting new evaluation metrics on historical data
  • Batch scoring of observations for quality monitoring
  • Periodic evaluation runs on recent data

The method uses a streaming/pipeline approach to process items in batches, making it memory-efficient for large datasets. It includes comprehensive error handling, retry logic, and resume capability for long-running evaluations.

Arguments:
  • scope: The type of items to evaluate. Must be one of:
    • "traces": Evaluate complete traces with all their observations
    • "observations": Evaluate individual observations (spans, generations, events)
  • mapper: Function that transforms API response objects into evaluator inputs. Receives a trace/observation object and returns an EvaluatorInputs instance with input, output, expected_output, and metadata fields. Can be sync or async.
  • evaluators: List of evaluation functions to run on each item. Each evaluator receives the mapped inputs and returns Evaluation object(s). Evaluator failures are logged but don't stop the batch evaluation.
  • filter: Optional JSON filter string for querying items (same format as Langfuse API). Examples:
    • '{"tags": ["production"]}'
    • '{"user_id": "user123", "timestamp": {"operator": ">", "value": "2024-01-01"}}' Default: None (fetches all items).
  • fetch_batch_size: Number of items to fetch per API call and hold in memory. Larger values may be faster but use more memory. Default: 50.
  • max_items: Maximum total number of items to process. If None, processes all items matching the filter. Useful for testing or limiting evaluation runs. Default: None (process all).
  • max_concurrency: Maximum number of items to evaluate concurrently. Controls parallelism and resource usage. Default: 50.
  • composite_evaluator: Optional function that creates a composite score from item-level evaluations. Receives the original item and its evaluations, returns a single Evaluation. Useful for weighted averages or combined metrics. Default: None.
  • metadata: Optional metadata dict to add to all created scores. Useful for tracking evaluation runs, versions, or other context. Default: None.
  • max_retries: Maximum number of retry attempts for failed batch fetches. Uses exponential backoff (1s, 2s, 4s). Default: 3.
  • verbose: If True, logs progress information to console. Useful for monitoring long-running evaluations. Default: False.
  • resume_from: Optional resume token from a previous incomplete run. Allows continuing evaluation after interruption or failure. Default: None.
Returns:

BatchEvaluationResult containing: - total_items_fetched: Number of items fetched from API - total_items_processed: Number of items successfully evaluated - total_items_failed: Number of items that failed evaluation - total_scores_created: Scores created by item-level evaluators - total_composite_scores_created: Scores created by composite evaluator - total_evaluations_failed: Individual evaluator failures - evaluator_stats: Per-evaluator statistics (success rate, scores created) - resume_token: Token for resuming if incomplete (None if completed) - completed: True if all items processed - duration_seconds: Total execution time - failed_item_ids: IDs of items that failed - error_summary: Error types and counts - has_more_items: True if max_items reached but more exist

Raises:
  • ValueError: If invalid scope is provided.
Examples:

Basic trace evaluation:

from langfuse import Langfuse, EvaluatorInputs, Evaluation

client = Langfuse()

# Define mapper to extract fields from traces
def trace_mapper(trace):
    return EvaluatorInputs(
        input=trace.input,
        output=trace.output,
        expected_output=None,
        metadata={"trace_id": trace.id}
    )

# Define evaluator
def length_evaluator(*, input, output, expected_output, metadata):
    return Evaluation(
        name="output_length",
        value=len(output) if output else 0
    )

# Run batch evaluation
result = client.run_batched_evaluation(
    scope="traces",
    mapper=trace_mapper,
    evaluators=[length_evaluator],
    filter='{"tags": ["production"]}',
    max_items=1000,
    verbose=True
)

print(f"Processed {result.total_items_processed} traces")
print(f"Created {result.total_scores_created} scores")

Evaluation with composite scorer:

def accuracy_evaluator(*, input, output, expected_output, metadata):
    # ... evaluation logic
    return Evaluation(name="accuracy", value=0.85)

def relevance_evaluator(*, input, output, expected_output, metadata):
    # ... evaluation logic
    return Evaluation(name="relevance", value=0.92)

def composite_evaluator(*, item, evaluations):
    # Weighted average of evaluations
    weights = {"accuracy": 0.6, "relevance": 0.4}
    total = sum(
        e.value * weights.get(e.name, 0)
        for e in evaluations
        if isinstance(e.value, (int, float))
    )
    return Evaluation(
        name="composite_score",
        value=total,
        comment=f"Weighted average of {len(evaluations)} metrics"
    )

result = client.run_batched_evaluation(
    scope="traces",
    mapper=trace_mapper,
    evaluators=[accuracy_evaluator, relevance_evaluator],
    composite_evaluator=composite_evaluator,
    filter='{"user_id": "important_user"}',
    verbose=True
)

Handling incomplete runs with resume:

# Initial run that may fail or timeout
result = client.run_batched_evaluation(
    scope="observations",
    mapper=obs_mapper,
    evaluators=[my_evaluator],
    max_items=10000,
    verbose=True
)

# Check if incomplete
if not result.completed and result.resume_token:
    print(f"Processed {result.resume_token.items_processed} items before interruption")

    # Resume from where it left off
    result = client.run_batched_evaluation(
        scope="observations",
        mapper=obs_mapper,
        evaluators=[my_evaluator],
        resume_from=result.resume_token,
        verbose=True
    )

print(f"Total items processed: {result.total_items_processed}")

Monitoring evaluator performance:

result = client.run_batched_evaluation(...)

for stats in result.evaluator_stats:
    success_rate = stats.successful_runs / stats.total_runs
    print(f"{stats.name}:")
    print(f"  Success rate: {success_rate:.1%}")
    print(f"  Scores created: {stats.total_scores_created}")

    if stats.failed_runs > 0:
        print(f"  âš ī¸  Failed {stats.failed_runs} times")
Note:
  • Evaluator failures are logged but don't stop the batch evaluation
  • Individual item failures are tracked but don't stop processing
  • Fetch failures are retried with exponential backoff
  • All scores are automatically flushed to Langfuse at the end
  • The resume mechanism uses timestamp-based filtering to avoid duplicates
def auth_check(self) -> bool:
3321    def auth_check(self) -> bool:
3322        """Check if the provided credentials (public and secret key) are valid.
3323
3324        Raises:
3325            Exception: If no projects were found for the provided credentials.
3326
3327        Note:
3328            This method is blocking. It is discouraged to use it in production code.
3329        """
3330        try:
3331            projects = self.api.projects.get()
3332            langfuse_logger.debug(
3333                f"Auth check successful, found {len(projects.data)} projects"
3334            )
3335            if len(projects.data) == 0:
3336                raise Exception(
3337                    "Auth check failed, no project found for the keys provided."
3338                )
3339            return True
3340
3341        except AttributeError as e:
3342            langfuse_logger.warning(
3343                f"Auth check failed: Client not properly initialized. Error: {e}"
3344            )
3345            return False
3346
3347        except Error as e:
3348            handle_fern_exception(e)
3349            raise e

Check if the provided credentials (public and secret key) are valid.

Raises:
  • Exception: If no projects were found for the provided credentials.
Note:

This method is blocking. It is discouraged to use it in production code.

def create_dataset( self, *, name: str, description: Optional[str] = None, metadata: Optional[Any] = None, input_schema: Optional[Any] = None, expected_output_schema: Optional[Any] = None) -> langfuse.api.Dataset:
3351    def create_dataset(
3352        self,
3353        *,
3354        name: str,
3355        description: Optional[str] = None,
3356        metadata: Optional[Any] = None,
3357        input_schema: Optional[Any] = None,
3358        expected_output_schema: Optional[Any] = None,
3359    ) -> Dataset:
3360        """Create a dataset with the given name on Langfuse.
3361
3362        Args:
3363            name: Name of the dataset to create.
3364            description: Description of the dataset. Defaults to None.
3365            metadata: Additional metadata. Defaults to None.
3366            input_schema: JSON Schema for validating dataset item inputs. When set, all new items will be validated against this schema.
3367            expected_output_schema: JSON Schema for validating dataset item expected outputs. When set, all new items will be validated against this schema.
3368
3369        Returns:
3370            Dataset: The created dataset as returned by the Langfuse API.
3371        """
3372        try:
3373            body = CreateDatasetRequest(
3374                name=name,
3375                description=description,
3376                metadata=metadata,
3377                inputSchema=input_schema,
3378                expectedOutputSchema=expected_output_schema,
3379            )
3380            langfuse_logger.debug(f"Creating datasets {body}")
3381
3382            return self.api.datasets.create(request=body)
3383
3384        except Error as e:
3385            handle_fern_exception(e)
3386            raise e

Create a dataset with the given name on Langfuse.

Arguments:
  • name: Name of the dataset to create.
  • description: Description of the dataset. Defaults to None.
  • metadata: Additional metadata. Defaults to None.
  • input_schema: JSON Schema for validating dataset item inputs. When set, all new items will be validated against this schema.
  • expected_output_schema: JSON Schema for validating dataset item expected outputs. When set, all new items will be validated against this schema.
Returns:

Dataset: The created dataset as returned by the Langfuse API.

def create_dataset_item( self, *, dataset_name: str, input: Optional[Any] = None, expected_output: Optional[Any] = None, metadata: Optional[Any] = None, source_trace_id: Optional[str] = None, source_observation_id: Optional[str] = None, status: Optional[langfuse.api.DatasetStatus] = None, id: Optional[str] = None) -> langfuse.api.DatasetItem:
3388    def create_dataset_item(
3389        self,
3390        *,
3391        dataset_name: str,
3392        input: Optional[Any] = None,
3393        expected_output: Optional[Any] = None,
3394        metadata: Optional[Any] = None,
3395        source_trace_id: Optional[str] = None,
3396        source_observation_id: Optional[str] = None,
3397        status: Optional[DatasetStatus] = None,
3398        id: Optional[str] = None,
3399    ) -> DatasetItem:
3400        """Create a dataset item.
3401
3402        Upserts if an item with id already exists.
3403
3404        Args:
3405            dataset_name: Name of the dataset in which the dataset item should be created.
3406            input: Input data. Defaults to None. Can contain any dict, list or scalar.
3407            expected_output: Expected output data. Defaults to None. Can contain any dict, list or scalar.
3408            metadata: Additional metadata. Defaults to None. Can contain any dict, list or scalar.
3409            source_trace_id: Id of the source trace. Defaults to None.
3410            source_observation_id: Id of the source observation. Defaults to None.
3411            status: Status of the dataset item. Defaults to ACTIVE for newly created items.
3412            id: Id of the dataset item. Defaults to None. Provide your own id if you want to dedupe dataset items. Id needs to be globally unique and cannot be reused across datasets.
3413
3414        Returns:
3415            DatasetItem: The created dataset item as returned by the Langfuse API.
3416
3417        Example:
3418            ```python
3419            from langfuse import Langfuse
3420
3421            langfuse = Langfuse()
3422
3423            # Uploading items to the Langfuse dataset named "capital_cities"
3424            langfuse.create_dataset_item(
3425                dataset_name="capital_cities",
3426                input={"input": {"country": "Italy"}},
3427                expected_output={"expected_output": "Rome"},
3428                metadata={"foo": "bar"}
3429            )
3430            ```
3431        """
3432        try:
3433            body = CreateDatasetItemRequest(
3434                datasetName=dataset_name,
3435                input=input,
3436                expectedOutput=expected_output,
3437                metadata=metadata,
3438                sourceTraceId=source_trace_id,
3439                sourceObservationId=source_observation_id,
3440                status=status,
3441                id=id,
3442            )
3443            langfuse_logger.debug(f"Creating dataset item {body}")
3444            return self.api.dataset_items.create(request=body)
3445        except Error as e:
3446            handle_fern_exception(e)
3447            raise e

Create a dataset item.

Upserts if an item with id already exists.

Arguments:
  • dataset_name: Name of the dataset in which the dataset item should be created.
  • input: Input data. Defaults to None. Can contain any dict, list or scalar.
  • expected_output: Expected output data. Defaults to None. Can contain any dict, list or scalar.
  • metadata: Additional metadata. Defaults to None. Can contain any dict, list or scalar.
  • source_trace_id: Id of the source trace. Defaults to None.
  • source_observation_id: Id of the source observation. Defaults to None.
  • status: Status of the dataset item. Defaults to ACTIVE for newly created items.
  • id: Id of the dataset item. Defaults to None. Provide your own id if you want to dedupe dataset items. Id needs to be globally unique and cannot be reused across datasets.
Returns:

DatasetItem: The created dataset item as returned by the Langfuse API.

Example:
from langfuse import Langfuse

langfuse = Langfuse()

# Uploading items to the Langfuse dataset named "capital_cities"
langfuse.create_dataset_item(
    dataset_name="capital_cities",
    input={"input": {"country": "Italy"}},
    expected_output={"expected_output": "Rome"},
    metadata={"foo": "bar"}
)
def resolve_media_references( self, *, obj: Any, resolve_with: Literal['base64_data_uri'], max_depth: int = 10, content_fetch_timeout_seconds: int = 5) -> Any:
3449    def resolve_media_references(
3450        self,
3451        *,
3452        obj: Any,
3453        resolve_with: Literal["base64_data_uri"],
3454        max_depth: int = 10,
3455        content_fetch_timeout_seconds: int = 5,
3456    ) -> Any:
3457        """Replace media reference strings in an object with base64 data URIs.
3458
3459        This method recursively traverses an object (up to max_depth) looking for media reference strings
3460        in the format "@@@langfuseMedia:...@@@". When found, it (synchronously) fetches the actual media content using
3461        the provided Langfuse client and replaces the reference string with a base64 data URI.
3462
3463        If fetching media content fails for a reference string, a warning is logged and the reference
3464        string is left unchanged.
3465
3466        Args:
3467            obj: The object to process. Can be a primitive value, array, or nested object.
3468                If the object has a __dict__ attribute, a dict will be returned instead of the original object type.
3469            resolve_with: The representation of the media content to replace the media reference string with.
3470                Currently only "base64_data_uri" is supported.
3471            max_depth: int: The maximum depth to traverse the object. Default is 10.
3472            content_fetch_timeout_seconds: int: The timeout in seconds for fetching media content. Default is 5.
3473
3474        Returns:
3475            A deep copy of the input object with all media references replaced with base64 data URIs where possible.
3476            If the input object has a __dict__ attribute, a dict will be returned instead of the original object type.
3477
3478        Example:
3479            obj = {
3480                "image": "@@@langfuseMedia:type=image/jpeg|id=123|source=bytes@@@",
3481                "nested": {
3482                    "pdf": "@@@langfuseMedia:type=application/pdf|id=456|source=bytes@@@"
3483                }
3484            }
3485
3486            result = await LangfuseMedia.resolve_media_references(obj, langfuse_client)
3487
3488            # Result:
3489            # {
3490            #     "image": "data:image/jpeg;base64,/9j/4AAQSkZJRg...",
3491            #     "nested": {
3492            #         "pdf": "data:application/pdf;base64,JVBERi0xLjcK..."
3493            #     }
3494            # }
3495        """
3496        return LangfuseMedia.resolve_media_references(
3497            langfuse_client=self,
3498            obj=obj,
3499            resolve_with=resolve_with,
3500            max_depth=max_depth,
3501            content_fetch_timeout_seconds=content_fetch_timeout_seconds,
3502        )

Replace media reference strings in an object with base64 data URIs.

This method recursively traverses an object (up to max_depth) looking for media reference strings in the format "@@@langfuseMedia:...@@@". When found, it (synchronously) fetches the actual media content using the provided Langfuse client and replaces the reference string with a base64 data URI.

If fetching media content fails for a reference string, a warning is logged and the reference string is left unchanged.

Arguments:
  • obj: The object to process. Can be a primitive value, array, or nested object. If the object has a __dict__ attribute, a dict will be returned instead of the original object type.
  • resolve_with: The representation of the media content to replace the media reference string with. Currently only "base64_data_uri" is supported.
  • max_depth: int: The maximum depth to traverse the object. Default is 10.
  • content_fetch_timeout_seconds: int: The timeout in seconds for fetching media content. Default is 5.
Returns:

A deep copy of the input object with all media references replaced with base64 data URIs where possible. If the input object has a __dict__ attribute, a dict will be returned instead of the original object type.

Example:

obj = { "image": "@@@langfuseMedia:type=image/jpeg|id=123|source=bytes@@@", "nested": { "pdf": "@@@langfuseMedia:type=application/pdf|id=456|source=bytes@@@" } }

result = await LangfuseMedia.resolve_media_references(obj, langfuse_client)

Result:

{

"image": "data:image/jpeg;base64,/9j/4AAQSkZJRg...",

"nested": {

"pdf": "data:application/pdf;base64,JVBERi0xLjcK..."

}

}

def get_prompt( self, name: str, *, version: Optional[int] = None, label: Optional[str] = None, type: Literal['chat', 'text'] = 'text', cache_ttl_seconds: Optional[int] = None, fallback: Union[List[langfuse.model.ChatMessageDict], NoneType, str] = None, max_retries: Optional[int] = None, fetch_timeout_seconds: Optional[int] = None) -> Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient]:
3532    def get_prompt(
3533        self,
3534        name: str,
3535        *,
3536        version: Optional[int] = None,
3537        label: Optional[str] = None,
3538        type: Literal["chat", "text"] = "text",
3539        cache_ttl_seconds: Optional[int] = None,
3540        fallback: Union[Optional[List[ChatMessageDict]], Optional[str]] = None,
3541        max_retries: Optional[int] = None,
3542        fetch_timeout_seconds: Optional[int] = None,
3543    ) -> PromptClient:
3544        """Get a prompt.
3545
3546        This method attempts to fetch the requested prompt from the local cache. If the prompt is not found
3547        in the cache or if the cached prompt has expired, it will try to fetch the prompt from the server again
3548        and update the cache. If fetching the new prompt fails, and there is an expired prompt in the cache, it will
3549        return the expired prompt as a fallback.
3550
3551        Args:
3552            name (str): The name of the prompt to retrieve.
3553
3554        Keyword Args:
3555            version (Optional[int]): The version of the prompt to retrieve. If no label and version is specified, the `production` label is returned. Specify either version or label, not both.
3556            label: Optional[str]: The label of the prompt to retrieve. If no label and version is specified, the `production` label is returned. Specify either version or label, not both.
3557            cache_ttl_seconds: Optional[int]: Time-to-live in seconds for caching the prompt. Must be specified as a
3558            keyword argument. If not set, defaults to 60 seconds. Disables caching if set to 0.
3559            type: Literal["chat", "text"]: The type of the prompt to retrieve. Defaults to "text".
3560            fallback: Union[Optional[List[ChatMessageDict]], Optional[str]]: The prompt string to return if fetching the prompt fails. Important on the first call where no cached prompt is available. Follows Langfuse prompt formatting with double curly braces for variables. Defaults to None.
3561            max_retries: Optional[int]: The maximum number of retries in case of API/network errors. Defaults to 2. The maximum value is 4. Retries have an exponential backoff with a maximum delay of 10 seconds.
3562            fetch_timeout_seconds: Optional[int]: The timeout in milliseconds for fetching the prompt. Defaults to the default timeout set on the SDK, which is 5 seconds per default.
3563
3564        Returns:
3565            The prompt object retrieved from the cache or directly fetched if not cached or expired of type
3566            - TextPromptClient, if type argument is 'text'.
3567            - ChatPromptClient, if type argument is 'chat'.
3568
3569        Raises:
3570            Exception: Propagates any exceptions raised during the fetching of a new prompt, unless there is an
3571            expired prompt in the cache, in which case it logs a warning and returns the expired prompt.
3572        """
3573        if self._resources is None:
3574            raise Error(
3575                "SDK is not correctly initialized. Check the init logs for more details."
3576            )
3577        if version is not None and label is not None:
3578            raise ValueError("Cannot specify both version and label at the same time.")
3579
3580        if not name:
3581            raise ValueError("Prompt name cannot be empty.")
3582
3583        cache_key = PromptCache.generate_cache_key(name, version=version, label=label)
3584        bounded_max_retries = self._get_bounded_max_retries(
3585            max_retries, default_max_retries=2, max_retries_upper_bound=4
3586        )
3587
3588        langfuse_logger.debug(f"Getting prompt '{cache_key}'")
3589        cached_prompt = self._resources.prompt_cache.get(cache_key)
3590
3591        if cached_prompt is None or cache_ttl_seconds == 0:
3592            langfuse_logger.debug(
3593                f"Prompt '{cache_key}' not found in cache or caching disabled."
3594            )
3595            try:
3596                return self._fetch_prompt_and_update_cache(
3597                    name,
3598                    version=version,
3599                    label=label,
3600                    ttl_seconds=cache_ttl_seconds,
3601                    max_retries=bounded_max_retries,
3602                    fetch_timeout_seconds=fetch_timeout_seconds,
3603                )
3604            except Exception as e:
3605                if fallback:
3606                    langfuse_logger.warning(
3607                        f"Returning fallback prompt for '{cache_key}' due to fetch error: {e}"
3608                    )
3609
3610                    fallback_client_args: Dict[str, Any] = {
3611                        "name": name,
3612                        "prompt": fallback,
3613                        "type": type,
3614                        "version": version or 0,
3615                        "config": {},
3616                        "labels": [label] if label else [],
3617                        "tags": [],
3618                    }
3619
3620                    if type == "text":
3621                        return TextPromptClient(
3622                            prompt=Prompt_Text(**fallback_client_args),
3623                            is_fallback=True,
3624                        )
3625
3626                    if type == "chat":
3627                        return ChatPromptClient(
3628                            prompt=Prompt_Chat(**fallback_client_args),
3629                            is_fallback=True,
3630                        )
3631
3632                raise e
3633
3634        if cached_prompt.is_expired():
3635            langfuse_logger.debug(f"Stale prompt '{cache_key}' found in cache.")
3636            try:
3637                # refresh prompt in background thread, refresh_prompt deduplicates tasks
3638                langfuse_logger.debug(f"Refreshing prompt '{cache_key}' in background.")
3639
3640                def refresh_task() -> None:
3641                    self._fetch_prompt_and_update_cache(
3642                        name,
3643                        version=version,
3644                        label=label,
3645                        ttl_seconds=cache_ttl_seconds,
3646                        max_retries=bounded_max_retries,
3647                        fetch_timeout_seconds=fetch_timeout_seconds,
3648                    )
3649
3650                self._resources.prompt_cache.add_refresh_prompt_task(
3651                    cache_key,
3652                    refresh_task,
3653                )
3654                langfuse_logger.debug(
3655                    f"Returning stale prompt '{cache_key}' from cache."
3656                )
3657                # return stale prompt
3658                return cached_prompt.value
3659
3660            except Exception as e:
3661                langfuse_logger.warning(
3662                    f"Error when refreshing cached prompt '{cache_key}', returning cached version. Error: {e}"
3663                )
3664                # creation of refresh prompt task failed, return stale prompt
3665                return cached_prompt.value
3666
3667        return cached_prompt.value

Get a prompt.

This method attempts to fetch the requested prompt from the local cache. If the prompt is not found in the cache or if the cached prompt has expired, it will try to fetch the prompt from the server again and update the cache. If fetching the new prompt fails, and there is an expired prompt in the cache, it will return the expired prompt as a fallback.

Arguments:
  • name (str): The name of the prompt to retrieve.
Keyword Args:

version (Optional[int]): The version of the prompt to retrieve. If no label and version is specified, the production label is returned. Specify either version or label, not both. label: Optional[str]: The label of the prompt to retrieve. If no label and version is specified, the production label is returned. Specify either version or label, not both. cache_ttl_seconds: Optional[int]: Time-to-live in seconds for caching the prompt. Must be specified as a keyword argument. If not set, defaults to 60 seconds. Disables caching if set to 0. type: Literal["chat", "text"]: The type of the prompt to retrieve. Defaults to "text". fallback: Union[Optional[List[ChatMessageDict]], Optional[str]]: The prompt string to return if fetching the prompt fails. Important on the first call where no cached prompt is available. Follows Langfuse prompt formatting with double curly braces for variables. Defaults to None. max_retries: Optional[int]: The maximum number of retries in case of API/network errors. Defaults to 2. The maximum value is 4. Retries have an exponential backoff with a maximum delay of 10 seconds. fetch_timeout_seconds: Optional[int]: The timeout in milliseconds for fetching the prompt. Defaults to the default timeout set on the SDK, which is 5 seconds per default.

Returns:

The prompt object retrieved from the cache or directly fetched if not cached or expired of type

  • TextPromptClient, if type argument is 'text'.
  • ChatPromptClient, if type argument is 'chat'.
Raises:
  • Exception: Propagates any exceptions raised during the fetching of a new prompt, unless there is an
  • expired prompt in the cache, in which case it logs a warning and returns the expired prompt.
def create_prompt( self, *, name: str, prompt: Union[str, List[Union[langfuse.model.ChatMessageDict, langfuse.model.ChatMessageWithPlaceholdersDict_Message, langfuse.model.ChatMessageWithPlaceholdersDict_Placeholder]]], labels: List[str] = [], tags: Optional[List[str]] = None, type: Optional[Literal['chat', 'text']] = 'text', config: Optional[Any] = None, commit_message: Optional[str] = None) -> Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient]:
3769    def create_prompt(
3770        self,
3771        *,
3772        name: str,
3773        prompt: Union[
3774            str, List[Union[ChatMessageDict, ChatMessageWithPlaceholdersDict]]
3775        ],
3776        labels: List[str] = [],
3777        tags: Optional[List[str]] = None,
3778        type: Optional[Literal["chat", "text"]] = "text",
3779        config: Optional[Any] = None,
3780        commit_message: Optional[str] = None,
3781    ) -> PromptClient:
3782        """Create a new prompt in Langfuse.
3783
3784        Keyword Args:
3785            name : The name of the prompt to be created.
3786            prompt : The content of the prompt to be created.
3787            is_active [DEPRECATED] : A flag indicating whether the prompt is active or not. This is deprecated and will be removed in a future release. Please use the 'production' label instead.
3788            labels: The labels of the prompt. Defaults to None. To create a default-served prompt, add the 'production' label.
3789            tags: The tags of the prompt. Defaults to None. Will be applied to all versions of the prompt.
3790            config: Additional structured data to be saved with the prompt. Defaults to None.
3791            type: The type of the prompt to be created. "chat" vs. "text". Defaults to "text".
3792            commit_message: Optional string describing the change.
3793
3794        Returns:
3795            TextPromptClient: The prompt if type argument is 'text'.
3796            ChatPromptClient: The prompt if type argument is 'chat'.
3797        """
3798        try:
3799            langfuse_logger.debug(f"Creating prompt {name=}, {labels=}")
3800
3801            if type == "chat":
3802                if not isinstance(prompt, list):
3803                    raise ValueError(
3804                        "For 'chat' type, 'prompt' must be a list of chat messages with role and content attributes."
3805                    )
3806                request: Union[CreatePromptRequest_Chat, CreatePromptRequest_Text] = (
3807                    CreatePromptRequest_Chat(
3808                        name=name,
3809                        prompt=cast(Any, prompt),
3810                        labels=labels,
3811                        tags=tags,
3812                        config=config or {},
3813                        commitMessage=commit_message,
3814                        type="chat",
3815                    )
3816                )
3817                server_prompt = self.api.prompts.create(request=request)
3818
3819                if self._resources is not None:
3820                    self._resources.prompt_cache.invalidate(name)
3821
3822                return ChatPromptClient(prompt=cast(Prompt_Chat, server_prompt))
3823
3824            if not isinstance(prompt, str):
3825                raise ValueError("For 'text' type, 'prompt' must be a string.")
3826
3827            request = CreatePromptRequest_Text(
3828                name=name,
3829                prompt=prompt,
3830                labels=labels,
3831                tags=tags,
3832                config=config or {},
3833                commitMessage=commit_message,
3834                type="text",
3835            )
3836
3837            server_prompt = self.api.prompts.create(request=request)
3838
3839            if self._resources is not None:
3840                self._resources.prompt_cache.invalidate(name)
3841
3842            return TextPromptClient(prompt=cast(Prompt_Text, server_prompt))
3843
3844        except Error as e:
3845            handle_fern_exception(e)
3846            raise e

Create a new prompt in Langfuse.

Keyword Args:

name : The name of the prompt to be created. prompt : The content of the prompt to be created. is_active [DEPRECATED] : A flag indicating whether the prompt is active or not. This is deprecated and will be removed in a future release. Please use the 'production' label instead. labels: The labels of the prompt. Defaults to None. To create a default-served prompt, add the 'production' label. tags: The tags of the prompt. Defaults to None. Will be applied to all versions of the prompt. config: Additional structured data to be saved with the prompt. Defaults to None. type: The type of the prompt to be created. "chat" vs. "text". Defaults to "text". commit_message: Optional string describing the change.

Returns:

TextPromptClient: The prompt if type argument is 'text'. ChatPromptClient: The prompt if type argument is 'chat'.

def update_prompt(self, *, name: str, version: int, new_labels: List[str] = []) -> Any:
3848    def update_prompt(
3849        self,
3850        *,
3851        name: str,
3852        version: int,
3853        new_labels: List[str] = [],
3854    ) -> Any:
3855        """Update an existing prompt version in Langfuse. The Langfuse SDK prompt cache is invalidated for all prompts witht he specified name.
3856
3857        Args:
3858            name (str): The name of the prompt to update.
3859            version (int): The version number of the prompt to update.
3860            new_labels (List[str], optional): New labels to assign to the prompt version. Labels are unique across versions. The "latest" label is reserved and managed by Langfuse. Defaults to [].
3861
3862        Returns:
3863            Prompt: The updated prompt from the Langfuse API.
3864
3865        """
3866        updated_prompt = self.api.prompt_version.update(
3867            name=self._url_encode(name),
3868            version=version,
3869            new_labels=new_labels,
3870        )
3871
3872        if self._resources is not None:
3873            self._resources.prompt_cache.invalidate(name)
3874
3875        return updated_prompt

Update an existing prompt version in Langfuse. The Langfuse SDK prompt cache is invalidated for all prompts witht he specified name.

Arguments:
  • name (str): The name of the prompt to update.
  • version (int): The version number of the prompt to update.
  • new_labels (List[str], optional): New labels to assign to the prompt version. Labels are unique across versions. The "latest" label is reserved and managed by Langfuse. Defaults to [].
Returns:

Prompt: The updated prompt from the Langfuse API.

def clear_prompt_cache(self) -> None:
3890    def clear_prompt_cache(self) -> None:
3891        """Clear the entire prompt cache, removing all cached prompts.
3892
3893        This method is useful when you want to force a complete refresh of all
3894        cached prompts, for example after major updates or when you need to
3895        ensure the latest versions are fetched from the server.
3896        """
3897        if self._resources is not None:
3898            self._resources.prompt_cache.clear()

Clear the entire prompt cache, removing all cached prompts.

This method is useful when you want to force a complete refresh of all cached prompts, for example after major updates or when you need to ensure the latest versions are fetched from the server.

def get_client(*, public_key: Optional[str] = None) -> Langfuse:
 61def get_client(*, public_key: Optional[str] = None) -> Langfuse:
 62    """Get or create a Langfuse client instance.
 63
 64    Returns an existing Langfuse client or creates a new one if none exists. In multi-project setups,
 65    providing a public_key is required. Multi-project support is experimental - see Langfuse docs.
 66
 67    Behavior:
 68    - Single project: Returns existing client or creates new one
 69    - Multi-project: Requires public_key to return specific client
 70    - No public_key in multi-project: Returns disabled client to prevent data leakage
 71
 72    The function uses a singleton pattern per public_key to conserve resources and maintain state.
 73
 74    Args:
 75        public_key (Optional[str]): Project identifier
 76            - With key: Returns client for that project
 77            - Without key: Returns single client or disabled client if multiple exist
 78
 79    Returns:
 80        Langfuse: Client instance in one of three states:
 81            1. Client for specified public_key
 82            2. Default client for single-project setup
 83            3. Disabled client when multiple projects exist without key
 84
 85    Security:
 86        Disables tracing when multiple projects exist without explicit key to prevent
 87        cross-project data leakage. Multi-project setups are experimental.
 88
 89    Example:
 90        ```python
 91        # Single project
 92        client = get_client()  # Default client
 93
 94        # In multi-project usage:
 95        client_a = get_client(public_key="project_a_key")  # Returns project A's client
 96        client_b = get_client(public_key="project_b_key")  # Returns project B's client
 97
 98        # Without specific key in multi-project setup:
 99        client = get_client()  # Returns disabled client for safety
100        ```
101    """
102    with LangfuseResourceManager._lock:
103        active_instances = LangfuseResourceManager._instances
104
105        # If no explicit public_key provided, check execution context
106        if not public_key:
107            public_key = _current_public_key.get(None)
108
109        if not public_key:
110            if len(active_instances) == 0:
111                # No clients initialized yet, create default instance
112                return Langfuse()
113
114            if len(active_instances) == 1:
115                # Only one client exists, safe to use without specifying key
116                instance = list(active_instances.values())[0]
117
118                # Initialize with the credentials bound to the instance
119                # This is important if the original instance was instantiated
120                # via constructor arguments
121                return _create_client_from_instance(instance)
122
123            else:
124                # Multiple clients exist but no key specified - disable tracing
125                # to prevent cross-project data leakage
126                langfuse_logger.warning(
127                    "No 'langfuse_public_key' passed to decorated function, but multiple langfuse clients are instantiated in current process. Skipping tracing for this function to avoid cross-project leakage."
128                )
129                return Langfuse(
130                    tracing_enabled=False, public_key="fake", secret_key="fake"
131                )
132
133        else:
134            # Specific key provided, look up existing instance
135            target_instance: Optional[LangfuseResourceManager] = active_instances.get(
136                public_key, None
137            )
138
139            if target_instance is None:
140                # No instance found with this key - client not initialized properly
141                langfuse_logger.warning(
142                    f"No Langfuse client with public key {public_key} has been initialized. Skipping tracing for decorated function."
143                )
144                return Langfuse(
145                    tracing_enabled=False, public_key="fake", secret_key="fake"
146                )
147
148            # target_instance is guaranteed to be not None at this point
149            return _create_client_from_instance(target_instance, public_key)

Get or create a Langfuse client instance.

Returns an existing Langfuse client or creates a new one if none exists. In multi-project setups, providing a public_key is required. Multi-project support is experimental - see Langfuse docs.

Behavior:

  • Single project: Returns existing client or creates new one
  • Multi-project: Requires public_key to return specific client
  • No public_key in multi-project: Returns disabled client to prevent data leakage

The function uses a singleton pattern per public_key to conserve resources and maintain state.

Arguments:
  • public_key (Optional[str]): Project identifier
    • With key: Returns client for that project
    • Without key: Returns single client or disabled client if multiple exist
Returns:

Langfuse: Client instance in one of three states: 1. Client for specified public_key 2. Default client for single-project setup 3. Disabled client when multiple projects exist without key

Security:

Disables tracing when multiple projects exist without explicit key to prevent cross-project data leakage. Multi-project setups are experimental.

Example:
# Single project
client = get_client()  # Default client

# In multi-project usage:
client_a = get_client(public_key="project_a_key")  # Returns project A's client
client_b = get_client(public_key="project_b_key")  # Returns project B's client

# Without specific key in multi-project setup:
client = get_client()  # Returns disabled client for safety
def observe( func: Optional[~F] = None, *, name: Optional[str] = None, as_type: Union[Literal['generation', 'embedding'], Literal['span', 'agent', 'tool', 'chain', 'retriever', 'evaluator', 'guardrail'], NoneType] = None, capture_input: Optional[bool] = None, capture_output: Optional[bool] = None, transform_to_string: Optional[Callable[[Iterable], str]] = None) -> Union[~F, Callable[[~F], ~F]]:
 90    def observe(
 91        self,
 92        func: Optional[F] = None,
 93        *,
 94        name: Optional[str] = None,
 95        as_type: Optional[ObservationTypeLiteralNoEvent] = None,
 96        capture_input: Optional[bool] = None,
 97        capture_output: Optional[bool] = None,
 98        transform_to_string: Optional[Callable[[Iterable], str]] = None,
 99    ) -> Union[F, Callable[[F], F]]:
100        """Wrap a function to create and manage Langfuse tracing around its execution, supporting both synchronous and asynchronous functions.
101
102        This decorator provides seamless integration of Langfuse observability into your codebase. It automatically creates
103        spans or generations around function execution, capturing timing, inputs/outputs, and error states. The decorator
104        intelligently handles both synchronous and asynchronous functions, preserving function signatures and type hints.
105
106        Using OpenTelemetry's distributed tracing system, it maintains proper trace context propagation throughout your application,
107        enabling you to see hierarchical traces of function calls with detailed performance metrics and function-specific details.
108
109        Args:
110            func (Optional[Callable]): The function to decorate. When used with parentheses @observe(), this will be None.
111            name (Optional[str]): Custom name for the created trace or span. If not provided, the function name is used.
112            as_type (Optional[Literal]): Set the observation type. Supported values:
113                    "generation", "span", "agent", "tool", "chain", "retriever", "embedding", "evaluator", "guardrail".
114                    Observation types are highlighted in the Langfuse UI for filtering and visualization.
115                    The types "generation" and "embedding" create a span on which additional attributes such as model metrics
116                    can be set.
117
118        Returns:
119            Callable: A wrapped version of the original function that automatically creates and manages Langfuse spans.
120
121        Example:
122            For general function tracing with automatic naming:
123            ```python
124            @observe()
125            def process_user_request(user_id, query):
126                # Function is automatically traced with name "process_user_request"
127                return get_response(query)
128            ```
129
130            For language model generation tracking:
131            ```python
132            @observe(name="answer-generation", as_type="generation")
133            async def generate_answer(query):
134                # Creates a generation-type span with extended LLM metrics
135                response = await openai.chat.completions.create(
136                    model="gpt-4",
137                    messages=[{"role": "user", "content": query}]
138                )
139                return response.choices[0].message.content
140            ```
141
142            For trace context propagation between functions:
143            ```python
144            @observe()
145            def main_process():
146                # Parent span is created
147                return sub_process()  # Child span automatically connected to parent
148
149            @observe()
150            def sub_process():
151                # Automatically becomes a child span of main_process
152                return "result"
153            ```
154
155        Raises:
156            Exception: Propagates any exceptions from the wrapped function after logging them in the trace.
157
158        Notes:
159            - The decorator preserves the original function's signature, docstring, and return type.
160            - Proper parent-child relationships between spans are automatically maintained.
161            - Special keyword arguments can be passed to control tracing:
162              - langfuse_trace_id: Explicitly set the trace ID for this function call
163              - langfuse_parent_observation_id: Explicitly set the parent span ID
164              - langfuse_public_key: Use a specific Langfuse project (when multiple clients exist)
165            - For async functions, the decorator returns an async function wrapper.
166            - For sync functions, the decorator returns a synchronous wrapper.
167        """
168        valid_types = set(get_observation_types_list(ObservationTypeLiteralNoEvent))
169        if as_type is not None and as_type not in valid_types:
170            self._log.warning(
171                f"Invalid as_type '{as_type}'. Valid types are: {', '.join(sorted(valid_types))}. Defaulting to 'span'."
172            )
173            as_type = "span"
174
175        function_io_capture_enabled = os.environ.get(
176            LANGFUSE_OBSERVE_DECORATOR_IO_CAPTURE_ENABLED, "True"
177        ).lower() not in ("false", "0")
178
179        should_capture_input = (
180            capture_input if capture_input is not None else function_io_capture_enabled
181        )
182
183        should_capture_output = (
184            capture_output
185            if capture_output is not None
186            else function_io_capture_enabled
187        )
188
189        def decorator(func: F) -> F:
190            return (
191                self._async_observe(
192                    func,
193                    name=name,
194                    as_type=as_type,
195                    capture_input=should_capture_input,
196                    capture_output=should_capture_output,
197                    transform_to_string=transform_to_string,
198                )
199                if asyncio.iscoroutinefunction(func)
200                else self._sync_observe(
201                    func,
202                    name=name,
203                    as_type=as_type,
204                    capture_input=should_capture_input,
205                    capture_output=should_capture_output,
206                    transform_to_string=transform_to_string,
207                )
208            )
209
210        """Handle decorator with or without parentheses.
211
212        This logic enables the decorator to work both with and without parentheses:
213        - @observe - Python passes the function directly to the decorator
214        - @observe() - Python calls the decorator first, which must return a function decorator
215
216        When called without arguments (@observe), the func parameter contains the function to decorate,
217        so we directly apply the decorator to it. When called with parentheses (@observe()),
218        func is None, so we return the decorator function itself for Python to apply in the next step.
219        """
220        if func is None:
221            return decorator
222        else:
223            return decorator(func)

Wrap a function to create and manage Langfuse tracing around its execution, supporting both synchronous and asynchronous functions.

This decorator provides seamless integration of Langfuse observability into your codebase. It automatically creates spans or generations around function execution, capturing timing, inputs/outputs, and error states. The decorator intelligently handles both synchronous and asynchronous functions, preserving function signatures and type hints.

Using OpenTelemetry's distributed tracing system, it maintains proper trace context propagation throughout your application, enabling you to see hierarchical traces of function calls with detailed performance metrics and function-specific details.

Arguments:
  • func (Optional[Callable]): The function to decorate. When used with parentheses @observe(), this will be None.
  • name (Optional[str]): Custom name for the created trace or span. If not provided, the function name is used.
  • as_type (Optional[Literal]): Set the observation type. Supported values: "generation", "span", "agent", "tool", "chain", "retriever", "embedding", "evaluator", "guardrail". Observation types are highlighted in the Langfuse UI for filtering and visualization. The types "generation" and "embedding" create a span on which additional attributes such as model metrics can be set.
Returns:

Callable: A wrapped version of the original function that automatically creates and manages Langfuse spans.

Example:

For general function tracing with automatic naming:

@observe()
def process_user_request(user_id, query):
    # Function is automatically traced with name "process_user_request"
    return get_response(query)

For language model generation tracking:

@observe(name="answer-generation", as_type="generation")
async def generate_answer(query):
    # Creates a generation-type span with extended LLM metrics
    response = await openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": query}]
    )
    return response.choices[0].message.content

For trace context propagation between functions:

@observe()
def main_process():
    # Parent span is created
    return sub_process()  # Child span automatically connected to parent

@observe()
def sub_process():
    # Automatically becomes a child span of main_process
    return "result"
Raises:
  • Exception: Propagates any exceptions from the wrapped function after logging them in the trace.
Notes:
  • The decorator preserves the original function's signature, docstring, and return type.
  • Proper parent-child relationships between spans are automatically maintained.
  • Special keyword arguments can be passed to control tracing:
    • langfuse_trace_id: Explicitly set the trace ID for this function call
    • langfuse_parent_observation_id: Explicitly set the parent span ID
    • langfuse_public_key: Use a specific Langfuse project (when multiple clients exist)
  • For async functions, the decorator returns an async function wrapper.
  • For sync functions, the decorator returns a synchronous wrapper.
def propagate_attributes( *, user_id: Optional[str] = None, session_id: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, version: Optional[str] = None, tags: Optional[List[str]] = None, trace_name: Optional[str] = None, as_baggage: bool = False) -> opentelemetry.util._decorator._AgnosticContextManager[typing.Any]:
 76def propagate_attributes(
 77    *,
 78    user_id: Optional[str] = None,
 79    session_id: Optional[str] = None,
 80    metadata: Optional[Dict[str, str]] = None,
 81    version: Optional[str] = None,
 82    tags: Optional[List[str]] = None,
 83    trace_name: Optional[str] = None,
 84    as_baggage: bool = False,
 85) -> _AgnosticContextManager[Any]:
 86    """Propagate trace-level attributes to all spans created within this context.
 87
 88    This context manager sets attributes on the currently active span AND automatically
 89    propagates them to all new child spans created within the context. This is the
 90    recommended way to set trace-level attributes like user_id, session_id, and metadata
 91    dimensions that should be consistently applied across all observations in a trace.
 92
 93    **IMPORTANT**: Call this as early as possible within your trace/workflow. Only the
 94    currently active span and spans created after entering this context will have these
 95    attributes. Pre-existing spans will NOT be retroactively updated.
 96
 97    **Why this matters**: Langfuse aggregation queries (e.g., total cost by user_id,
 98    filtering by session_id) only include observations that have the attribute set.
 99    If you call `propagate_attributes` late in your workflow, earlier spans won't be
100    included in aggregations for that attribute.
101
102    Args:
103        user_id: User identifier to associate with all spans in this context.
104            Must be US-ASCII string, ≤200 characters. Use this to track which user
105            generated each trace and enable e.g. per-user cost/performance analysis.
106        session_id: Session identifier to associate with all spans in this context.
107            Must be US-ASCII string, ≤200 characters. Use this to group related traces
108            within a user session (e.g., a conversation thread, multi-turn interaction).
109        metadata: Additional key-value metadata to propagate to all spans.
110            - Keys and values must be US-ASCII strings
111            - All values must be ≤200 characters
112            - Use for dimensions like internal correlating identifiers
113            - AVOID: large payloads, sensitive data, non-string values (will be dropped with warning)
114        version: Version identfier for parts of your application that are independently versioned, e.g. agents
115        tags: List of tags to categorize the group of observations
116        trace_name: Name to assign to the trace. Must be US-ASCII string, ≤200 characters.
117            Use this to set a consistent trace name for all spans created within this context.
118        as_baggage: If True, propagates attributes using OpenTelemetry baggage for
119            cross-process/service propagation. **Security warning**: When enabled,
120            attribute values are added to HTTP headers on ALL outbound requests.
121            Only enable if values are safe to transmit via HTTP headers and you need
122            cross-service tracing. Default: False.
123
124    Returns:
125        Context manager that propagates attributes to all child spans.
126
127    Example:
128        Basic usage with user and session tracking:
129
130        ```python
131        from langfuse import Langfuse
132
133        langfuse = Langfuse()
134
135        # Set attributes early in the trace
136        with langfuse.start_as_current_span(name="user_workflow") as span:
137            with langfuse.propagate_attributes(
138                user_id="user_123",
139                session_id="session_abc",
140                metadata={"experiment": "variant_a", "environment": "production"}
141            ):
142                # All spans created here will have user_id, session_id, and metadata
143                with langfuse.start_span(name="llm_call") as llm_span:
144                    # This span inherits: user_id, session_id, experiment, environment
145                    ...
146
147                with langfuse.start_generation(name="completion") as gen:
148                    # This span also inherits all attributes
149                    ...
150        ```
151
152        Late propagation (anti-pattern):
153
154        ```python
155        with langfuse.start_as_current_span(name="workflow") as span:
156            # These spans WON'T have user_id
157            early_span = langfuse.start_span(name="early_work")
158            early_span.end()
159
160            # Set attributes in the middle
161            with langfuse.propagate_attributes(user_id="user_123"):
162                # Only spans created AFTER this point will have user_id
163                late_span = langfuse.start_span(name="late_work")
164                late_span.end()
165
166            # Result: Aggregations by user_id will miss "early_work" span
167        ```
168
169        Cross-service propagation with baggage (advanced):
170
171        ```python
172        # Service A - originating service
173        with langfuse.start_as_current_span(name="api_request"):
174            with langfuse.propagate_attributes(
175                user_id="user_123",
176                session_id="session_abc",
177                as_baggage=True  # Propagate via HTTP headers
178            ):
179                # Make HTTP request to Service B
180                response = requests.get("https://service-b.example.com/api")
181                # user_id and session_id are now in HTTP headers
182
183        # Service B - downstream service
184        # OpenTelemetry will automatically extract baggage from HTTP headers
185        # and propagate to spans in Service B
186        ```
187
188    Note:
189        - **Validation**: All attribute values (user_id, session_id, metadata values)
190          must be strings ≤200 characters. Invalid values will be dropped with a
191          warning logged. Ensure values meet constraints before calling.
192        - **OpenTelemetry**: This uses OpenTelemetry context propagation under the hood,
193          making it compatible with other OTel-instrumented libraries.
194
195    Raises:
196        No exceptions are raised. Invalid values are logged as warnings and dropped.
197    """
198    return _propagate_attributes(
199        user_id=user_id,
200        session_id=session_id,
201        metadata=metadata,
202        version=version,
203        tags=tags,
204        trace_name=trace_name,
205        as_baggage=as_baggage,
206    )

Propagate trace-level attributes to all spans created within this context.

This context manager sets attributes on the currently active span AND automatically propagates them to all new child spans created within the context. This is the recommended way to set trace-level attributes like user_id, session_id, and metadata dimensions that should be consistently applied across all observations in a trace.

IMPORTANT: Call this as early as possible within your trace/workflow. Only the currently active span and spans created after entering this context will have these attributes. Pre-existing spans will NOT be retroactively updated.

Why this matters: Langfuse aggregation queries (e.g., total cost by user_id, filtering by session_id) only include observations that have the attribute set. If you call propagate_attributes late in your workflow, earlier spans won't be included in aggregations for that attribute.

Arguments:
  • user_id: User identifier to associate with all spans in this context. Must be US-ASCII string, ≤200 characters. Use this to track which user generated each trace and enable e.g. per-user cost/performance analysis.
  • session_id: Session identifier to associate with all spans in this context. Must be US-ASCII string, ≤200 characters. Use this to group related traces within a user session (e.g., a conversation thread, multi-turn interaction).
  • metadata: Additional key-value metadata to propagate to all spans.
    • Keys and values must be US-ASCII strings
    • All values must be ≤200 characters
    • Use for dimensions like internal correlating identifiers
    • AVOID: large payloads, sensitive data, non-string values (will be dropped with warning)
  • version: Version identfier for parts of your application that are independently versioned, e.g. agents
  • tags: List of tags to categorize the group of observations
  • trace_name: Name to assign to the trace. Must be US-ASCII string, ≤200 characters. Use this to set a consistent trace name for all spans created within this context.
  • as_baggage: If True, propagates attributes using OpenTelemetry baggage for cross-process/service propagation. Security warning: When enabled, attribute values are added to HTTP headers on ALL outbound requests. Only enable if values are safe to transmit via HTTP headers and you need cross-service tracing. Default: False.
Returns:

Context manager that propagates attributes to all child spans.

Example:

Basic usage with user and session tracking:

from langfuse import Langfuse

langfuse = Langfuse()

# Set attributes early in the trace
with langfuse.start_as_current_span(name="user_workflow") as span:
    with langfuse.propagate_attributes(
        user_id="user_123",
        session_id="session_abc",
        metadata={"experiment": "variant_a", "environment": "production"}
    ):
        # All spans created here will have user_id, session_id, and metadata
        with langfuse.start_span(name="llm_call") as llm_span:
            # This span inherits: user_id, session_id, experiment, environment
            ...

        with langfuse.start_generation(name="completion") as gen:
            # This span also inherits all attributes
            ...

Late propagation (anti-pattern):

with langfuse.start_as_current_span(name="workflow") as span:
    # These spans WON'T have user_id
    early_span = langfuse.start_span(name="early_work")
    early_span.end()

    # Set attributes in the middle
    with langfuse.propagate_attributes(user_id="user_123"):
        # Only spans created AFTER this point will have user_id
        late_span = langfuse.start_span(name="late_work")
        late_span.end()

    # Result: Aggregations by user_id will miss "early_work" span

Cross-service propagation with baggage (advanced):

# Service A - originating service
with langfuse.start_as_current_span(name="api_request"):
    with langfuse.propagate_attributes(
        user_id="user_123",
        session_id="session_abc",
        as_baggage=True  # Propagate via HTTP headers
    ):
        # Make HTTP request to Service B
        response = requests.get("https://service-b.example.com/api")
        # user_id and session_id are now in HTTP headers

# Service B - downstream service
# OpenTelemetry will automatically extract baggage from HTTP headers
# and propagate to spans in Service B
Note:
  • Validation: All attribute values (user_id, session_id, metadata values) must be strings ≤200 characters. Invalid values will be dropped with a warning logged. Ensure values meet constraints before calling.
  • OpenTelemetry: This uses OpenTelemetry context propagation under the hood, making it compatible with other OTel-instrumented libraries.
Raises:
  • No exceptions are raised. Invalid values are logged as warnings and dropped.
ObservationTypeLiteral = typing.Union[typing.Literal['generation', 'embedding'], typing.Literal['span', 'agent', 'tool', 'chain', 'retriever', 'evaluator', 'guardrail'], typing.Literal['event']]
class LangfuseSpan(langfuse._client.span.LangfuseObservationWrapper):
1166class LangfuseSpan(LangfuseObservationWrapper):
1167    """Standard span implementation for general operations in Langfuse.
1168
1169    This class represents a general-purpose span that can be used to trace
1170    any operation in your application. It extends the base LangfuseObservationWrapper
1171    with specific methods for creating child spans, generations, and updating
1172    span-specific attributes. If possible, use a more specific type for
1173    better observability and insights.
1174    """
1175
1176    def __init__(
1177        self,
1178        *,
1179        otel_span: otel_trace_api.Span,
1180        langfuse_client: "Langfuse",
1181        input: Optional[Any] = None,
1182        output: Optional[Any] = None,
1183        metadata: Optional[Any] = None,
1184        environment: Optional[str] = None,
1185        version: Optional[str] = None,
1186        level: Optional[SpanLevel] = None,
1187        status_message: Optional[str] = None,
1188    ):
1189        """Initialize a new LangfuseSpan.
1190
1191        Args:
1192            otel_span: The OpenTelemetry span to wrap
1193            langfuse_client: Reference to the parent Langfuse client
1194            input: Input data for the span (any JSON-serializable object)
1195            output: Output data from the span (any JSON-serializable object)
1196            metadata: Additional metadata to associate with the span
1197            environment: The tracing environment
1198            version: Version identifier for the code or component
1199            level: Importance level of the span (info, warning, error)
1200            status_message: Optional status message for the span
1201        """
1202        super().__init__(
1203            otel_span=otel_span,
1204            as_type="span",
1205            langfuse_client=langfuse_client,
1206            input=input,
1207            output=output,
1208            metadata=metadata,
1209            environment=environment,
1210            version=version,
1211            level=level,
1212            status_message=status_message,
1213        )
1214
1215    def start_span(
1216        self,
1217        name: str,
1218        input: Optional[Any] = None,
1219        output: Optional[Any] = None,
1220        metadata: Optional[Any] = None,
1221        version: Optional[str] = None,
1222        level: Optional[SpanLevel] = None,
1223        status_message: Optional[str] = None,
1224    ) -> "LangfuseSpan":
1225        """Create a new child span.
1226
1227        This method creates a new child span with this span as the parent.
1228        Unlike start_as_current_span(), this method does not set the new span
1229        as the current span in the context.
1230
1231        Args:
1232            name: Name of the span (e.g., function or operation name)
1233            input: Input data for the operation
1234            output: Output data from the operation
1235            metadata: Additional metadata to associate with the span
1236            version: Version identifier for the code or component
1237            level: Importance level of the span (info, warning, error)
1238            status_message: Optional status message for the span
1239
1240        Returns:
1241            A new LangfuseSpan that must be ended with .end() when complete
1242
1243        Example:
1244            ```python
1245            parent_span = langfuse.start_span(name="process-request")
1246            try:
1247                # Create a child span
1248                child_span = parent_span.start_span(name="validate-input")
1249                try:
1250                    # Do validation work
1251                    validation_result = validate(request_data)
1252                    child_span.update(output=validation_result)
1253                finally:
1254                    child_span.end()
1255
1256                # Continue with parent span
1257                result = process_validated_data(validation_result)
1258                parent_span.update(output=result)
1259            finally:
1260                parent_span.end()
1261            ```
1262        """
1263        return self.start_observation(
1264            name=name,
1265            as_type="span",
1266            input=input,
1267            output=output,
1268            metadata=metadata,
1269            version=version,
1270            level=level,
1271            status_message=status_message,
1272        )
1273
1274    def start_as_current_span(
1275        self,
1276        *,
1277        name: str,
1278        input: Optional[Any] = None,
1279        output: Optional[Any] = None,
1280        metadata: Optional[Any] = None,
1281        version: Optional[str] = None,
1282        level: Optional[SpanLevel] = None,
1283        status_message: Optional[str] = None,
1284    ) -> _AgnosticContextManager["LangfuseSpan"]:
1285        """[DEPRECATED] Create a new child span and set it as the current span in a context manager.
1286
1287        DEPRECATED: This method is deprecated and will be removed in a future version.
1288        Use start_as_current_observation(as_type='span') instead.
1289
1290        This method creates a new child span and sets it as the current span within
1291        a context manager. It should be used with a 'with' statement to automatically
1292        manage the span's lifecycle.
1293
1294        Args:
1295            name: Name of the span (e.g., function or operation name)
1296            input: Input data for the operation
1297            output: Output data from the operation
1298            metadata: Additional metadata to associate with the span
1299            version: Version identifier for the code or component
1300            level: Importance level of the span (info, warning, error)
1301            status_message: Optional status message for the span
1302
1303        Returns:
1304            A context manager that yields a new LangfuseSpan
1305
1306        Example:
1307            ```python
1308            with langfuse.start_as_current_span(name="process-request") as parent_span:
1309                # Parent span is active here
1310
1311                # Create a child span with context management
1312                with parent_span.start_as_current_span(name="validate-input") as child_span:
1313                    # Child span is active here
1314                    validation_result = validate(request_data)
1315                    child_span.update(output=validation_result)
1316
1317                # Back to parent span context
1318                result = process_validated_data(validation_result)
1319                parent_span.update(output=result)
1320            ```
1321        """
1322        warnings.warn(
1323            "start_as_current_span is deprecated and will be removed in a future version. "
1324            "Use start_as_current_observation(as_type='span') instead.",
1325            DeprecationWarning,
1326            stacklevel=2,
1327        )
1328        return self.start_as_current_observation(
1329            name=name,
1330            as_type="span",
1331            input=input,
1332            output=output,
1333            metadata=metadata,
1334            version=version,
1335            level=level,
1336            status_message=status_message,
1337        )
1338
1339    def start_generation(
1340        self,
1341        *,
1342        name: str,
1343        input: Optional[Any] = None,
1344        output: Optional[Any] = None,
1345        metadata: Optional[Any] = None,
1346        version: Optional[str] = None,
1347        level: Optional[SpanLevel] = None,
1348        status_message: Optional[str] = None,
1349        completion_start_time: Optional[datetime] = None,
1350        model: Optional[str] = None,
1351        model_parameters: Optional[Dict[str, MapValue]] = None,
1352        usage_details: Optional[Dict[str, int]] = None,
1353        cost_details: Optional[Dict[str, float]] = None,
1354        prompt: Optional[PromptClient] = None,
1355    ) -> "LangfuseGeneration":
1356        """[DEPRECATED] Create a new child generation span.
1357
1358        DEPRECATED: This method is deprecated and will be removed in a future version.
1359        Use start_observation(as_type='generation') instead.
1360
1361        This method creates a new child generation span with this span as the parent.
1362        Generation spans are specialized for AI/LLM operations and include additional
1363        fields for model information, usage stats, and costs.
1364
1365        Unlike start_as_current_generation(), this method does not set the new span
1366        as the current span in the context.
1367
1368        Args:
1369            name: Name of the generation operation
1370            input: Input data for the model (e.g., prompts)
1371            output: Output from the model (e.g., completions)
1372            metadata: Additional metadata to associate with the generation
1373            version: Version identifier for the model or component
1374            level: Importance level of the generation (info, warning, error)
1375            status_message: Optional status message for the generation
1376            completion_start_time: When the model started generating the response
1377            model: Name/identifier of the AI model used (e.g., "gpt-4")
1378            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1379            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1380            cost_details: Cost information for the model call
1381            prompt: Associated prompt template from Langfuse prompt management
1382
1383        Returns:
1384            A new LangfuseGeneration that must be ended with .end() when complete
1385
1386        Example:
1387            ```python
1388            span = langfuse.start_span(name="process-query")
1389            try:
1390                # Create a generation child span
1391                generation = span.start_generation(
1392                    name="generate-answer",
1393                    model="gpt-4",
1394                    input={"prompt": "Explain quantum computing"}
1395                )
1396                try:
1397                    # Call model API
1398                    response = llm.generate(...)
1399
1400                    generation.update(
1401                        output=response.text,
1402                        usage_details={
1403                            "prompt_tokens": response.usage.prompt_tokens,
1404                            "completion_tokens": response.usage.completion_tokens
1405                        }
1406                    )
1407                finally:
1408                    generation.end()
1409
1410                # Continue with parent span
1411                span.update(output={"answer": response.text, "source": "gpt-4"})
1412            finally:
1413                span.end()
1414            ```
1415        """
1416        warnings.warn(
1417            "start_generation is deprecated and will be removed in a future version. "
1418            "Use start_observation(as_type='generation') instead.",
1419            DeprecationWarning,
1420            stacklevel=2,
1421        )
1422        return self.start_observation(
1423            name=name,
1424            as_type="generation",
1425            input=input,
1426            output=output,
1427            metadata=metadata,
1428            version=version,
1429            level=level,
1430            status_message=status_message,
1431            completion_start_time=completion_start_time,
1432            model=model,
1433            model_parameters=model_parameters,
1434            usage_details=usage_details,
1435            cost_details=cost_details,
1436            prompt=prompt,
1437        )
1438
1439    def start_as_current_generation(
1440        self,
1441        *,
1442        name: str,
1443        input: Optional[Any] = None,
1444        output: Optional[Any] = None,
1445        metadata: Optional[Any] = None,
1446        version: Optional[str] = None,
1447        level: Optional[SpanLevel] = None,
1448        status_message: Optional[str] = None,
1449        completion_start_time: Optional[datetime] = None,
1450        model: Optional[str] = None,
1451        model_parameters: Optional[Dict[str, MapValue]] = None,
1452        usage_details: Optional[Dict[str, int]] = None,
1453        cost_details: Optional[Dict[str, float]] = None,
1454        prompt: Optional[PromptClient] = None,
1455    ) -> _AgnosticContextManager["LangfuseGeneration"]:
1456        """[DEPRECATED] Create a new child generation span and set it as the current span in a context manager.
1457
1458        DEPRECATED: This method is deprecated and will be removed in a future version.
1459        Use start_as_current_observation(as_type='generation') instead.
1460
1461        This method creates a new child generation span and sets it as the current span
1462        within a context manager. Generation spans are specialized for AI/LLM operations
1463        and include additional fields for model information, usage stats, and costs.
1464
1465        Args:
1466            name: Name of the generation operation
1467            input: Input data for the model (e.g., prompts)
1468            output: Output from the model (e.g., completions)
1469            metadata: Additional metadata to associate with the generation
1470            version: Version identifier for the model or component
1471            level: Importance level of the generation (info, warning, error)
1472            status_message: Optional status message for the generation
1473            completion_start_time: When the model started generating the response
1474            model: Name/identifier of the AI model used (e.g., "gpt-4")
1475            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1476            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1477            cost_details: Cost information for the model call
1478            prompt: Associated prompt template from Langfuse prompt management
1479
1480        Returns:
1481            A context manager that yields a new LangfuseGeneration
1482
1483        Example:
1484            ```python
1485            with langfuse.start_as_current_span(name="process-request") as span:
1486                # Prepare data
1487                query = preprocess_user_query(user_input)
1488
1489                # Create a generation span with context management
1490                with span.start_as_current_generation(
1491                    name="generate-answer",
1492                    model="gpt-4",
1493                    input={"query": query}
1494                ) as generation:
1495                    # Generation span is active here
1496                    response = llm.generate(query)
1497
1498                    # Update with results
1499                    generation.update(
1500                        output=response.text,
1501                        usage_details={
1502                            "prompt_tokens": response.usage.prompt_tokens,
1503                            "completion_tokens": response.usage.completion_tokens
1504                        }
1505                    )
1506
1507                # Back to parent span context
1508                span.update(output={"answer": response.text, "source": "gpt-4"})
1509            ```
1510        """
1511        warnings.warn(
1512            "start_as_current_generation is deprecated and will be removed in a future version. "
1513            "Use start_as_current_observation(as_type='generation') instead.",
1514            DeprecationWarning,
1515            stacklevel=2,
1516        )
1517        return self.start_as_current_observation(
1518            name=name,
1519            as_type="generation",
1520            input=input,
1521            output=output,
1522            metadata=metadata,
1523            version=version,
1524            level=level,
1525            status_message=status_message,
1526            completion_start_time=completion_start_time,
1527            model=model,
1528            model_parameters=model_parameters,
1529            usage_details=usage_details,
1530            cost_details=cost_details,
1531            prompt=prompt,
1532        )
1533
1534    def create_event(
1535        self,
1536        *,
1537        name: str,
1538        input: Optional[Any] = None,
1539        output: Optional[Any] = None,
1540        metadata: Optional[Any] = None,
1541        version: Optional[str] = None,
1542        level: Optional[SpanLevel] = None,
1543        status_message: Optional[str] = None,
1544    ) -> "LangfuseEvent":
1545        """Create a new Langfuse observation of type 'EVENT'.
1546
1547        Args:
1548            name: Name of the span (e.g., function or operation name)
1549            input: Input data for the operation (can be any JSON-serializable object)
1550            output: Output data from the operation (can be any JSON-serializable object)
1551            metadata: Additional metadata to associate with the span
1552            version: Version identifier for the code or component
1553            level: Importance level of the span (info, warning, error)
1554            status_message: Optional status message for the span
1555
1556        Returns:
1557            The LangfuseEvent object
1558
1559        Example:
1560            ```python
1561            event = langfuse.create_event(name="process-event")
1562            ```
1563        """
1564        timestamp = time_ns()
1565
1566        with otel_trace_api.use_span(self._otel_span):
1567            new_otel_span = self._langfuse_client._otel_tracer.start_span(
1568                name=name, start_time=timestamp
1569            )
1570
1571        return cast(
1572            "LangfuseEvent",
1573            LangfuseEvent(
1574                otel_span=new_otel_span,
1575                langfuse_client=self._langfuse_client,
1576                input=input,
1577                output=output,
1578                metadata=metadata,
1579                environment=self._environment,
1580                version=version,
1581                level=level,
1582                status_message=status_message,
1583            ).end(end_time=timestamp),
1584        )

Standard span implementation for general operations in Langfuse.

This class represents a general-purpose span that can be used to trace any operation in your application. It extends the base LangfuseObservationWrapper with specific methods for creating child spans, generations, and updating span-specific attributes. If possible, use a more specific type for better observability and insights.

LangfuseSpan( *, otel_span: opentelemetry.trace.span.Span, langfuse_client: Langfuse, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, environment: Optional[str] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None)
1176    def __init__(
1177        self,
1178        *,
1179        otel_span: otel_trace_api.Span,
1180        langfuse_client: "Langfuse",
1181        input: Optional[Any] = None,
1182        output: Optional[Any] = None,
1183        metadata: Optional[Any] = None,
1184        environment: Optional[str] = None,
1185        version: Optional[str] = None,
1186        level: Optional[SpanLevel] = None,
1187        status_message: Optional[str] = None,
1188    ):
1189        """Initialize a new LangfuseSpan.
1190
1191        Args:
1192            otel_span: The OpenTelemetry span to wrap
1193            langfuse_client: Reference to the parent Langfuse client
1194            input: Input data for the span (any JSON-serializable object)
1195            output: Output data from the span (any JSON-serializable object)
1196            metadata: Additional metadata to associate with the span
1197            environment: The tracing environment
1198            version: Version identifier for the code or component
1199            level: Importance level of the span (info, warning, error)
1200            status_message: Optional status message for the span
1201        """
1202        super().__init__(
1203            otel_span=otel_span,
1204            as_type="span",
1205            langfuse_client=langfuse_client,
1206            input=input,
1207            output=output,
1208            metadata=metadata,
1209            environment=environment,
1210            version=version,
1211            level=level,
1212            status_message=status_message,
1213        )

Initialize a new LangfuseSpan.

Arguments:
  • otel_span: The OpenTelemetry span to wrap
  • langfuse_client: Reference to the parent Langfuse client
  • input: Input data for the span (any JSON-serializable object)
  • output: Output data from the span (any JSON-serializable object)
  • metadata: Additional metadata to associate with the span
  • environment: The tracing environment
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
def start_span( self, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None) -> LangfuseSpan:
1215    def start_span(
1216        self,
1217        name: str,
1218        input: Optional[Any] = None,
1219        output: Optional[Any] = None,
1220        metadata: Optional[Any] = None,
1221        version: Optional[str] = None,
1222        level: Optional[SpanLevel] = None,
1223        status_message: Optional[str] = None,
1224    ) -> "LangfuseSpan":
1225        """Create a new child span.
1226
1227        This method creates a new child span with this span as the parent.
1228        Unlike start_as_current_span(), this method does not set the new span
1229        as the current span in the context.
1230
1231        Args:
1232            name: Name of the span (e.g., function or operation name)
1233            input: Input data for the operation
1234            output: Output data from the operation
1235            metadata: Additional metadata to associate with the span
1236            version: Version identifier for the code or component
1237            level: Importance level of the span (info, warning, error)
1238            status_message: Optional status message for the span
1239
1240        Returns:
1241            A new LangfuseSpan that must be ended with .end() when complete
1242
1243        Example:
1244            ```python
1245            parent_span = langfuse.start_span(name="process-request")
1246            try:
1247                # Create a child span
1248                child_span = parent_span.start_span(name="validate-input")
1249                try:
1250                    # Do validation work
1251                    validation_result = validate(request_data)
1252                    child_span.update(output=validation_result)
1253                finally:
1254                    child_span.end()
1255
1256                # Continue with parent span
1257                result = process_validated_data(validation_result)
1258                parent_span.update(output=result)
1259            finally:
1260                parent_span.end()
1261            ```
1262        """
1263        return self.start_observation(
1264            name=name,
1265            as_type="span",
1266            input=input,
1267            output=output,
1268            metadata=metadata,
1269            version=version,
1270            level=level,
1271            status_message=status_message,
1272        )

Create a new child span.

This method creates a new child span with this span as the parent. Unlike start_as_current_span(), this method does not set the new span as the current span in the context.

Arguments:
  • name: Name of the span (e.g., function or operation name)
  • input: Input data for the operation
  • output: Output data from the operation
  • metadata: Additional metadata to associate with the span
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
Returns:

A new LangfuseSpan that must be ended with .end() when complete

Example:
parent_span = langfuse.start_span(name="process-request")
try:
    # Create a child span
    child_span = parent_span.start_span(name="validate-input")
    try:
        # Do validation work
        validation_result = validate(request_data)
        child_span.update(output=validation_result)
    finally:
        child_span.end()

    # Continue with parent span
    result = process_validated_data(validation_result)
    parent_span.update(output=result)
finally:
    parent_span.end()
def start_as_current_span( self, *, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None) -> opentelemetry.util._decorator._AgnosticContextManager[LangfuseSpan]:
1274    def start_as_current_span(
1275        self,
1276        *,
1277        name: str,
1278        input: Optional[Any] = None,
1279        output: Optional[Any] = None,
1280        metadata: Optional[Any] = None,
1281        version: Optional[str] = None,
1282        level: Optional[SpanLevel] = None,
1283        status_message: Optional[str] = None,
1284    ) -> _AgnosticContextManager["LangfuseSpan"]:
1285        """[DEPRECATED] Create a new child span and set it as the current span in a context manager.
1286
1287        DEPRECATED: This method is deprecated and will be removed in a future version.
1288        Use start_as_current_observation(as_type='span') instead.
1289
1290        This method creates a new child span and sets it as the current span within
1291        a context manager. It should be used with a 'with' statement to automatically
1292        manage the span's lifecycle.
1293
1294        Args:
1295            name: Name of the span (e.g., function or operation name)
1296            input: Input data for the operation
1297            output: Output data from the operation
1298            metadata: Additional metadata to associate with the span
1299            version: Version identifier for the code or component
1300            level: Importance level of the span (info, warning, error)
1301            status_message: Optional status message for the span
1302
1303        Returns:
1304            A context manager that yields a new LangfuseSpan
1305
1306        Example:
1307            ```python
1308            with langfuse.start_as_current_span(name="process-request") as parent_span:
1309                # Parent span is active here
1310
1311                # Create a child span with context management
1312                with parent_span.start_as_current_span(name="validate-input") as child_span:
1313                    # Child span is active here
1314                    validation_result = validate(request_data)
1315                    child_span.update(output=validation_result)
1316
1317                # Back to parent span context
1318                result = process_validated_data(validation_result)
1319                parent_span.update(output=result)
1320            ```
1321        """
1322        warnings.warn(
1323            "start_as_current_span is deprecated and will be removed in a future version. "
1324            "Use start_as_current_observation(as_type='span') instead.",
1325            DeprecationWarning,
1326            stacklevel=2,
1327        )
1328        return self.start_as_current_observation(
1329            name=name,
1330            as_type="span",
1331            input=input,
1332            output=output,
1333            metadata=metadata,
1334            version=version,
1335            level=level,
1336            status_message=status_message,
1337        )

[DEPRECATED] Create a new child span and set it as the current span in a context manager.

DEPRECATED: This method is deprecated and will be removed in a future version. Use start_as_current_observation(as_type='span') instead.

This method creates a new child span and sets it as the current span within a context manager. It should be used with a 'with' statement to automatically manage the span's lifecycle.

Arguments:
  • name: Name of the span (e.g., function or operation name)
  • input: Input data for the operation
  • output: Output data from the operation
  • metadata: Additional metadata to associate with the span
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
Returns:

A context manager that yields a new LangfuseSpan

Example:
with langfuse.start_as_current_span(name="process-request") as parent_span:
    # Parent span is active here

    # Create a child span with context management
    with parent_span.start_as_current_span(name="validate-input") as child_span:
        # Child span is active here
        validation_result = validate(request_data)
        child_span.update(output=validation_result)

    # Back to parent span context
    result = process_validated_data(validation_result)
    parent_span.update(output=result)
def start_generation( self, *, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None) -> LangfuseGeneration:
1339    def start_generation(
1340        self,
1341        *,
1342        name: str,
1343        input: Optional[Any] = None,
1344        output: Optional[Any] = None,
1345        metadata: Optional[Any] = None,
1346        version: Optional[str] = None,
1347        level: Optional[SpanLevel] = None,
1348        status_message: Optional[str] = None,
1349        completion_start_time: Optional[datetime] = None,
1350        model: Optional[str] = None,
1351        model_parameters: Optional[Dict[str, MapValue]] = None,
1352        usage_details: Optional[Dict[str, int]] = None,
1353        cost_details: Optional[Dict[str, float]] = None,
1354        prompt: Optional[PromptClient] = None,
1355    ) -> "LangfuseGeneration":
1356        """[DEPRECATED] Create a new child generation span.
1357
1358        DEPRECATED: This method is deprecated and will be removed in a future version.
1359        Use start_observation(as_type='generation') instead.
1360
1361        This method creates a new child generation span with this span as the parent.
1362        Generation spans are specialized for AI/LLM operations and include additional
1363        fields for model information, usage stats, and costs.
1364
1365        Unlike start_as_current_generation(), this method does not set the new span
1366        as the current span in the context.
1367
1368        Args:
1369            name: Name of the generation operation
1370            input: Input data for the model (e.g., prompts)
1371            output: Output from the model (e.g., completions)
1372            metadata: Additional metadata to associate with the generation
1373            version: Version identifier for the model or component
1374            level: Importance level of the generation (info, warning, error)
1375            status_message: Optional status message for the generation
1376            completion_start_time: When the model started generating the response
1377            model: Name/identifier of the AI model used (e.g., "gpt-4")
1378            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1379            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1380            cost_details: Cost information for the model call
1381            prompt: Associated prompt template from Langfuse prompt management
1382
1383        Returns:
1384            A new LangfuseGeneration that must be ended with .end() when complete
1385
1386        Example:
1387            ```python
1388            span = langfuse.start_span(name="process-query")
1389            try:
1390                # Create a generation child span
1391                generation = span.start_generation(
1392                    name="generate-answer",
1393                    model="gpt-4",
1394                    input={"prompt": "Explain quantum computing"}
1395                )
1396                try:
1397                    # Call model API
1398                    response = llm.generate(...)
1399
1400                    generation.update(
1401                        output=response.text,
1402                        usage_details={
1403                            "prompt_tokens": response.usage.prompt_tokens,
1404                            "completion_tokens": response.usage.completion_tokens
1405                        }
1406                    )
1407                finally:
1408                    generation.end()
1409
1410                # Continue with parent span
1411                span.update(output={"answer": response.text, "source": "gpt-4"})
1412            finally:
1413                span.end()
1414            ```
1415        """
1416        warnings.warn(
1417            "start_generation is deprecated and will be removed in a future version. "
1418            "Use start_observation(as_type='generation') instead.",
1419            DeprecationWarning,
1420            stacklevel=2,
1421        )
1422        return self.start_observation(
1423            name=name,
1424            as_type="generation",
1425            input=input,
1426            output=output,
1427            metadata=metadata,
1428            version=version,
1429            level=level,
1430            status_message=status_message,
1431            completion_start_time=completion_start_time,
1432            model=model,
1433            model_parameters=model_parameters,
1434            usage_details=usage_details,
1435            cost_details=cost_details,
1436            prompt=prompt,
1437        )

[DEPRECATED] Create a new child generation span.

DEPRECATED: This method is deprecated and will be removed in a future version. Use start_observation(as_type='generation') instead.

This method creates a new child generation span with this span as the parent. Generation spans are specialized for AI/LLM operations and include additional fields for model information, usage stats, and costs.

Unlike start_as_current_generation(), this method does not set the new span as the current span in the context.

Arguments:
  • name: Name of the generation operation
  • input: Input data for the model (e.g., prompts)
  • output: Output from the model (e.g., completions)
  • metadata: Additional metadata to associate with the generation
  • version: Version identifier for the model or component
  • level: Importance level of the generation (info, warning, error)
  • status_message: Optional status message for the generation
  • completion_start_time: When the model started generating the response
  • model: Name/identifier of the AI model used (e.g., "gpt-4")
  • model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
  • usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
  • cost_details: Cost information for the model call
  • prompt: Associated prompt template from Langfuse prompt management
Returns:

A new LangfuseGeneration that must be ended with .end() when complete

Example:
span = langfuse.start_span(name="process-query")
try:
    # Create a generation child span
    generation = span.start_generation(
        name="generate-answer",
        model="gpt-4",
        input={"prompt": "Explain quantum computing"}
    )
    try:
        # Call model API
        response = llm.generate(...)

        generation.update(
            output=response.text,
            usage_details={
                "prompt_tokens": response.usage.prompt_tokens,
                "completion_tokens": response.usage.completion_tokens
            }
        )
    finally:
        generation.end()

    # Continue with parent span
    span.update(output={"answer": response.text, "source": "gpt-4"})
finally:
    span.end()
def start_as_current_generation( self, *, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None) -> opentelemetry.util._decorator._AgnosticContextManager[LangfuseGeneration]:
1439    def start_as_current_generation(
1440        self,
1441        *,
1442        name: str,
1443        input: Optional[Any] = None,
1444        output: Optional[Any] = None,
1445        metadata: Optional[Any] = None,
1446        version: Optional[str] = None,
1447        level: Optional[SpanLevel] = None,
1448        status_message: Optional[str] = None,
1449        completion_start_time: Optional[datetime] = None,
1450        model: Optional[str] = None,
1451        model_parameters: Optional[Dict[str, MapValue]] = None,
1452        usage_details: Optional[Dict[str, int]] = None,
1453        cost_details: Optional[Dict[str, float]] = None,
1454        prompt: Optional[PromptClient] = None,
1455    ) -> _AgnosticContextManager["LangfuseGeneration"]:
1456        """[DEPRECATED] Create a new child generation span and set it as the current span in a context manager.
1457
1458        DEPRECATED: This method is deprecated and will be removed in a future version.
1459        Use start_as_current_observation(as_type='generation') instead.
1460
1461        This method creates a new child generation span and sets it as the current span
1462        within a context manager. Generation spans are specialized for AI/LLM operations
1463        and include additional fields for model information, usage stats, and costs.
1464
1465        Args:
1466            name: Name of the generation operation
1467            input: Input data for the model (e.g., prompts)
1468            output: Output from the model (e.g., completions)
1469            metadata: Additional metadata to associate with the generation
1470            version: Version identifier for the model or component
1471            level: Importance level of the generation (info, warning, error)
1472            status_message: Optional status message for the generation
1473            completion_start_time: When the model started generating the response
1474            model: Name/identifier of the AI model used (e.g., "gpt-4")
1475            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1476            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1477            cost_details: Cost information for the model call
1478            prompt: Associated prompt template from Langfuse prompt management
1479
1480        Returns:
1481            A context manager that yields a new LangfuseGeneration
1482
1483        Example:
1484            ```python
1485            with langfuse.start_as_current_span(name="process-request") as span:
1486                # Prepare data
1487                query = preprocess_user_query(user_input)
1488
1489                # Create a generation span with context management
1490                with span.start_as_current_generation(
1491                    name="generate-answer",
1492                    model="gpt-4",
1493                    input={"query": query}
1494                ) as generation:
1495                    # Generation span is active here
1496                    response = llm.generate(query)
1497
1498                    # Update with results
1499                    generation.update(
1500                        output=response.text,
1501                        usage_details={
1502                            "prompt_tokens": response.usage.prompt_tokens,
1503                            "completion_tokens": response.usage.completion_tokens
1504                        }
1505                    )
1506
1507                # Back to parent span context
1508                span.update(output={"answer": response.text, "source": "gpt-4"})
1509            ```
1510        """
1511        warnings.warn(
1512            "start_as_current_generation is deprecated and will be removed in a future version. "
1513            "Use start_as_current_observation(as_type='generation') instead.",
1514            DeprecationWarning,
1515            stacklevel=2,
1516        )
1517        return self.start_as_current_observation(
1518            name=name,
1519            as_type="generation",
1520            input=input,
1521            output=output,
1522            metadata=metadata,
1523            version=version,
1524            level=level,
1525            status_message=status_message,
1526            completion_start_time=completion_start_time,
1527            model=model,
1528            model_parameters=model_parameters,
1529            usage_details=usage_details,
1530            cost_details=cost_details,
1531            prompt=prompt,
1532        )

[DEPRECATED] Create a new child generation span and set it as the current span in a context manager.

DEPRECATED: This method is deprecated and will be removed in a future version. Use start_as_current_observation(as_type='generation') instead.

This method creates a new child generation span and sets it as the current span within a context manager. Generation spans are specialized for AI/LLM operations and include additional fields for model information, usage stats, and costs.

Arguments:
  • name: Name of the generation operation
  • input: Input data for the model (e.g., prompts)
  • output: Output from the model (e.g., completions)
  • metadata: Additional metadata to associate with the generation
  • version: Version identifier for the model or component
  • level: Importance level of the generation (info, warning, error)
  • status_message: Optional status message for the generation
  • completion_start_time: When the model started generating the response
  • model: Name/identifier of the AI model used (e.g., "gpt-4")
  • model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
  • usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
  • cost_details: Cost information for the model call
  • prompt: Associated prompt template from Langfuse prompt management
Returns:

A context manager that yields a new LangfuseGeneration

Example:
with langfuse.start_as_current_span(name="process-request") as span:
    # Prepare data
    query = preprocess_user_query(user_input)

    # Create a generation span with context management
    with span.start_as_current_generation(
        name="generate-answer",
        model="gpt-4",
        input={"query": query}
    ) as generation:
        # Generation span is active here
        response = llm.generate(query)

        # Update with results
        generation.update(
            output=response.text,
            usage_details={
                "prompt_tokens": response.usage.prompt_tokens,
                "completion_tokens": response.usage.completion_tokens
            }
        )

    # Back to parent span context
    span.update(output={"answer": response.text, "source": "gpt-4"})
def create_event( self, *, name: str, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None) -> LangfuseEvent:
1534    def create_event(
1535        self,
1536        *,
1537        name: str,
1538        input: Optional[Any] = None,
1539        output: Optional[Any] = None,
1540        metadata: Optional[Any] = None,
1541        version: Optional[str] = None,
1542        level: Optional[SpanLevel] = None,
1543        status_message: Optional[str] = None,
1544    ) -> "LangfuseEvent":
1545        """Create a new Langfuse observation of type 'EVENT'.
1546
1547        Args:
1548            name: Name of the span (e.g., function or operation name)
1549            input: Input data for the operation (can be any JSON-serializable object)
1550            output: Output data from the operation (can be any JSON-serializable object)
1551            metadata: Additional metadata to associate with the span
1552            version: Version identifier for the code or component
1553            level: Importance level of the span (info, warning, error)
1554            status_message: Optional status message for the span
1555
1556        Returns:
1557            The LangfuseEvent object
1558
1559        Example:
1560            ```python
1561            event = langfuse.create_event(name="process-event")
1562            ```
1563        """
1564        timestamp = time_ns()
1565
1566        with otel_trace_api.use_span(self._otel_span):
1567            new_otel_span = self._langfuse_client._otel_tracer.start_span(
1568                name=name, start_time=timestamp
1569            )
1570
1571        return cast(
1572            "LangfuseEvent",
1573            LangfuseEvent(
1574                otel_span=new_otel_span,
1575                langfuse_client=self._langfuse_client,
1576                input=input,
1577                output=output,
1578                metadata=metadata,
1579                environment=self._environment,
1580                version=version,
1581                level=level,
1582                status_message=status_message,
1583            ).end(end_time=timestamp),
1584        )

Create a new Langfuse observation of type 'EVENT'.

Arguments:
  • name: Name of the span (e.g., function or operation name)
  • input: Input data for the operation (can be any JSON-serializable object)
  • output: Output data from the operation (can be any JSON-serializable object)
  • metadata: Additional metadata to associate with the span
  • version: Version identifier for the code or component
  • level: Importance level of the span (info, warning, error)
  • status_message: Optional status message for the span
Returns:

The LangfuseEvent object

Example:
event = langfuse.create_event(name="process-event")
class LangfuseGeneration(langfuse._client.span.LangfuseObservationWrapper):
1587class LangfuseGeneration(LangfuseObservationWrapper):
1588    """Specialized span implementation for AI model generations in Langfuse.
1589
1590    This class represents a generation span specifically designed for tracking
1591    AI/LLM operations. It extends the base LangfuseObservationWrapper with specialized
1592    attributes for model details, token usage, and costs.
1593    """
1594
1595    def __init__(
1596        self,
1597        *,
1598        otel_span: otel_trace_api.Span,
1599        langfuse_client: "Langfuse",
1600        input: Optional[Any] = None,
1601        output: Optional[Any] = None,
1602        metadata: Optional[Any] = None,
1603        environment: Optional[str] = None,
1604        version: Optional[str] = None,
1605        level: Optional[SpanLevel] = None,
1606        status_message: Optional[str] = None,
1607        completion_start_time: Optional[datetime] = None,
1608        model: Optional[str] = None,
1609        model_parameters: Optional[Dict[str, MapValue]] = None,
1610        usage_details: Optional[Dict[str, int]] = None,
1611        cost_details: Optional[Dict[str, float]] = None,
1612        prompt: Optional[PromptClient] = None,
1613    ):
1614        """Initialize a new LangfuseGeneration span.
1615
1616        Args:
1617            otel_span: The OpenTelemetry span to wrap
1618            langfuse_client: Reference to the parent Langfuse client
1619            input: Input data for the generation (e.g., prompts)
1620            output: Output from the generation (e.g., completions)
1621            metadata: Additional metadata to associate with the generation
1622            environment: The tracing environment
1623            version: Version identifier for the model or component
1624            level: Importance level of the generation (info, warning, error)
1625            status_message: Optional status message for the generation
1626            completion_start_time: When the model started generating the response
1627            model: Name/identifier of the AI model used (e.g., "gpt-4")
1628            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1629            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1630            cost_details: Cost information for the model call
1631            prompt: Associated prompt template from Langfuse prompt management
1632        """
1633        super().__init__(
1634            as_type="generation",
1635            otel_span=otel_span,
1636            langfuse_client=langfuse_client,
1637            input=input,
1638            output=output,
1639            metadata=metadata,
1640            environment=environment,
1641            version=version,
1642            level=level,
1643            status_message=status_message,
1644            completion_start_time=completion_start_time,
1645            model=model,
1646            model_parameters=model_parameters,
1647            usage_details=usage_details,
1648            cost_details=cost_details,
1649            prompt=prompt,
1650        )

Specialized span implementation for AI model generations in Langfuse.

This class represents a generation span specifically designed for tracking AI/LLM operations. It extends the base LangfuseObservationWrapper with specialized attributes for model details, token usage, and costs.

LangfuseGeneration( *, otel_span: opentelemetry.trace.span.Span, langfuse_client: Langfuse, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, environment: Optional[str] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None)
1595    def __init__(
1596        self,
1597        *,
1598        otel_span: otel_trace_api.Span,
1599        langfuse_client: "Langfuse",
1600        input: Optional[Any] = None,
1601        output: Optional[Any] = None,
1602        metadata: Optional[Any] = None,
1603        environment: Optional[str] = None,
1604        version: Optional[str] = None,
1605        level: Optional[SpanLevel] = None,
1606        status_message: Optional[str] = None,
1607        completion_start_time: Optional[datetime] = None,
1608        model: Optional[str] = None,
1609        model_parameters: Optional[Dict[str, MapValue]] = None,
1610        usage_details: Optional[Dict[str, int]] = None,
1611        cost_details: Optional[Dict[str, float]] = None,
1612        prompt: Optional[PromptClient] = None,
1613    ):
1614        """Initialize a new LangfuseGeneration span.
1615
1616        Args:
1617            otel_span: The OpenTelemetry span to wrap
1618            langfuse_client: Reference to the parent Langfuse client
1619            input: Input data for the generation (e.g., prompts)
1620            output: Output from the generation (e.g., completions)
1621            metadata: Additional metadata to associate with the generation
1622            environment: The tracing environment
1623            version: Version identifier for the model or component
1624            level: Importance level of the generation (info, warning, error)
1625            status_message: Optional status message for the generation
1626            completion_start_time: When the model started generating the response
1627            model: Name/identifier of the AI model used (e.g., "gpt-4")
1628            model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
1629            usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
1630            cost_details: Cost information for the model call
1631            prompt: Associated prompt template from Langfuse prompt management
1632        """
1633        super().__init__(
1634            as_type="generation",
1635            otel_span=otel_span,
1636            langfuse_client=langfuse_client,
1637            input=input,
1638            output=output,
1639            metadata=metadata,
1640            environment=environment,
1641            version=version,
1642            level=level,
1643            status_message=status_message,
1644            completion_start_time=completion_start_time,
1645            model=model,
1646            model_parameters=model_parameters,
1647            usage_details=usage_details,
1648            cost_details=cost_details,
1649            prompt=prompt,
1650        )

Initialize a new LangfuseGeneration span.

Arguments:
  • otel_span: The OpenTelemetry span to wrap
  • langfuse_client: Reference to the parent Langfuse client
  • input: Input data for the generation (e.g., prompts)
  • output: Output from the generation (e.g., completions)
  • metadata: Additional metadata to associate with the generation
  • environment: The tracing environment
  • version: Version identifier for the model or component
  • level: Importance level of the generation (info, warning, error)
  • status_message: Optional status message for the generation
  • completion_start_time: When the model started generating the response
  • model: Name/identifier of the AI model used (e.g., "gpt-4")
  • model_parameters: Parameters used for the model (e.g., temperature, max_tokens)
  • usage_details: Token usage information (e.g., prompt_tokens, completion_tokens)
  • cost_details: Cost information for the model call
  • prompt: Associated prompt template from Langfuse prompt management
class LangfuseEvent(langfuse._client.span.LangfuseObservationWrapper):
1653class LangfuseEvent(LangfuseObservationWrapper):
1654    """Specialized span implementation for Langfuse Events."""
1655
1656    def __init__(
1657        self,
1658        *,
1659        otel_span: otel_trace_api.Span,
1660        langfuse_client: "Langfuse",
1661        input: Optional[Any] = None,
1662        output: Optional[Any] = None,
1663        metadata: Optional[Any] = None,
1664        environment: Optional[str] = None,
1665        version: Optional[str] = None,
1666        level: Optional[SpanLevel] = None,
1667        status_message: Optional[str] = None,
1668    ):
1669        """Initialize a new LangfuseEvent span.
1670
1671        Args:
1672            otel_span: The OpenTelemetry span to wrap
1673            langfuse_client: Reference to the parent Langfuse client
1674            input: Input data for the event
1675            output: Output from the event
1676            metadata: Additional metadata to associate with the generation
1677            environment: The tracing environment
1678            version: Version identifier for the model or component
1679            level: Importance level of the generation (info, warning, error)
1680            status_message: Optional status message for the generation
1681        """
1682        super().__init__(
1683            otel_span=otel_span,
1684            as_type="event",
1685            langfuse_client=langfuse_client,
1686            input=input,
1687            output=output,
1688            metadata=metadata,
1689            environment=environment,
1690            version=version,
1691            level=level,
1692            status_message=status_message,
1693        )
1694
1695    def update(
1696        self,
1697        *,
1698        name: Optional[str] = None,
1699        input: Optional[Any] = None,
1700        output: Optional[Any] = None,
1701        metadata: Optional[Any] = None,
1702        version: Optional[str] = None,
1703        level: Optional[SpanLevel] = None,
1704        status_message: Optional[str] = None,
1705        completion_start_time: Optional[datetime] = None,
1706        model: Optional[str] = None,
1707        model_parameters: Optional[Dict[str, MapValue]] = None,
1708        usage_details: Optional[Dict[str, int]] = None,
1709        cost_details: Optional[Dict[str, float]] = None,
1710        prompt: Optional[PromptClient] = None,
1711        **kwargs: Any,
1712    ) -> "LangfuseEvent":
1713        """Update is not allowed for LangfuseEvent because events cannot be updated.
1714
1715        This method logs a warning and returns self without making changes.
1716
1717        Returns:
1718            self: Returns the unchanged LangfuseEvent instance
1719        """
1720        langfuse_logger.warning(
1721            "Attempted to update LangfuseEvent observation. Events cannot be updated after creation."
1722        )
1723        return self

Specialized span implementation for Langfuse Events.

LangfuseEvent( *, otel_span: opentelemetry.trace.span.Span, langfuse_client: Langfuse, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, environment: Optional[str] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None)
1656    def __init__(
1657        self,
1658        *,
1659        otel_span: otel_trace_api.Span,
1660        langfuse_client: "Langfuse",
1661        input: Optional[Any] = None,
1662        output: Optional[Any] = None,
1663        metadata: Optional[Any] = None,
1664        environment: Optional[str] = None,
1665        version: Optional[str] = None,
1666        level: Optional[SpanLevel] = None,
1667        status_message: Optional[str] = None,
1668    ):
1669        """Initialize a new LangfuseEvent span.
1670
1671        Args:
1672            otel_span: The OpenTelemetry span to wrap
1673            langfuse_client: Reference to the parent Langfuse client
1674            input: Input data for the event
1675            output: Output from the event
1676            metadata: Additional metadata to associate with the generation
1677            environment: The tracing environment
1678            version: Version identifier for the model or component
1679            level: Importance level of the generation (info, warning, error)
1680            status_message: Optional status message for the generation
1681        """
1682        super().__init__(
1683            otel_span=otel_span,
1684            as_type="event",
1685            langfuse_client=langfuse_client,
1686            input=input,
1687            output=output,
1688            metadata=metadata,
1689            environment=environment,
1690            version=version,
1691            level=level,
1692            status_message=status_message,
1693        )

Initialize a new LangfuseEvent span.

Arguments:
  • otel_span: The OpenTelemetry span to wrap
  • langfuse_client: Reference to the parent Langfuse client
  • input: Input data for the event
  • output: Output from the event
  • metadata: Additional metadata to associate with the generation
  • environment: The tracing environment
  • version: Version identifier for the model or component
  • level: Importance level of the generation (info, warning, error)
  • status_message: Optional status message for the generation
def update( self, *, name: Optional[str] = None, input: Optional[Any] = None, output: Optional[Any] = None, metadata: Optional[Any] = None, version: Optional[str] = None, level: Optional[Literal['DEBUG', 'DEFAULT', 'WARNING', 'ERROR']] = None, status_message: Optional[str] = None, completion_start_time: Optional[datetime.datetime] = None, model: Optional[str] = None, model_parameters: Optional[Dict[str, Union[str, NoneType, int, bool, List[str]]]] = None, usage_details: Optional[Dict[str, int]] = None, cost_details: Optional[Dict[str, float]] = None, prompt: Union[langfuse.model.TextPromptClient, langfuse.model.ChatPromptClient, NoneType] = None, **kwargs: Any) -> LangfuseEvent:
1695    def update(
1696        self,
1697        *,
1698        name: Optional[str] = None,
1699        input: Optional[Any] = None,
1700        output: Optional[Any] = None,
1701        metadata: Optional[Any] = None,
1702        version: Optional[str] = None,
1703        level: Optional[SpanLevel] = None,
1704        status_message: Optional[str] = None,
1705        completion_start_time: Optional[datetime] = None,
1706        model: Optional[str] = None,
1707        model_parameters: Optional[Dict[str, MapValue]] = None,
1708        usage_details: Optional[Dict[str, int]] = None,
1709        cost_details: Optional[Dict[str, float]] = None,
1710        prompt: Optional[PromptClient] = None,
1711        **kwargs: Any,
1712    ) -> "LangfuseEvent":
1713        """Update is not allowed for LangfuseEvent because events cannot be updated.
1714
1715        This method logs a warning and returns self without making changes.
1716
1717        Returns:
1718            self: Returns the unchanged LangfuseEvent instance
1719        """
1720        langfuse_logger.warning(
1721            "Attempted to update LangfuseEvent observation. Events cannot be updated after creation."
1722        )
1723        return self

Update is not allowed for LangfuseEvent because events cannot be updated.

This method logs a warning and returns self without making changes.

Returns:

self: Returns the unchanged LangfuseEvent instance

class LangfuseOtelSpanAttributes:
27class LangfuseOtelSpanAttributes:
28    # Langfuse-Trace attributes
29    TRACE_NAME = "langfuse.trace.name"
30    TRACE_USER_ID = "user.id"
31    TRACE_SESSION_ID = "session.id"
32    TRACE_TAGS = "langfuse.trace.tags"
33    TRACE_PUBLIC = "langfuse.trace.public"
34    TRACE_METADATA = "langfuse.trace.metadata"
35    TRACE_INPUT = "langfuse.trace.input"
36    TRACE_OUTPUT = "langfuse.trace.output"
37
38    # Langfuse-observation attributes
39    OBSERVATION_TYPE = "langfuse.observation.type"
40    OBSERVATION_METADATA = "langfuse.observation.metadata"
41    OBSERVATION_LEVEL = "langfuse.observation.level"
42    OBSERVATION_STATUS_MESSAGE = "langfuse.observation.status_message"
43    OBSERVATION_INPUT = "langfuse.observation.input"
44    OBSERVATION_OUTPUT = "langfuse.observation.output"
45
46    # Langfuse-observation of type Generation attributes
47    OBSERVATION_COMPLETION_START_TIME = "langfuse.observation.completion_start_time"
48    OBSERVATION_MODEL = "langfuse.observation.model.name"
49    OBSERVATION_MODEL_PARAMETERS = "langfuse.observation.model.parameters"
50    OBSERVATION_USAGE_DETAILS = "langfuse.observation.usage_details"
51    OBSERVATION_COST_DETAILS = "langfuse.observation.cost_details"
52    OBSERVATION_PROMPT_NAME = "langfuse.observation.prompt.name"
53    OBSERVATION_PROMPT_VERSION = "langfuse.observation.prompt.version"
54
55    # General
56    ENVIRONMENT = "langfuse.environment"
57    RELEASE = "langfuse.release"
58    VERSION = "langfuse.version"
59
60    # Internal
61    AS_ROOT = "langfuse.internal.as_root"
62
63    # Experiments
64    EXPERIMENT_ID = "langfuse.experiment.id"
65    EXPERIMENT_NAME = "langfuse.experiment.name"
66    EXPERIMENT_DESCRIPTION = "langfuse.experiment.description"
67    EXPERIMENT_METADATA = "langfuse.experiment.metadata"
68    EXPERIMENT_DATASET_ID = "langfuse.experiment.dataset.id"
69    EXPERIMENT_ITEM_ID = "langfuse.experiment.item.id"
70    EXPERIMENT_ITEM_EXPECTED_OUTPUT = "langfuse.experiment.item.expected_output"
71    EXPERIMENT_ITEM_METADATA = "langfuse.experiment.item.metadata"
72    EXPERIMENT_ITEM_ROOT_OBSERVATION_ID = "langfuse.experiment.item.root_observation_id"
TRACE_NAME = 'langfuse.trace.name'
TRACE_USER_ID = 'user.id'
TRACE_SESSION_ID = 'session.id'
TRACE_TAGS = 'langfuse.trace.tags'
TRACE_PUBLIC = 'langfuse.trace.public'
TRACE_METADATA = 'langfuse.trace.metadata'
TRACE_INPUT = 'langfuse.trace.input'
TRACE_OUTPUT = 'langfuse.trace.output'
OBSERVATION_TYPE = 'langfuse.observation.type'
OBSERVATION_METADATA = 'langfuse.observation.metadata'
OBSERVATION_LEVEL = 'langfuse.observation.level'
OBSERVATION_STATUS_MESSAGE = 'langfuse.observation.status_message'
OBSERVATION_INPUT = 'langfuse.observation.input'
OBSERVATION_OUTPUT = 'langfuse.observation.output'
OBSERVATION_COMPLETION_START_TIME = 'langfuse.observation.completion_start_time'
OBSERVATION_MODEL = 'langfuse.observation.model.name'
OBSERVATION_MODEL_PARAMETERS = 'langfuse.observation.model.parameters'
OBSERVATION_USAGE_DETAILS = 'langfuse.observation.usage_details'
OBSERVATION_COST_DETAILS = 'langfuse.observation.cost_details'
OBSERVATION_PROMPT_NAME = 'langfuse.observation.prompt.name'
OBSERVATION_PROMPT_VERSION = 'langfuse.observation.prompt.version'
ENVIRONMENT = 'langfuse.environment'
RELEASE = 'langfuse.release'
VERSION = 'langfuse.version'
AS_ROOT = 'langfuse.internal.as_root'
EXPERIMENT_ID = 'langfuse.experiment.id'
EXPERIMENT_NAME = 'langfuse.experiment.name'
EXPERIMENT_DESCRIPTION = 'langfuse.experiment.description'
EXPERIMENT_METADATA = 'langfuse.experiment.metadata'
EXPERIMENT_DATASET_ID = 'langfuse.experiment.dataset.id'
EXPERIMENT_ITEM_ID = 'langfuse.experiment.item.id'
EXPERIMENT_ITEM_EXPECTED_OUTPUT = 'langfuse.experiment.item.expected_output'
EXPERIMENT_ITEM_METADATA = 'langfuse.experiment.item.metadata'
EXPERIMENT_ITEM_ROOT_OBSERVATION_ID = 'langfuse.experiment.item.root_observation_id'
class LangfuseAgent(langfuse._client.span.LangfuseObservationWrapper):
1726class LangfuseAgent(LangfuseObservationWrapper):
1727    """Agent observation for reasoning blocks that act on tools using LLM guidance."""
1728
1729    def __init__(self, **kwargs: Any) -> None:
1730        """Initialize a new LangfuseAgent span."""
1731        kwargs["as_type"] = "agent"
1732        super().__init__(**kwargs)

Agent observation for reasoning blocks that act on tools using LLM guidance.

LangfuseAgent(**kwargs: Any)
1729    def __init__(self, **kwargs: Any) -> None:
1730        """Initialize a new LangfuseAgent span."""
1731        kwargs["as_type"] = "agent"
1732        super().__init__(**kwargs)

Initialize a new LangfuseAgent span.

class LangfuseTool(langfuse._client.span.LangfuseObservationWrapper):
1735class LangfuseTool(LangfuseObservationWrapper):
1736    """Tool observation representing external tool calls, e.g., calling a weather API."""
1737
1738    def __init__(self, **kwargs: Any) -> None:
1739        """Initialize a new LangfuseTool span."""
1740        kwargs["as_type"] = "tool"
1741        super().__init__(**kwargs)

Tool observation representing external tool calls, e.g., calling a weather API.

LangfuseTool(**kwargs: Any)
1738    def __init__(self, **kwargs: Any) -> None:
1739        """Initialize a new LangfuseTool span."""
1740        kwargs["as_type"] = "tool"
1741        super().__init__(**kwargs)

Initialize a new LangfuseTool span.

class LangfuseChain(langfuse._client.span.LangfuseObservationWrapper):
1744class LangfuseChain(LangfuseObservationWrapper):
1745    """Chain observation for connecting LLM application steps, e.g. passing context from retriever to LLM."""
1746
1747    def __init__(self, **kwargs: Any) -> None:
1748        """Initialize a new LangfuseChain span."""
1749        kwargs["as_type"] = "chain"
1750        super().__init__(**kwargs)

Chain observation for connecting LLM application steps, e.g. passing context from retriever to LLM.

LangfuseChain(**kwargs: Any)
1747    def __init__(self, **kwargs: Any) -> None:
1748        """Initialize a new LangfuseChain span."""
1749        kwargs["as_type"] = "chain"
1750        super().__init__(**kwargs)

Initialize a new LangfuseChain span.

class LangfuseEmbedding(langfuse._client.span.LangfuseObservationWrapper):
1762class LangfuseEmbedding(LangfuseObservationWrapper):
1763    """Embedding observation for LLM embedding calls, typically used before retrieval."""
1764
1765    def __init__(self, **kwargs: Any) -> None:
1766        """Initialize a new LangfuseEmbedding span."""
1767        kwargs["as_type"] = "embedding"
1768        super().__init__(**kwargs)

Embedding observation for LLM embedding calls, typically used before retrieval.

LangfuseEmbedding(**kwargs: Any)
1765    def __init__(self, **kwargs: Any) -> None:
1766        """Initialize a new LangfuseEmbedding span."""
1767        kwargs["as_type"] = "embedding"
1768        super().__init__(**kwargs)

Initialize a new LangfuseEmbedding span.

class LangfuseEvaluator(langfuse._client.span.LangfuseObservationWrapper):
1771class LangfuseEvaluator(LangfuseObservationWrapper):
1772    """Evaluator observation for assessing relevance, correctness, or helpfulness of LLM outputs."""
1773
1774    def __init__(self, **kwargs: Any) -> None:
1775        """Initialize a new LangfuseEvaluator span."""
1776        kwargs["as_type"] = "evaluator"
1777        super().__init__(**kwargs)

Evaluator observation for assessing relevance, correctness, or helpfulness of LLM outputs.

LangfuseEvaluator(**kwargs: Any)
1774    def __init__(self, **kwargs: Any) -> None:
1775        """Initialize a new LangfuseEvaluator span."""
1776        kwargs["as_type"] = "evaluator"
1777        super().__init__(**kwargs)

Initialize a new LangfuseEvaluator span.

class LangfuseRetriever(langfuse._client.span.LangfuseObservationWrapper):
1753class LangfuseRetriever(LangfuseObservationWrapper):
1754    """Retriever observation for data retrieval steps, e.g. vector store or database queries."""
1755
1756    def __init__(self, **kwargs: Any) -> None:
1757        """Initialize a new LangfuseRetriever span."""
1758        kwargs["as_type"] = "retriever"
1759        super().__init__(**kwargs)

Retriever observation for data retrieval steps, e.g. vector store or database queries.

LangfuseRetriever(**kwargs: Any)
1756    def __init__(self, **kwargs: Any) -> None:
1757        """Initialize a new LangfuseRetriever span."""
1758        kwargs["as_type"] = "retriever"
1759        super().__init__(**kwargs)

Initialize a new LangfuseRetriever span.

class LangfuseGuardrail(langfuse._client.span.LangfuseObservationWrapper):
1780class LangfuseGuardrail(LangfuseObservationWrapper):
1781    """Guardrail observation for protection e.g. against jailbreaks or offensive content."""
1782
1783    def __init__(self, **kwargs: Any) -> None:
1784        """Initialize a new LangfuseGuardrail span."""
1785        kwargs["as_type"] = "guardrail"
1786        super().__init__(**kwargs)

Guardrail observation for protection e.g. against jailbreaks or offensive content.

LangfuseGuardrail(**kwargs: Any)
1783    def __init__(self, **kwargs: Any) -> None:
1784        """Initialize a new LangfuseGuardrail span."""
1785        kwargs["as_type"] = "guardrail"
1786        super().__init__(**kwargs)

Initialize a new LangfuseGuardrail span.

class Evaluation:
 97class Evaluation:
 98    """Represents an evaluation result for an experiment item or an entire experiment run.
 99
100    This class provides a strongly-typed way to create evaluation results in evaluator functions.
101    Users must use keyword arguments when instantiating this class.
102
103    Attributes:
104        name: Unique identifier for the evaluation metric. Should be descriptive
105            and consistent across runs (e.g., "accuracy", "bleu_score", "toxicity").
106            Used for aggregation and comparison across experiment runs.
107        value: The evaluation score or result. Can be:
108            - Numeric (int/float): For quantitative metrics like accuracy (0.85), BLEU (0.42)
109            - String: For categorical results like "positive", "negative", "neutral"
110            - Boolean: For binary assessments like "passes_safety_check"
111        comment: Optional human-readable explanation of the evaluation result.
112            Useful for providing context, explaining scoring rationale, or noting
113            special conditions. Displayed in Langfuse UI for interpretability.
114        metadata: Optional structured metadata about the evaluation process.
115            Can include confidence scores, intermediate calculations, model versions,
116            or any other relevant technical details.
117        data_type: Optional score data type. Required if value is not NUMERIC.
118            One of NUMERIC, CATEGORICAL, or BOOLEAN. Defaults to NUMERIC.
119        config_id: Optional Langfuse score config ID.
120
121    Examples:
122        Basic accuracy evaluation:
123        ```python
124        from langfuse import Evaluation
125
126        def accuracy_evaluator(*, input, output, expected_output=None, **kwargs):
127            if not expected_output:
128                return Evaluation(name="accuracy", value=None, comment="No expected output")
129
130            is_correct = output.strip().lower() == expected_output.strip().lower()
131            return Evaluation(
132                name="accuracy",
133                value=1.0 if is_correct else 0.0,
134                comment="Correct answer" if is_correct else "Incorrect answer"
135            )
136        ```
137
138        Multi-metric evaluator:
139        ```python
140        def comprehensive_evaluator(*, input, output, expected_output=None, **kwargs):
141            return [
142                Evaluation(name="length", value=len(output), comment=f"Output length: {len(output)} chars"),
143                Evaluation(name="has_greeting", value="hello" in output.lower(), comment="Contains greeting"),
144                Evaluation(
145                    name="quality",
146                    value=0.85,
147                    comment="High quality response",
148                    metadata={"confidence": 0.92, "model": "gpt-4"}
149                )
150            ]
151        ```
152
153        Categorical evaluation:
154        ```python
155        def sentiment_evaluator(*, input, output, **kwargs):
156            sentiment = analyze_sentiment(output)  # Returns "positive", "negative", or "neutral"
157            return Evaluation(
158                name="sentiment",
159                value=sentiment,
160                comment=f"Response expresses {sentiment} sentiment",
161                data_type="CATEGORICAL"
162            )
163        ```
164
165        Failed evaluation with error handling:
166        ```python
167        def external_api_evaluator(*, input, output, **kwargs):
168            try:
169                score = external_api.evaluate(output)
170                return Evaluation(name="external_score", value=score)
171            except Exception as e:
172                return Evaluation(
173                    name="external_score",
174                    value=None,
175                    comment=f"API unavailable: {e}",
176                    metadata={"error": str(e), "retry_count": 3}
177                )
178        ```
179
180    Note:
181        All arguments must be passed as keywords. Positional arguments are not allowed
182        to ensure code clarity and prevent errors from argument reordering.
183    """
184
185    def __init__(
186        self,
187        *,
188        name: str,
189        value: Union[int, float, str, bool],
190        comment: Optional[str] = None,
191        metadata: Optional[Dict[str, Any]] = None,
192        data_type: Optional[ScoreDataType] = None,
193        config_id: Optional[str] = None,
194    ):
195        """Initialize an Evaluation with the provided data.
196
197        Args:
198            name: Unique identifier for the evaluation metric.
199            value: The evaluation score or result.
200            comment: Optional human-readable explanation of the result.
201            metadata: Optional structured metadata about the evaluation process.
202            data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN).
203            config_id: Optional Langfuse score config ID.
204
205        Note:
206            All arguments must be provided as keywords. Positional arguments will raise a TypeError.
207        """
208        self.name = name
209        self.value = value
210        self.comment = comment
211        self.metadata = metadata
212        self.data_type = data_type
213        self.config_id = config_id

Represents an evaluation result for an experiment item or an entire experiment run.

This class provides a strongly-typed way to create evaluation results in evaluator functions. Users must use keyword arguments when instantiating this class.

Attributes:
  • name: Unique identifier for the evaluation metric. Should be descriptive and consistent across runs (e.g., "accuracy", "bleu_score", "toxicity"). Used for aggregation and comparison across experiment runs.
  • value: The evaluation score or result. Can be:
    • Numeric (int/float): For quantitative metrics like accuracy (0.85), BLEU (0.42)
    • String: For categorical results like "positive", "negative", "neutral"
    • Boolean: For binary assessments like "passes_safety_check"
  • comment: Optional human-readable explanation of the evaluation result. Useful for providing context, explaining scoring rationale, or noting special conditions. Displayed in Langfuse UI for interpretability.
  • metadata: Optional structured metadata about the evaluation process. Can include confidence scores, intermediate calculations, model versions, or any other relevant technical details.
  • data_type: Optional score data type. Required if value is not NUMERIC. One of NUMERIC, CATEGORICAL, or BOOLEAN. Defaults to NUMERIC.
  • config_id: Optional Langfuse score config ID.
Examples:

Basic accuracy evaluation:

from langfuse import Evaluation

def accuracy_evaluator(*, input, output, expected_output=None, **kwargs):
    if not expected_output:
        return Evaluation(name="accuracy", value=None, comment="No expected output")

    is_correct = output.strip().lower() == expected_output.strip().lower()
    return Evaluation(
        name="accuracy",
        value=1.0 if is_correct else 0.0,
        comment="Correct answer" if is_correct else "Incorrect answer"
    )

Multi-metric evaluator:

def comprehensive_evaluator(*, input, output, expected_output=None, **kwargs):
    return [
        Evaluation(name="length", value=len(output), comment=f"Output length: {len(output)} chars"),
        Evaluation(name="has_greeting", value="hello" in output.lower(), comment="Contains greeting"),
        Evaluation(
            name="quality",
            value=0.85,
            comment="High quality response",
            metadata={"confidence": 0.92, "model": "gpt-4"}
        )
    ]

Categorical evaluation:

def sentiment_evaluator(*, input, output, **kwargs):
    sentiment = analyze_sentiment(output)  # Returns "positive", "negative", or "neutral"
    return Evaluation(
        name="sentiment",
        value=sentiment,
        comment=f"Response expresses {sentiment} sentiment",
        data_type="CATEGORICAL"
    )

Failed evaluation with error handling:

def external_api_evaluator(*, input, output, **kwargs):
    try:
        score = external_api.evaluate(output)
        return Evaluation(name="external_score", value=score)
    except Exception as e:
        return Evaluation(
            name="external_score",
            value=None,
            comment=f"API unavailable: {e}",
            metadata={"error": str(e), "retry_count": 3}
        )
Note:

All arguments must be passed as keywords. Positional arguments are not allowed to ensure code clarity and prevent errors from argument reordering.

Evaluation( *, name: str, value: Union[int, float, str, bool], comment: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, data_type: Optional[langfuse.api.ScoreDataType] = None, config_id: Optional[str] = None)
185    def __init__(
186        self,
187        *,
188        name: str,
189        value: Union[int, float, str, bool],
190        comment: Optional[str] = None,
191        metadata: Optional[Dict[str, Any]] = None,
192        data_type: Optional[ScoreDataType] = None,
193        config_id: Optional[str] = None,
194    ):
195        """Initialize an Evaluation with the provided data.
196
197        Args:
198            name: Unique identifier for the evaluation metric.
199            value: The evaluation score or result.
200            comment: Optional human-readable explanation of the result.
201            metadata: Optional structured metadata about the evaluation process.
202            data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN).
203            config_id: Optional Langfuse score config ID.
204
205        Note:
206            All arguments must be provided as keywords. Positional arguments will raise a TypeError.
207        """
208        self.name = name
209        self.value = value
210        self.comment = comment
211        self.metadata = metadata
212        self.data_type = data_type
213        self.config_id = config_id

Initialize an Evaluation with the provided data.

Arguments:
  • name: Unique identifier for the evaluation metric.
  • value: The evaluation score or result.
  • comment: Optional human-readable explanation of the result.
  • metadata: Optional structured metadata about the evaluation process.
  • data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN).
  • config_id: Optional Langfuse score config ID.
Note:

All arguments must be provided as keywords. Positional arguments will raise a TypeError.

name
value
comment
metadata
data_type
config_id
class EvaluatorInputs:
 39class EvaluatorInputs:
 40    """Input data structure for evaluators, returned by mapper functions.
 41
 42    This class provides a strongly-typed container for transforming API response
 43    objects (traces, observations) into the standardized format expected
 44    by evaluator functions. It ensures consistent access to input, output, expected
 45    output, and metadata regardless of the source entity type.
 46
 47    Attributes:
 48        input: The input data that was provided to generate the output being evaluated.
 49            For traces, this might be the initial prompt or request. For observations,
 50            this could be the span's input. The exact meaning depends on your use case.
 51        output: The actual output that was produced and needs to be evaluated.
 52            For traces, this is typically the final response. For observations,
 53            this might be the generation output or span result.
 54        expected_output: Optional ground truth or expected result for comparison.
 55            Used by evaluators to assess correctness. May be None if no ground truth
 56            is available for the entity being evaluated.
 57        metadata: Optional structured metadata providing additional context for evaluation.
 58            Can include information about the entity, execution context, user attributes,
 59            or any other relevant data that evaluators might use.
 60
 61    Examples:
 62        Simple mapper for traces:
 63        ```python
 64        from langfuse import EvaluatorInputs
 65
 66        def trace_mapper(trace):
 67            return EvaluatorInputs(
 68                input=trace.input,
 69                output=trace.output,
 70                expected_output=None,  # No ground truth available
 71                metadata={"user_id": trace.user_id, "tags": trace.tags}
 72            )
 73        ```
 74
 75        Mapper for observations extracting specific fields:
 76        ```python
 77        def observation_mapper(observation):
 78            # Extract input/output from observation's data
 79            input_data = observation.input if hasattr(observation, 'input') else None
 80            output_data = observation.output if hasattr(observation, 'output') else None
 81
 82            return EvaluatorInputs(
 83                input=input_data,
 84                output=output_data,
 85                expected_output=None,
 86                metadata={
 87                    "observation_type": observation.type,
 88                    "model": observation.model,
 89                    "latency_ms": observation.end_time - observation.start_time
 90                }
 91            )
 92        ```
 93        ```
 94
 95    Note:
 96        All arguments must be passed as keywords when instantiating this class.
 97    """
 98
 99    def __init__(
100        self,
101        *,
102        input: Any,
103        output: Any,
104        expected_output: Any = None,
105        metadata: Optional[Dict[str, Any]] = None,
106    ):
107        """Initialize EvaluatorInputs with the provided data.
108
109        Args:
110            input: The input data for evaluation.
111            output: The output data to be evaluated.
112            expected_output: Optional ground truth for comparison.
113            metadata: Optional additional context for evaluation.
114
115        Note:
116            All arguments must be provided as keywords.
117        """
118        self.input = input
119        self.output = output
120        self.expected_output = expected_output
121        self.metadata = metadata

Input data structure for evaluators, returned by mapper functions.

This class provides a strongly-typed container for transforming API response objects (traces, observations) into the standardized format expected by evaluator functions. It ensures consistent access to input, output, expected output, and metadata regardless of the source entity type.

Attributes:
  • input: The input data that was provided to generate the output being evaluated. For traces, this might be the initial prompt or request. For observations, this could be the span's input. The exact meaning depends on your use case.
  • output: The actual output that was produced and needs to be evaluated. For traces, this is typically the final response. For observations, this might be the generation output or span result.
  • expected_output: Optional ground truth or expected result for comparison. Used by evaluators to assess correctness. May be None if no ground truth is available for the entity being evaluated.
  • metadata: Optional structured metadata providing additional context for evaluation. Can include information about the entity, execution context, user attributes, or any other relevant data that evaluators might use.
Examples:

Simple mapper for traces:

from langfuse import EvaluatorInputs

def trace_mapper(trace):
    return EvaluatorInputs(
        input=trace.input,
        output=trace.output,
        expected_output=None,  # No ground truth available
        metadata={"user_id": trace.user_id, "tags": trace.tags}
    )

Mapper for observations extracting specific fields:

def observation_mapper(observation):
    # Extract input/output from observation's data
    input_data = observation.input if hasattr(observation, 'input') else None
    output_data = observation.output if hasattr(observation, 'output') else None

    return EvaluatorInputs(
        input=input_data,
        output=output_data,
        expected_output=None,
        metadata={
            "observation_type": observation.type,
            "model": observation.model,
            "latency_ms": observation.end_time - observation.start_time
        }
    )

```

Note:

All arguments must be passed as keywords when instantiating this class.

EvaluatorInputs( *, input: Any, output: Any, expected_output: Any = None, metadata: Optional[Dict[str, Any]] = None)
 99    def __init__(
100        self,
101        *,
102        input: Any,
103        output: Any,
104        expected_output: Any = None,
105        metadata: Optional[Dict[str, Any]] = None,
106    ):
107        """Initialize EvaluatorInputs with the provided data.
108
109        Args:
110            input: The input data for evaluation.
111            output: The output data to be evaluated.
112            expected_output: Optional ground truth for comparison.
113            metadata: Optional additional context for evaluation.
114
115        Note:
116            All arguments must be provided as keywords.
117        """
118        self.input = input
119        self.output = output
120        self.expected_output = expected_output
121        self.metadata = metadata

Initialize EvaluatorInputs with the provided data.

Arguments:
  • input: The input data for evaluation.
  • output: The output data to be evaluated.
  • expected_output: Optional ground truth for comparison.
  • metadata: Optional additional context for evaluation.
Note:

All arguments must be provided as keywords.

input
output
expected_output
metadata
class MapperFunction(typing.Protocol):
124class MapperFunction(Protocol):
125    """Protocol defining the interface for mapper functions in batch evaluation.
126
127    Mapper functions transform API response objects (traces or observations)
128    into the standardized EvaluatorInputs format that evaluators expect. This abstraction
129    allows you to define how to extract and structure evaluation data from different
130    entity types.
131
132    Mapper functions must:
133    - Accept a single item parameter (trace, observation)
134    - Return an EvaluatorInputs instance with input, output, expected_output, metadata
135    - Can be either synchronous or asynchronous
136    - Should handle missing or malformed data gracefully
137    """
138
139    def __call__(
140        self,
141        *,
142        item: Union["TraceWithFullDetails", "ObservationsView"],
143        **kwargs: Dict[str, Any],
144    ) -> Union[EvaluatorInputs, Awaitable[EvaluatorInputs]]:
145        """Transform an API response object into evaluator inputs.
146
147        This method defines how to extract evaluation-relevant data from the raw
148        API response object. The implementation should map entity-specific fields
149        to the standardized input/output/expected_output/metadata structure.
150
151        Args:
152            item: The API response object to transform. The type depends on the scope:
153                - TraceWithFullDetails: When evaluating traces
154                - ObservationsView: When evaluating observations
155
156        Returns:
157            EvaluatorInputs: A structured container with:
158                - input: The input data that generated the output
159                - output: The output to be evaluated
160                - expected_output: Optional ground truth for comparison
161                - metadata: Optional additional context
162
163            Can return either a direct EvaluatorInputs instance or an awaitable
164            (for async mappers that need to fetch additional data).
165
166        Examples:
167            Basic trace mapper:
168            ```python
169            def map_trace(trace):
170                return EvaluatorInputs(
171                    input=trace.input,
172                    output=trace.output,
173                    expected_output=None,
174                    metadata={"trace_id": trace.id, "user": trace.user_id}
175                )
176            ```
177
178            Observation mapper with conditional logic:
179            ```python
180            def map_observation(observation):
181                # Extract fields based on observation type
182                if observation.type == "GENERATION":
183                    input_data = observation.input
184                    output_data = observation.output
185                else:
186                    # For other types, use different fields
187                    input_data = observation.metadata.get("input")
188                    output_data = observation.metadata.get("output")
189
190                return EvaluatorInputs(
191                    input=input_data,
192                    output=output_data,
193                    expected_output=None,
194                    metadata={"obs_id": observation.id, "type": observation.type}
195                )
196            ```
197
198            Async mapper (if additional processing needed):
199            ```python
200            async def map_trace_async(trace):
201                # Could do async processing here if needed
202                processed_output = await some_async_transformation(trace.output)
203
204                return EvaluatorInputs(
205                    input=trace.input,
206                    output=processed_output,
207                    expected_output=None,
208                    metadata={"trace_id": trace.id}
209                )
210            ```
211        """
212        ...

Protocol defining the interface for mapper functions in batch evaluation.

Mapper functions transform API response objects (traces or observations) into the standardized EvaluatorInputs format that evaluators expect. This abstraction allows you to define how to extract and structure evaluation data from different entity types.

Mapper functions must:

  • Accept a single item parameter (trace, observation)
  • Return an EvaluatorInputs instance with input, output, expected_output, metadata
  • Can be either synchronous or asynchronous
  • Should handle missing or malformed data gracefully
MapperFunction(*args, **kwargs)
1927def _no_init_or_replace_init(self, *args, **kwargs):
1928    cls = type(self)
1929
1930    if cls._is_protocol:
1931        raise TypeError('Protocols cannot be instantiated')
1932
1933    # Already using a custom `__init__`. No need to calculate correct
1934    # `__init__` to call. This can lead to RecursionError. See bpo-45121.
1935    if cls.__init__ is not _no_init_or_replace_init:
1936        return
1937
1938    # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`.
1939    # The first instantiation of the subclass will call `_no_init_or_replace_init` which
1940    # searches for a proper new `__init__` in the MRO. The new `__init__`
1941    # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent
1942    # instantiation of the protocol subclass will thus use the new
1943    # `__init__` and no longer call `_no_init_or_replace_init`.
1944    for base in cls.__mro__:
1945        init = base.__dict__.get('__init__', _no_init_or_replace_init)
1946        if init is not _no_init_or_replace_init:
1947            cls.__init__ = init
1948            break
1949    else:
1950        # should not happen
1951        cls.__init__ = object.__init__
1952
1953    cls.__init__(self, *args, **kwargs)
class CompositeEvaluatorFunction(typing.Protocol):
215class CompositeEvaluatorFunction(Protocol):
216    """Protocol defining the interface for composite evaluator functions.
217
218    Composite evaluators create aggregate scores from multiple item-level evaluations.
219    This is commonly used to compute weighted averages, combined metrics, or other
220    composite assessments based on individual evaluation results.
221
222    Composite evaluators:
223    - Accept the same inputs as item-level evaluators (input, output, expected_output, metadata)
224      plus the list of evaluations
225    - Return either a single Evaluation, a list of Evaluations, or a dict
226    - Can be either synchronous or asynchronous
227    - Have access to both raw item data and evaluation results
228    """
229
230    def __call__(
231        self,
232        *,
233        input: Optional[Any] = None,
234        output: Optional[Any] = None,
235        expected_output: Optional[Any] = None,
236        metadata: Optional[Dict[str, Any]] = None,
237        evaluations: List[Evaluation],
238        **kwargs: Dict[str, Any],
239    ) -> Union[
240        Evaluation,
241        List[Evaluation],
242        Dict[str, Any],
243        Awaitable[Evaluation],
244        Awaitable[List[Evaluation]],
245        Awaitable[Dict[str, Any]],
246    ]:
247        r"""Create a composite evaluation from item-level evaluation results.
248
249        This method combines multiple evaluation scores into a single composite metric.
250        Common use cases include weighted averages, pass/fail decisions based on multiple
251        criteria, or custom scoring logic that considers multiple dimensions.
252
253        Args:
254            input: The input data that was provided to the system being evaluated.
255            output: The output generated by the system being evaluated.
256            expected_output: The expected/reference output for comparison (if available).
257            metadata: Additional metadata about the evaluation context.
258            evaluations: List of evaluation results from item-level evaluators.
259                Each evaluation contains name, value, comment, and metadata.
260
261        Returns:
262            Can return any of:
263            - Evaluation: A single composite evaluation result
264            - List[Evaluation]: Multiple composite evaluations
265            - Dict: A dict that will be converted to an Evaluation
266                - name: Identifier for the composite metric (e.g., "composite_score")
267                - value: The computed composite value
268                - comment: Optional explanation of how the score was computed
269                - metadata: Optional details about the composition logic
270
271            Can return either a direct Evaluation instance or an awaitable
272            (for async composite evaluators).
273
274        Examples:
275            Simple weighted average:
276            ```python
277            def weighted_composite(*, input, output, expected_output, metadata, evaluations):
278                weights = {
279                    "accuracy": 0.5,
280                    "relevance": 0.3,
281                    "safety": 0.2
282                }
283
284                total_score = 0.0
285                total_weight = 0.0
286
287                for eval in evaluations:
288                    if eval.name in weights and isinstance(eval.value, (int, float)):
289                        total_score += eval.value * weights[eval.name]
290                        total_weight += weights[eval.name]
291
292                final_score = total_score / total_weight if total_weight > 0 else 0.0
293
294                return Evaluation(
295                    name="composite_score",
296                    value=final_score,
297                    comment=f"Weighted average of {len(evaluations)} metrics"
298                )
299            ```
300
301            Pass/fail composite based on thresholds:
302            ```python
303            def pass_fail_composite(*, input, output, expected_output, metadata, evaluations):
304                # Must pass all criteria
305                thresholds = {
306                    "accuracy": 0.7,
307                    "safety": 0.9,
308                    "relevance": 0.6
309                }
310
311                passes = True
312                failing_metrics = []
313
314                for metric, threshold in thresholds.items():
315                    eval_result = next((e for e in evaluations if e.name == metric), None)
316                    if eval_result and isinstance(eval_result.value, (int, float)):
317                        if eval_result.value < threshold:
318                            passes = False
319                            failing_metrics.append(metric)
320
321                return Evaluation(
322                    name="passes_all_checks",
323                    value=passes,
324                    comment=f"Failed: {', '.join(failing_metrics)}" if failing_metrics else "All checks passed",
325                    data_type="BOOLEAN"
326                )
327            ```
328
329            Async composite with external scoring:
330            ```python
331            async def llm_composite(*, input, output, expected_output, metadata, evaluations):
332                # Use LLM to synthesize multiple evaluation results
333                eval_summary = "\n".join(
334                    f"- {e.name}: {e.value}" for e in evaluations
335                )
336
337                prompt = f"Given these evaluation scores:\n{eval_summary}\n"
338                prompt += f"For the output: {output}\n"
339                prompt += "Provide an overall quality score from 0-1."
340
341                response = await openai.chat.completions.create(
342                    model="gpt-4",
343                    messages=[{"role": "user", "content": prompt}]
344                )
345
346                score = float(response.choices[0].message.content.strip())
347
348                return Evaluation(
349                    name="llm_composite_score",
350                    value=score,
351                    comment="LLM-synthesized composite score"
352                )
353            ```
354
355            Context-aware composite:
356            ```python
357            def context_composite(*, input, output, expected_output, metadata, evaluations):
358                # Adjust weighting based on metadata
359                base_weights = {"accuracy": 0.5, "speed": 0.3, "cost": 0.2}
360
361                # If metadata indicates high importance, prioritize accuracy
362                if metadata and metadata.get('importance') == 'high':
363                    weights = {"accuracy": 0.7, "speed": 0.2, "cost": 0.1}
364                else:
365                    weights = base_weights
366
367                total = sum(
368                    e.value * weights.get(e.name, 0)
369                    for e in evaluations
370                    if isinstance(e.value, (int, float))
371                )
372
373                return Evaluation(
374                    name="weighted_composite",
375                    value=total,
376                    comment="Context-aware weighted composite"
377                )
378            ```
379        """
380        ...

Protocol defining the interface for composite evaluator functions.

Composite evaluators create aggregate scores from multiple item-level evaluations. This is commonly used to compute weighted averages, combined metrics, or other composite assessments based on individual evaluation results.

Composite evaluators:

  • Accept the same inputs as item-level evaluators (input, output, expected_output, metadata) plus the list of evaluations
  • Return either a single Evaluation, a list of Evaluations, or a dict
  • Can be either synchronous or asynchronous
  • Have access to both raw item data and evaluation results
CompositeEvaluatorFunction(*args, **kwargs)
1927def _no_init_or_replace_init(self, *args, **kwargs):
1928    cls = type(self)
1929
1930    if cls._is_protocol:
1931        raise TypeError('Protocols cannot be instantiated')
1932
1933    # Already using a custom `__init__`. No need to calculate correct
1934    # `__init__` to call. This can lead to RecursionError. See bpo-45121.
1935    if cls.__init__ is not _no_init_or_replace_init:
1936        return
1937
1938    # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`.
1939    # The first instantiation of the subclass will call `_no_init_or_replace_init` which
1940    # searches for a proper new `__init__` in the MRO. The new `__init__`
1941    # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent
1942    # instantiation of the protocol subclass will thus use the new
1943    # `__init__` and no longer call `_no_init_or_replace_init`.
1944    for base in cls.__mro__:
1945        init = base.__dict__.get('__init__', _no_init_or_replace_init)
1946        if init is not _no_init_or_replace_init:
1947            cls.__init__ = init
1948            break
1949    else:
1950        # should not happen
1951        cls.__init__ = object.__init__
1952
1953    cls.__init__(self, *args, **kwargs)
class EvaluatorStats:
383class EvaluatorStats:
384    """Statistics for a single evaluator's performance during batch evaluation.
385
386    This class tracks detailed metrics about how a specific evaluator performed
387    across all items in a batch evaluation run. It helps identify evaluator issues,
388    understand reliability, and optimize evaluation pipelines.
389
390    Attributes:
391        name: The name of the evaluator function (extracted from __name__).
392        total_runs: Total number of times the evaluator was invoked.
393        successful_runs: Number of times the evaluator completed successfully.
394        failed_runs: Number of times the evaluator raised an exception or failed.
395        total_scores_created: Total number of evaluation scores created by this evaluator.
396            Can be higher than successful_runs if the evaluator returns multiple scores.
397
398    Examples:
399        Accessing evaluator stats from batch evaluation result:
400        ```python
401        result = client.run_batched_evaluation(...)
402
403        for stats in result.evaluator_stats:
404            print(f"Evaluator: {stats.name}")
405            print(f"  Success rate: {stats.successful_runs / stats.total_runs:.1%}")
406            print(f"  Scores created: {stats.total_scores_created}")
407
408            if stats.failed_runs > 0:
409                print(f"  âš ī¸  Failed {stats.failed_runs} times")
410        ```
411
412        Identifying problematic evaluators:
413        ```python
414        result = client.run_batched_evaluation(...)
415
416        # Find evaluators with high failure rates
417        for stats in result.evaluator_stats:
418            failure_rate = stats.failed_runs / stats.total_runs
419            if failure_rate > 0.1:  # More than 10% failures
420                print(f"âš ī¸  {stats.name} has {failure_rate:.1%} failure rate")
421                print(f"    Consider debugging or removing this evaluator")
422        ```
423
424    Note:
425        All arguments must be passed as keywords when instantiating this class.
426    """
427
428    def __init__(
429        self,
430        *,
431        name: str,
432        total_runs: int = 0,
433        successful_runs: int = 0,
434        failed_runs: int = 0,
435        total_scores_created: int = 0,
436    ):
437        """Initialize EvaluatorStats with the provided metrics.
438
439        Args:
440            name: The evaluator function name.
441            total_runs: Total number of evaluator invocations.
442            successful_runs: Number of successful completions.
443            failed_runs: Number of failures.
444            total_scores_created: Total scores created by this evaluator.
445
446        Note:
447            All arguments must be provided as keywords.
448        """
449        self.name = name
450        self.total_runs = total_runs
451        self.successful_runs = successful_runs
452        self.failed_runs = failed_runs
453        self.total_scores_created = total_scores_created

Statistics for a single evaluator's performance during batch evaluation.

This class tracks detailed metrics about how a specific evaluator performed across all items in a batch evaluation run. It helps identify evaluator issues, understand reliability, and optimize evaluation pipelines.

Attributes:
  • name: The name of the evaluator function (extracted from __name__).
  • total_runs: Total number of times the evaluator was invoked.
  • successful_runs: Number of times the evaluator completed successfully.
  • failed_runs: Number of times the evaluator raised an exception or failed.
  • total_scores_created: Total number of evaluation scores created by this evaluator. Can be higher than successful_runs if the evaluator returns multiple scores.
Examples:

Accessing evaluator stats from batch evaluation result:

result = client.run_batched_evaluation(...)

for stats in result.evaluator_stats:
    print(f"Evaluator: {stats.name}")
    print(f"  Success rate: {stats.successful_runs / stats.total_runs:.1%}")
    print(f"  Scores created: {stats.total_scores_created}")

    if stats.failed_runs > 0:
        print(f"  âš ī¸  Failed {stats.failed_runs} times")

Identifying problematic evaluators:

result = client.run_batched_evaluation(...)

# Find evaluators with high failure rates
for stats in result.evaluator_stats:
    failure_rate = stats.failed_runs / stats.total_runs
    if failure_rate > 0.1:  # More than 10% failures
        print(f"âš ī¸  {stats.name} has {failure_rate:.1%} failure rate")
        print(f"    Consider debugging or removing this evaluator")
Note:

All arguments must be passed as keywords when instantiating this class.

EvaluatorStats( *, name: str, total_runs: int = 0, successful_runs: int = 0, failed_runs: int = 0, total_scores_created: int = 0)
428    def __init__(
429        self,
430        *,
431        name: str,
432        total_runs: int = 0,
433        successful_runs: int = 0,
434        failed_runs: int = 0,
435        total_scores_created: int = 0,
436    ):
437        """Initialize EvaluatorStats with the provided metrics.
438
439        Args:
440            name: The evaluator function name.
441            total_runs: Total number of evaluator invocations.
442            successful_runs: Number of successful completions.
443            failed_runs: Number of failures.
444            total_scores_created: Total scores created by this evaluator.
445
446        Note:
447            All arguments must be provided as keywords.
448        """
449        self.name = name
450        self.total_runs = total_runs
451        self.successful_runs = successful_runs
452        self.failed_runs = failed_runs
453        self.total_scores_created = total_scores_created

Initialize EvaluatorStats with the provided metrics.

Arguments:
  • name: The evaluator function name.
  • total_runs: Total number of evaluator invocations.
  • successful_runs: Number of successful completions.
  • failed_runs: Number of failures.
  • total_scores_created: Total scores created by this evaluator.
Note:

All arguments must be provided as keywords.

name
total_runs
successful_runs
failed_runs
total_scores_created
class BatchEvaluationResumeToken:
456class BatchEvaluationResumeToken:
457    """Token for resuming a failed batch evaluation run.
458
459    This class encapsulates all the information needed to resume a batch evaluation
460    that was interrupted or failed partway through. It uses timestamp-based filtering
461    to avoid re-processing items that were already evaluated, even if the underlying
462    dataset changed between runs.
463
464    Attributes:
465        scope: The type of items being evaluated ("traces", "observations").
466        filter: The original JSON filter string used to query items.
467        last_processed_timestamp: ISO 8601 timestamp of the last successfully processed item.
468            Used to construct a filter that only fetches items after this timestamp.
469        last_processed_id: The ID of the last successfully processed item, for reference.
470        items_processed: Count of items successfully processed before interruption.
471
472    Examples:
473        Resuming a failed batch evaluation:
474        ```python
475        # Initial run that fails partway through
476        try:
477            result = client.run_batched_evaluation(
478                scope="traces",
479                mapper=my_mapper,
480                evaluators=[evaluator1, evaluator2],
481                filter='{"tags": ["production"]}',
482                max_items=10000
483            )
484        except Exception as e:
485            print(f"Evaluation failed: {e}")
486
487            # Save the resume token
488            if result.resume_token:
489                # Store resume token for later (e.g., in a file or database)
490                import json
491                with open("resume_token.json", "w") as f:
492                    json.dump({
493                        "scope": result.resume_token.scope,
494                        "filter": result.resume_token.filter,
495                        "last_timestamp": result.resume_token.last_processed_timestamp,
496                        "last_id": result.resume_token.last_processed_id,
497                        "items_done": result.resume_token.items_processed
498                    }, f)
499
500        # Later, resume from where it left off
501        with open("resume_token.json") as f:
502            token_data = json.load(f)
503
504        resume_token = BatchEvaluationResumeToken(
505            scope=token_data["scope"],
506            filter=token_data["filter"],
507            last_processed_timestamp=token_data["last_timestamp"],
508            last_processed_id=token_data["last_id"],
509            items_processed=token_data["items_done"]
510        )
511
512        # Resume the evaluation
513        result = client.run_batched_evaluation(
514            scope="traces",
515            mapper=my_mapper,
516            evaluators=[evaluator1, evaluator2],
517            resume_from=resume_token
518        )
519
520        print(f"Processed {result.total_items_processed} additional items")
521        ```
522
523        Handling partial completion:
524        ```python
525        result = client.run_batched_evaluation(...)
526
527        if not result.completed:
528            print(f"Evaluation incomplete. Processed {result.resume_token.items_processed} items")
529            print(f"Last item: {result.resume_token.last_processed_id}")
530            print(f"Resume from: {result.resume_token.last_processed_timestamp}")
531
532            # Optionally retry automatically
533            if result.resume_token:
534                print("Retrying...")
535                result = client.run_batched_evaluation(
536                    scope=result.resume_token.scope,
537                    mapper=my_mapper,
538                    evaluators=my_evaluators,
539                    resume_from=result.resume_token
540                )
541        ```
542
543    Note:
544        All arguments must be passed as keywords when instantiating this class.
545        The timestamp-based approach means that items created after the initial run
546        but before the timestamp will be skipped. This is intentional to avoid
547        duplicates and ensure consistent evaluation.
548    """
549
550    def __init__(
551        self,
552        *,
553        scope: str,
554        filter: Optional[str],
555        last_processed_timestamp: str,
556        last_processed_id: str,
557        items_processed: int,
558    ):
559        """Initialize BatchEvaluationResumeToken with the provided state.
560
561        Args:
562            scope: The scope type ("traces", "observations").
563            filter: The original JSON filter string.
564            last_processed_timestamp: ISO 8601 timestamp of last processed item.
565            last_processed_id: ID of last processed item.
566            items_processed: Count of items processed before interruption.
567
568        Note:
569            All arguments must be provided as keywords.
570        """
571        self.scope = scope
572        self.filter = filter
573        self.last_processed_timestamp = last_processed_timestamp
574        self.last_processed_id = last_processed_id
575        self.items_processed = items_processed

Token for resuming a failed batch evaluation run.

This class encapsulates all the information needed to resume a batch evaluation that was interrupted or failed partway through. It uses timestamp-based filtering to avoid re-processing items that were already evaluated, even if the underlying dataset changed between runs.

Attributes:
  • scope: The type of items being evaluated ("traces", "observations").
  • filter: The original JSON filter string used to query items.
  • last_processed_timestamp: ISO 8601 timestamp of the last successfully processed item. Used to construct a filter that only fetches items after this timestamp.
  • last_processed_id: The ID of the last successfully processed item, for reference.
  • items_processed: Count of items successfully processed before interruption.
Examples:

Resuming a failed batch evaluation:

# Initial run that fails partway through
try:
    result = client.run_batched_evaluation(
        scope="traces",
        mapper=my_mapper,
        evaluators=[evaluator1, evaluator2],
        filter='{"tags": ["production"]}',
        max_items=10000
    )
except Exception as e:
    print(f"Evaluation failed: {e}")

    # Save the resume token
    if result.resume_token:
        # Store resume token for later (e.g., in a file or database)
        import json
        with open("resume_token.json", "w") as f:
            json.dump({
                "scope": result.resume_token.scope,
                "filter": result.resume_token.filter,
                "last_timestamp": result.resume_token.last_processed_timestamp,
                "last_id": result.resume_token.last_processed_id,
                "items_done": result.resume_token.items_processed
            }, f)

# Later, resume from where it left off
with open("resume_token.json") as f:
    token_data = json.load(f)

resume_token = BatchEvaluationResumeToken(
    scope=token_data["scope"],
    filter=token_data["filter"],
    last_processed_timestamp=token_data["last_timestamp"],
    last_processed_id=token_data["last_id"],
    items_processed=token_data["items_done"]
)

# Resume the evaluation
result = client.run_batched_evaluation(
    scope="traces",
    mapper=my_mapper,
    evaluators=[evaluator1, evaluator2],
    resume_from=resume_token
)

print(f"Processed {result.total_items_processed} additional items")

Handling partial completion:

result = client.run_batched_evaluation(...)

if not result.completed:
    print(f"Evaluation incomplete. Processed {result.resume_token.items_processed} items")
    print(f"Last item: {result.resume_token.last_processed_id}")
    print(f"Resume from: {result.resume_token.last_processed_timestamp}")

    # Optionally retry automatically
    if result.resume_token:
        print("Retrying...")
        result = client.run_batched_evaluation(
            scope=result.resume_token.scope,
            mapper=my_mapper,
            evaluators=my_evaluators,
            resume_from=result.resume_token
        )
Note:

All arguments must be passed as keywords when instantiating this class. The timestamp-based approach means that items created after the initial run but before the timestamp will be skipped. This is intentional to avoid duplicates and ensure consistent evaluation.

BatchEvaluationResumeToken( *, scope: str, filter: Optional[str], last_processed_timestamp: str, last_processed_id: str, items_processed: int)
550    def __init__(
551        self,
552        *,
553        scope: str,
554        filter: Optional[str],
555        last_processed_timestamp: str,
556        last_processed_id: str,
557        items_processed: int,
558    ):
559        """Initialize BatchEvaluationResumeToken with the provided state.
560
561        Args:
562            scope: The scope type ("traces", "observations").
563            filter: The original JSON filter string.
564            last_processed_timestamp: ISO 8601 timestamp of last processed item.
565            last_processed_id: ID of last processed item.
566            items_processed: Count of items processed before interruption.
567
568        Note:
569            All arguments must be provided as keywords.
570        """
571        self.scope = scope
572        self.filter = filter
573        self.last_processed_timestamp = last_processed_timestamp
574        self.last_processed_id = last_processed_id
575        self.items_processed = items_processed

Initialize BatchEvaluationResumeToken with the provided state.

Arguments:
  • scope: The scope type ("traces", "observations").
  • filter: The original JSON filter string.
  • last_processed_timestamp: ISO 8601 timestamp of last processed item.
  • last_processed_id: ID of last processed item.
  • items_processed: Count of items processed before interruption.
Note:

All arguments must be provided as keywords.

scope
filter
last_processed_timestamp
last_processed_id
items_processed
class BatchEvaluationResult:
578class BatchEvaluationResult:
579    r"""Complete result structure for batch evaluation execution.
580
581    This class encapsulates comprehensive statistics and metadata about a batch
582    evaluation run, including counts, evaluator-specific metrics, timing information,
583    error details, and resume capability.
584
585    Attributes:
586        total_items_fetched: Total number of items fetched from the API.
587        total_items_processed: Number of items successfully evaluated.
588        total_items_failed: Number of items that failed during evaluation.
589        total_scores_created: Total scores created by all item-level evaluators.
590        total_composite_scores_created: Scores created by the composite evaluator.
591        total_evaluations_failed: Number of individual evaluator failures across all items.
592        evaluator_stats: List of per-evaluator statistics (success/failure rates, scores created).
593        resume_token: Token for resuming if evaluation was interrupted (None if completed).
594        completed: True if all items were processed, False if stopped early or failed.
595        duration_seconds: Total time taken to execute the batch evaluation.
596        failed_item_ids: List of IDs for items that failed evaluation.
597        error_summary: Dictionary mapping error types to occurrence counts.
598        has_more_items: True if max_items limit was reached but more items exist.
599        item_evaluations: Dictionary mapping item IDs to their evaluation results (both regular and composite).
600
601    Examples:
602        Basic result inspection:
603        ```python
604        result = client.run_batched_evaluation(...)
605
606        print(f"Processed: {result.total_items_processed}/{result.total_items_fetched}")
607        print(f"Scores created: {result.total_scores_created}")
608        print(f"Duration: {result.duration_seconds:.2f}s")
609        print(f"Success rate: {result.total_items_processed / result.total_items_fetched:.1%}")
610        ```
611
612        Detailed analysis with evaluator stats:
613        ```python
614        result = client.run_batched_evaluation(...)
615
616        print(f"\n📊 Batch Evaluation Results")
617        print(f"{'='*50}")
618        print(f"Items processed: {result.total_items_processed}")
619        print(f"Items failed: {result.total_items_failed}")
620        print(f"Scores created: {result.total_scores_created}")
621
622        if result.total_composite_scores_created > 0:
623            print(f"Composite scores: {result.total_composite_scores_created}")
624
625        print(f"\n📈 Evaluator Performance:")
626        for stats in result.evaluator_stats:
627            success_rate = stats.successful_runs / stats.total_runs if stats.total_runs > 0 else 0
628            print(f"\n  {stats.name}:")
629            print(f"    Success rate: {success_rate:.1%}")
630            print(f"    Scores created: {stats.total_scores_created}")
631            if stats.failed_runs > 0:
632                print(f"    âš ī¸  Failures: {stats.failed_runs}")
633
634        if result.error_summary:
635            print(f"\nâš ī¸  Errors encountered:")
636            for error_type, count in result.error_summary.items():
637                print(f"    {error_type}: {count}")
638        ```
639
640        Handling incomplete runs:
641        ```python
642        result = client.run_batched_evaluation(...)
643
644        if not result.completed:
645            print("âš ī¸  Evaluation incomplete!")
646
647            if result.resume_token:
648                print(f"Processed {result.resume_token.items_processed} items before failure")
649                print(f"Use resume_from parameter to continue from:")
650                print(f"  Timestamp: {result.resume_token.last_processed_timestamp}")
651                print(f"  Last ID: {result.resume_token.last_processed_id}")
652
653        if result.has_more_items:
654            print(f"â„šī¸  More items available beyond max_items limit")
655        ```
656
657        Performance monitoring:
658        ```python
659        result = client.run_batched_evaluation(...)
660
661        items_per_second = result.total_items_processed / result.duration_seconds
662        avg_scores_per_item = result.total_scores_created / result.total_items_processed
663
664        print(f"Performance metrics:")
665        print(f"  Throughput: {items_per_second:.2f} items/second")
666        print(f"  Avg scores/item: {avg_scores_per_item:.2f}")
667        print(f"  Total duration: {result.duration_seconds:.2f}s")
668
669        if result.total_evaluations_failed > 0:
670            failure_rate = result.total_evaluations_failed / (
671                result.total_items_processed * len(result.evaluator_stats)
672            )
673            print(f"  Evaluation failure rate: {failure_rate:.1%}")
674        ```
675
676    Note:
677        All arguments must be passed as keywords when instantiating this class.
678    """
679
680    def __init__(
681        self,
682        *,
683        total_items_fetched: int,
684        total_items_processed: int,
685        total_items_failed: int,
686        total_scores_created: int,
687        total_composite_scores_created: int,
688        total_evaluations_failed: int,
689        evaluator_stats: List[EvaluatorStats],
690        resume_token: Optional[BatchEvaluationResumeToken],
691        completed: bool,
692        duration_seconds: float,
693        failed_item_ids: List[str],
694        error_summary: Dict[str, int],
695        has_more_items: bool,
696        item_evaluations: Dict[str, List["Evaluation"]],
697    ):
698        """Initialize BatchEvaluationResult with comprehensive statistics.
699
700        Args:
701            total_items_fetched: Total items fetched from API.
702            total_items_processed: Items successfully evaluated.
703            total_items_failed: Items that failed evaluation.
704            total_scores_created: Scores from item-level evaluators.
705            total_composite_scores_created: Scores from composite evaluator.
706            total_evaluations_failed: Individual evaluator failures.
707            evaluator_stats: Per-evaluator statistics.
708            resume_token: Token for resuming (None if completed).
709            completed: Whether all items were processed.
710            duration_seconds: Total execution time.
711            failed_item_ids: IDs of failed items.
712            error_summary: Error types and counts.
713            has_more_items: Whether more items exist beyond max_items.
714            item_evaluations: Dictionary mapping item IDs to their evaluation results.
715
716        Note:
717            All arguments must be provided as keywords.
718        """
719        self.total_items_fetched = total_items_fetched
720        self.total_items_processed = total_items_processed
721        self.total_items_failed = total_items_failed
722        self.total_scores_created = total_scores_created
723        self.total_composite_scores_created = total_composite_scores_created
724        self.total_evaluations_failed = total_evaluations_failed
725        self.evaluator_stats = evaluator_stats
726        self.resume_token = resume_token
727        self.completed = completed
728        self.duration_seconds = duration_seconds
729        self.failed_item_ids = failed_item_ids
730        self.error_summary = error_summary
731        self.has_more_items = has_more_items
732        self.item_evaluations = item_evaluations
733
734    def __str__(self) -> str:
735        """Return a formatted string representation of the batch evaluation results.
736
737        Returns:
738            A multi-line string with a summary of the evaluation results.
739        """
740        lines = []
741        lines.append("=" * 60)
742        lines.append("Batch Evaluation Results")
743        lines.append("=" * 60)
744
745        # Summary statistics
746        lines.append(f"\nStatus: {'Completed' if self.completed else 'Incomplete'}")
747        lines.append(f"Duration: {self.duration_seconds:.2f}s")
748        lines.append(f"\nItems fetched: {self.total_items_fetched}")
749        lines.append(f"Items processed: {self.total_items_processed}")
750
751        if self.total_items_failed > 0:
752            lines.append(f"Items failed: {self.total_items_failed}")
753
754        # Success rate
755        if self.total_items_fetched > 0:
756            success_rate = self.total_items_processed / self.total_items_fetched * 100
757            lines.append(f"Success rate: {success_rate:.1f}%")
758
759        # Scores created
760        lines.append(f"\nScores created: {self.total_scores_created}")
761        if self.total_composite_scores_created > 0:
762            lines.append(f"Composite scores: {self.total_composite_scores_created}")
763
764        total_scores = self.total_scores_created + self.total_composite_scores_created
765        lines.append(f"Total scores: {total_scores}")
766
767        # Evaluator statistics
768        if self.evaluator_stats:
769            lines.append("\nEvaluator Performance:")
770            for stats in self.evaluator_stats:
771                lines.append(f"  {stats.name}:")
772                if stats.total_runs > 0:
773                    success_rate = (
774                        stats.successful_runs / stats.total_runs * 100
775                        if stats.total_runs > 0
776                        else 0
777                    )
778                    lines.append(
779                        f"    Runs: {stats.successful_runs}/{stats.total_runs} "
780                        f"({success_rate:.1f}% success)"
781                    )
782                    lines.append(f"    Scores created: {stats.total_scores_created}")
783                    if stats.failed_runs > 0:
784                        lines.append(f"    Failed runs: {stats.failed_runs}")
785
786        # Performance metrics
787        if self.total_items_processed > 0 and self.duration_seconds > 0:
788            items_per_sec = self.total_items_processed / self.duration_seconds
789            lines.append("\nPerformance:")
790            lines.append(f"  Throughput: {items_per_sec:.2f} items/second")
791            if self.total_scores_created > 0:
792                avg_scores = self.total_scores_created / self.total_items_processed
793                lines.append(f"  Avg scores per item: {avg_scores:.2f}")
794
795        # Errors and warnings
796        if self.error_summary:
797            lines.append("\nErrors encountered:")
798            for error_type, count in self.error_summary.items():
799                lines.append(f"  {error_type}: {count}")
800
801        # Incomplete run information
802        if not self.completed:
803            lines.append("\nWarning: Evaluation incomplete")
804            if self.resume_token:
805                lines.append(
806                    f"  Last processed: {self.resume_token.last_processed_timestamp}"
807                )
808                lines.append(f"  Items processed: {self.resume_token.items_processed}")
809                lines.append("  Use resume_from parameter to continue")
810
811        if self.has_more_items:
812            lines.append("\nNote: More items available beyond max_items limit")
813
814        lines.append("=" * 60)
815        return "\n".join(lines)

Complete result structure for batch evaluation execution.

This class encapsulates comprehensive statistics and metadata about a batch evaluation run, including counts, evaluator-specific metrics, timing information, error details, and resume capability.

Attributes:
  • total_items_fetched: Total number of items fetched from the API.
  • total_items_processed: Number of items successfully evaluated.
  • total_items_failed: Number of items that failed during evaluation.
  • total_scores_created: Total scores created by all item-level evaluators.
  • total_composite_scores_created: Scores created by the composite evaluator.
  • total_evaluations_failed: Number of individual evaluator failures across all items.
  • evaluator_stats: List of per-evaluator statistics (success/failure rates, scores created).
  • resume_token: Token for resuming if evaluation was interrupted (None if completed).
  • completed: True if all items were processed, False if stopped early or failed.
  • duration_seconds: Total time taken to execute the batch evaluation.
  • failed_item_ids: List of IDs for items that failed evaluation.
  • error_summary: Dictionary mapping error types to occurrence counts.
  • has_more_items: True if max_items limit was reached but more items exist.
  • item_evaluations: Dictionary mapping item IDs to their evaluation results (both regular and composite).
Examples:

Basic result inspection:

result = client.run_batched_evaluation(...)

print(f"Processed: {result.total_items_processed}/{result.total_items_fetched}")
print(f"Scores created: {result.total_scores_created}")
print(f"Duration: {result.duration_seconds:.2f}s")
print(f"Success rate: {result.total_items_processed / result.total_items_fetched:.1%}")

Detailed analysis with evaluator stats:

result = client.run_batched_evaluation(...)

print(f"\n📊 Batch Evaluation Results")
print(f"{'='*50}")
print(f"Items processed: {result.total_items_processed}")
print(f"Items failed: {result.total_items_failed}")
print(f"Scores created: {result.total_scores_created}")

if result.total_composite_scores_created > 0:
    print(f"Composite scores: {result.total_composite_scores_created}")

print(f"\n📈 Evaluator Performance:")
for stats in result.evaluator_stats:
    success_rate = stats.successful_runs / stats.total_runs if stats.total_runs > 0 else 0
    print(f"\n  {stats.name}:")
    print(f"    Success rate: {success_rate:.1%}")
    print(f"    Scores created: {stats.total_scores_created}")
    if stats.failed_runs > 0:
        print(f"    âš ī¸  Failures: {stats.failed_runs}")

if result.error_summary:
    print(f"\nâš ī¸  Errors encountered:")
    for error_type, count in result.error_summary.items():
        print(f"    {error_type}: {count}")

Handling incomplete runs:

result = client.run_batched_evaluation(...)

if not result.completed:
    print("âš ī¸  Evaluation incomplete!")

    if result.resume_token:
        print(f"Processed {result.resume_token.items_processed} items before failure")
        print(f"Use resume_from parameter to continue from:")
        print(f"  Timestamp: {result.resume_token.last_processed_timestamp}")
        print(f"  Last ID: {result.resume_token.last_processed_id}")

if result.has_more_items:
    print(f"â„šī¸  More items available beyond max_items limit")

Performance monitoring:

result = client.run_batched_evaluation(...)

items_per_second = result.total_items_processed / result.duration_seconds
avg_scores_per_item = result.total_scores_created / result.total_items_processed

print(f"Performance metrics:")
print(f"  Throughput: {items_per_second:.2f} items/second")
print(f"  Avg scores/item: {avg_scores_per_item:.2f}")
print(f"  Total duration: {result.duration_seconds:.2f}s")

if result.total_evaluations_failed > 0:
    failure_rate = result.total_evaluations_failed / (
        result.total_items_processed * len(result.evaluator_stats)
    )
    print(f"  Evaluation failure rate: {failure_rate:.1%}")
Note:

All arguments must be passed as keywords when instantiating this class.

BatchEvaluationResult( *, total_items_fetched: int, total_items_processed: int, total_items_failed: int, total_scores_created: int, total_composite_scores_created: int, total_evaluations_failed: int, evaluator_stats: List[EvaluatorStats], resume_token: Optional[BatchEvaluationResumeToken], completed: bool, duration_seconds: float, failed_item_ids: List[str], error_summary: Dict[str, int], has_more_items: bool, item_evaluations: Dict[str, List[Evaluation]])
680    def __init__(
681        self,
682        *,
683        total_items_fetched: int,
684        total_items_processed: int,
685        total_items_failed: int,
686        total_scores_created: int,
687        total_composite_scores_created: int,
688        total_evaluations_failed: int,
689        evaluator_stats: List[EvaluatorStats],
690        resume_token: Optional[BatchEvaluationResumeToken],
691        completed: bool,
692        duration_seconds: float,
693        failed_item_ids: List[str],
694        error_summary: Dict[str, int],
695        has_more_items: bool,
696        item_evaluations: Dict[str, List["Evaluation"]],
697    ):
698        """Initialize BatchEvaluationResult with comprehensive statistics.
699
700        Args:
701            total_items_fetched: Total items fetched from API.
702            total_items_processed: Items successfully evaluated.
703            total_items_failed: Items that failed evaluation.
704            total_scores_created: Scores from item-level evaluators.
705            total_composite_scores_created: Scores from composite evaluator.
706            total_evaluations_failed: Individual evaluator failures.
707            evaluator_stats: Per-evaluator statistics.
708            resume_token: Token for resuming (None if completed).
709            completed: Whether all items were processed.
710            duration_seconds: Total execution time.
711            failed_item_ids: IDs of failed items.
712            error_summary: Error types and counts.
713            has_more_items: Whether more items exist beyond max_items.
714            item_evaluations: Dictionary mapping item IDs to their evaluation results.
715
716        Note:
717            All arguments must be provided as keywords.
718        """
719        self.total_items_fetched = total_items_fetched
720        self.total_items_processed = total_items_processed
721        self.total_items_failed = total_items_failed
722        self.total_scores_created = total_scores_created
723        self.total_composite_scores_created = total_composite_scores_created
724        self.total_evaluations_failed = total_evaluations_failed
725        self.evaluator_stats = evaluator_stats
726        self.resume_token = resume_token
727        self.completed = completed
728        self.duration_seconds = duration_seconds
729        self.failed_item_ids = failed_item_ids
730        self.error_summary = error_summary
731        self.has_more_items = has_more_items
732        self.item_evaluations = item_evaluations

Initialize BatchEvaluationResult with comprehensive statistics.

Arguments:
  • total_items_fetched: Total items fetched from API.
  • total_items_processed: Items successfully evaluated.
  • total_items_failed: Items that failed evaluation.
  • total_scores_created: Scores from item-level evaluators.
  • total_composite_scores_created: Scores from composite evaluator.
  • total_evaluations_failed: Individual evaluator failures.
  • evaluator_stats: Per-evaluator statistics.
  • resume_token: Token for resuming (None if completed).
  • completed: Whether all items were processed.
  • duration_seconds: Total execution time.
  • failed_item_ids: IDs of failed items.
  • error_summary: Error types and counts.
  • has_more_items: Whether more items exist beyond max_items.
  • item_evaluations: Dictionary mapping item IDs to their evaluation results.
Note:

All arguments must be provided as keywords.

total_items_fetched
total_items_processed
total_items_failed
total_scores_created
total_composite_scores_created
total_evaluations_failed
evaluator_stats
resume_token
completed
duration_seconds
failed_item_ids
error_summary
has_more_items
item_evaluations