diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 53fede752..946400a5d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -45,9 +45,7 @@ uv run mypy langfuse --no-error-summary ### Update openapi spec -1. Generate Fern Python SDK in [langfuse](https://github.com/langfuse/langfuse) and copy the files generated in `generated/python` into the `langfuse/api` folder in this repo. -2. Execute the linter by running `uv run ruff format .` -3. Rebuild and deploy the package to PyPi. +A PR with the changes is automatically created upon changing the Spec in the langfuse repo. ### Publish release diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index 85ec83a4e..04d8fae2c 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -1747,7 +1747,7 @@ def create_score( trace_id: Optional[str] = None, score_id: Optional[str] = None, observation_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -1777,13 +1777,13 @@ def create_score( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) session_id: ID of the Langfuse session to associate the score with dataset_run_id: ID of the Langfuse dataset run to associate the score with trace_id: ID of the Langfuse trace to associate the score with observation_id: Optional ID of the specific observation to score. Trace ID must be provided too. score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -1907,7 +1907,7 @@ def score_current_span( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -1931,9 +1931,9 @@ def score_current_span( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -1971,7 +1971,7 @@ def score_current_span( name=name, value=cast(str, value), score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, metadata=metadata, @@ -1997,7 +1997,7 @@ def score_current_trace( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -2022,9 +2022,9 @@ def score_current_trace( Args: name: Name of the score (e.g., "user_satisfaction", "overall_quality") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -2060,7 +2060,7 @@ def score_current_trace( name=name, value=cast(str, value), score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, metadata=metadata, diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py index 2590262ce..bd0c638a7 100644 --- a/langfuse/_client/span.py +++ b/langfuse/_client/span.py @@ -308,7 +308,7 @@ def score( value: str, score_id: Optional[str] = None, data_type: Optional[ - Literal[ScoreDataType.CATEGORICAL] + Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT] ] = ScoreDataType.CATEGORICAL, comment: Optional[str] = None, config_id: Optional[str] = None, @@ -335,9 +335,9 @@ def score( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL) + value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse timestamp: Optional timestamp for the score (defaults to current UTC time) @@ -364,7 +364,7 @@ def score( trace_id=self.trace_id, observation_id=self.id, score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, timestamp=timestamp, @@ -395,7 +395,7 @@ def score_trace( value: str, score_id: Optional[str] = None, data_type: Optional[ - Literal[ScoreDataType.CATEGORICAL] + Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT] ] = ScoreDataType.CATEGORICAL, comment: Optional[str] = None, config_id: Optional[str] = None, @@ -423,9 +423,9 @@ def score_trace( Args: name: Name of the score (e.g., "user_satisfaction", "overall_quality") - value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL) + value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse timestamp: Optional timestamp for the score (defaults to current UTC time) @@ -451,7 +451,7 @@ def score_trace( value=cast(str, value), trace_id=self.trace_id, score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, timestamp=timestamp, diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py index 443f5cdd2..aa103cf12 100644 --- a/langfuse/api/__init__.py +++ b/langfuse/api/__init__.py @@ -112,12 +112,16 @@ ScoreV1_Boolean, ScoreV1_Categorical, ScoreV1_Numeric, + ScoreV1_Text, Score_Boolean, Score_Categorical, Score_Correction, Score_Numeric, + Score_Text, Session, SessionWithTraces, + TextScore, + TextScoreV1, Trace, TraceWithDetails, TraceWithFullDetails, @@ -281,10 +285,12 @@ GetScoresResponseDataCategorical, GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, + GetScoresResponseDataText, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, GetScoresResponseData_Correction, GetScoresResponseData_Numeric, + GetScoresResponseData_Text, GetScoresResponseTraceData, ) from .sessions import PaginatedSessions @@ -377,10 +383,12 @@ "GetScoresResponseDataCategorical": ".scores", "GetScoresResponseDataCorrection": ".scores", "GetScoresResponseDataNumeric": ".scores", + "GetScoresResponseDataText": ".scores", "GetScoresResponseData_Boolean": ".scores", "GetScoresResponseData_Categorical": ".scores", "GetScoresResponseData_Correction": ".scores", "GetScoresResponseData_Numeric": ".scores", + "GetScoresResponseData_Text": ".scores", "GetScoresResponseTraceData": ".scores", "HealthResponse": ".health", "IngestionError": ".ingestion", @@ -489,10 +497,12 @@ "ScoreV1_Boolean": ".commons", "ScoreV1_Categorical": ".commons", "ScoreV1_Numeric": ".commons", + "ScoreV1_Text": ".commons", "Score_Boolean": ".commons", "Score_Categorical": ".commons", "Score_Correction": ".commons", "Score_Numeric": ".commons", + "Score_Text": ".commons", "SdkLogBody": ".ingestion", "SdkLogEvent": ".ingestion", "ServiceProviderConfig": ".scim", @@ -501,6 +511,8 @@ "SessionWithTraces": ".commons", "Sort": ".trace", "TextPrompt": ".prompts", + "TextScore": ".commons", + "TextScoreV1": ".commons", "Trace": ".commons", "TraceBody": ".ingestion", "TraceEvent": ".ingestion", @@ -664,10 +676,12 @@ def __dir__(): "GetScoresResponseDataCategorical", "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", + "GetScoresResponseDataText", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", + "GetScoresResponseData_Text", "GetScoresResponseTraceData", "HealthResponse", "IngestionError", @@ -776,10 +790,12 @@ def __dir__(): "ScoreV1_Boolean", "ScoreV1_Categorical", "ScoreV1_Numeric", + "ScoreV1_Text", "Score_Boolean", "Score_Categorical", "Score_Correction", "Score_Numeric", + "Score_Text", "SdkLogBody", "SdkLogEvent", "ServiceProviderConfig", @@ -788,6 +804,8 @@ def __dir__(): "SessionWithTraces", "Sort", "TextPrompt", + "TextScore", + "TextScoreV1", "Trace", "TraceBody", "TraceEvent", diff --git a/langfuse/api/commons/__init__.py b/langfuse/api/commons/__init__.py index af79e3e25..81cb57f96 100644 --- a/langfuse/api/commons/__init__.py +++ b/langfuse/api/commons/__init__.py @@ -47,12 +47,16 @@ ScoreV1_Boolean, ScoreV1_Categorical, ScoreV1_Numeric, + ScoreV1_Text, Score_Boolean, Score_Categorical, Score_Correction, Score_Numeric, + Score_Text, Session, SessionWithTraces, + TextScore, + TextScoreV1, Trace, TraceWithDetails, TraceWithFullDetails, @@ -110,12 +114,16 @@ "ScoreV1_Boolean": ".types", "ScoreV1_Categorical": ".types", "ScoreV1_Numeric": ".types", + "ScoreV1_Text": ".types", "Score_Boolean": ".types", "Score_Categorical": ".types", "Score_Correction": ".types", "Score_Numeric": ".types", + "Score_Text": ".types", "Session": ".types", "SessionWithTraces": ".types", + "TextScore": ".types", + "TextScoreV1": ".types", "Trace": ".types", "TraceWithDetails": ".types", "TraceWithFullDetails": ".types", @@ -196,12 +204,16 @@ def __dir__(): "ScoreV1_Boolean", "ScoreV1_Categorical", "ScoreV1_Numeric", + "ScoreV1_Text", "Score_Boolean", "Score_Categorical", "Score_Correction", "Score_Numeric", + "Score_Text", "Session", "SessionWithTraces", + "TextScore", + "TextScoreV1", "Trace", "TraceWithDetails", "TraceWithFullDetails", diff --git a/langfuse/api/commons/types/__init__.py b/langfuse/api/commons/types/__init__.py index b7834caf3..5ce0a58cd 100644 --- a/langfuse/api/commons/types/__init__.py +++ b/langfuse/api/commons/types/__init__.py @@ -43,14 +43,23 @@ Score_Categorical, Score_Correction, Score_Numeric, + Score_Text, ) from .score_config import ScoreConfig from .score_config_data_type import ScoreConfigDataType from .score_data_type import ScoreDataType from .score_source import ScoreSource - from .score_v1 import ScoreV1, ScoreV1_Boolean, ScoreV1_Categorical, ScoreV1_Numeric + from .score_v1 import ( + ScoreV1, + ScoreV1_Boolean, + ScoreV1_Categorical, + ScoreV1_Numeric, + ScoreV1_Text, + ) from .session import Session from .session_with_traces import SessionWithTraces + from .text_score import TextScore + from .text_score_v1 import TextScoreV1 from .trace import Trace from .trace_with_details import TraceWithDetails from .trace_with_full_details import TraceWithFullDetails @@ -96,12 +105,16 @@ "ScoreV1_Boolean": ".score_v1", "ScoreV1_Categorical": ".score_v1", "ScoreV1_Numeric": ".score_v1", + "ScoreV1_Text": ".score_v1", "Score_Boolean": ".score", "Score_Categorical": ".score", "Score_Correction": ".score", "Score_Numeric": ".score", + "Score_Text": ".score", "Session": ".session", "SessionWithTraces": ".session_with_traces", + "TextScore": ".text_score", + "TextScoreV1": ".text_score_v1", "Trace": ".trace", "TraceWithDetails": ".trace_with_details", "TraceWithFullDetails": ".trace_with_full_details", @@ -177,12 +190,16 @@ def __dir__(): "ScoreV1_Boolean", "ScoreV1_Categorical", "ScoreV1_Numeric", + "ScoreV1_Text", "Score_Boolean", "Score_Categorical", "Score_Correction", "Score_Numeric", + "Score_Text", "Session", "SessionWithTraces", + "TextScore", + "TextScoreV1", "Trace", "TraceWithDetails", "TraceWithFullDetails", diff --git a/langfuse/api/commons/types/score.py b/langfuse/api/commons/types/score.py index bfcb94f3d..33c901c5f 100644 --- a/langfuse/api/commons/types/score.py +++ b/langfuse/api/commons/types/score.py @@ -195,7 +195,54 @@ class Score_Correction(UniversalBaseModel): ) +class Score_Text(UniversalBaseModel): + data_type: typing_extensions.Annotated[ + typing.Literal["TEXT"], FieldMetadata(alias="dataType") + ] = "TEXT" + string_value: typing_extensions.Annotated[str, FieldMetadata(alias="stringValue")] + id: str + trace_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="traceId") + ] = None + session_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="sessionId") + ] = None + observation_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="observationId") + ] = None + dataset_run_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="datasetRunId") + ] = None + name: str + source: ScoreSource + timestamp: dt.datetime + created_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="createdAt") + ] + updated_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="updatedAt") + ] + author_user_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="authorUserId") + ] = None + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="configId") + ] = None + queue_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="queueId") + ] = None + environment: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + Score = typing_extensions.Annotated[ - typing.Union[Score_Numeric, Score_Categorical, Score_Boolean, Score_Correction], + typing.Union[ + Score_Numeric, Score_Categorical, Score_Boolean, Score_Correction, Score_Text + ], pydantic.Field(discriminator="data_type"), ] diff --git a/langfuse/api/commons/types/score_config_data_type.py b/langfuse/api/commons/types/score_config_data_type.py index 4f4660305..683b46a0f 100644 --- a/langfuse/api/commons/types/score_config_data_type.py +++ b/langfuse/api/commons/types/score_config_data_type.py @@ -11,12 +11,14 @@ class ScoreConfigDataType(enum.StrEnum): NUMERIC = "NUMERIC" BOOLEAN = "BOOLEAN" CATEGORICAL = "CATEGORICAL" + TEXT = "TEXT" def visit( self, numeric: typing.Callable[[], T_Result], boolean: typing.Callable[[], T_Result], categorical: typing.Callable[[], T_Result], + text: typing.Callable[[], T_Result], ) -> T_Result: if self is ScoreConfigDataType.NUMERIC: return numeric() @@ -24,3 +26,5 @@ def visit( return boolean() if self is ScoreConfigDataType.CATEGORICAL: return categorical() + if self is ScoreConfigDataType.TEXT: + return text() diff --git a/langfuse/api/commons/types/score_data_type.py b/langfuse/api/commons/types/score_data_type.py index c29a77f07..18301b51f 100644 --- a/langfuse/api/commons/types/score_data_type.py +++ b/langfuse/api/commons/types/score_data_type.py @@ -12,6 +12,7 @@ class ScoreDataType(enum.StrEnum): BOOLEAN = "BOOLEAN" CATEGORICAL = "CATEGORICAL" CORRECTION = "CORRECTION" + TEXT = "TEXT" def visit( self, @@ -19,6 +20,7 @@ def visit( boolean: typing.Callable[[], T_Result], categorical: typing.Callable[[], T_Result], correction: typing.Callable[[], T_Result], + text: typing.Callable[[], T_Result], ) -> T_Result: if self is ScoreDataType.NUMERIC: return numeric() @@ -28,3 +30,5 @@ def visit( return categorical() if self is ScoreDataType.CORRECTION: return correction() + if self is ScoreDataType.TEXT: + return text() diff --git a/langfuse/api/commons/types/score_v1.py b/langfuse/api/commons/types/score_v1.py index 1a409ce2f..e17e61b89 100644 --- a/langfuse/api/commons/types/score_v1.py +++ b/langfuse/api/commons/types/score_v1.py @@ -125,7 +125,44 @@ class ScoreV1_Boolean(UniversalBaseModel): ) +class ScoreV1_Text(UniversalBaseModel): + data_type: typing_extensions.Annotated[ + typing.Literal["TEXT"], FieldMetadata(alias="dataType") + ] = "TEXT" + string_value: typing_extensions.Annotated[str, FieldMetadata(alias="stringValue")] + id: str + trace_id: typing_extensions.Annotated[str, FieldMetadata(alias="traceId")] + name: str + source: ScoreSource + observation_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="observationId") + ] = None + timestamp: dt.datetime + created_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="createdAt") + ] + updated_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="updatedAt") + ] + author_user_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="authorUserId") + ] = None + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="configId") + ] = None + queue_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="queueId") + ] = None + environment: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + ScoreV1 = typing_extensions.Annotated[ - typing.Union[ScoreV1_Numeric, ScoreV1_Categorical, ScoreV1_Boolean], + typing.Union[ScoreV1_Numeric, ScoreV1_Categorical, ScoreV1_Boolean, ScoreV1_Text], pydantic.Field(discriminator="data_type"), ] diff --git a/langfuse/api/commons/types/text_score.py b/langfuse/api/commons/types/text_score.py new file mode 100644 index 000000000..438e30f47 --- /dev/null +++ b/langfuse/api/commons/types/text_score.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ...core.serialization import FieldMetadata +from .base_score import BaseScore + + +class TextScore(BaseScore): + string_value: typing_extensions.Annotated[ + str, FieldMetadata(alias="stringValue") + ] = pydantic.Field() + """ + The text content of the score (1-500 characters) + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/commons/types/text_score_v1.py b/langfuse/api/commons/types/text_score_v1.py new file mode 100644 index 000000000..937ca281e --- /dev/null +++ b/langfuse/api/commons/types/text_score_v1.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ...core.serialization import FieldMetadata +from .base_score_v1 import BaseScoreV1 + + +class TextScoreV1(BaseScoreV1): + string_value: typing_extensions.Annotated[ + str, FieldMetadata(alias="stringValue") + ] = pydantic.Field() + """ + The text content of the score (1-500 characters) + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/ingestion/types/score_body.py b/langfuse/api/ingestion/types/score_body.py index f559187b6..d9a32b6a7 100644 --- a/langfuse/api/ingestion/types/score_body.py +++ b/langfuse/api/ingestion/types/score_body.py @@ -51,7 +51,7 @@ class ScoreBody(UniversalBaseModel): value: CreateScoreValue = pydantic.Field() """ - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. """ comment: typing.Optional[str] = None diff --git a/langfuse/api/legacy/score_v1/client.py b/langfuse/api/legacy/score_v1/client.py index 7c7a214e3..03ca8b836 100644 --- a/langfuse/api/legacy/score_v1/client.py +++ b/langfuse/api/legacy/score_v1/client.py @@ -54,7 +54,7 @@ def create( name : str value : CreateScoreValue - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. id : typing.Optional[str] @@ -203,7 +203,7 @@ async def create( name : str value : CreateScoreValue - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. id : typing.Optional[str] diff --git a/langfuse/api/legacy/score_v1/raw_client.py b/langfuse/api/legacy/score_v1/raw_client.py index 9bcbe082d..834560ec9 100644 --- a/langfuse/api/legacy/score_v1/raw_client.py +++ b/langfuse/api/legacy/score_v1/raw_client.py @@ -53,7 +53,7 @@ def create( name : str value : CreateScoreValue - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. id : typing.Optional[str] @@ -314,7 +314,7 @@ async def create( name : str value : CreateScoreValue - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. id : typing.Optional[str] diff --git a/langfuse/api/legacy/score_v1/types/create_score_request.py b/langfuse/api/legacy/score_v1/types/create_score_request.py index d54333ac3..a0397bdfc 100644 --- a/langfuse/api/legacy/score_v1/types/create_score_request.py +++ b/langfuse/api/legacy/score_v1/types/create_score_request.py @@ -39,7 +39,7 @@ class CreateScoreRequest(UniversalBaseModel): name: str value: CreateScoreValue = pydantic.Field() """ - The value of the score. Must be passed as string for categorical scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false) + The value of the score. Must be passed as string for categorical and text scores, and numeric for boolean and numeric scores. Boolean score values must equal either 1 or 0 (true or false). Text score values must be between 1 and 500 characters. """ comment: typing.Optional[str] = None diff --git a/langfuse/api/scores/__init__.py b/langfuse/api/scores/__init__.py index d320aecfc..d57fb5371 100644 --- a/langfuse/api/scores/__init__.py +++ b/langfuse/api/scores/__init__.py @@ -13,10 +13,12 @@ GetScoresResponseDataCategorical, GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, + GetScoresResponseDataText, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, GetScoresResponseData_Correction, GetScoresResponseData_Numeric, + GetScoresResponseData_Text, GetScoresResponseTraceData, ) _dynamic_imports: typing.Dict[str, str] = { @@ -26,10 +28,12 @@ "GetScoresResponseDataCategorical": ".types", "GetScoresResponseDataCorrection": ".types", "GetScoresResponseDataNumeric": ".types", + "GetScoresResponseDataText": ".types", "GetScoresResponseData_Boolean": ".types", "GetScoresResponseData_Categorical": ".types", "GetScoresResponseData_Correction": ".types", "GetScoresResponseData_Numeric": ".types", + "GetScoresResponseData_Text": ".types", "GetScoresResponseTraceData": ".types", } @@ -68,9 +72,11 @@ def __dir__(): "GetScoresResponseDataCategorical", "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", + "GetScoresResponseDataText", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", + "GetScoresResponseData_Text", "GetScoresResponseTraceData", ] diff --git a/langfuse/api/scores/types/__init__.py b/langfuse/api/scores/types/__init__.py index 3ee0246d4..5b82ed448 100644 --- a/langfuse/api/scores/types/__init__.py +++ b/langfuse/api/scores/types/__init__.py @@ -13,11 +13,13 @@ GetScoresResponseData_Categorical, GetScoresResponseData_Correction, GetScoresResponseData_Numeric, + GetScoresResponseData_Text, ) from .get_scores_response_data_boolean import GetScoresResponseDataBoolean from .get_scores_response_data_categorical import GetScoresResponseDataCategorical from .get_scores_response_data_correction import GetScoresResponseDataCorrection from .get_scores_response_data_numeric import GetScoresResponseDataNumeric + from .get_scores_response_data_text import GetScoresResponseDataText from .get_scores_response_trace_data import GetScoresResponseTraceData _dynamic_imports: typing.Dict[str, str] = { "GetScoresResponse": ".get_scores_response", @@ -26,10 +28,12 @@ "GetScoresResponseDataCategorical": ".get_scores_response_data_categorical", "GetScoresResponseDataCorrection": ".get_scores_response_data_correction", "GetScoresResponseDataNumeric": ".get_scores_response_data_numeric", + "GetScoresResponseDataText": ".get_scores_response_data_text", "GetScoresResponseData_Boolean": ".get_scores_response_data", "GetScoresResponseData_Categorical": ".get_scores_response_data", "GetScoresResponseData_Correction": ".get_scores_response_data", "GetScoresResponseData_Numeric": ".get_scores_response_data", + "GetScoresResponseData_Text": ".get_scores_response_data", "GetScoresResponseTraceData": ".get_scores_response_trace_data", } @@ -68,9 +72,11 @@ def __dir__(): "GetScoresResponseDataCategorical", "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", + "GetScoresResponseDataText", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", + "GetScoresResponseData_Text", "GetScoresResponseTraceData", ] diff --git a/langfuse/api/scores/types/get_scores_response_data.py b/langfuse/api/scores/types/get_scores_response_data.py index d1cdda417..f85cd471c 100644 --- a/langfuse/api/scores/types/get_scores_response_data.py +++ b/langfuse/api/scores/types/get_scores_response_data.py @@ -200,12 +200,59 @@ class GetScoresResponseData_Correction(UniversalBaseModel): ) +class GetScoresResponseData_Text(UniversalBaseModel): + data_type: typing_extensions.Annotated[ + typing.Literal["TEXT"], FieldMetadata(alias="dataType") + ] = "TEXT" + trace: typing.Optional[GetScoresResponseTraceData] = None + string_value: typing_extensions.Annotated[str, FieldMetadata(alias="stringValue")] + id: str + trace_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="traceId") + ] = None + session_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="sessionId") + ] = None + observation_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="observationId") + ] = None + dataset_run_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="datasetRunId") + ] = None + name: str + source: ScoreSource + timestamp: dt.datetime + created_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="createdAt") + ] + updated_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="updatedAt") + ] + author_user_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="authorUserId") + ] = None + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="configId") + ] = None + queue_id: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="queueId") + ] = None + environment: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + GetScoresResponseData = typing_extensions.Annotated[ typing.Union[ GetScoresResponseData_Numeric, GetScoresResponseData_Categorical, GetScoresResponseData_Boolean, GetScoresResponseData_Correction, + GetScoresResponseData_Text, ], pydantic.Field(discriminator="data_type"), ] diff --git a/langfuse/api/scores/types/get_scores_response_data_text.py b/langfuse/api/scores/types/get_scores_response_data_text.py new file mode 100644 index 000000000..4949afe74 --- /dev/null +++ b/langfuse/api/scores/types/get_scores_response_data_text.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ...commons.types.text_score import TextScore +from .get_scores_response_trace_data import GetScoresResponseTraceData + + +class GetScoresResponseDataText(TextScore): + trace: typing.Optional[GetScoresResponseTraceData] = None + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/experiment.py b/langfuse/experiment.py index 6e4b32e10..67b50a900 100644 --- a/langfuse/experiment.py +++ b/langfuse/experiment.py @@ -17,8 +17,9 @@ Union, ) -from langfuse.api import DatasetItem, ScoreDataType +from langfuse.api import DatasetItem from langfuse.logger import langfuse_logger as logger +from langfuse.types import ExperimentScoreType class LocalExperimentItem(TypedDict, total=False): @@ -184,7 +185,7 @@ def __init__( value: Union[int, float, str, bool], comment: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, - data_type: Optional[ScoreDataType] = None, + data_type: Optional[ExperimentScoreType] = None, config_id: Optional[str] = None, ): """Initialize an Evaluation with the provided data. diff --git a/langfuse/types.py b/langfuse/types.py index 067088e40..c3029e713 100644 --- a/langfuse/types.py +++ b/langfuse/types.py @@ -35,7 +35,10 @@ def my_evaluator(*, output: str, **kwargs) -> Evaluation: SpanLevel = Literal["DEBUG", "DEFAULT", "WARNING", "ERROR"] -ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"] +ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN", "TEXT"] + +# Text scores are not supported for evals and experiments +ExperimentScoreType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"] class MaskFunction(Protocol): @@ -73,6 +76,7 @@ class TraceContext(TypedDict): __all__ = [ "SpanLevel", "ScoreDataType", + "ExperimentScoreType", "MaskFunction", "ParsedMediaReference", "TraceContext", diff --git a/pyproject.toml b/pyproject.toml index 7c2d2e497..267f52bcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dev = [ "langgraph>=1,<2", "autoevals>=0.0.130,<0.1", "opentelemetry-instrumentation-threading>=0.59b0,<1", + "tenacity>=9.1.4", ] docs = [ "pdoc>=15.0.4,<16", diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index 91064de23..8edc27866 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -5,6 +5,7 @@ from time import sleep import pytest +from tenacity import Retrying, stop_after_delay, wait_fixed from langfuse import Langfuse, propagate_attributes from langfuse._client.resource_manager import LangfuseResourceManager @@ -321,6 +322,66 @@ def test_create_categorical_score(): assert created_score["stringValue"] == "high score" +def test_create_text_score(): + langfuse = Langfuse() + api_wrapper = LangfuseAPI() + + # Create a span and set trace properties + with langfuse.start_as_current_observation(name="test-span") as span: + with propagate_attributes( + trace_name="this-is-so-great-new", + user_id="test", + metadata={"test": "test"}, + ): + # Get trace ID for later use + trace_id = span.trace_id + + # Ensure data is sent + langfuse.flush() + + # Create a text score + score_id = create_uuid() + langfuse.create_score( + score_id=score_id, + trace_id=trace_id, + name="this-is-a-score", + value="This is a detailed text evaluation of the output quality.", + data_type="TEXT", + ) + + # Create a generation in the same trace + generation = langfuse.start_observation( + as_type="generation", + name="yet another child", + metadata="test", + trace_context={"trace_id": trace_id}, + ) + generation.end() + + # Ensure data is sent + langfuse.flush() + + # Retrieve and verify with retry + for attempt in Retrying( + stop=stop_after_delay(10), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + trace = api_wrapper.get_trace(trace_id) + + # Find the score we created by name + created_score = next( + (s for s in trace["scores"] if s["name"] == "this-is-a-score"), None + ) + assert created_score is not None, "Score not found in trace" + assert created_score["id"] == score_id + assert created_score["dataType"] == "TEXT" + + assert ( + created_score["stringValue"] + == "This is a detailed text evaluation of the output quality." + ) + + def test_create_score_with_custom_timestamp(): langfuse = Langfuse() api_wrapper = LangfuseAPI() diff --git a/uv.lock b/uv.lock index 611044a4b..e5740bff8 100644 --- a/uv.lock +++ b/uv.lock @@ -3,7 +3,7 @@ revision = 3 requires-python = ">=3.10, <4.0" [options] -exclude-newer = "2026-03-25T20:04:08.521428164Z" +exclude-newer = "2026-03-31T15:26:41.189939Z" exclude-newer-span = "P7D" [[package]] @@ -583,6 +583,7 @@ dev = [ { name = "pytest-timeout" }, { name = "pytest-xdist" }, { name = "ruff" }, + { name = "tenacity" }, ] docs = [ { name = "pdoc" }, @@ -616,6 +617,7 @@ dev = [ { name = "pytest-timeout", specifier = ">=2.1.0,<3" }, { name = "pytest-xdist", specifier = ">=3.3.1,<4" }, { name = "ruff", specifier = ">=0.15.2,<0.16" }, + { name = "tenacity", specifier = ">=9.1.4" }, ] docs = [{ name = "pdoc", specifier = ">=15.0.4,<16" }]