diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py index b457dc6b..688361c8 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py @@ -204,7 +204,6 @@ def _start_embedding(instance, texts): # Create embedding invocation embedding = UtilEmbeddingInvocation( - operation_name="embedding", request_model=request_model, input_texts=texts if isinstance(texts, list) else [texts], provider=provider, diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/cassettes/test_langchain_embedding_call.yaml b/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/cassettes/test_langchain_embedding_call.yaml new file mode 100644 index 00000000..0c768091 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/cassettes/test_langchain_embedding_call.yaml @@ -0,0 +1,78 @@ +interactions: +- request: + body: |- + { + "input": [ + "What is the capital of France?" + ], + "model": "text-embedding-ada-002", + "encoding_format": "float" + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.1 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.10 + method: POST + uri: https://api.openai.com/v1/embeddings + response: + body: + string: |- + { + "object": "list", + "data": [ + { + "object": "embedding", + "embedding": [0.0023064255, -0.009327292, 0.015797347, -0.0077586975, -0.013595423], + "index": 0 + } + ], + "model": "text-embedding-ada-002-v2", + "usage": { + "prompt_tokens": 7, + "total_tokens": 7 + } + } + headers: + content-type: + - application/json + date: + - Sun, 21 Sep 2025 04:09:41 GMT + openai-organization: + - test_openai_org_id + x-request-id: + - emb-50308e7e-2aac-4167-a8fb-03f9f5ed8169 + content-length: + - '256' + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/cassettes/test_langchain_embedding_call_error.yaml b/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/cassettes/test_langchain_embedding_call_error.yaml new file mode 100644 index 00000000..50cfcee5 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/cassettes/test_langchain_embedding_call_error.yaml @@ -0,0 +1,71 @@ +interactions: +- request: + body: |- + { + "input": [ + "What is the capital of France?" + ], + "model": "text-embedding-ada-002", + "encoding_format": "float" + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.1 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.10 + method: POST + uri: https://api.openai.com/v1/embeddings + response: + body: + string: |- + { + "error": { + "message": "Incorrect API key provided: test-api-****. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": null, + "code": "invalid_api_key" + } + } + headers: + content-type: + - application/json + date: + - Sun, 21 Sep 2025 04:09:41 GMT + openai-organization: + - test_openai_org_id + x-request-id: + - emb-err-50308e7e-2aac-4167-a8fb-03f9f5ed8169 + content-length: + - '256' + status: + code: 401 + message: Unauthorized +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/test_langchain_embedding.py b/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/test_langchain_embedding.py new file mode 100644 index 00000000..9560a40c --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/tests/test_langchain_embedding.py @@ -0,0 +1,154 @@ +"""Minimal LangChain embedding instrumentation test. + +Follows the same VCR cassette integration pattern as test_langchain_llm.py +to validate that embedding instrumentation emits correct telemetry: + +1. An embedding invocation succeeds using the recorded VCR cassette. +2. A span is emitted with GenAI semantic convention attributes for an embeddings op. +3. The default operation_name is 'embeddings' (from EmbeddingInvocation types.py default). +4. Core request model attribute exists and is plausible. +5. Metrics (duration at minimum) are produced and contain at least one data point. +""" + +from __future__ import annotations + +# mypy: ignore-errors +# pyright: reportGeneralTypeIssues=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownParameterType=false, reportUnknownArgumentType=false, reportAttributeAccessIssue=false, reportCallIssue=false + +from typing import Any, List +import pytest +from pytest import MonkeyPatch +from pydantic import SecretStr + +from langchain_openai import OpenAIEmbeddings + +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.semconv._incubating.metrics import gen_ai_metrics +from opentelemetry.sdk.trace import ReadableSpan # test-only type reference +from opentelemetry.trace.status import StatusCode +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.metrics.export import InMemoryMetricReader + + +EMBEDDINGS = gen_ai_attributes.GenAiOperationNameValues.EMBEDDINGS.value + + +@pytest.mark.vcr() +def test_langchain_embedding_call( + span_exporter: InMemorySpanExporter, + metric_reader: InMemoryMetricReader, + instrument_with_content: Any, + monkeypatch: MonkeyPatch, +): + # Arrange + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + model = "text-embedding-ada-002" + embeddings = OpenAIEmbeddings( + model=model, + api_key=SecretStr("test-api-key"), + check_embedding_ctx_length=False, # avoid tiktoken download in test + ) + + # Act + result = embeddings.embed_query("What is the capital of France?") + + # Basic functional assertion – result must be a list of floats + assert isinstance(result, list), "Expected a list of floats" + assert len(result) > 0, "Expected non-empty embedding vector" + assert all(isinstance(v, float) for v in result), "All values must be floats" + + # Spans + spans: List[ReadableSpan] = span_exporter.get_finished_spans() # type: ignore[assignment] + assert spans, "Expected at least one span" + embedding_span = None + for s in spans: + attrs_obj = getattr(s, "attributes", None) + op_name = None + try: + if attrs_obj is not None: + op_name = attrs_obj.get(gen_ai_attributes.GEN_AI_OPERATION_NAME) + except Exception: + op_name = None + if op_name == EMBEDDINGS: + embedding_span = s + break + assert embedding_span is not None, "No embeddings operation span found" + + # Span attribute sanity + attrs = getattr(embedding_span, "attributes", {}) + assert attrs.get(gen_ai_attributes.GEN_AI_OPERATION_NAME) == EMBEDDINGS + assert attrs.get(gen_ai_attributes.GEN_AI_REQUEST_MODEL) == model + # If token usage captured ensure it is a non-negative integer + tok_val = attrs.get(gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS) + if tok_val is not None: + assert isinstance(tok_val, int) and tok_val >= 0 + + # Span name should follow "{operation_name} {request_model}" convention + assert embedding_span.name == f"embeddings {model}" + + # Metrics – ensure at least duration histogram present with >=1 point + metrics_data = metric_reader.get_metrics_data() + found_duration = False + if metrics_data: + for rm in getattr(metrics_data, "resource_metrics", []) or []: + for scope in getattr(rm, "scope_metrics", []) or []: + for metric in getattr(scope, "metrics", []) or []: + if metric.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION: + dps = getattr(metric.data, "data_points", []) + if dps: + assert dps[0].sum >= 0 + found_duration = True + assert found_duration, "Duration metric missing" + + +@pytest.mark.vcr() +def test_langchain_embedding_call_error( + span_exporter: InMemorySpanExporter, + instrument_with_content: Any, + monkeypatch: MonkeyPatch, +): + """When the embedding API returns an error the wrapper must: + 1. Still emit a span with operation_name == 'embeddings'. + 2. Mark the span status as ERROR. + 3. Re-raise the original exception so the caller sees the failure. + """ + # Arrange + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + model = "text-embedding-ada-002" + embeddings = OpenAIEmbeddings( + model=model, + api_key=SecretStr("test-api-key"), + check_embedding_ctx_length=False, # avoid tiktoken download in test + max_retries=0, # fail immediately, don't retry on 401 + ) + + # Act – the call should raise because the cassette returns a 401 + with pytest.raises(Exception): + embeddings.embed_query("What is the capital of France?") + + # Spans – an embedding span must still be emitted + spans: List[ReadableSpan] = span_exporter.get_finished_spans() # type: ignore[assignment] + assert spans, "Expected at least one span even on error" + embedding_span = None + for s in spans: + attrs_obj = getattr(s, "attributes", None) + op_name = None + try: + if attrs_obj is not None: + op_name = attrs_obj.get(gen_ai_attributes.GEN_AI_OPERATION_NAME) + except Exception: + op_name = None + if op_name == EMBEDDINGS: + embedding_span = s + break + assert embedding_span is not None, ( + "No embeddings operation span found on error path" + ) + + # Span attribute sanity + attrs = getattr(embedding_span, "attributes", {}) + assert attrs.get(gen_ai_attributes.GEN_AI_OPERATION_NAME) == EMBEDDINGS + assert attrs.get(gen_ai_attributes.GEN_AI_REQUEST_MODEL) == model + + # Span must be marked as error + assert embedding_span.status.status_code == StatusCode.ERROR diff --git a/util/opentelemetry-util-genai/tests/test_embedding_invocation.py b/util/opentelemetry-util-genai/tests/test_embedding_invocation.py index eabc3085..fa09dea1 100644 --- a/util/opentelemetry-util-genai/tests/test_embedding_invocation.py +++ b/util/opentelemetry-util-genai/tests/test_embedding_invocation.py @@ -1,5 +1,15 @@ +"""Tests for EmbeddingInvocation lifecycle, defaults, and telemetry.""" + +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) from opentelemetry.util.genai.handler import get_telemetry_handler -from opentelemetry.util.genai.types import EmbeddingInvocation +from opentelemetry.util.genai.types import EmbeddingInvocation, Error def test_embedding_invocation_creates_span(): @@ -16,3 +26,140 @@ def test_embedding_invocation_creates_span(): # span should have ended (recording possibly false depending on SDK impl) # we at least assert the object reference still exists assert emb.span is not None + + +def test_embedding_invocation_default_operation_name(): + """EmbeddingInvocation should default operation_name to 'embeddings'.""" + emb = EmbeddingInvocation( + request_model="text-embedding-ada-002", + input_texts=["hello"], + ) + assert ( + emb.operation_name == GenAI.GenAiOperationNameValues.EMBEDDINGS.value + ) + assert emb.operation_name == "embeddings" + + +def test_embedding_invocation_semantic_convention_attributes(): + """semantic_convention_attributes() should include the default operation_name.""" + emb = EmbeddingInvocation( + request_model="text-embedding-3-small", + input_texts=["test input"], + provider="openai", + ) + semconv_attrs = emb.semantic_convention_attributes() + + assert GenAI.GEN_AI_OPERATION_NAME in semconv_attrs + assert semconv_attrs[GenAI.GEN_AI_OPERATION_NAME] == "embeddings" + assert GenAI.GEN_AI_REQUEST_MODEL in semconv_attrs + assert ( + semconv_attrs[GenAI.GEN_AI_REQUEST_MODEL] == "text-embedding-3-small" + ) + + +def test_embedding_invocation_span_attributes(): + """Spans should carry the correct operation_name attribute from the default.""" + span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + + handler = get_telemetry_handler() + span_emitters = list(handler._emitter.emitters_for("span")) + if span_emitters: + span_emitters[0]._tracer = tracer_provider.get_tracer(__name__) + + emb = EmbeddingInvocation( + request_model="text-embedding-ada-002", + input_texts=["hello world"], + provider="openai", + ) + + handler.start_embedding(emb) + handler.stop_embedding(emb) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + attrs = span.attributes + + # operation_name should be "embeddings" (the default from types.py) + assert attrs[GenAI.GEN_AI_OPERATION_NAME] == "embeddings" + assert attrs[GenAI.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002" + + +def test_embedding_invocation_span_name(): + """Span name should be '{operation_name} {request_model}'.""" + span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + + handler = get_telemetry_handler() + span_emitters = list(handler._emitter.emitters_for("span")) + if span_emitters: + span_emitters[0]._tracer = tracer_provider.get_tracer(__name__) + + emb = EmbeddingInvocation( + request_model="text-embedding-3-large", + input_texts=["test"], + provider="openai", + ) + + handler.start_embedding(emb) + handler.stop_embedding(emb) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "embeddings text-embedding-3-large" + + +def test_embedding_invocation_with_error(): + """Error path should still produce a span with correct operation_name.""" + span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + + handler = get_telemetry_handler() + span_emitters = list(handler._emitter.emitters_for("span")) + if span_emitters: + span_emitters[0]._tracer = tracer_provider.get_tracer(__name__) + + emb = EmbeddingInvocation( + request_model="text-embedding-ada-002", + input_texts=["test"], + provider="openai", + ) + + handler.start_embedding(emb) + handler.fail_embedding(emb, Error(message="API error", type=RuntimeError)) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + attrs = span.attributes + assert attrs[GenAI.GEN_AI_OPERATION_NAME] == "embeddings" + + +def test_embedding_invocation_custom_operation_name_override(): + """If a caller explicitly sets operation_name, the override should be honoured.""" + emb = EmbeddingInvocation( + operation_name="custom_embedding", + request_model="my-model", + input_texts=["x"], + ) + assert emb.operation_name == "custom_embedding" + + +def test_embedding_invocation_without_explicit_operation_name_matches_langchain_usage(): + """Verify the pattern used by langchain instrumentation (no operation_name kwarg) + produces the correct default.""" + # This mirrors the construction in langchain __init__.py after the fix: + # UtilEmbeddingInvocation(request_model=..., input_texts=..., provider=..., attributes=...) + emb = EmbeddingInvocation( + request_model="text-embedding-ada-002", + input_texts=["hello world"], + provider="openai", + attributes={"framework": "langchain"}, + ) + assert emb.operation_name == "embeddings"