From 0fc6ab596cdc79e16c18797a8293d7687d6199cd Mon Sep 17 00:00:00 2001 From: Sergey Sergeev Date: Thu, 5 Feb 2026 15:51:20 -0800 Subject: [PATCH 1/4] fix dev setup documentation --- README.md | 60 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 486ac2ff..bcb01faa 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,7 @@ Evaluation worker -> evaluate -> handler.evaluation_results(list) -> CompositeEm - Backpressure strategies for high-volume content events. ## 14. Development setup + Get the packages installed: Setup a virtual env (Note: will erase your .venv in the current folder) @@ -272,8 +273,6 @@ pip install -e util/opentelemetry-util-genai --no-deps pip install -e util/opentelemetry-util-genai-evals --no-deps pip install -e util/opentelemetry-util-genai-evals-deepeval --no-deps pip install -e util/opentelemetry-util-genai-emitters-splunk --no-deps -pip install -e util/opentelemetry-util-genai-traceloop-translator --no-deps -pip install -e instrumentation-genai/opentelemetry-instrumentation-langchain --no-deps pip install -r dev-genai-requirements.txt pip install -r instrumentation-genai/opentelemetry-instrumentation-langchain/examples/manual/requirements.txt @@ -281,11 +280,56 @@ export OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric_event,splunk export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE=SPAN_AND_EVENT -export OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="Deepeval(LLMInvocation(bias,toxicity))" +# configure which GenAI types to evaluate and which evaluations, for example +# export OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(bias,toxicity),AgentInvocation(hallucination))" +# use minimal set up for development to avoid extra inference cost or use a local LLM +export OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="Deepeval(LLMInvocation(bias,toxicity))" export OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION=true +# Deepeval optimization +export DEEPEVAL_FILE_SYSTEM=READ_ONLY +export DEEPEVAL_TELEMETRY_OPT_OUT=YES +``` + +To debug util-genai + +```bash +export OTEL_INSTRUMENTATION_GENAI_DEBUG=false ``` -Sudo-code to create LLMInvocation for your in-code llm code +### to install an instrumentation library + +```bash +pip install -e instrumentation-genai/opentelemetry-instrumentation-langchain --no-deps +``` + +Examples for each instrumentation library or package can be found in `/examples`, i.e. + +```bash +util/opentelemetry-util-genai/examples/ +``` + +### Installing a Translator library + +To use exiting 3rd partu instrumentations and convert it to Splunk Distro semantic conventions/run instrumentation-side evaluations you can install a translator library. + +For example for existing traseloop instrumentations +```bash +pip install -e util/opentelemetry-util-genai-traceloop-translator --no-deps +``` + +## Installing aidefence instrumentation + +```bash +pip install -e instrumentation-genai/opentelemetry-instrumentation-aidefense + +export AI_DEFENSE_API_KEY="your-ai-defense-key" + +python instrumentation-genai/opentelemetry-instrumentation-aidefense/examples/multi_agent_travel_planner/main.py +``` + +## In-code instrumentation example + +Sudo-code to create LLMInvocation for your in-code for an application: ```python from opentelemetry.util.genai.handler import get_telemetry_handler @@ -302,14 +346,10 @@ inv.output_messages = [OutputMessage(role="assistant", parts=[Text("Hi!")], fini handler.stop_llm(inv) ``` -Additionally, for `aidefense` +Additionally, you can run a simple example reporting an LLM Invocation ```bash -pip install -e instrumentation-genai/opentelemetry-instrumentation-aidefense - -export AI_DEFENSE_API_KEY="your-ai-defense-key" - -python instrumentation-genai/opentelemetry-instrumentation-aidefense/examples/multi_agent_travel_planner/main.py +python util/opentelemetry-util-genai/examples/invocation_example.py llm --exporter otlp --session-id my-session-123 ``` ## 15. Linting and Formatting From 824b659c2facc525df50684bd7fac45735bc9920 Mon Sep 17 00:00:00 2001 From: Sergey Sergeev Date: Thu, 5 Feb 2026 21:20:12 -0800 Subject: [PATCH 2/4] fix dev setup documentation --- README.md | 38 +++++++++++++++---- .../CHANGELOG.md | 5 +++ .../util/genai/evals/bootstrap.py | 3 ++ .../opentelemetry/util/genai/evals/version.py | 2 +- util/opentelemetry-util-genai/CHANGELOG.md | 5 +++ .../src/opentelemetry/util/genai/debug.py | 12 ++++++ .../src/opentelemetry/util/genai/version.py | 2 +- 7 files changed, 58 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index bcb01faa..074bd7dc 100644 --- a/README.md +++ b/README.md @@ -280,20 +280,44 @@ export OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric_event,splunk export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE=SPAN_AND_EVENT -# configure which GenAI types to evaluate and which evaluations, for example -# export OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(bias,toxicity),AgentInvocation(hallucination))" -# use minimal set up for development to avoid extra inference cost or use a local LLM +# configure which GenAI types to evaluate and which evaluations export OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="Deepeval(LLMInvocation(bias,toxicity))" -export OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION=true # Deepeval optimization export DEEPEVAL_FILE_SYSTEM=READ_ONLY export DEEPEVAL_TELEMETRY_OPT_OUT=YES +# set environment and service names for ease of filtering +export OTEL_SERVICE_NAME=genai-eval-test +export OTEL_RESOURCE_ATTRIBUTES='deployment.environment=genai-dev' ``` -To debug util-genai +For telemetry to properly work with Splunk Platform instrumentation, set the env var to enable Splunk format for aggregated evaluation results. + +```bash +export OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION="replace-category:SplunkEvaluationResults" +export OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION=true +``` + +### Deepeval evaluator integration configuration + +Instrumentation-side evaluations can be configured using `OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS` environment variable + +```bash +# uses defaults - evaluates LLMInvocation and AgentInvocation with 5 metrics: +# (bias,toxicity,answer_relevancy,hallucination,sentiment) +OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval" + +# Specific metrics for LLMInvocation +OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(bias,toxicity))" + +# Multiple types with metrics +OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(bias,toxicity),AgentInvocation(hallucination))" + +# With metric options +OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(hallucination(threshold=0.8)))" +``` ```bash -export OTEL_INSTRUMENTATION_GENAI_DEBUG=false +export OTEL_INSTRUMENTATION_GENAI_DEBUG=true ``` ### to install an instrumentation library @@ -349,7 +373,7 @@ handler.stop_llm(inv) Additionally, you can run a simple example reporting an LLM Invocation ```bash -python util/opentelemetry-util-genai/examples/invocation_example.py llm --exporter otlp --session-id my-session-123 +python util/opentelemetry-util-genai/examples/invocation_example.py llm --exporter otlp ``` ## 15. Linting and Formatting diff --git a/util/opentelemetry-util-genai-evals/CHANGELOG.md b/util/opentelemetry-util-genai-evals/CHANGELOG.md index e798275e..f51a8f05 100644 --- a/util/opentelemetry-util-genai-evals/CHANGELOG.md +++ b/util/opentelemetry-util-genai-evals/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this repository are documented in this file. +## Version 0.1.8 - 2026-02-06 + +### Fixed +- **Logging visibility** - INFO level log messages from the evals bootstrap, proxy, manager, and worker modules are now always visible (e.g., "Using separate process evaluation mode"). DEBUG level messages require `OTEL_INSTRUMENTATION_GENAI_DEBUG=true`. + ## Version 0.1.7 - 2026-01-28 ### Added diff --git a/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/bootstrap.py b/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/bootstrap.py index 2dc2c5bd..5f743d90 100644 --- a/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/bootstrap.py +++ b/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/bootstrap.py @@ -5,6 +5,9 @@ import logging from typing import Any +# Import debug module to ensure parent logger is configured when debug is enabled +# This must happen before any logging calls in this module +from opentelemetry.util.genai import debug as _debug # noqa: F401 from opentelemetry.util.genai.callbacks import CompletionCallback from .manager import Manager diff --git a/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/version.py b/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/version.py index 4e891f14..08721248 100644 --- a/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/version.py +++ b/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/version.py @@ -1,3 +1,3 @@ """Version metadata for opentelemetry-util-genai-evals.""" -__version__ = "0.1.7" +__version__ = "0.1.8" diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index a15f0318..4ddc77a7 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this repository are documented in this file. +## Version 0.1.10 - 2026-02-06 + +### Fixed +- **Logging visibility for evals module** - INFO level messages from `opentelemetry.util.genai.evals.*` modules are now always visible (e.g., "Using separate process evaluation mode"). DEBUG level messages require `OTEL_INSTRUMENTATION_GENAI_DEBUG=true`. + ## Version 0.1.9 - 2026-01-29 - Release 0.1.9 diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/debug.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/debug.py index 37e7008d..880b41f3 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/debug.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/debug.py @@ -55,6 +55,18 @@ def _read_enabled_flag() -> bool: handler.setFormatter(fmt) _LOGGER.addHandler(handler) _LOGGER.setLevel(logging.DEBUG) + _LOGGER.propagate = False # Prevent duplicate logs via parent logger + +# Configure the parent logger for all opentelemetry.util.genai.* modules +# including evals subpackages. INFO level messages (like "Using separate process +# evaluation mode") are always visible; DEBUG level requires the debug flag. +_PARENT_LOGGER = logging.getLogger("opentelemetry.util.genai") +if not _PARENT_LOGGER.handlers: + parent_handler = logging.StreamHandler() + parent_fmt = logging.Formatter("[%(name)s] %(levelname)s: %(message)s") + parent_handler.setFormatter(parent_fmt) + _PARENT_LOGGER.addHandler(parent_handler) + _PARENT_LOGGER.setLevel(logging.DEBUG if _ENABLED else logging.INFO) def is_enabled() -> bool: diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/version.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/version.py index cb785abb..ad64ed74 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/version.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.1.9" +__version__ = "0.1.10" From e3a5e7c3fca87d8f8b4e250ac33fb4fe547a56b6 Mon Sep 17 00:00:00 2001 From: Sergey Sergeev Date: Thu, 5 Feb 2026 22:06:03 -0800 Subject: [PATCH 3/4] fix logger name in the test --- util/opentelemetry-util-genai/tests/test_upload_hook.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/util/opentelemetry-util-genai/tests/test_upload_hook.py b/util/opentelemetry-util-genai/tests/test_upload_hook.py index 93731bce..8351ed6e 100644 --- a/util/opentelemetry-util-genai/tests/test_upload_hook.py +++ b/util/opentelemetry-util-genai/tests/test_upload_hook.py @@ -70,7 +70,9 @@ def test_load_upload_hook_invalid(self, mock_entry_points: Mock): FakeEntryPoint("my-hook", lambda: InvalidUploadHook) ] - with self.assertLogs(level=logging.DEBUG) as logs: + with self.assertLogs( + "opentelemetry.util.genai.upload_hook", level=logging.DEBUG + ) as logs: self.assertIsInstance(load_upload_hook(), _NoOpUploadHook) self.assertEqual(len(logs.output), 1) self.assertIn("is not a valid UploadHook. Using noop", logs.output[0]) From adb7f6c863a2fb1251c4838caa017d0075c39ac1 Mon Sep 17 00:00:00 2001 From: Sergey Sergeev Date: Thu, 5 Feb 2026 22:08:57 -0800 Subject: [PATCH 4/4] fix typo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 074bd7dc..3d5b4db2 100644 --- a/README.md +++ b/README.md @@ -293,8 +293,8 @@ export OTEL_RESOURCE_ATTRIBUTES='deployment.environment=genai-dev' For telemetry to properly work with Splunk Platform instrumentation, set the env var to enable Splunk format for aggregated evaluation results. ```bash -export OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION="replace-category:SplunkEvaluationResults" export OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION=true +export OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION="replace-category:SplunkEvaluationResults" ``` ### Deepeval evaluator integration configuration @@ -334,7 +334,7 @@ util/opentelemetry-util-genai/examples/ ### Installing a Translator library -To use exiting 3rd partu instrumentations and convert it to Splunk Distro semantic conventions/run instrumentation-side evaluations you can install a translator library. +To use exiting 3rd party instrumentations and convert it to Splunk Distro semantic conventions/run instrumentation-side evaluations you can install a translator library. For example for existing traseloop instrumentations ```bash