shuningc · shuningc · Feb 5, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/README.md b/README.md
@@ -259,6 +259,7 @@ Evaluation worker -> evaluate -> handler.evaluation_results(list) -> CompositeEm
 - Backpressure strategies for high-volume content events.
 
 ## 14. Development setup
+
 Get the packages installed:
 
 Setup a virtual env (Note: will erase your .venv in the current folder)
@@ -272,20 +273,87 @@ pip install -e util/opentelemetry-util-genai --no-deps
 pip install -e util/opentelemetry-util-genai-evals --no-deps
 pip install -e util/opentelemetry-util-genai-evals-deepeval --no-deps
 pip install -e util/opentelemetry-util-genai-emitters-splunk --no-deps
-pip install -e util/opentelemetry-util-genai-traceloop-translator --no-deps
-pip install -e instrumentation-genai/opentelemetry-instrumentation-langchain --no-deps
 pip install -r dev-genai-requirements.txt
 pip install -r instrumentation-genai/opentelemetry-instrumentation-langchain/examples/manual/requirements.txt
 
 export OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental
 export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric_event,splunk
 export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
 export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE=SPAN_AND_EVENT
-export OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="Deepeval(LLMInvocation(bias,toxicity))"
+# configure which GenAI types to evaluate and which evaluations
+export OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="Deepeval(LLMInvocation(bias,toxicity))" 
+# Deepeval optimization
+export DEEPEVAL_FILE_SYSTEM=READ_ONLY
+export DEEPEVAL_TELEMETRY_OPT_OUT=YES
+# set environment and service names for ease of filtering
+export OTEL_SERVICE_NAME=genai-eval-test
+export OTEL_RESOURCE_ATTRIBUTES='deployment.environment=genai-dev'
+```
+
+For telemetry to properly work with Splunk Platform instrumentation, set the env var to enable Splunk format for aggregated evaluation results.
+
+```bash
 export OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION=true
+export OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION="replace-category:SplunkEvaluationResults"
+```
+
+### Deepeval evaluator integration configuration
+
+Instrumentation-side evaluations can be configured using `OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS` environment variable
+
+```bash
+# uses defaults - evaluates LLMInvocation and AgentInvocation with 5 metrics:
+# (bias,toxicity,answer_relevancy,hallucination,sentiment)
+OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval"
+
+# Specific metrics for LLMInvocation
+OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(bias,toxicity))"
+
+# Multiple types with metrics
+OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(bias,toxicity),AgentInvocation(hallucination))"
+
+# With metric options
+OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS="deepeval(LLMInvocation(hallucination(threshold=0.8)))"
+```
+
+```bash
+export OTEL_INSTRUMENTATION_GENAI_DEBUG=true
+```
+
+### to install an instrumentation library
+
+```bash
+pip install -e instrumentation-genai/opentelemetry-instrumentation-langchain --no-deps
+```
+
+Examples for each instrumentation library or package can be found in `<that package folder>/examples`, i.e.
+
+```bash
+util/opentelemetry-util-genai/examples/
+```
+
+### Installing a Translator library
+
+To use exiting 3rd party instrumentations and convert it to Splunk Distro semantic conventions/run instrumentation-side evaluations you can install a translator library.
+
+For example for existing traseloop instrumentations
+```bash
+pip install -e util/opentelemetry-util-genai-traceloop-translator --no-deps
+```
+
+## Installing aidefence instrumentation
+
+```bash
+pip install -e instrumentation-genai/opentelemetry-instrumentation-aidefense
+
+export AI_DEFENSE_API_KEY="your-ai-defense-key"
+
+python instrumentation-genai/opentelemetry-instrumentation-aidefense/examples/multi_agent_travel_planner/main.py
 ```
 
-Sudo-code to create LLMInvocation for your in-code llm code
+## In-code instrumentation example
+
+Sudo-code to create LLMInvocation for your in-code for an application:
 
 ```python
 from opentelemetry.util.genai.handler import get_telemetry_handler
@@ -302,14 +370,10 @@ inv.output_messages = [OutputMessage(role="assistant", parts=[Text("Hi!")], fini
 handler.stop_llm(inv)
 ```
 
-Additionally, for `aidefense`
+Additionally, you can run a simple example reporting an LLM Invocation
 
 ```bash
-pip install -e instrumentation-genai/opentelemetry-instrumentation-aidefense
-
-export AI_DEFENSE_API_KEY="your-ai-defense-key"
-
-python instrumentation-genai/opentelemetry-instrumentation-aidefense/examples/multi_agent_travel_planner/main.py
+python util/opentelemetry-util-genai/examples/invocation_example.py llm --exporter otlp
 ```
 
 ## 15. Linting and Formatting

diff --git a/util/opentelemetry-util-genai-evals/CHANGELOG.md b/util/opentelemetry-util-genai-evals/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 All notable changes to this repository are documented in this file.
 
+## Version 0.1.8 - 2026-02-06
+
+### Fixed
+- **Logging visibility** - INFO level log messages from the evals bootstrap, proxy, manager, and worker modules are now always visible (e.g., "Using separate process evaluation mode"). DEBUG level messages require `OTEL_INSTRUMENTATION_GENAI_DEBUG=true`.
+
 ## Version 0.1.7 - 2026-01-28
 
 ### Added

diff --git a/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/bootstrap.py b/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/bootstrap.py
@@ -5,6 +5,9 @@
 import logging
 from typing import Any
 
+# Import debug module to ensure parent logger is configured when debug is enabled
+# This must happen before any logging calls in this module
+from opentelemetry.util.genai import debug as _debug  # noqa: F401
 from opentelemetry.util.genai.callbacks import CompletionCallback
 
 from .manager import Manager

diff --git a/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/version.py b/util/opentelemetry-util-genai-evals/src/opentelemetry/util/genai/evals/version.py
@@ -1,3 +1,3 @@
 """Version metadata for opentelemetry-util-genai-evals."""
 
-__version__ = "0.1.7"
+__version__ = "0.1.8"
diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 All notable changes to this repository are documented in this file.
 
+## Version 0.1.10 - 2026-02-06
+
+### Fixed
+- **Logging visibility for evals module** - INFO level messages from `opentelemetry.util.genai.evals.*` modules are now always visible (e.g., "Using separate process evaluation mode"). DEBUG level messages require `OTEL_INSTRUMENTATION_GENAI_DEBUG=true`.
+
 ## Version 0.1.9 - 2026-01-29
 
 - Release 0.1.9

diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/debug.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/debug.py
@@ -55,6 +55,18 @@ def _read_enabled_flag() -> bool:
     handler.setFormatter(fmt)
     _LOGGER.addHandler(handler)
     _LOGGER.setLevel(logging.DEBUG)
+    _LOGGER.propagate = False  # Prevent duplicate logs via parent logger
+
+# Configure the parent logger for all opentelemetry.util.genai.* modules
+# including evals subpackages. INFO level messages (like "Using separate process
+# evaluation mode") are always visible; DEBUG level requires the debug flag.
+_PARENT_LOGGER = logging.getLogger("opentelemetry.util.genai")
+if not _PARENT_LOGGER.handlers:
+    parent_handler = logging.StreamHandler()
+    parent_fmt = logging.Formatter("[%(name)s] %(levelname)s: %(message)s")
+    parent_handler.setFormatter(parent_fmt)
+    _PARENT_LOGGER.addHandler(parent_handler)
+    _PARENT_LOGGER.setLevel(logging.DEBUG if _ENABLED else logging.INFO)
 
 
 def is_enabled() -> bool:

diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/version.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.1.9"
+__version__ = "0.1.10"
diff --git a/util/opentelemetry-util-genai/tests/test_upload_hook.py b/util/opentelemetry-util-genai/tests/test_upload_hook.py
@@ -70,7 +70,9 @@ def test_load_upload_hook_invalid(self, mock_entry_points: Mock):
             FakeEntryPoint("my-hook", lambda: InvalidUploadHook)
         ]
 
-        with self.assertLogs(level=logging.DEBUG) as logs:
+        with self.assertLogs(
+            "opentelemetry.util.genai.upload_hook", level=logging.DEBUG
+        ) as logs:
             self.assertIsInstance(load_upload_hook(), _NoOpUploadHook)
         self.assertEqual(len(logs.output), 1)
         self.assertIn("is not a valid UploadHook. Using noop", logs.output[0])