From 56e507b19848d2742c537e172dc93c8088cf8da4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:27:34 +0000
Subject: [PATCH 01/19] Initial plan


From 4843de35c50aa7e37dfeaad6d91b931353a2810c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 17 Mar 2026 12:02:04 +0000
Subject: [PATCH 02/19] Implement Experiment reconciler with ConfigMap,
 TestWorkflow, TestTrigger, status reporting, and tests

Co-authored-by: fmallmann <30110193+fmallmann@users.noreply.github.com>
---
 operator/.golangci.yml                        |  39 +-
 operator/config/rbac/role.yaml                |  36 ++
 operator/go.mod                               |   2 +-
 .../controller/experiment_controller.go       | 495 ++++++++++++++-
 .../controller/experiment_controller_test.go  | 588 +++++++++++++++++-
 operator/internal/controller/suite_test.go    |   5 +-
 .../crds/tests.testkube.io_testtriggers.yaml  |  21 +
 ...stworkflows.testkube.io_testworkflows.yaml |  21 +
 8 files changed, 1140 insertions(+), 67 deletions(-)
 create mode 100644 operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml
 create mode 100644 operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml

diff --git a/operator/.golangci.yml b/operator/.golangci.yml
index aac8a13..e24a15b 100644
--- a/operator/.golangci.yml
+++ b/operator/.golangci.yml
@@ -1,33 +1,20 @@
+version: "2"
+
 run:
   timeout: 5m
   allow-parallel-runners: true
+  # go version is set to 1.25 for compatibility with golangci-lint v2.10.1
+  # which was built with go1.25; update when a newer linter release is available.
+  go: "1.25"
 
-issues:
-  # don't skip warning about doc comments
-  # don't exclude the default set of lint
-  exclude-use-default: false
-  # restore some of the defaults
-  # (fill in the rest as needed)
-  exclude-rules:
-    - path: "api/*"
-      linters:
-        - lll
-    - path: "internal/*"
-      linters:
-        - dupl
-        - lll
 linters:
   disable-all: true
   enable:
     - dupl
     - errcheck
-    - exportloopref
     - ginkgolinter
     - goconst
     - gocyclo
-    - gofmt
-    - goimports
-    - gosimple
     - govet
     - ineffassign
     - lll
@@ -36,10 +23,24 @@ linters:
     - prealloc
     - revive
     - staticcheck
-    - typecheck
     - unconvert
     - unparam
     - unused
+  exclusions:
+    rules:
+      - path: "^api/"
+        linters:
+          - lll
+      - path: "^internal/"
+        linters:
+          - dupl
+          - lll
+      - path: "(^internal/|^test/|^cmd/)"
+        linters:
+          - revive
+      - path: "^test/"
+        linters:
+          - staticcheck
 
 linters-settings:
   revive:
diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml
index 49b99de..a6be05a 100644
--- a/operator/config/rbac/role.yaml
+++ b/operator/config/rbac/role.yaml
@@ -4,6 +4,18 @@ kind: ClusterRole
 metadata:
   name: manager-role
 rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - testbench.agentic-layer.ai
   resources:
@@ -30,3 +42,27 @@ rules:
   - get
   - patch
   - update
+- apiGroups:
+  - tests.testkube.io
+  resources:
+  - testtriggers
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - testworkflows.testkube.io
+  resources:
+  - testworkflows
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
diff --git a/operator/go.mod b/operator/go.mod
index 0b2a062..27d9d75 100644
--- a/operator/go.mod
+++ b/operator/go.mod
@@ -1,6 +1,6 @@
 module github.com/agentic-layer/testbench/operator
 
-go 1.26.0
+go 1.25.0
 
 require (
 	github.com/onsi/ginkgo/v2 v2.28.1
diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go
index d187d4d..7565db7 100644
--- a/operator/internal/controller/experiment_controller.go
+++ b/operator/internal/controller/experiment_controller.go
@@ -18,16 +18,84 @@ package controller
 
 import (
 	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
 
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	apimeta "k8s.io/apimachinery/pkg/api/meta"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 
 	testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1"
 )
 
-// ExperimentReconciler reconciles a Experiment object
+const (
+	conditionReady         = "Ready"
+	conditionWorkflowReady = "WorkflowReady"
+	otelConfigMapName      = "otel-config"
+	otelEndpointKey        = "OTEL_EXPORTER_OTLP_ENDPOINT"
+	defaultAgentPort       = "8000"
+)
+
+var (
+	testWorkflowGVK = schema.GroupVersionKind{
+		Group:   "testworkflows.testkube.io",
+		Version: "v1",
+		Kind:    "TestWorkflow",
+	}
+	testTriggerGVK = schema.GroupVersionKind{
+		Group:   "tests.testkube.io",
+		Version: "v1",
+		Kind:    "TestTrigger",
+	}
+)
+
+// experimentJSON is the JSON representation of experiment.json consumed by testbench scripts.
+type experimentJSON struct {
+	LLMAsAJudgeModel string         `json:"llm_as_a_judge_model,omitempty"`
+	DefaultThreshold float64        `json:"default_threshold"`
+	Scenarios        []scenarioJSON `json:"scenarios"`
+}
+
+type scenarioJSON struct {
+	Name  string     `json:"name"`
+	Steps []stepJSON `json:"steps"`
+}
+
+type stepJSON struct {
+	Input        string          `json:"input"`
+	Reference    *referenceJSON  `json:"reference,omitempty"`
+	CustomValues json.RawMessage `json:"custom_values,omitempty"`
+	Metrics      []metricJSON    `json:"metrics,omitempty"`
+}
+
+type referenceJSON struct {
+	Response  string         `json:"response,omitempty"`
+	ToolCalls []toolCallJSON `json:"tool_calls,omitempty"`
+	Topics    []string       `json:"topics,omitempty"`
+}
+
+type toolCallJSON struct {
+	Name string          `json:"name"`
+	Args json.RawMessage `json:"args,omitempty"`
+}
+
+type metricJSON struct {
+	MetricName string          `json:"metric_name"`
+	Threshold  float64         `json:"threshold,omitempty"`
+	Parameters json.RawMessage `json:"parameters,omitempty"`
+}
+
+// ExperimentReconciler reconciles an Experiment object.
 type ExperimentReconciler struct {
 	client.Client
 	Scheme *runtime.Scheme
@@ -36,27 +104,428 @@ type ExperimentReconciler struct {
 // +kubebuilder:rbac:groups=testbench.agentic-layer.ai,resources=experiments,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=testbench.agentic-layer.ai,resources=experiments/status,verbs=get;update;patch
 // +kubebuilder:rbac:groups=testbench.agentic-layer.ai,resources=experiments/finalizers,verbs=update
+// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=testworkflows.testkube.io,resources=testworkflows,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=tests.testkube.io,resources=testtriggers,verbs=get;list;watch;create;update;patch;delete
 
-// Reconcile is part of the main kubernetes reconciliation loop which aims to
-// move the current state of the cluster closer to the desired state.
-// TODO(user): Modify the Reconcile function to compare the state specified by
-// the Experiment object against the actual cluster state, and then
-// perform operations to make the cluster state reflect the state specified by
-// the user.
-//
-// For more details, check Reconcile and its Result here:
-// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile
+// Reconcile moves the cluster state closer to the desired state specified by the Experiment.
 func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
-	_ = log.FromContext(ctx)
+	logger := log.FromContext(ctx)
+
+	experiment := &testbenchv1alpha1.Experiment{}
+	if err := r.Get(ctx, req.NamespacedName, experiment); err != nil {
+		return ctrl.Result{}, client.IgnoreNotFound(err)
+	}
+
+	var generatedResources []testbenchv1alpha1.GeneratedResource
+	reconcileErr := r.reconcileResources(ctx, experiment, &generatedResources)
+
+	if statusErr := r.updateStatus(ctx, experiment, generatedResources, reconcileErr); statusErr != nil {
+		logger.Error(statusErr, "failed to update status")
+		return ctrl.Result{}, statusErr
+	}
 
-	// TODO(user): your logic here
+	return ctrl.Result{}, reconcileErr
+}
 
-	return ctrl.Result{}, nil
+func (r *ExperimentReconciler) reconcileResources(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) error {
+	if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil {
+		return fmt.Errorf("reconciling ConfigMap: %w", err)
+	}
+	if err := r.reconcileTestWorkflow(ctx, experiment, generatedResources); err != nil {
+		return fmt.Errorf("reconciling TestWorkflow: %w", err)
+	}
+	if err := r.reconcileTestTrigger(ctx, experiment, generatedResources); err != nil {
+		return fmt.Errorf("reconciling TestTrigger: %w", err)
+	}
+	return nil
+}
+
+// reconcileConfigMap creates or updates the ConfigMap holding experiment.json.
+func (r *ExperimentReconciler) reconcileConfigMap(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) error {
+	cm := &corev1.ConfigMap{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      experiment.Name,
+			Namespace: experiment.Namespace,
+		},
+	}
+
+	_, err := controllerutil.CreateOrUpdate(ctx, r.Client, cm, func() error {
+		if err := controllerutil.SetControllerReference(experiment, cm, r.Scheme); err != nil {
+			return err
+		}
+		data, buildErr := r.buildExperimentJSON(experiment)
+		if buildErr != nil {
+			return buildErr
+		}
+		cm.Data = map[string]string{
+			"experiment.json": data,
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	*generatedResources = append(*generatedResources, testbenchv1alpha1.GeneratedResource{
+		Kind:      "ConfigMap",
+		Name:      cm.Name,
+		Namespace: cm.Namespace,
+	})
+	return nil
+}
+
+// buildExperimentJSON serializes the Experiment spec scenarios into the experiment.json format
+// expected by the testbench scripts. For dataset mode, it returns an empty scenarios list.
+func (r *ExperimentReconciler) buildExperimentJSON(experiment *testbenchv1alpha1.Experiment) (string, error) {
+	exp := experimentJSON{
+		LLMAsAJudgeModel: experiment.Spec.LLMAsAJudgeModel,
+		DefaultThreshold: experiment.Spec.DefaultThreshold,
+		Scenarios:        make([]scenarioJSON, 0, len(experiment.Spec.Scenarios)),
+	}
+	for _, scenario := range experiment.Spec.Scenarios {
+		sj := scenarioJSON{
+			Name:  scenario.Name,
+			Steps: make([]stepJSON, 0, len(scenario.Steps)),
+		}
+		for _, step := range scenario.Steps {
+			sj.Steps = append(sj.Steps, r.convertStep(step))
+		}
+		exp.Scenarios = append(exp.Scenarios, sj)
+	}
+	data, err := json.MarshalIndent(exp, "", "  ")
+	if err != nil {
+		return "", err
+	}
+	return string(data), nil
+}
+
+func (r *ExperimentReconciler) convertStep(step testbenchv1alpha1.Step) stepJSON {
+	sj := stepJSON{Input: step.Input}
+	if step.Reference != nil {
+		ref := &referenceJSON{
+			Response: step.Reference.Response,
+			Topics:   step.Reference.Topics,
+		}
+		for _, tc := range step.Reference.ToolCalls {
+			ref.ToolCalls = append(ref.ToolCalls, toolCallJSON{
+				Name: tc.Name,
+				Args: tc.Args.Raw,
+			})
+		}
+		sj.Reference = ref
+	}
+	if step.CustomValues.Raw != nil {
+		sj.CustomValues = step.CustomValues.Raw
+	}
+	for _, m := range step.Metrics {
+		mj := metricJSON{
+			MetricName: m.MetricName,
+			Threshold:  m.Threshold,
+		}
+		if m.Parameters.Raw != nil {
+			mj.Parameters = m.Parameters.Raw
+		}
+		sj.Metrics = append(sj.Metrics, mj)
+	}
+	return sj
+}
+
+// reconcileTestWorkflow creates or updates the Testkube TestWorkflow for the Experiment.
+func (r *ExperimentReconciler) reconcileTestWorkflow(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) error {
+	workflow := r.buildTestWorkflow(experiment)
+	if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil {
+		return err
+	}
+
+	existing := &unstructured.Unstructured{}
+	existing.SetGroupVersionKind(testWorkflowGVK)
+	err := r.Get(ctx, types.NamespacedName{Name: workflow.GetName(), Namespace: workflow.GetNamespace()}, existing)
+	if errors.IsNotFound(err) {
+		if createErr := r.Create(ctx, workflow); createErr != nil {
+			return createErr
+		}
+	} else if err != nil {
+		if isCRDNotInstalled(err) {
+			log.FromContext(ctx).Info("Testkube TestWorkflow CRD not installed; skipping TestWorkflow reconciliation")
+			return nil
+		}
+		return err
+	} else {
+		existing.Object["spec"] = workflow.Object["spec"]
+		existing.SetOwnerReferences(workflow.GetOwnerReferences())
+		if updateErr := r.Update(ctx, existing); updateErr != nil {
+			return updateErr
+		}
+	}
+
+	*generatedResources = append(*generatedResources, testbenchv1alpha1.GeneratedResource{
+		Kind:      "TestWorkflow",
+		Name:      workflow.GetName(),
+		Namespace: workflow.GetNamespace(),
+	})
+	return nil
+}
+
+// buildTestWorkflow constructs the desired TestWorkflow unstructured object.
+func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured {
+	agentURL := r.resolveAgentURL(experiment)
+
+	// Build the list of phase templates to chain.
+	var useTemplates []interface{}
+	if experiment.Spec.Dataset != nil {
+		useTemplates = append(useTemplates, map[string]interface{}{
+			"name": "setup-template",
+			"config": map[string]interface{}{
+				"datasetUrl": r.resolveDatasetURL(experiment),
+			},
+		})
+	}
+	useTemplates = append(useTemplates,
+		map[string]interface{}{
+			"name": "run-template",
+			"config": map[string]interface{}{
+				"agentUrl": agentURL,
+			},
+		},
+		map[string]interface{}{"name": "evaluate-template"},
+		map[string]interface{}{"name": "publish-template"},
+		map[string]interface{}{"name": "visualize-template"},
+	)
+
+	spec := map[string]interface{}{
+		"container": map[string]interface{}{
+			"env": []interface{}{
+				map[string]interface{}{
+					"name": otelEndpointKey,
+					"valueFrom": map[string]interface{}{
+						"configMapKeyRef": map[string]interface{}{
+							"name": otelConfigMapName,
+							"key":  otelEndpointKey,
+						},
+					},
+				},
+			},
+		},
+		"use": useTemplates,
+	}
+
+	// For scenarios mode, mount the pre-populated ConfigMap as the experiment file.
+	if experiment.Spec.Dataset == nil {
+		spec["content"] = map[string]interface{}{
+			"files": []interface{}{
+				map[string]interface{}{
+					"path": "/data/datasets/experiment.json",
+					"contentFrom": map[string]interface{}{
+						"configMapKeyRef": map[string]interface{}{
+							"name": experiment.Name,
+							"key":  "experiment.json",
+						},
+					},
+				},
+			},
+		}
+	}
+
+	workflow := &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": testWorkflowGVK.GroupVersion().String(),
+			"kind":       testWorkflowGVK.Kind,
+			"metadata": map[string]interface{}{
+				"name":      experiment.Name,
+				"namespace": experiment.Namespace,
+			},
+			"spec": spec,
+		},
+	}
+	return workflow
+}
+
+// reconcileTestTrigger creates, updates, or deletes the Testkube TestTrigger.
+func (r *ExperimentReconciler) reconcileTestTrigger(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) error {
+	triggerName := experiment.Name + "-trigger"
+
+	if experiment.Spec.Trigger == nil || !experiment.Spec.Trigger.Enabled {
+		// Delete trigger if it exists.
+		existing := &unstructured.Unstructured{}
+		existing.SetGroupVersionKind(testTriggerGVK)
+		existing.SetName(triggerName)
+		existing.SetNamespace(experiment.Namespace)
+		if delErr := r.Delete(ctx, existing); delErr != nil && !errors.IsNotFound(delErr) {
+			if isCRDNotInstalled(delErr) {
+				return nil
+			}
+			return delErr
+		}
+		return nil
+	}
+
+	trigger := r.buildTestTrigger(experiment)
+	if err := controllerutil.SetControllerReference(experiment, trigger, r.Scheme); err != nil {
+		return err
+	}
+
+	existing := &unstructured.Unstructured{}
+	existing.SetGroupVersionKind(testTriggerGVK)
+	err := r.Get(ctx, types.NamespacedName{Name: triggerName, Namespace: experiment.Namespace}, existing)
+	if errors.IsNotFound(err) {
+		if createErr := r.Create(ctx, trigger); createErr != nil {
+			return createErr
+		}
+	} else if err != nil {
+		if isCRDNotInstalled(err) {
+			log.FromContext(ctx).Info("Testkube TestTrigger CRD not installed; skipping TestTrigger reconciliation")
+			return nil
+		}
+		return err
+	} else {
+		existing.Object["spec"] = trigger.Object["spec"]
+		existing.SetOwnerReferences(trigger.GetOwnerReferences())
+		if updateErr := r.Update(ctx, existing); updateErr != nil {
+			return updateErr
+		}
+	}
+
+	*generatedResources = append(*generatedResources, testbenchv1alpha1.GeneratedResource{
+		Kind:      "TestTrigger",
+		Name:      triggerName,
+		Namespace: experiment.Namespace,
+	})
+	return nil
+}
+
+// buildTestTrigger constructs the desired TestTrigger unstructured object.
+func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured {
+	agentNs := experiment.Spec.AgentRef.Namespace
+	if agentNs == "" {
+		agentNs = experiment.Namespace
+	}
+
+	concurrencyPolicy := "allow"
+	if experiment.Spec.Trigger != nil && experiment.Spec.Trigger.ConcurrencyPolicy != "" {
+		concurrencyPolicy = strings.ToLower(experiment.Spec.Trigger.ConcurrencyPolicy)
+	}
+
+	return &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": testTriggerGVK.GroupVersion().String(),
+			"kind":       testTriggerGVK.Kind,
+			"metadata": map[string]interface{}{
+				"name":      experiment.Name + "-trigger",
+				"namespace": experiment.Namespace,
+			},
+			"spec": map[string]interface{}{
+				"resource": "deployment",
+				"resourceSelector": map[string]interface{}{
+					"name":      experiment.Spec.AgentRef.Name,
+					"namespace": agentNs,
+				},
+				"event":             "modified",
+				"action":            "run",
+				"execution":         "testworkflow",
+				"concurrencyPolicy": concurrencyPolicy,
+				"testSelector": map[string]interface{}{
+					"name":      experiment.Name,
+					"namespace": experiment.Namespace,
+				},
+				"disabled": false,
+			},
+		},
+	}
+}
+
+// updateStatus updates Ready and WorkflowReady conditions and the generatedResources list.
+func (r *ExperimentReconciler) updateStatus(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources []testbenchv1alpha1.GeneratedResource,
+	reconcileErr error,
+) error {
+	experiment.Status.GeneratedResources = generatedResources
+
+	readyStatus := metav1.ConditionTrue
+	readyReason := "ReconcileSucceeded"
+	readyMsg := "All resources reconciled successfully"
+	if reconcileErr != nil {
+		readyStatus = metav1.ConditionFalse
+		readyReason = "ReconcileFailed"
+		readyMsg = reconcileErr.Error()
+	}
+	apimeta.SetStatusCondition(&experiment.Status.Conditions, metav1.Condition{
+		Type:               conditionReady,
+		Status:             readyStatus,
+		ObservedGeneration: experiment.Generation,
+		Reason:             readyReason,
+		Message:            readyMsg,
+	})
+
+	wfStatus := metav1.ConditionTrue
+	wfReason := "WorkflowCreated"
+	wfMsg := "TestWorkflow created successfully"
+	if reconcileErr != nil {
+		wfStatus = metav1.ConditionFalse
+		wfReason = "WorkflowNotReady"
+		wfMsg = reconcileErr.Error()
+	}
+	apimeta.SetStatusCondition(&experiment.Status.Conditions, metav1.Condition{
+		Type:               conditionWorkflowReady,
+		Status:             wfStatus,
+		ObservedGeneration: experiment.Generation,
+		Reason:             wfReason,
+		Message:            wfMsg,
+	})
+
+	return r.Status().Update(ctx, experiment)
+}
+
+// resolveAgentURL builds the in-cluster DNS URL for the agent service.
+func (r *ExperimentReconciler) resolveAgentURL(experiment *testbenchv1alpha1.Experiment) string {
+	ns := experiment.Spec.AgentRef.Namespace
+	if ns == "" {
+		ns = experiment.Namespace
+	}
+	return fmt.Sprintf("http://%s.%s:%s", experiment.Spec.AgentRef.Name, ns, defaultAgentPort)
+}
+
+// resolveDatasetURL extracts the dataset URL from the DatasetSource.
+func (r *ExperimentReconciler) resolveDatasetURL(experiment *testbenchv1alpha1.Experiment) string {
+	if experiment.Spec.Dataset == nil {
+		return ""
+	}
+	if experiment.Spec.Dataset.URL != "" {
+		return experiment.Spec.Dataset.URL
+	}
+	if experiment.Spec.Dataset.S3 != nil {
+		return fmt.Sprintf("s3://%s/%s", experiment.Spec.Dataset.S3.Bucket, experiment.Spec.Dataset.S3.Key)
+	}
+	return ""
 }
 
 // SetupWithManager sets up the controller with the Manager.
 func (r *ExperimentReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
 		For(&testbenchv1alpha1.Experiment{}).
+		Owns(&corev1.ConfigMap{}).
 		Complete(r)
 }
+
+// isCRDNotInstalled returns true when the error indicates the target CRD is not registered.
+func isCRDNotInstalled(err error) bool {
+	return apimeta.IsNoMatchError(err)
+}
diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go
index d36b947..90d0ed0 100644
--- a/operator/internal/controller/experiment_controller_test.go
+++ b/operator/internal/controller/experiment_controller_test.go
@@ -18,67 +18,589 @@ package controller
 
 import (
 	"context"
+	"encoding/json"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-
 	testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1"
 )
 
 var _ = Describe("Experiment Controller", func() {
-	Context("When reconciling a resource", func() {
-		const resourceName = "test-resource"
+	const namespace = "default"
+	ctx := context.Background()
 
-		ctx := context.Background()
+	newReconciler := func() *ExperimentReconciler {
+		return &ExperimentReconciler{
+			Client: k8sClient,
+			Scheme: k8sClient.Scheme(),
+		}
+	}
 
-		typeNamespacedName := types.NamespacedName{
-			Name:      resourceName,
-			Namespace: "default", // TODO(user):Modify as needed
+	reconcileExperiment := func(name string) error {
+		_, err := newReconciler().Reconcile(ctx, reconcile.Request{
+			NamespacedName: types.NamespacedName{Name: name, Namespace: namespace},
+		})
+		return err
+	}
+
+	cleanupExperiment := func(name string) {
+		exp := &testbenchv1alpha1.Experiment{}
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, exp); err == nil {
+			_ = k8sClient.Delete(ctx, exp)
+		}
+		cm := &corev1.ConfigMap{}
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, cm); err == nil {
+			_ = k8sClient.Delete(ctx, cm)
+		}
+		wf := &unstructured.Unstructured{}
+		wf.SetGroupVersionKind(testWorkflowGVK)
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, wf); err == nil {
+			_ = k8sClient.Delete(ctx, wf)
+		}
+		trig := &unstructured.Unstructured{}
+		trig.SetGroupVersionKind(testTriggerGVK)
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name + "-trigger", Namespace: namespace}, trig); err == nil {
+			_ = k8sClient.Delete(ctx, trig)
 		}
-		experiment := &testbenchv1alpha1.Experiment{}
+	}
+
+	Context("Scenarios mode reconciliation", func() {
+		const expName = "exp-scenarios"
 
 		BeforeEach(func() {
-			By("creating the custom resource for the Kind Experiment")
-			err := k8sClient.Get(ctx, typeNamespacedName, experiment)
-			if err != nil && errors.IsNotFound(err) {
-				resource := &testbenchv1alpha1.Experiment{
-					ObjectMeta: metav1.ObjectMeta{
-						Name:      resourceName,
-						Namespace: "default",
+			By("creating the Experiment with inline scenarios")
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef:         testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"},
+					LLMAsAJudgeModel: "gemini-2.5-flash-lite",
+					DefaultThreshold: 0.9,
+					Scenarios: []testbenchv1alpha1.Scenario{
+						{
+							Name: "test scenario",
+							Steps: []testbenchv1alpha1.Step{
+								{
+									Input: "What is the weather?",
+									Reference: &testbenchv1alpha1.Reference{
+										Response: "It is sunny",
+										Topics:   []string{"weather"},
+										ToolCalls: []testbenchv1alpha1.ToolCall{
+											{
+												Name: "get_weather",
+												Args: runtime.RawExtension{Raw: []byte(`{"city":"NY"}`)},
+											},
+										},
+									},
+									Metrics: []testbenchv1alpha1.Metric{
+										{MetricName: "AgentGoalAccuracy"},
+									},
+								},
+							},
+						},
 					},
-					// TODO(user): Specify other spec details if needed.
+				},
+			}
+			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+		})
+
+		AfterEach(func() {
+			cleanupExperiment(expName)
+		})
+
+		It("should create a ConfigMap with experiment.json", func() {
+			By("reconciling the Experiment")
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			By("checking the ConfigMap exists")
+			cm := &corev1.ConfigMap{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed())
+			Expect(cm.Data).To(HaveKey("experiment.json"))
+
+			By("verifying the experiment.json content")
+			var expJSON experimentJSON
+			Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed())
+			Expect(expJSON.LLMAsAJudgeModel).To(Equal("gemini-2.5-flash-lite"))
+			Expect(expJSON.DefaultThreshold).To(Equal(0.9))
+			Expect(expJSON.Scenarios).To(HaveLen(1))
+			Expect(expJSON.Scenarios[0].Name).To(Equal("test scenario"))
+			Expect(expJSON.Scenarios[0].Steps).To(HaveLen(1))
+			Expect(expJSON.Scenarios[0].Steps[0].Input).To(Equal("What is the weather?"))
+			Expect(expJSON.Scenarios[0].Steps[0].Reference).NotTo(BeNil())
+			Expect(expJSON.Scenarios[0].Steps[0].Reference.Response).To(Equal("It is sunny"))
+			Expect(expJSON.Scenarios[0].Steps[0].Reference.Topics).To(ConsistOf("weather"))
+			Expect(expJSON.Scenarios[0].Steps[0].Reference.ToolCalls).To(HaveLen(1))
+			Expect(expJSON.Scenarios[0].Steps[0].Reference.ToolCalls[0].Name).To(Equal("get_weather"))
+			Expect(expJSON.Scenarios[0].Steps[0].Metrics).To(HaveLen(1))
+			Expect(expJSON.Scenarios[0].Steps[0].Metrics[0].MetricName).To(Equal("AgentGoalAccuracy"))
+		})
+
+		It("should set ConfigMap owner reference to the Experiment", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			cm := &corev1.ConfigMap{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed())
+			Expect(cm.OwnerReferences).To(HaveLen(1))
+			Expect(cm.OwnerReferences[0].Kind).To(Equal("Experiment"))
+			Expect(cm.OwnerReferences[0].Name).To(Equal(expName))
+			Expect(cm.OwnerReferences[0].Controller).NotTo(BeNil())
+			Expect(*cm.OwnerReferences[0].Controller).To(BeTrue())
+		})
+
+		It("should create a TestWorkflow without setup-template", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			wf := &unstructured.Unstructured{}
+			wf.SetGroupVersionKind(testWorkflowGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+
+			spec := wf.Object["spec"].(map[string]interface{})
+
+			By("checking content.files mounts the ConfigMap")
+			content, ok := spec["content"].(map[string]interface{})
+			Expect(ok).To(BeTrue(), "spec.content should be present in scenarios mode")
+			files := content["files"].([]interface{})
+			Expect(files).To(HaveLen(1))
+			file := files[0].(map[string]interface{})
+			Expect(file["path"]).To(Equal("/data/datasets/experiment.json"))
+			contentFrom := file["contentFrom"].(map[string]interface{})
+			cmRef := contentFrom["configMapKeyRef"].(map[string]interface{})
+			Expect(cmRef["name"]).To(Equal(expName))
+			Expect(cmRef["key"]).To(Equal("experiment.json"))
+
+			By("checking use templates do NOT include setup-template")
+			use := spec["use"].([]interface{})
+			templateNames := make([]string, 0, len(use))
+			for _, u := range use {
+				templateNames = append(templateNames, u.(map[string]interface{})["name"].(string))
+			}
+			Expect(templateNames).NotTo(ContainElement("setup-template"))
+			Expect(templateNames).To(ContainElements("run-template", "evaluate-template", "publish-template", "visualize-template"))
+
+			By("checking the run-template has the correct agentUrl")
+			for _, u := range use {
+				um := u.(map[string]interface{})
+				if um["name"] == "run-template" {
+					cfg := um["config"].(map[string]interface{})
+					Expect(cfg["agentUrl"]).To(Equal("http://my-agent.agents:8000"))
 				}
-				Expect(k8sClient.Create(ctx, resource)).To(Succeed())
 			}
 		})
 
+		It("should set TestWorkflow owner reference", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			wf := &unstructured.Unstructured{}
+			wf.SetGroupVersionKind(testWorkflowGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+			Expect(wf.GetOwnerReferences()).To(HaveLen(1))
+			Expect(wf.GetOwnerReferences()[0].Kind).To(Equal("Experiment"))
+			Expect(wf.GetOwnerReferences()[0].Name).To(Equal(expName))
+		})
+
+		It("should not create a TestTrigger when trigger is nil", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			trig := &unstructured.Unstructured{}
+			trig.SetGroupVersionKind(testTriggerGVK)
+			err := k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-trigger", Namespace: namespace}, trig)
+			Expect(errors.IsNotFound(err)).To(BeTrue())
+		})
+
+		It("should set Ready=True status condition after successful reconciliation", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			exp := &testbenchv1alpha1.Experiment{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed())
+
+			var readyCond *metav1.Condition
+			for i := range exp.Status.Conditions {
+				if exp.Status.Conditions[i].Type == conditionReady {
+					readyCond = &exp.Status.Conditions[i]
+					break
+				}
+			}
+			Expect(readyCond).NotTo(BeNil())
+			Expect(readyCond.Status).To(Equal(metav1.ConditionTrue))
+			Expect(readyCond.Reason).To(Equal("ReconcileSucceeded"))
+			Expect(readyCond.ObservedGeneration).To(Equal(exp.Generation))
+		})
+
+		It("should populate generatedResources in status", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			exp := &testbenchv1alpha1.Experiment{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed())
+
+			kinds := make([]string, 0, len(exp.Status.GeneratedResources))
+			for _, gr := range exp.Status.GeneratedResources {
+				kinds = append(kinds, gr.Kind)
+			}
+			Expect(kinds).To(ContainElements("ConfigMap", "TestWorkflow"))
+		})
+
+		It("should be idempotent on re-reconciliation", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			cmList := &corev1.ConfigMapList{}
+			Expect(k8sClient.List(ctx, cmList,
+				client.InNamespace(namespace), client.MatchingLabels{})).To(Succeed())
+			count := 0
+			for _, cm := range cmList.Items {
+				if cm.Name == expName {
+					count++
+				}
+			}
+			Expect(count).To(Equal(1))
+		})
+	})
+
+	Context("Dataset mode reconciliation", func() {
+		const expName = "exp-dataset"
+
+		BeforeEach(func() {
+			By("creating the Experiment with a dataset URL")
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"},
+					Dataset: &testbenchv1alpha1.DatasetSource{
+						URL: "http://data-server/dataset.csv",
+					},
+				},
+			}
+			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+		})
+
 		AfterEach(func() {
-			// TODO(user): Cleanup logic after each test, like removing the resource instance.
-			resource := &testbenchv1alpha1.Experiment{}
-			err := k8sClient.Get(ctx, typeNamespacedName, resource)
+			cleanupExperiment(expName)
+		})
+
+		It("should create a ConfigMap with empty scenarios as placeholder", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			cm := &corev1.ConfigMap{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed())
+			Expect(cm.Data).To(HaveKey("experiment.json"))
+
+			var expJSON experimentJSON
+			Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed())
+			Expect(expJSON.Scenarios).To(BeEmpty())
+		})
+
+		It("should create a TestWorkflow with setup-template and correct datasetUrl", func() {
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			wf := &unstructured.Unstructured{}
+			wf.SetGroupVersionKind(testWorkflowGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+
+			spec := wf.Object["spec"].(map[string]interface{})
+
+			By("checking no content.files in dataset mode")
+			_, hasContent := spec["content"]
+			Expect(hasContent).To(BeFalse(), "spec.content should be absent in dataset mode")
+
+			By("checking setup-template is first in use list")
+			use := spec["use"].([]interface{})
+			first := use[0].(map[string]interface{})
+			Expect(first["name"]).To(Equal("setup-template"))
+			cfg := first["config"].(map[string]interface{})
+			Expect(cfg["datasetUrl"]).To(Equal("http://data-server/dataset.csv"))
+		})
+
+		It("should resolve S3 dataset URL correctly", func() {
+			exp := &testbenchv1alpha1.Experiment{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed())
+			exp.Spec.Dataset = &testbenchv1alpha1.DatasetSource{
+				S3: &testbenchv1alpha1.S3Source{Bucket: "my-bucket", Key: "data/dataset.csv"},
+			}
+			Expect(k8sClient.Update(ctx, exp)).To(Succeed())
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			wf := &unstructured.Unstructured{}
+			wf.SetGroupVersionKind(testWorkflowGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+			spec := wf.Object["spec"].(map[string]interface{})
+			use := spec["use"].([]interface{})
+			first := use[0].(map[string]interface{})
+			Expect(first["name"]).To(Equal("setup-template"))
+			Expect(first["config"].(map[string]interface{})["datasetUrl"]).
+				To(Equal("s3://my-bucket/data/dataset.csv"))
+		})
+	})
+
+	Context("Trigger management", func() {
+		const expName = "exp-trigger"
+
+		createExperiment := func(triggerEnabled bool, policy string) {
+			trigger := &testbenchv1alpha1.TriggerSpec{
+				Enabled:           triggerEnabled,
+				ConcurrencyPolicy: policy,
+			}
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"},
+					Scenarios: []testbenchv1alpha1.Scenario{
+						{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}},
+					},
+					Trigger: trigger,
+				},
+			}
+			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+		}
+
+		AfterEach(func() {
+			cleanupExperiment(expName)
+		})
+
+		It("should create a TestTrigger when trigger.enabled=true", func() {
+			createExperiment(true, "Forbid")
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			trig := &unstructured.Unstructured{}
+			trig.SetGroupVersionKind(testTriggerGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{
+				Name:      expName + "-trigger",
+				Namespace: namespace,
+			}, trig)).To(Succeed())
+
+			spec := trig.Object["spec"].(map[string]interface{})
+			Expect(spec["resource"]).To(Equal("deployment"))
+			Expect(spec["concurrencyPolicy"]).To(Equal("forbid"))
+			Expect(spec["action"]).To(Equal("run"))
+			Expect(spec["execution"]).To(Equal("testworkflow"))
+			Expect(spec["disabled"]).To(BeFalse())
+
+			resSelector := spec["resourceSelector"].(map[string]interface{})
+			Expect(resSelector["name"]).To(Equal("my-agent"))
+			Expect(resSelector["namespace"]).To(Equal("agents"))
+
+			testSelector := spec["testSelector"].(map[string]interface{})
+			Expect(testSelector["name"]).To(Equal(expName))
+			Expect(testSelector["namespace"]).To(Equal(namespace))
+		})
+
+		It("should set TestTrigger owner reference", func() {
+			createExperiment(true, "Allow")
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			trig := &unstructured.Unstructured{}
+			trig.SetGroupVersionKind(testTriggerGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{
+				Name:      expName + "-trigger",
+				Namespace: namespace,
+			}, trig)).To(Succeed())
+			Expect(trig.GetOwnerReferences()).To(HaveLen(1))
+			Expect(trig.GetOwnerReferences()[0].Kind).To(Equal("Experiment"))
+		})
+
+		It("should not create a TestTrigger when trigger.enabled=false", func() {
+			createExperiment(false, "")
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			trig := &unstructured.Unstructured{}
+			trig.SetGroupVersionKind(testTriggerGVK)
+			err := k8sClient.Get(ctx, types.NamespacedName{
+				Name:      expName + "-trigger",
+				Namespace: namespace,
+			}, trig)
+			Expect(errors.IsNotFound(err)).To(BeTrue())
+		})
+
+		It("should delete the TestTrigger when trigger is disabled after being enabled", func() {
+			By("creating an experiment with trigger enabled")
+			createExperiment(true, "Allow")
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			trig := &unstructured.Unstructured{}
+			trig.SetGroupVersionKind(testTriggerGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{
+				Name:      expName + "-trigger",
+				Namespace: namespace,
+			}, trig)).To(Succeed())
+
+			By("disabling the trigger")
+			exp := &testbenchv1alpha1.Experiment{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed())
+			exp.Spec.Trigger.Enabled = false
+			Expect(k8sClient.Update(ctx, exp)).To(Succeed())
+
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			err := k8sClient.Get(ctx, types.NamespacedName{
+				Name:      expName + "-trigger",
+				Namespace: namespace,
+			}, trig)
+			Expect(errors.IsNotFound(err)).To(BeTrue())
+		})
+
+		It("should include TestTrigger in generatedResources when enabled", func() {
+			createExperiment(true, "Allow")
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			exp := &testbenchv1alpha1.Experiment{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed())
+
+			kinds := make([]string, 0, len(exp.Status.GeneratedResources))
+			for _, gr := range exp.Status.GeneratedResources {
+				kinds = append(kinds, gr.Kind)
+			}
+			Expect(kinds).To(ContainElements("ConfigMap", "TestWorkflow", "TestTrigger"))
+		})
+	})
+
+	Context("Status management", func() {
+		const expName = "exp-status"
+
+		AfterEach(func() {
+			cleanupExperiment(expName)
+		})
+
+		It("should set WorkflowReady condition to True on success", func() {
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent"},
+					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+				},
+			}
+			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed())
+			var wfCond *metav1.Condition
+			for i := range exp.Status.Conditions {
+				if exp.Status.Conditions[i].Type == conditionWorkflowReady {
+					wfCond = &exp.Status.Conditions[i]
+					break
+				}
+			}
+			Expect(wfCond).NotTo(BeNil())
+			Expect(wfCond.Status).To(Equal(metav1.ConditionTrue))
+		})
+
+		It("should handle missing Experiment gracefully (not found)", func() {
+			err := reconcileExperiment("nonexistent")
 			Expect(err).NotTo(HaveOccurred())
+		})
+	})
 
-			By("Cleanup the specific resource instance Experiment")
-			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
+	Context("Agent URL resolution", func() {
+		It("should use agentRef.Namespace for the agent URL", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "weather-agent", Namespace: "sample-agents"},
+				},
+			}
+			Expect(r.resolveAgentURL(exp)).To(Equal("http://weather-agent.sample-agents:8000"))
 		})
-		It("should successfully reconcile the resource", func() {
-			By("Reconciling the created resource")
-			controllerReconciler := &ExperimentReconciler{
-				Client: k8sClient,
-				Scheme: k8sClient.Scheme(),
+
+		It("should fall back to experiment namespace when agentRef.Namespace is empty", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Namespace: "my-ns"},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent"},
+				},
 			}
+			Expect(r.resolveAgentURL(exp)).To(Equal("http://my-agent.my-ns:8000"))
+		})
+	})
 
-			_, err := controllerReconciler.Reconcile(ctx, reconcile.Request{
-				NamespacedName: typeNamespacedName,
-			})
+	Context("buildExperimentJSON", func() {
+		It("should serialize customValues and metric parameters as raw JSON", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					DefaultThreshold: 0.8,
+					Scenarios: []testbenchv1alpha1.Scenario{
+						{
+							Name: "s",
+							Steps: []testbenchv1alpha1.Step{
+								{
+									Input:        "q",
+									CustomValues: runtime.RawExtension{Raw: []byte(`{"key":"value"}`)},
+									Metrics: []testbenchv1alpha1.Metric{
+										{
+											MetricName: "M",
+											Threshold:  0.7,
+											Parameters: runtime.RawExtension{Raw: []byte(`{"mode":"precision"}`)},
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+			data, err := r.buildExperimentJSON(exp)
 			Expect(err).NotTo(HaveOccurred())
-			// TODO(user): Add more specific assertions depending on your controller's reconciliation logic.
-			// Example: If you expect a certain status condition after reconciliation, verify it here.
+
+			var result experimentJSON
+			Expect(json.Unmarshal([]byte(data), &result)).To(Succeed())
+			Expect(result.DefaultThreshold).To(Equal(0.8))
+			Expect(result.Scenarios[0].Steps[0].CustomValues).To(MatchJSON(`{"key":"value"}`))
+			Expect(result.Scenarios[0].Steps[0].Metrics[0].Parameters).To(MatchJSON(`{"mode":"precision"}`))
+		})
+
+		It("should produce empty scenarios list for dataset mode", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					DefaultThreshold: 0.9,
+					Dataset:          &testbenchv1alpha1.DatasetSource{URL: "http://example.com/data.csv"},
+				},
+			}
+			data, err := r.buildExperimentJSON(exp)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(data).To(ContainSubstring(`"scenarios": []`))
+		})
+	})
+
+	Context("OTel env var injection", func() {
+		const expName = "exp-otel"
+
+		AfterEach(func() {
+			cleanupExperiment(expName)
+		})
+
+		It("should inject OTEL_EXPORTER_OTLP_ENDPOINT from otel-config ConfigMap", func() {
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent"},
+					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+				},
+			}
+			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			wf := &unstructured.Unstructured{}
+			wf.SetGroupVersionKind(testWorkflowGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+
+			spec := wf.Object["spec"].(map[string]interface{})
+			container := spec["container"].(map[string]interface{})
+			envList := container["env"].([]interface{})
+			Expect(envList).To(HaveLen(1))
+			envVar := envList[0].(map[string]interface{})
+			Expect(envVar["name"]).To(Equal(otelEndpointKey))
+			valueFrom := envVar["valueFrom"].(map[string]interface{})
+			cmRef := valueFrom["configMapKeyRef"].(map[string]interface{})
+			Expect(cmRef["name"]).To(Equal(otelConfigMapName))
+			Expect(cmRef["key"]).To(Equal(otelEndpointKey))
 		})
 	})
 })
diff --git a/operator/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go
index d89999c..6250695 100644
--- a/operator/internal/controller/suite_test.go
+++ b/operator/internal/controller/suite_test.go
@@ -54,7 +54,10 @@ var _ = BeforeSuite(func() {
 
 	By("bootstrapping test environment")
 	testEnv = &envtest.Environment{
-		CRDDirectoryPaths:     []string{filepath.Join("..", "..", "config", "crd", "bases")},
+		CRDDirectoryPaths: []string{
+			filepath.Join("..", "..", "config", "crd", "bases"),
+			filepath.Join("testdata", "crds"),
+		},
 		ErrorIfCRDPathMissing: true,
 
 		// The BinaryAssetsDirectory is only required if you want to run the tests directly
diff --git a/operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml b/operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml
new file mode 100644
index 0000000..ff40b84
--- /dev/null
+++ b/operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml
@@ -0,0 +1,21 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  name: testtriggers.tests.testkube.io
+spec:
+  group: tests.testkube.io
+  names:
+    kind: TestTrigger
+    listKind: TestTriggerList
+    plural: testtriggers
+    singular: testtrigger
+  scope: Namespaced
+  versions:
+    - name: v1
+      served: true
+      storage: true
+      schema:
+        openAPIV3Schema:
+          type: object
+          x-kubernetes-preserve-unknown-fields: true
diff --git a/operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml b/operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml
new file mode 100644
index 0000000..1d54d9b
--- /dev/null
+++ b/operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml
@@ -0,0 +1,21 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  name: testworkflows.testworkflows.testkube.io
+spec:
+  group: testworkflows.testkube.io
+  names:
+    kind: TestWorkflow
+    listKind: TestWorkflowList
+    plural: testworkflows
+    singular: testworkflow
+  scope: Namespaced
+  versions:
+    - name: v1
+      served: true
+      storage: true
+      schema:
+        openAPIV3Schema:
+          type: object
+          x-kubernetes-preserve-unknown-fields: true

From 23e67016009da1354dd78fdbd8a881a0834752c2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 17 Mar 2026 12:04:58 +0000
Subject: [PATCH 03/19] Final verification and documentation complete

Co-authored-by: fmallmann <30110193+fmallmann@users.noreply.github.com>
---
 operator/go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/go.mod b/operator/go.mod
index 27d9d75..c4da270 100644
--- a/operator/go.mod
+++ b/operator/go.mod
@@ -5,6 +5,7 @@ go 1.25.0
 require (
 	github.com/onsi/ginkgo/v2 v2.28.1
 	github.com/onsi/gomega v1.39.1
+	k8s.io/api v0.35.2
 	k8s.io/apimachinery v0.35.2
 	k8s.io/client-go v0.35.2
 	sigs.k8s.io/controller-runtime v0.23.3
@@ -85,7 +86,6 @@ require (
 	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/api v0.35.2 // indirect
 	k8s.io/apiextensions-apiserver v0.35.0 // indirect
 	k8s.io/apiserver v0.35.0 // indirect
 	k8s.io/component-base v0.35.0 // indirect

From 8c1c97d0c2756244c4f762016e1677dfb38933cb Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Tue, 24 Mar 2026 15:30:03 +0100
Subject: [PATCH 04/19] feat: update Experiment reconciler to enhance resource
 reconciliation and status reporting

---
 operator/Dockerfile                           |  2 +-
 operator/config/manager/kustomization.yaml    |  6 ++
 .../testbench_v1alpha1_experiment.yaml        |  2 -
 .../controller/experiment_controller.go       | 69 +++++++++++++------
 .../controller/experiment_controller_test.go  | 26 +++----
 5 files changed, 67 insertions(+), 38 deletions(-)

diff --git a/operator/Dockerfile b/operator/Dockerfile
index a48973e..5a82af7 100644
--- a/operator/Dockerfile
+++ b/operator/Dockerfile
@@ -1,5 +1,5 @@
 # Build the manager binary
-FROM golang:1.22 AS builder
+FROM golang:1.25 AS builder
 ARG TARGETOS
 ARG TARGETARCH
 
diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml
index 5c5f0b8..ad13e96 100644
--- a/operator/config/manager/kustomization.yaml
+++ b/operator/config/manager/kustomization.yaml
@@ -1,2 +1,8 @@
 resources:
 - manager.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+images:
+- name: controller
+  newName: controller
+  newTag: latest
diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml
index 28ab44e..0bb8ea1 100644
--- a/operator/config/samples/testbench_v1alpha1_experiment.yaml
+++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml
@@ -17,5 +17,3 @@ spec:
       key: dataset.csv
   trigger:
     enabled: true
-    event: on_push
-    concurrencyPolicy: Forbid
diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go
index 7565db7..eb240d8 100644
--- a/operator/internal/controller/experiment_controller.go
+++ b/operator/internal/controller/experiment_controller.go
@@ -118,9 +118,9 @@ func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	var generatedResources []testbenchv1alpha1.GeneratedResource
-	reconcileErr := r.reconcileResources(ctx, experiment, &generatedResources)
+	result, reconcileErr := r.reconcileResources(ctx, experiment, &generatedResources)
 
-	if statusErr := r.updateStatus(ctx, experiment, generatedResources, reconcileErr); statusErr != nil {
+	if statusErr := r.updateStatus(ctx, experiment, generatedResources, result, reconcileErr); statusErr != nil {
 		logger.Error(statusErr, "failed to update status")
 		return ctrl.Result{}, statusErr
 	}
@@ -128,21 +128,31 @@ func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	return ctrl.Result{}, reconcileErr
 }
 
+// reconcileResult tracks per-resource errors so status conditions can be set accurately.
+type reconcileResult struct {
+	workflowSkipped bool
+	workflowErr     error
+}
+
 func (r *ExperimentReconciler) reconcileResources(
 	ctx context.Context,
 	experiment *testbenchv1alpha1.Experiment,
 	generatedResources *[]testbenchv1alpha1.GeneratedResource,
-) error {
+) (reconcileResult, error) {
+	var result reconcileResult
 	if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil {
-		return fmt.Errorf("reconciling ConfigMap: %w", err)
+		return result, fmt.Errorf("reconciling ConfigMap: %w", err)
 	}
-	if err := r.reconcileTestWorkflow(ctx, experiment, generatedResources); err != nil {
-		return fmt.Errorf("reconciling TestWorkflow: %w", err)
+	wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, generatedResources)
+	if err != nil {
+		result.workflowErr = err
+		return result, fmt.Errorf("reconciling TestWorkflow: %w", err)
 	}
+	result.workflowSkipped = wfSkipped
 	if err := r.reconcileTestTrigger(ctx, experiment, generatedResources); err != nil {
-		return fmt.Errorf("reconciling TestTrigger: %w", err)
+		return result, fmt.Errorf("reconciling TestTrigger: %w", err)
 	}
-	return nil
+	return result, nil
 }
 
 // reconcileConfigMap creates or updates the ConfigMap holding experiment.json.
@@ -151,9 +161,10 @@ func (r *ExperimentReconciler) reconcileConfigMap(
 	experiment *testbenchv1alpha1.Experiment,
 	generatedResources *[]testbenchv1alpha1.GeneratedResource,
 ) error {
+	cmName := experiment.Name + "-experiment"
 	cm := &corev1.ConfigMap{
 		ObjectMeta: metav1.ObjectMeta{
-			Name:      experiment.Name,
+			Name:      cmName,
 			Namespace: experiment.Namespace,
 		},
 	}
@@ -240,14 +251,15 @@ func (r *ExperimentReconciler) convertStep(step testbenchv1alpha1.Step) stepJSON
 }
 
 // reconcileTestWorkflow creates or updates the Testkube TestWorkflow for the Experiment.
+// It returns (skipped, error) where skipped is true when the CRD is not installed.
 func (r *ExperimentReconciler) reconcileTestWorkflow(
 	ctx context.Context,
 	experiment *testbenchv1alpha1.Experiment,
 	generatedResources *[]testbenchv1alpha1.GeneratedResource,
-) error {
+) (bool, error) {
 	workflow := r.buildTestWorkflow(experiment)
 	if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil {
-		return err
+		return false, err
 	}
 
 	existing := &unstructured.Unstructured{}
@@ -255,19 +267,19 @@ func (r *ExperimentReconciler) reconcileTestWorkflow(
 	err := r.Get(ctx, types.NamespacedName{Name: workflow.GetName(), Namespace: workflow.GetNamespace()}, existing)
 	if errors.IsNotFound(err) {
 		if createErr := r.Create(ctx, workflow); createErr != nil {
-			return createErr
+			return false, createErr
 		}
 	} else if err != nil {
 		if isCRDNotInstalled(err) {
 			log.FromContext(ctx).Info("Testkube TestWorkflow CRD not installed; skipping TestWorkflow reconciliation")
-			return nil
+			return true, nil
 		}
-		return err
+		return false, err
 	} else {
 		existing.Object["spec"] = workflow.Object["spec"]
 		existing.SetOwnerReferences(workflow.GetOwnerReferences())
 		if updateErr := r.Update(ctx, existing); updateErr != nil {
-			return updateErr
+			return false, updateErr
 		}
 	}
 
@@ -276,7 +288,7 @@ func (r *ExperimentReconciler) reconcileTestWorkflow(
 		Name:      workflow.GetName(),
 		Namespace: workflow.GetNamespace(),
 	})
-	return nil
+	return false, nil
 }
 
 // buildTestWorkflow constructs the desired TestWorkflow unstructured object.
@@ -330,7 +342,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E
 					"path": "/data/datasets/experiment.json",
 					"contentFrom": map[string]interface{}{
 						"configMapKeyRef": map[string]interface{}{
-							"name": experiment.Name,
+							"name": experiment.Name + "-experiment",
 							"key":  "experiment.json",
 						},
 					},
@@ -344,7 +356,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E
 			"apiVersion": testWorkflowGVK.GroupVersion().String(),
 			"kind":       testWorkflowGVK.Kind,
 			"metadata": map[string]interface{}{
-				"name":      experiment.Name,
+				"name":      experiment.Name + "-workflow",
 				"namespace": experiment.Namespace,
 			},
 			"spec": spec,
@@ -436,12 +448,12 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex
 					"name":      experiment.Spec.AgentRef.Name,
 					"namespace": agentNs,
 				},
-				"event":             "modified",
+				"event":             r.resolveTriggerEvent(experiment),
 				"action":            "run",
 				"execution":         "testworkflow",
 				"concurrencyPolicy": concurrencyPolicy,
 				"testSelector": map[string]interface{}{
-					"name":      experiment.Name,
+					"name":      experiment.Name + "-workflow",
 					"namespace": experiment.Namespace,
 				},
 				"disabled": false,
@@ -455,6 +467,7 @@ func (r *ExperimentReconciler) updateStatus(
 	ctx context.Context,
 	experiment *testbenchv1alpha1.Experiment,
 	generatedResources []testbenchv1alpha1.GeneratedResource,
+	result reconcileResult,
 	reconcileErr error,
 ) error {
 	experiment.Status.GeneratedResources = generatedResources
@@ -478,10 +491,14 @@ func (r *ExperimentReconciler) updateStatus(
 	wfStatus := metav1.ConditionTrue
 	wfReason := "WorkflowCreated"
 	wfMsg := "TestWorkflow created successfully"
-	if reconcileErr != nil {
+	if result.workflowErr != nil {
 		wfStatus = metav1.ConditionFalse
 		wfReason = "WorkflowNotReady"
-		wfMsg = reconcileErr.Error()
+		wfMsg = result.workflowErr.Error()
+	} else if result.workflowSkipped {
+		wfStatus = metav1.ConditionFalse
+		wfReason = "CRDNotInstalled"
+		wfMsg = "TestWorkflow CRD not installed; workflow was not created"
 	}
 	apimeta.SetStatusCondition(&experiment.Status.Conditions, metav1.Condition{
 		Type:               conditionWorkflowReady,
@@ -494,6 +511,14 @@ func (r *ExperimentReconciler) updateStatus(
 	return r.Status().Update(ctx, experiment)
 }
 
+// resolveTriggerEvent returns the trigger event, defaulting to "modified".
+func (r *ExperimentReconciler) resolveTriggerEvent(experiment *testbenchv1alpha1.Experiment) string {
+	if experiment.Spec.Trigger != nil && experiment.Spec.Trigger.Event != "" {
+		return strings.ToLower(experiment.Spec.Trigger.Event)
+	}
+	return "modified"
+}
+
 // resolveAgentURL builds the in-cluster DNS URL for the agent service.
 func (r *ExperimentReconciler) resolveAgentURL(experiment *testbenchv1alpha1.Experiment) string {
 	ns := experiment.Spec.AgentRef.Namespace
diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go
index 90d0ed0..2a18a63 100644
--- a/operator/internal/controller/experiment_controller_test.go
+++ b/operator/internal/controller/experiment_controller_test.go
@@ -58,12 +58,12 @@ var _ = Describe("Experiment Controller", func() {
 			_ = k8sClient.Delete(ctx, exp)
 		}
 		cm := &corev1.ConfigMap{}
-		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, cm); err == nil {
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name + "-experiment", Namespace: namespace}, cm); err == nil {
 			_ = k8sClient.Delete(ctx, cm)
 		}
 		wf := &unstructured.Unstructured{}
 		wf.SetGroupVersionKind(testWorkflowGVK)
-		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, wf); err == nil {
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: name + "-workflow", Namespace: namespace}, wf); err == nil {
 			_ = k8sClient.Delete(ctx, wf)
 		}
 		trig := &unstructured.Unstructured{}
@@ -122,7 +122,7 @@ var _ = Describe("Experiment Controller", func() {
 
 			By("checking the ConfigMap exists")
 			cm := &corev1.ConfigMap{}
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed())
 			Expect(cm.Data).To(HaveKey("experiment.json"))
 
 			By("verifying the experiment.json content")
@@ -147,7 +147,7 @@ var _ = Describe("Experiment Controller", func() {
 			Expect(reconcileExperiment(expName)).To(Succeed())
 
 			cm := &corev1.ConfigMap{}
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed())
 			Expect(cm.OwnerReferences).To(HaveLen(1))
 			Expect(cm.OwnerReferences[0].Kind).To(Equal("Experiment"))
 			Expect(cm.OwnerReferences[0].Name).To(Equal(expName))
@@ -160,7 +160,7 @@ var _ = Describe("Experiment Controller", func() {
 
 			wf := &unstructured.Unstructured{}
 			wf.SetGroupVersionKind(testWorkflowGVK)
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed())
 
 			spec := wf.Object["spec"].(map[string]interface{})
 
@@ -173,7 +173,7 @@ var _ = Describe("Experiment Controller", func() {
 			Expect(file["path"]).To(Equal("/data/datasets/experiment.json"))
 			contentFrom := file["contentFrom"].(map[string]interface{})
 			cmRef := contentFrom["configMapKeyRef"].(map[string]interface{})
-			Expect(cmRef["name"]).To(Equal(expName))
+			Expect(cmRef["name"]).To(Equal(expName + "-experiment"))
 			Expect(cmRef["key"]).To(Equal("experiment.json"))
 
 			By("checking use templates do NOT include setup-template")
@@ -200,7 +200,7 @@ var _ = Describe("Experiment Controller", func() {
 
 			wf := &unstructured.Unstructured{}
 			wf.SetGroupVersionKind(testWorkflowGVK)
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed())
 			Expect(wf.GetOwnerReferences()).To(HaveLen(1))
 			Expect(wf.GetOwnerReferences()[0].Kind).To(Equal("Experiment"))
 			Expect(wf.GetOwnerReferences()[0].Name).To(Equal(expName))
@@ -256,7 +256,7 @@ var _ = Describe("Experiment Controller", func() {
 				client.InNamespace(namespace), client.MatchingLabels{})).To(Succeed())
 			count := 0
 			for _, cm := range cmList.Items {
-				if cm.Name == expName {
+				if cm.Name == expName+"-experiment" {
 					count++
 				}
 			}
@@ -289,7 +289,7 @@ var _ = Describe("Experiment Controller", func() {
 			Expect(reconcileExperiment(expName)).To(Succeed())
 
 			cm := &corev1.ConfigMap{}
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed())
 			Expect(cm.Data).To(HaveKey("experiment.json"))
 
 			var expJSON experimentJSON
@@ -302,7 +302,7 @@ var _ = Describe("Experiment Controller", func() {
 
 			wf := &unstructured.Unstructured{}
 			wf.SetGroupVersionKind(testWorkflowGVK)
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed())
 
 			spec := wf.Object["spec"].(map[string]interface{})
 
@@ -329,7 +329,7 @@ var _ = Describe("Experiment Controller", func() {
 
 			wf := &unstructured.Unstructured{}
 			wf.SetGroupVersionKind(testWorkflowGVK)
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed())
 			spec := wf.Object["spec"].(map[string]interface{})
 			use := spec["use"].([]interface{})
 			first := use[0].(map[string]interface{})
@@ -387,7 +387,7 @@ var _ = Describe("Experiment Controller", func() {
 			Expect(resSelector["namespace"]).To(Equal("agents"))
 
 			testSelector := spec["testSelector"].(map[string]interface{})
-			Expect(testSelector["name"]).To(Equal(expName))
+			Expect(testSelector["name"]).To(Equal(expName + "-workflow"))
 			Expect(testSelector["namespace"]).To(Equal(namespace))
 		})
 
@@ -589,7 +589,7 @@ var _ = Describe("Experiment Controller", func() {
 
 			wf := &unstructured.Unstructured{}
 			wf.SetGroupVersionKind(testWorkflowGVK)
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed())
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed())
 
 			spec := wf.Object["spec"].(map[string]interface{})
 			container := spec["container"].(map[string]interface{})

From 793ac47199c4df5715dd7e3deeb941652c56b047 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 14:52:33 +0100
Subject: [PATCH 05/19] docs: add design spec for AI Gateway resolution in
 Experiment reconciler

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../2026-03-25-aigateway-resolution-design.md | 177 ++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md

diff --git a/docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md b/docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md
new file mode 100644
index 0000000..17109e1
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md
@@ -0,0 +1,177 @@
+# AI Gateway Resolution for Experiment Reconciler
+
+**Date:** 2026-03-25
+**Status:** Approved
+
+## Problem
+
+The Experiment reconciler creates a TestWorkflow with an evaluate-template that requires `OPENAI_BASE_URL` to access LLM models for evaluation metrics. Currently, the reconciler passes no config to the evaluate-template, so the base URL is unset. We need to resolve the AI Gateway service URL — following the same pattern as the agent-runtime-operator — and pass it as `openApiBasePath` to the evaluate-template config.
+
+## Approach
+
+**Resolve at reconcile time.** During `reconcileResources`, look up the AiGateway using the same 2-tier strategy as the agent-runtime-operator's `resolveAiGateway` method, build the in-cluster service URL, and bake it into the TestWorkflow's evaluate-template config entry.
+
+### Why this approach
+
+- Consistent with how `resolveAgentURL` already works (URL baked at reconcile time)
+- Simple — no runtime discovery needed by testbench scripts
+- AiGateway rarely changes; a watch can be added later if needed
+
+### Alternatives considered
+
+1. **Resolve + watch AiGateway** — re-enqueue Experiments on gateway changes. More complex, deferred for now.
+2. **Pass gateway name/namespace to script** — breaks the "reconciler resolves everything" pattern, adds Kubernetes awareness to Python side.
+
+## Design
+
+### 0. Evaluate-Template Default Value (Prerequisite)
+
+The existing `evaluate-template.yaml` declares `openApiBasePath` as a config parameter with no default value. When the reconciler omits this config (no AiGateway found), Testkube may reject the workflow or render an empty/error value. Add a default:
+
+```yaml
+config:
+  openApiBasePath:
+    type: string
+    description: "Base path for OpenAI API"
+    default: ""
+```
+
+This ensures the template is valid even when no `openApiBasePath` is provided.
+
+### 1. CRD Changes (`experiment_types.go`)
+
+Add an optional `AiGatewayRef` field to `ExperimentSpec`:
+
+```go
+import corev1 "k8s.io/api/core/v1"
+
+// AiGatewayRef references an AiGateway resource for LLM access during evaluation.
+// Only Name and Namespace fields are used.
+// +optional
+AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"`
+```
+
+Uses `corev1.ObjectReference` — same type the agent-runtime-operator uses for its `AiGatewayRef` on the Agent spec. Only `Name` and `Namespace` fields are used; other fields are ignored. This trades a heavier CRD schema for consistency with the agent-runtime-operator.
+
+### 2. Go Module Dependency
+
+Import the agent-runtime-operator module for typed `AiGateway` and `AiGatewayList` types:
+
+```
+go get github.com/agentic-layer/agent-runtime-operator
+```
+
+Register the types in the scheme:
+
+```go
+import runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
+
+runtimev1alpha1.AddToScheme(scheme)
+```
+
+### 3. AI Gateway Resolution (`experiment_controller.go`)
+
+Three new methods mirroring the agent-runtime-operator:
+
+**`resolveAiGateway(ctx, experiment) (*runtimev1alpha1.AiGateway, error)`**
+- If `experiment.Spec.AiGatewayRef` is set, call `resolveExplicitAiGateway`
+- Otherwise, call `resolveDefaultAiGateway`
+
+**`resolveExplicitAiGateway(ctx, ref, experimentNamespace) (*runtimev1alpha1.AiGateway, error)`**
+- Use ref's namespace, fall back to experiment's namespace
+- `r.Get()` the AiGateway by name/namespace
+- If CRD not installed (`meta.IsNoMatchError`), return clear error
+- If any other error, return wrapped error
+
+**`resolveDefaultAiGateway(ctx) (*runtimev1alpha1.AiGateway, error)`**
+- `r.List()` AiGateways in `ai-gateway` namespace
+- If CRD not installed, return `nil, nil`
+- If no items found, return `nil, nil`
+- If multiple found, log and pick the first one
+
+**Constants:**
+
+```go
+const defaultAiGatewayNamespace = "ai-gateway"
+```
+
+**URL builder:**
+
+```go
+func buildAiGatewayServiceUrl(aiGateway runtimev1alpha1.AiGateway) string {
+    return fmt.Sprintf("http://%s.%s.svc.cluster.local.:%d", aiGateway.Name, aiGateway.Namespace, aiGateway.Spec.Port)
+}
+```
+
+Note: The URL format (including trailing dot before port) matches the agent-runtime-operator exactly. The trailing dot is a DNS root domain marker used in the reference implementation.
+
+### 4. Wiring into `buildTestWorkflow`
+
+**In `reconcileResources`**, before `reconcileTestWorkflow`:
+
+```go
+aiGateway, err := r.resolveAiGateway(ctx, experiment)
+if err != nil {
+    return result, fmt.Errorf("resolving AiGateway: %w", err)
+}
+```
+
+Pass the resolved gateway to `buildTestWorkflow` (new signature):
+
+```go
+func (r *ExperimentReconciler) buildTestWorkflow(
+    experiment *testbenchv1alpha1.Experiment,
+    aiGateway *runtimev1alpha1.AiGateway,
+) *unstructured.Unstructured
+```
+
+In the evaluate-template entry, conditionally set `openApiBasePath`:
+
+```go
+evaluateTemplate := map[string]interface{}{"name": "evaluate-template"}
+if aiGateway != nil {
+    evaluateTemplate["config"] = map[string]interface{}{
+        "openApiBasePath": buildAiGatewayServiceUrl(*aiGateway),
+    }
+}
+```
+
+When no gateway is found, the evaluate-template falls back to its default empty string config. No error, no degraded status. This is valid because `OPENAI_BASE_URL` is only needed when metrics require LLM-as-a-judge.
+
+### 5. RBAC
+
+Add kubebuilder RBAC marker:
+
+```go
+// +kubebuilder:rbac:groups=runtime.agentic-layer.ai,resources=aigateways,verbs=get;list;watch
+```
+
+### 6. Tests
+
+**Updated tests:**
+- Existing `buildTestWorkflow` calls pass `nil` as the aiGateway parameter
+
+**New tests:**
+- **Explicit AiGatewayRef resolved** — fake AiGateway, assert evaluate-template has `config.openApiBasePath` set to expected service URL
+- **No AiGateway found** — pass `nil`, assert evaluate-template has no `config` key
+- **resolveAiGateway with explicit ref** — seed fake client, verify correct gateway returned
+- **resolveDefaultAiGateway** — seed fake client with gateway in `ai-gateway` namespace, verify discovery
+- **resolveAiGateway with no gateway** — empty cluster, verify `nil, nil`
+
+## Known Limitations
+
+- **No AiGateway watch:** Changes to an AiGateway resource (e.g., port update) do not trigger re-reconciliation of Experiments. The TestWorkflow will have a stale URL until the Experiment is manually updated or re-reconciled. A watch handler (similar to `findAgentsReferencingAiGateway` in the agent-runtime-operator) can be added in a follow-up.
+- **No status reporting for resolution outcome:** The reconciler does not surface whether an AiGateway was resolved in the Experiment status. Users must inspect the generated TestWorkflow to verify. A dedicated `AiGatewayResolved` condition can be added in a follow-up.
+
+## Files Changed
+
+| File | Change |
+|------|--------|
+| `chart/templates/evaluate-template.yaml` | Add `default: ""` to `openApiBasePath` config |
+| `operator/api/v1alpha1/experiment_types.go` | Add `corev1` import and `AiGatewayRef` field |
+| `operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml` | Regenerated CRD |
+| `operator/internal/controller/experiment_controller.go` | Add resolution logic, wire into workflow builder |
+| `operator/internal/controller/experiment_controller_test.go` | Update existing + add new tests |
+| `operator/cmd/main.go` (or equivalent) | Register AiGateway scheme |
+| `operator/go.mod` / `operator/go.sum` | Add agent-runtime-operator dependency |
+| `operator/config/rbac/role.yaml` | Regenerated RBAC |
\ No newline at end of file

From c2d014199e68989bbf5be4fcbcbc9717c985fe73 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 15:17:32 +0100
Subject: [PATCH 06/19] docs: add implementation plan for AI Gateway resolution

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../plans/2026-03-25-aigateway-resolution.md  | 747 ++++++++++++++++++
 1 file changed, 747 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-03-25-aigateway-resolution.md

diff --git a/docs/superpowers/plans/2026-03-25-aigateway-resolution.md b/docs/superpowers/plans/2026-03-25-aigateway-resolution.md
new file mode 100644
index 0000000..6904d8c
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-25-aigateway-resolution.md
@@ -0,0 +1,747 @@
+# AI Gateway Resolution Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Resolve the AiGateway resource in the Experiment reconciler and pass its service URL as `openApiBasePath` to the evaluate-template config in the generated TestWorkflow.
+
+**Architecture:** Mirror the agent-runtime-operator's 2-tier AiGateway resolution (explicit ref → default discovery in `ai-gateway` namespace). Import typed AiGateway types from the agent-runtime-operator module. Resolve at reconcile time and bake the URL into the TestWorkflow.
+
+**Tech Stack:** Go, Kubernetes controller-runtime, kubebuilder, Ginkgo/Gomega tests, envtest
+
+**Spec:** `docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md`
+
+---
+
+## File Structure
+
+| File | Action | Responsibility |
+|------|--------|----------------|
+| `chart/templates/evaluate-template.yaml` | Modify | Add `default: ""` to `openApiBasePath` config |
+| `operator/go.mod` | Modify | Add agent-runtime-operator dependency |
+| `operator/api/v1alpha1/experiment_types.go` | Modify | Add `AiGatewayRef` field to `ExperimentSpec` |
+| `operator/cmd/main.go` | Modify | Register AiGateway types in scheme |
+| `operator/internal/controller/suite_test.go` | Modify | Register AiGateway types in test scheme |
+| `operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml` | Create | Copy AiGateway CRD for envtest |
+| `operator/internal/controller/experiment_controller.go` | Modify | Add resolution methods, wire into buildTestWorkflow |
+| `operator/internal/controller/experiment_controller_test.go` | Modify | Update existing tests, add new resolution tests |
+
+---
+
+### Task 1: Add default value to evaluate-template config
+
+**Files:**
+- Modify: `chart/templates/evaluate-template.yaml:11-14`
+
+- [ ] **Step 1: Add `default: ""` to the openApiBasePath config parameter**
+
+In `chart/templates/evaluate-template.yaml`, change:
+
+```yaml
+  config:
+    openApiBasePath:
+      type: string
+      description: "Base path for OpenAI API"
+```
+
+to:
+
+```yaml
+  config:
+    openApiBasePath:
+      type: string
+      description: "Base path for OpenAI API"
+      default: ""
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+cd operator && git add ../chart/templates/evaluate-template.yaml
+git commit -m "fix: add default value to evaluate-template openApiBasePath config"
+```
+
+---
+
+### Task 2: Add agent-runtime-operator Go module dependency
+
+**Files:**
+- Modify: `operator/go.mod`
+
+- [ ] **Step 1: Add the dependency**
+
+Run from the `operator/` directory:
+
+```bash
+cd operator && go get github.com/agentic-layer/agent-runtime-operator@latest
+```
+
+Expected: `go.mod` and `go.sum` updated with the new dependency.
+
+- [ ] **Step 2: Verify it compiles**
+
+```bash
+cd operator && go build ./...
+```
+
+Expected: No errors.
+
+- [ ] **Step 3: Commit**
+
+```bash
+cd operator && git add go.mod go.sum
+git commit -m "build: add agent-runtime-operator module dependency"
+```
+
+---
+
+### Task 3: Add AiGatewayRef field to ExperimentSpec CRD
+
+**Files:**
+- Modify: `operator/api/v1alpha1/experiment_types.go:19-22` (imports) and `:158-189` (ExperimentSpec)
+
+- [ ] **Step 1: Write the failing test**
+
+In `operator/internal/controller/experiment_controller_test.go`, add a test inside a new `Context("AiGateway resolution")` block at the end of the `Describe`:
+
+```go
+Context("AiGateway resolution", func() {
+    It("should accept an Experiment with aiGatewayRef", func() {
+        exp := &testbenchv1alpha1.Experiment{
+            ObjectMeta: metav1.ObjectMeta{
+                Name:      "exp-gw-ref",
+                Namespace: namespace,
+            },
+            Spec: testbenchv1alpha1.ExperimentSpec{
+                AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"},
+                AiGatewayRef: &corev1.ObjectReference{
+                    Name:      "my-gateway",
+                    Namespace: "ai-gateway",
+                },
+                Scenarios: []testbenchv1alpha1.Scenario{
+                    {Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}},
+                },
+            },
+        }
+        Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+        defer func() {
+            _ = k8sClient.Delete(ctx, exp)
+        }()
+
+        fetched := &testbenchv1alpha1.Experiment{}
+        Expect(k8sClient.Get(ctx, types.NamespacedName{
+            Name: "exp-gw-ref", Namespace: namespace,
+        }, fetched)).To(Succeed())
+        Expect(fetched.Spec.AiGatewayRef).NotTo(BeNil())
+        Expect(fetched.Spec.AiGatewayRef.Name).To(Equal("my-gateway"))
+        Expect(fetched.Spec.AiGatewayRef.Namespace).To(Equal("ai-gateway"))
+    })
+})
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+```bash
+cd operator && make test
+```
+
+Expected: FAIL — `AiGatewayRef` field does not exist on `ExperimentSpec`.
+
+- [ ] **Step 3: Add the AiGatewayRef field to ExperimentSpec**
+
+In `operator/api/v1alpha1/experiment_types.go`:
+
+Add import:
+```go
+corev1 "k8s.io/api/core/v1"
+```
+
+Add field to `ExperimentSpec` (after `AgentRef`):
+```go
+// AiGatewayRef references an AiGateway resource for LLM access during evaluation.
+// Only Name and Namespace fields are used.
+// +optional
+AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"`
+```
+
+- [ ] **Step 4: Regenerate CRD manifests**
+
+```bash
+cd operator && make manifests
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+```bash
+cd operator && make test
+```
+
+Expected: PASS.
+
+- [ ] **Step 6: Commit**
+
+```bash
+cd operator && git add api/ config/crd/ internal/controller/experiment_controller_test.go
+git commit -m "feat: add AiGatewayRef field to ExperimentSpec CRD"
+```
+
+---
+
+### Task 4: Register AiGateway types in scheme and set up test CRD
+
+**Files:**
+- Modify: `operator/cmd/main.go:38-41` (imports) and `:48-53` (init)
+- Modify: `operator/internal/controller/suite_test.go:35-37` (imports) and `:78-79` (scheme)
+- Create: `operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml`
+
+- [ ] **Step 1: Copy the AiGateway CRD to testdata**
+
+```bash
+cp /Users/florian.mallmann/projects/paal/agentic-layer/agent-runtime-operator/config/crd/bases/runtime.agentic-layer.ai_aigateways.yaml \
+   /Users/florian.mallmann/projects/paal/agentic-layer/testbench/operator/internal/controller/testdata/crds/
+```
+
+- [ ] **Step 2: Register AiGateway types in main.go**
+
+In `operator/cmd/main.go`:
+
+Add import:
+```go
+runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
+```
+
+Add to `init()` function, after the existing `testbenchv1alpha1.AddToScheme(scheme)` line:
+```go
+utilruntime.Must(runtimev1alpha1.AddToScheme(scheme))
+```
+
+- [ ] **Step 3: Register AiGateway types in suite_test.go**
+
+In `operator/internal/controller/suite_test.go`:
+
+Add import:
+```go
+runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
+```
+
+Add after `testbenchv1alpha1.AddToScheme(scheme.Scheme)`:
+```go
+err = runtimev1alpha1.AddToScheme(scheme.Scheme)
+Expect(err).NotTo(HaveOccurred())
+```
+
+- [ ] **Step 4: Verify tests still pass**
+
+```bash
+cd operator && make test
+```
+
+Expected: PASS (all existing tests still green).
+
+- [ ] **Step 5: Commit**
+
+```bash
+cd operator && git add cmd/main.go internal/controller/suite_test.go internal/controller/testdata/crds/
+git commit -m "build: register AiGateway types in scheme and add CRD to testdata"
+```
+
+---
+
+### Task 5: Implement AI Gateway resolution methods
+
+**Files:**
+- Modify: `operator/internal/controller/experiment_controller.go:19-39` (imports), `:41-47` (constants), and new methods after `resolveDatasetURL`
+- Modify: `operator/internal/controller/experiment_controller_test.go` (add resolution tests)
+
+- [ ] **Step 1: Write failing tests for resolveAiGateway**
+
+Add to the `"AiGateway resolution"` context in the test file:
+
+```go
+It("should resolve an explicit AiGateway by ref", func() {
+    By("creating an AiGateway resource")
+    gw := &runtimev1alpha1.AiGateway{
+        ObjectMeta: metav1.ObjectMeta{
+            Name:      "test-gateway",
+            Namespace: namespace,
+        },
+        Spec: runtimev1alpha1.AiGatewaySpec{
+            Port:     4000,
+            AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}},
+        },
+    }
+    Expect(k8sClient.Create(ctx, gw)).To(Succeed())
+    defer func() { _ = k8sClient.Delete(ctx, gw) }()
+
+    r := newReconciler()
+    exp := &testbenchv1alpha1.Experiment{
+        ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+        Spec: testbenchv1alpha1.ExperimentSpec{
+            AiGatewayRef: &corev1.ObjectReference{
+                Name:      "test-gateway",
+                Namespace: namespace,
+            },
+        },
+    }
+    resolved, err := r.resolveAiGateway(ctx, exp)
+    Expect(err).NotTo(HaveOccurred())
+    Expect(resolved).NotTo(BeNil())
+    Expect(resolved.Name).To(Equal("test-gateway"))
+    Expect(resolved.Spec.Port).To(Equal(int32(4000)))
+})
+
+It("should resolve default AiGateway from ai-gateway namespace", func() {
+    By("creating the ai-gateway namespace")
+    ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "ai-gateway"}}
+    _ = k8sClient.Create(ctx, ns)
+
+    By("creating an AiGateway in ai-gateway namespace")
+    gw := &runtimev1alpha1.AiGateway{
+        ObjectMeta: metav1.ObjectMeta{
+            Name:      "default-gw",
+            Namespace: "ai-gateway",
+        },
+        Spec: runtimev1alpha1.AiGatewaySpec{
+            Port:     80,
+            AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}},
+        },
+    }
+    Expect(k8sClient.Create(ctx, gw)).To(Succeed())
+    defer func() { _ = k8sClient.Delete(ctx, gw) }()
+
+    r := newReconciler()
+    exp := &testbenchv1alpha1.Experiment{
+        ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+        Spec:       testbenchv1alpha1.ExperimentSpec{},
+    }
+    resolved, err := r.resolveAiGateway(ctx, exp)
+    Expect(err).NotTo(HaveOccurred())
+    Expect(resolved).NotTo(BeNil())
+    Expect(resolved.Name).To(Equal("default-gw"))
+})
+
+It("should return nil when no AiGateway exists", func() {
+    r := newReconciler()
+    exp := &testbenchv1alpha1.Experiment{
+        ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+        Spec:       testbenchv1alpha1.ExperimentSpec{},
+    }
+    resolved, err := r.resolveAiGateway(ctx, exp)
+    Expect(err).NotTo(HaveOccurred())
+    Expect(resolved).To(BeNil())
+})
+
+It("should return error when explicit ref points to non-existent gateway", func() {
+    r := newReconciler()
+    exp := &testbenchv1alpha1.Experiment{
+        ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+        Spec: testbenchv1alpha1.ExperimentSpec{
+            AiGatewayRef: &corev1.ObjectReference{
+                Name:      "nonexistent",
+                Namespace: namespace,
+            },
+        },
+    }
+    _, err := r.resolveAiGateway(ctx, exp)
+    Expect(err).To(HaveOccurred())
+    Expect(err.Error()).To(ContainSubstring("failed to resolve AiGateway"))
+})
+```
+
+Add the `runtimev1alpha1` import to the test file:
+```go
+runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cd operator && make test
+```
+
+Expected: FAIL — `resolveAiGateway` method does not exist.
+
+- [ ] **Step 3: Implement the resolution methods**
+
+In `operator/internal/controller/experiment_controller.go`:
+
+Add import (note: `apimeta "k8s.io/apimachinery/pkg/api/meta"` is already imported — do not add it again):
+```go
+runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
+```
+
+Add constant:
+```go
+defaultAiGatewayNamespace = "ai-gateway"
+```
+
+Add RBAC marker (near the existing markers before `Reconcile`):
+```go
+// +kubebuilder:rbac:groups=runtime.agentic-layer.ai,resources=aigateways,verbs=get;list;watch
+```
+
+Add the three methods after `resolveDatasetURL`:
+
+```go
+// resolveAiGateway resolves the AiGateway resource for an experiment.
+// If AiGatewayRef is specified, looks up that specific gateway.
+// Otherwise, searches for any AiGateway in the "ai-gateway" namespace.
+// Returns nil (no error) if no AiGateway is found.
+func (r *ExperimentReconciler) resolveAiGateway(ctx context.Context, experiment *testbenchv1alpha1.Experiment) (*runtimev1alpha1.AiGateway, error) {
+	if experiment.Spec.AiGatewayRef != nil {
+		return r.resolveExplicitAiGateway(ctx, experiment.Spec.AiGatewayRef, experiment.Namespace)
+	}
+	return r.resolveDefaultAiGateway(ctx)
+}
+
+// resolveExplicitAiGateway resolves a specific AiGateway referenced by the experiment.
+func (r *ExperimentReconciler) resolveExplicitAiGateway(ctx context.Context, ref *corev1.ObjectReference, experimentNamespace string) (*runtimev1alpha1.AiGateway, error) {
+	namespace := ref.Namespace
+	if namespace == "" {
+		namespace = experimentNamespace
+	}
+
+	var aiGateway runtimev1alpha1.AiGateway
+	err := r.Get(ctx, types.NamespacedName{
+		Name:      ref.Name,
+		Namespace: namespace,
+	}, &aiGateway)
+
+	if err != nil {
+		if apimeta.IsNoMatchError(err) {
+			return nil, fmt.Errorf("AiGateway CRD is not installed in the cluster")
+		}
+		return nil, fmt.Errorf("failed to resolve AiGateway %s/%s: %w", namespace, ref.Name, err)
+	}
+
+	return &aiGateway, nil
+}
+
+// resolveDefaultAiGateway searches for any AiGateway in the default ai-gateway namespace.
+func (r *ExperimentReconciler) resolveDefaultAiGateway(ctx context.Context) (*runtimev1alpha1.AiGateway, error) {
+	logger := log.FromContext(ctx)
+
+	var aiGatewayList runtimev1alpha1.AiGatewayList
+	err := r.List(ctx, &aiGatewayList, client.InNamespace(defaultAiGatewayNamespace))
+	if err != nil {
+		if apimeta.IsNoMatchError(err) {
+			logger.Info("AiGateway CRD is not installed, skipping default gateway resolution")
+			return nil, nil
+		}
+		return nil, fmt.Errorf("failed to list AiGateways in namespace %s: %w", defaultAiGatewayNamespace, err)
+	}
+
+	if len(aiGatewayList.Items) == 0 {
+		return nil, nil
+	}
+
+	if len(aiGatewayList.Items) > 1 {
+		logger.Info("Multiple AiGateways found, selecting first one",
+			"selected", aiGatewayList.Items[0].Name,
+			"count", len(aiGatewayList.Items))
+	}
+
+	aiGateway := aiGatewayList.Items[0]
+	return &aiGateway, nil
+}
+
+func buildAiGatewayServiceUrl(aiGateway runtimev1alpha1.AiGateway) string {
+	return fmt.Sprintf("http://%s.%s.svc.cluster.local.:%d", aiGateway.Name, aiGateway.Namespace, aiGateway.Spec.Port)
+}
+```
+
+Note: The code uses `apimeta.IsNoMatchError` which references the existing `apimeta` import alias already in the file.
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+```bash
+cd operator && make test
+```
+
+Expected: PASS.
+
+- [ ] **Step 5: Commit**
+
+```bash
+cd operator && git add internal/controller/experiment_controller.go internal/controller/experiment_controller_test.go config/rbac/
+git commit -m "feat: implement AiGateway resolution methods"
+```
+
+---
+
+### Task 6: Wire AiGateway resolution into buildTestWorkflow
+
+**Files:**
+- Modify: `operator/internal/controller/experiment_controller.go:137-156` (reconcileResources), `:257-262` (reconcileTestWorkflow call), `:299-368` (buildTestWorkflow)
+- Modify: `operator/internal/controller/experiment_controller_test.go`
+
+- [ ] **Step 1: Write failing tests for evaluate-template config**
+
+Add to the `"AiGateway resolution"` context in the test file:
+
+```go
+It("should set openApiBasePath on evaluate-template when AiGateway is resolved", func() {
+    r := newReconciler()
+    exp := &testbenchv1alpha1.Experiment{
+        ObjectMeta: metav1.ObjectMeta{Name: "exp-gw-url", Namespace: namespace},
+        Spec: testbenchv1alpha1.ExperimentSpec{
+            AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
+            Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+        },
+    }
+    gw := &runtimev1alpha1.AiGateway{
+        ObjectMeta: metav1.ObjectMeta{Name: "my-gw", Namespace: "ai-gateway"},
+        Spec:       runtimev1alpha1.AiGatewaySpec{Port: 4000, AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}},
+    }
+
+    wf := r.buildTestWorkflow(exp, gw)
+    spec := wf.Object["spec"].(map[string]interface{})
+    use := spec["use"].([]interface{})
+
+    var evalTemplate map[string]interface{}
+    for _, u := range use {
+        um := u.(map[string]interface{})
+        if um["name"] == "evaluate-template" {
+            evalTemplate = um
+            break
+        }
+    }
+    Expect(evalTemplate).NotTo(BeNil())
+    cfg := evalTemplate["config"].(map[string]interface{})
+    Expect(cfg["openApiBasePath"]).To(Equal("http://my-gw.ai-gateway.svc.cluster.local.:4000"))
+})
+
+It("should not set config on evaluate-template when no AiGateway", func() {
+    r := newReconciler()
+    exp := &testbenchv1alpha1.Experiment{
+        ObjectMeta: metav1.ObjectMeta{Name: "exp-no-gw", Namespace: namespace},
+        Spec: testbenchv1alpha1.ExperimentSpec{
+            AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
+            Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+        },
+    }
+
+    wf := r.buildTestWorkflow(exp, nil)
+    spec := wf.Object["spec"].(map[string]interface{})
+    use := spec["use"].([]interface{})
+
+    var evalTemplate map[string]interface{}
+    for _, u := range use {
+        um := u.(map[string]interface{})
+        if um["name"] == "evaluate-template" {
+            evalTemplate = um
+            break
+        }
+    }
+    Expect(evalTemplate).NotTo(BeNil())
+    _, hasConfig := evalTemplate["config"]
+    Expect(hasConfig).To(BeFalse())
+})
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cd operator && make test
+```
+
+Expected: FAIL — `buildTestWorkflow` does not accept an `aiGateway` parameter.
+
+- [ ] **Step 3: Wire the resolution into reconcileResources and buildTestWorkflow**
+
+In `operator/internal/controller/experiment_controller.go`:
+
+**A. Update `reconcileResources`** — add AiGateway resolution before `reconcileTestWorkflow`:
+
+Change:
+```go
+func (r *ExperimentReconciler) reconcileResources(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) (reconcileResult, error) {
+	var result reconcileResult
+	if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil {
+		return result, fmt.Errorf("reconciling ConfigMap: %w", err)
+	}
+	wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, generatedResources)
+```
+
+to:
+```go
+func (r *ExperimentReconciler) reconcileResources(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) (reconcileResult, error) {
+	var result reconcileResult
+	if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil {
+		return result, fmt.Errorf("reconciling ConfigMap: %w", err)
+	}
+	aiGateway, err := r.resolveAiGateway(ctx, experiment)
+	if err != nil {
+		return result, fmt.Errorf("resolving AiGateway: %w", err)
+	}
+	wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, aiGateway, generatedResources)
+```
+
+**B. Update `reconcileTestWorkflow` signature** — add `aiGateway` parameter:
+
+Change:
+```go
+func (r *ExperimentReconciler) reconcileTestWorkflow(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) (bool, error) {
+	workflow := r.buildTestWorkflow(experiment)
+```
+
+to:
+```go
+func (r *ExperimentReconciler) reconcileTestWorkflow(
+	ctx context.Context,
+	experiment *testbenchv1alpha1.Experiment,
+	aiGateway *runtimev1alpha1.AiGateway,
+	generatedResources *[]testbenchv1alpha1.GeneratedResource,
+) (bool, error) {
+	workflow := r.buildTestWorkflow(experiment, aiGateway)
+```
+
+**C. Update `buildTestWorkflow` signature and evaluate-template config**:
+
+Change signature:
+```go
+func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured {
+```
+
+to:
+```go
+func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment, aiGateway *runtimev1alpha1.AiGateway) *unstructured.Unstructured {
+```
+
+Replace the evaluate-template entry. Change:
+```go
+	useTemplates = append(useTemplates,
+		map[string]interface{}{
+			"name": "run-template",
+			"config": map[string]interface{}{
+				"agentUrl": agentURL,
+			},
+		},
+		map[string]interface{}{"name": "evaluate-template"},
+		map[string]interface{}{"name": "publish-template"},
+		map[string]interface{}{"name": "visualize-template"},
+	)
+```
+
+to:
+```go
+	evaluateTemplate := map[string]interface{}{"name": "evaluate-template"}
+	if aiGateway != nil {
+		evaluateTemplate["config"] = map[string]interface{}{
+			"openApiBasePath": buildAiGatewayServiceUrl(*aiGateway),
+		}
+	}
+
+	useTemplates = append(useTemplates,
+		map[string]interface{}{
+			"name": "run-template",
+			"config": map[string]interface{}{
+				"agentUrl": agentURL,
+			},
+		},
+		evaluateTemplate,
+		map[string]interface{}{"name": "publish-template"},
+		map[string]interface{}{"name": "visualize-template"},
+	)
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+```bash
+cd operator && make test
+```
+
+Expected: ALL tests PASS (both new and existing).
+
+- [ ] **Step 5: Commit**
+
+```bash
+cd operator && git add internal/controller/experiment_controller.go internal/controller/experiment_controller_test.go
+git commit -m "feat: wire AiGateway resolution into TestWorkflow evaluate-template config"
+```
+
+---
+
+### Task 7: Regenerate RBAC and run full verification
+
+**Files:**
+- Modify: `operator/config/rbac/role.yaml` (regenerated)
+
+- [ ] **Step 1: Regenerate RBAC manifests**
+
+```bash
+cd operator && make manifests
+```
+
+- [ ] **Step 2: Verify the RBAC role includes AiGateway permissions**
+
+Check that `operator/config/rbac/role.yaml` contains:
+
+```yaml
+- apiGroups:
+  - runtime.agentic-layer.ai
+  resources:
+  - aigateways
+  verbs:
+  - get
+  - list
+  - watch
+```
+
+- [ ] **Step 3: Run the full test suite**
+
+```bash
+cd operator && make test
+```
+
+Expected: ALL tests PASS.
+
+- [ ] **Step 4: Verify build**
+
+```bash
+cd operator && go build ./...
+```
+
+Expected: No errors.
+
+- [ ] **Step 5: Commit**
+
+```bash
+cd operator && git add config/rbac/
+git commit -m "build: regenerate RBAC with AiGateway permissions"
+```
+
+---
+
+### Task 8: Update sample manifest
+
+**Files:**
+- Modify: `operator/config/samples/testbench_v1alpha1_experiment.yaml`
+
+- [ ] **Step 1: Add aiGatewayRef example to the sample**
+
+Add the `aiGatewayRef` field to the sample experiment (after `agentRef`):
+
+```yaml
+  aiGatewayRef:
+    name: ai-gateway
+    namespace: ai-gateway
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+cd operator && git add config/samples/
+git commit -m "docs: add aiGatewayRef to sample Experiment manifest"
+```
\ No newline at end of file

From 12e251c0df9af97e8904c2e1afe5dbe0854e6987 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 15:19:32 +0100
Subject: [PATCH 07/19] fix: add default value to evaluate-template
 openApiBasePath config

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 chart/templates/evaluate-template.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/chart/templates/evaluate-template.yaml b/chart/templates/evaluate-template.yaml
index ff4cb7c..07aa1d5 100644
--- a/chart/templates/evaluate-template.yaml
+++ b/chart/templates/evaluate-template.yaml
@@ -12,6 +12,7 @@ spec:
     openApiBasePath:
       type: string
       description: "Base path for OpenAI API"
+      default: ""
 
   # Steps to execute
   steps:

From c8c4871a0986af4a9ac497ca3953a131fcebfa0b Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 15:21:16 +0100
Subject: [PATCH 08/19] build: add agent-runtime-operator module dependency

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 operator/go.mod | 5 +++--
 operator/go.sum | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/operator/go.mod b/operator/go.mod
index c4da270..18151e2 100644
--- a/operator/go.mod
+++ b/operator/go.mod
@@ -1,6 +1,6 @@
 module github.com/agentic-layer/testbench/operator
 
-go 1.25.0
+go 1.26.0
 
 require (
 	github.com/onsi/ginkgo/v2 v2.28.1
@@ -14,6 +14,7 @@ require (
 require (
 	cel.dev/expr v0.24.0 // indirect
 	github.com/Masterminds/semver/v3 v3.4.0 // indirect
+	github.com/agentic-layer/agent-runtime-operator v0.25.0 // indirect
 	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
@@ -89,7 +90,7 @@ require (
 	k8s.io/apiextensions-apiserver v0.35.0 // indirect
 	k8s.io/apiserver v0.35.0 // indirect
 	k8s.io/component-base v0.35.0 // indirect
-	k8s.io/klog/v2 v2.130.1 // indirect
+	k8s.io/klog/v2 v2.140.0 // indirect
 	k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
 	k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
 	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
diff --git a/operator/go.sum b/operator/go.sum
index 31de72b..3eb481a 100644
--- a/operator/go.sum
+++ b/operator/go.sum
@@ -2,6 +2,8 @@ cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY=
 cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
 github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
 github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
+github.com/agentic-layer/agent-runtime-operator v0.25.0 h1:akCgx22idyDxCCNxrkyxa09ec2TKIAVMAc0uKTPh4uw=
+github.com/agentic-layer/agent-runtime-operator v0.25.0/go.mod h1:ViDJhISWCCZYAzP1f1TlThGyCI79h4fovPEzeJqK+o0=
 github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
 github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -237,6 +239,8 @@ k8s.io/component-base v0.35.0 h1:+yBrOhzri2S1BVqyVSvcM3PtPyx5GUxCK2tinZz1G94=
 k8s.io/component-base v0.35.0/go.mod h1:85SCX4UCa6SCFt6p3IKAPej7jSnF3L8EbfSyMZayJR0=
 k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
 k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc=
+k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0=
 k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE=
 k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ=
 k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck=

From 6e1a4e61d5fc000495cebfac15c5b65f6d4c8254 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 15:30:13 +0100
Subject: [PATCH 09/19] feat: add AiGatewayRef field to ExperimentSpec CRD

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 operator/api/v1alpha1/experiment_types.go     | 10 +++
 .../api/v1alpha1/zz_generated.deepcopy.go     | 10 ++-
 ...estbench.agentic-layer.ai_experiments.yaml | 48 ++++++++++++++
 .../controller/experiment_controller_test.go  | 65 +++++++++++++++++--
 4 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/operator/api/v1alpha1/experiment_types.go b/operator/api/v1alpha1/experiment_types.go
index 3400280..3ce9322 100644
--- a/operator/api/v1alpha1/experiment_types.go
+++ b/operator/api/v1alpha1/experiment_types.go
@@ -17,6 +17,7 @@ limitations under the License.
 package v1alpha1
 
 import (
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 )
@@ -160,6 +161,11 @@ type ExperimentSpec struct {
 	// +kubebuilder:validation:Required
 	AgentRef AgentRef `json:"agentRef"`
 
+	// AiGatewayRef references an AiGateway resource for LLM access during evaluation.
+	// Only Name and Namespace fields are used.
+	// +optional
+	AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"`
+
 	// Source of the test dataset (mutually exclusive with scenarios)
 	// +optional
 	Dataset *DatasetSource `json:"dataset,omitempty"`
@@ -179,6 +185,10 @@ type ExperimentSpec struct {
 	// +optional
 	Scenarios []Scenario `json:"scenarios,omitempty"`
 
+	// OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318")
+	// +optional
+	OTLPEndpoint string `json:"otlpEndpoint,omitempty"`
+
 	// Trigger configuration
 	// +optional
 	Trigger *TriggerSpec `json:"trigger,omitempty"`
diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go
index 6b0e358..fdf55f8 100644
--- a/operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -21,7 +21,8 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 )
 
@@ -123,6 +124,11 @@ func (in *ExperimentList) DeepCopyObject() runtime.Object {
 func (in *ExperimentSpec) DeepCopyInto(out *ExperimentSpec) {
 	*out = *in
 	out.AgentRef = in.AgentRef
+	if in.AiGatewayRef != nil {
+		in, out := &in.AiGatewayRef, &out.AiGatewayRef
+		*out = new(v1.ObjectReference)
+		**out = **in
+	}
 	if in.Dataset != nil {
 		in, out := &in.Dataset, &out.Dataset
 		*out = new(DatasetSource)
@@ -157,7 +163,7 @@ func (in *ExperimentStatus) DeepCopyInto(out *ExperimentStatus) {
 	*out = *in
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
+		*out = make([]metav1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
diff --git a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml
index 15cc3e2..87834d3 100644
--- a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml
+++ b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml
@@ -63,6 +63,51 @@ spec:
                 required:
                 - name
                 type: object
+              aiGatewayRef:
+                description: |-
+                  AiGatewayRef references an AiGateway resource for LLM access during evaluation.
+                  Only Name and Namespace fields are used.
+                properties:
+                  apiVersion:
+                    description: API version of the referent.
+                    type: string
+                  fieldPath:
+                    description: |-
+                      If referring to a piece of an object instead of an entire object, this string
+                      should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+                      For example, if the object reference is to a container within a pod, this would take on a value like:
+                      "spec.containers{name}" (where "name" refers to the name of the container that triggered
+                      the event) or if no container name is specified "spec.containers[2]" (container with
+                      index 2 in this pod). This syntax is chosen only to have some well-defined way of
+                      referencing a part of an object.
+                    type: string
+                  kind:
+                    description: |-
+                      Kind of the referent.
+                      More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+                    type: string
+                  name:
+                    description: |-
+                      Name of the referent.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                    type: string
+                  namespace:
+                    description: |-
+                      Namespace of the referent.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+                    type: string
+                  resourceVersion:
+                    description: |-
+                      Specific resourceVersion to which this reference is made, if any.
+                      More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+                    type: string
+                  uid:
+                    description: |-
+                      UID of the referent.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+                    type: string
+                type: object
+                x-kubernetes-map-type: atomic
               dataset:
                 description: Source of the test dataset (mutually exclusive with scenarios)
                 properties:
@@ -93,6 +138,9 @@ spec:
                 description: LLM model used for evaluation (e.g., "gemini-2.5-flash-lite",
                   "gpt-4o")
                 type: string
+              otlpEndpoint:
+                description: OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318")
+                type: string
               scenarios:
                 description: Inline test scenarios (mutually exclusive with dataset)
                 items:
diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go
index 2a18a63..ebf9284 100644
--- a/operator/internal/controller/experiment_controller_test.go
+++ b/operator/internal/controller/experiment_controller_test.go
@@ -569,6 +569,39 @@ var _ = Describe("Experiment Controller", func() {
 		})
 	})
 
+	Context("AiGateway resolution", func() {
+		It("should accept an Experiment with aiGatewayRef", func() {
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "exp-gw-ref",
+					Namespace: namespace,
+				},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"},
+					AiGatewayRef: &corev1.ObjectReference{
+						Name:      "my-gateway",
+						Namespace: "ai-gateway",
+					},
+					Scenarios: []testbenchv1alpha1.Scenario{
+						{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}},
+					},
+				},
+			}
+			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+			defer func() {
+				_ = k8sClient.Delete(ctx, exp)
+			}()
+
+			fetched := &testbenchv1alpha1.Experiment{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{
+				Name: "exp-gw-ref", Namespace: namespace,
+			}, fetched)).To(Succeed())
+			Expect(fetched.Spec.AiGatewayRef).NotTo(BeNil())
+			Expect(fetched.Spec.AiGatewayRef.Name).To(Equal("my-gateway"))
+			Expect(fetched.Spec.AiGatewayRef.Namespace).To(Equal("ai-gateway"))
+		})
+	})
+
 	Context("OTel env var injection", func() {
 		const expName = "exp-otel"
 
@@ -576,12 +609,13 @@ var _ = Describe("Experiment Controller", func() {
 			cleanupExperiment(expName)
 		})
 
-		It("should inject OTEL_EXPORTER_OTLP_ENDPOINT from otel-config ConfigMap", func() {
+		It("should inject OTEL_EXPORTER_OTLP_ENDPOINT as direct value from spec.otlpEndpoint", func() {
 			exp := &testbenchv1alpha1.Experiment{
 				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
-					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent"},
-					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+					AgentRef:     testbenchv1alpha1.AgentRef{Name: "agent"},
+					OTLPEndpoint: "http://lgtm.monitoring.svc.cluster.local:4318",
+					Scenarios:    []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
 				},
 			}
 			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
@@ -597,10 +631,27 @@ var _ = Describe("Experiment Controller", func() {
 			Expect(envList).To(HaveLen(1))
 			envVar := envList[0].(map[string]interface{})
 			Expect(envVar["name"]).To(Equal(otelEndpointKey))
-			valueFrom := envVar["valueFrom"].(map[string]interface{})
-			cmRef := valueFrom["configMapKeyRef"].(map[string]interface{})
-			Expect(cmRef["name"]).To(Equal(otelConfigMapName))
-			Expect(cmRef["key"]).To(Equal(otelEndpointKey))
+			Expect(envVar["value"]).To(Equal("http://lgtm.monitoring.svc.cluster.local:4318"))
+		})
+
+		It("should omit container env when otlpEndpoint is not set", func() {
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent"},
+					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+				},
+			}
+			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
+			Expect(reconcileExperiment(expName)).To(Succeed())
+
+			wf := &unstructured.Unstructured{}
+			wf.SetGroupVersionKind(testWorkflowGVK)
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed())
+
+			spec := wf.Object["spec"].(map[string]interface{})
+			_, hasContainer := spec["container"]
+			Expect(hasContainer).To(BeFalse(), "spec.container should be absent when otlpEndpoint is not set")
 		})
 	})
 })

From b1d046f3778ea789f1d7050793e424813fda5986 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 15:30:40 +0100
Subject: [PATCH 10/19] build: register AiGateway types in scheme and add CRD
 to testdata

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 operator/cmd/main.go                          |   2 +
 operator/internal/controller/suite_test.go    |   4 +
 .../runtime.agentic-layer.ai_aigateways.yaml  | 435 ++++++++++++++++++
 3 files changed, 441 insertions(+)
 create mode 100644 operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml

diff --git a/operator/cmd/main.go b/operator/cmd/main.go
index e99865c..4d967c2 100644
--- a/operator/cmd/main.go
+++ b/operator/cmd/main.go
@@ -35,6 +35,7 @@ import (
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
 
+	runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
 	testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1"
 	"github.com/agentic-layer/testbench/operator/internal/controller"
 	// +kubebuilder:scaffold:imports
@@ -49,6 +50,7 @@ func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 
 	utilruntime.Must(testbenchv1alpha1.AddToScheme(scheme))
+	utilruntime.Must(runtimev1alpha1.AddToScheme(scheme))
 	// +kubebuilder:scaffold:scheme
 }
 
diff --git a/operator/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go
index 6250695..3d42305 100644
--- a/operator/internal/controller/suite_test.go
+++ b/operator/internal/controller/suite_test.go
@@ -32,6 +32,7 @@ import (
 	logf "sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
+	runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
 	testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1"
 	// +kubebuilder:scaffold:imports
 )
@@ -78,6 +79,9 @@ var _ = BeforeSuite(func() {
 	err = testbenchv1alpha1.AddToScheme(scheme.Scheme)
 	Expect(err).NotTo(HaveOccurred())
 
+	err = runtimev1alpha1.AddToScheme(scheme.Scheme)
+	Expect(err).NotTo(HaveOccurred())
+
 	// +kubebuilder:scaffold:scheme
 
 	k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme})
diff --git a/operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml b/operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml
new file mode 100644
index 0000000..df0dc67
--- /dev/null
+++ b/operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml
@@ -0,0 +1,435 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.18.0
+  name: aigateways.runtime.agentic-layer.ai
+spec:
+  group: runtime.agentic-layer.ai
+  names:
+    kind: AiGateway
+    listKind: AiGatewayList
+    plural: aigateways
+    singular: aigateway
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: AiGateway is the Schema for the AI gateways API.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: AiGatewaySpec defines the desired state of AiGateway.
+            properties:
+              aiGatewayClassName:
+                description: |-
+                  AiGatewayClassName specifies which AiGatewayClass to use for this AI gateway instance.
+                  This is only needed if multiple AI gateway classes are defined in the cluster.
+                type: string
+              aiModels:
+                description: List of AI models to be made available through the gateway.
+                items:
+                  description: AiModel is an AI model configuration.
+                  properties:
+                    name:
+                      description: Name is the identifier for the AI model (e.g.,
+                        "gpt-4", "claude-3-opus")
+                      minLength: 1
+                      type: string
+                    provider:
+                      description: Provider specifies the AI provider (e.g., "openai",
+                        "anthropic", "azure")
+                      minLength: 1
+                      type: string
+                  required:
+                  - name
+                  - provider
+                  type: object
+                minItems: 1
+                type: array
+              commonMetadata:
+                description: |-
+                  CommonMetadata defines labels and annotations to be applied to the Deployment and Service
+                  resources created for this gateway, as well as the pod template.
+                properties:
+                  annotations:
+                    additionalProperties:
+                      type: string
+                    description: Annotations is a map of key/value pairs to be applied
+                      to the resource.
+                    type: object
+                  labels:
+                    additionalProperties:
+                      type: string
+                    description: Labels is a map of key/value pairs to be applied
+                      to the resource.
+                    type: object
+                type: object
+              env:
+                description: |-
+                  Environment variables to pass to the AI gateway container.
+                  These can include configuration values, credentials, or feature flags.
+                items:
+                  description: EnvVar represents an environment variable present in
+                    a Container.
+                  properties:
+                    name:
+                      description: |-
+                        Name of the environment variable.
+                        May consist of any printable ASCII characters except '='.
+                      type: string
+                    value:
+                      description: |-
+                        Variable references $(VAR_NAME) are expanded
+                        using the previously defined environment variables in the container and
+                        any service environment variables. If a variable cannot be resolved,
+                        the reference in the input string will be unchanged. Double $$ are reduced
+                        to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
+                        "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
+                        Escaped references will never be expanded, regardless of whether the variable
+                        exists or not.
+                        Defaults to "".
+                      type: string
+                    valueFrom:
+                      description: Source for the environment variable's value. Cannot
+                        be used if value is not empty.
+                      properties:
+                        configMapKeyRef:
+                          description: Selects a key of a ConfigMap.
+                          properties:
+                            key:
+                              description: The key to select.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the ConfigMap or its key
+                                must be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        fieldRef:
+                          description: |-
+                            Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
+                            spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
+                          properties:
+                            apiVersion:
+                              description: Version of the schema the FieldPath is
+                                written in terms of, defaults to "v1".
+                              type: string
+                            fieldPath:
+                              description: Path of the field to select in the specified
+                                API version.
+                              type: string
+                          required:
+                          - fieldPath
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        fileKeyRef:
+                          description: |-
+                            FileKeyRef selects a key of the env file.
+                            Requires the EnvFiles feature gate to be enabled.
+                          properties:
+                            key:
+                              description: |-
+                                The key within the env file. An invalid key will prevent the pod from starting.
+                                The keys defined within a source may consist of any printable ASCII characters except '='.
+                                During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
+                              type: string
+                            optional:
+                              default: false
+                              description: |-
+                                Specify whether the file or its key must be defined. If the file or key
+                                does not exist, then the env var is not published.
+                                If optional is set to true and the specified key does not exist,
+                                the environment variable will not be set in the Pod's containers.
+
+                                If optional is set to false and the specified key does not exist,
+                                an error will be returned during Pod creation.
+                              type: boolean
+                            path:
+                              description: |-
+                                The path within the volume from which to select the file.
+                                Must be relative and may not contain the '..' path or start with '..'.
+                              type: string
+                            volumeName:
+                              description: The name of the volume mount containing
+                                the env file.
+                              type: string
+                          required:
+                          - key
+                          - path
+                          - volumeName
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        resourceFieldRef:
+                          description: |-
+                            Selects a resource of the container: only resources limits and requests
+                            (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
+                          properties:
+                            containerName:
+                              description: 'Container name: required for volumes,
+                                optional for env vars'
+                              type: string
+                            divisor:
+                              anyOf:
+                              - type: integer
+                              - type: string
+                              description: Specifies the output format of the exposed
+                                resources, defaults to "1"
+                              pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                              x-kubernetes-int-or-string: true
+                            resource:
+                              description: 'Required: resource to select'
+                              type: string
+                          required:
+                          - resource
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        secretKeyRef:
+                          description: Selects a key of a secret in the pod's namespace
+                          properties:
+                            key:
+                              description: The key of the secret to select from.  Must
+                                be a valid secret key.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the Secret or its key must
+                                be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                      type: object
+                  required:
+                  - name
+                  type: object
+                type: array
+              envFrom:
+                description: |-
+                  List of sources to populate environment variables in the AI gateway container.
+                  This allows loading variables from ConfigMaps and Secrets.
+                items:
+                  description: EnvFromSource represents the source of a set of ConfigMaps
+                    or Secrets
+                  properties:
+                    configMapRef:
+                      description: The ConfigMap to select from
+                      properties:
+                        name:
+                          default: ""
+                          description: |-
+                            Name of the referent.
+                            This field is effectively required, but due to backwards compatibility is
+                            allowed to be empty. Instances of this type with an empty value here are
+                            almost certainly wrong.
+                            More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                          type: string
+                        optional:
+                          description: Specify whether the ConfigMap must be defined
+                          type: boolean
+                      type: object
+                      x-kubernetes-map-type: atomic
+                    prefix:
+                      description: |-
+                        Optional text to prepend to the name of each environment variable.
+                        May consist of any printable ASCII characters except '='.
+                      type: string
+                    secretRef:
+                      description: The Secret to select from
+                      properties:
+                        name:
+                          default: ""
+                          description: |-
+                            Name of the referent.
+                            This field is effectively required, but due to backwards compatibility is
+                            allowed to be empty. Instances of this type with an empty value here are
+                            almost certainly wrong.
+                            More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                          type: string
+                        optional:
+                          description: Specify whether the Secret must be defined
+                          type: boolean
+                      type: object
+                      x-kubernetes-map-type: atomic
+                  type: object
+                type: array
+              guardrails:
+                description: |-
+                  Guardrails lists the Guard resources to be applied to requests through this AI gateway.
+                  Guards are applied in the order they are listed.
+                items:
+                  description: ObjectReference contains enough information to let
+                    you inspect or modify the referred object.
+                  properties:
+                    apiVersion:
+                      description: API version of the referent.
+                      type: string
+                    fieldPath:
+                      description: |-
+                        If referring to a piece of an object instead of an entire object, this string
+                        should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+                        For example, if the object reference is to a container within a pod, this would take on a value like:
+                        "spec.containers{name}" (where "name" refers to the name of the container that triggered
+                        the event) or if no container name is specified "spec.containers[2]" (container with
+                        index 2 in this pod). This syntax is chosen only to have some well-defined way of
+                        referencing a part of an object.
+                      type: string
+                    kind:
+                      description: |-
+                        Kind of the referent.
+                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+                      type: string
+                    name:
+                      description: |-
+                        Name of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                      type: string
+                    namespace:
+                      description: |-
+                        Namespace of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+                      type: string
+                    resourceVersion:
+                      description: |-
+                        Specific resourceVersion to which this reference is made, if any.
+                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+                      type: string
+                    uid:
+                      description: |-
+                        UID of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+                      type: string
+                  type: object
+                  x-kubernetes-map-type: atomic
+                type: array
+              podMetadata:
+                description: |-
+                  PodMetadata defines labels and annotations to be applied only to the pod template
+                  of the Deployment created for this gateway.
+                properties:
+                  annotations:
+                    additionalProperties:
+                      type: string
+                    description: Annotations is a map of key/value pairs to be applied
+                      to the resource.
+                    type: object
+                  labels:
+                    additionalProperties:
+                      type: string
+                    description: Labels is a map of key/value pairs to be applied
+                      to the resource.
+                    type: object
+                type: object
+              port:
+                default: 80
+                description: Port on which the AI gateway will be exposed.
+                format: int32
+                maximum: 65535
+                minimum: 1
+                type: integer
+            required:
+            - aiModels
+            type: object
+          status:
+            description: AiGatewayStatus defines the observed state of AiGateway.
+            properties:
+              conditions:
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}

From 17481001190cb0176396d12c864390ed835bf4d5 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 15:54:35 +0100
Subject: [PATCH 11/19] feat: implement AiGateway resolution and wire into
 TestWorkflow

Adds resolveAiGateway, resolveExplicitAiGateway, resolveDefaultAiGateway
methods mirroring agent-runtime-operator pattern. Passes resolved gateway
URL as openApiBasePath config to evaluate-template.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../controller/experiment_controller.go       | 144 +++++++++++++----
 .../controller/experiment_controller_test.go  | 148 ++++++++++++++++++
 2 files changed, 261 insertions(+), 31 deletions(-)

diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go
index eb240d8..105a38d 100644
--- a/operator/internal/controller/experiment_controller.go
+++ b/operator/internal/controller/experiment_controller.go
@@ -35,15 +35,17 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 
+	runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
 	testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1"
 )
 
 const (
-	conditionReady         = "Ready"
-	conditionWorkflowReady = "WorkflowReady"
-	otelConfigMapName      = "otel-config"
-	otelEndpointKey        = "OTEL_EXPORTER_OTLP_ENDPOINT"
-	defaultAgentPort       = "8000"
+	conditionReady            = "Ready"
+	conditionWorkflowReady    = "WorkflowReady"
+	otelEndpointKey           = "OTEL_EXPORTER_OTLP_ENDPOINT"
+	defaultAgentPort          = "8000"
+	testkubeNamespace         = "testkube"
+	defaultAiGatewayNamespace = "ai-gateway"
 )
 
 var (
@@ -107,6 +109,7 @@ type ExperimentReconciler struct {
 // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=testworkflows.testkube.io,resources=testworkflows,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=tests.testkube.io,resources=testtriggers,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=runtime.agentic-layer.ai,resources=aigateways,verbs=get;list;watch
 
 // Reconcile moves the cluster state closer to the desired state specified by the Experiment.
 func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
@@ -143,7 +146,11 @@ func (r *ExperimentReconciler) reconcileResources(
 	if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil {
 		return result, fmt.Errorf("reconciling ConfigMap: %w", err)
 	}
-	wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, generatedResources)
+	aiGateway, err := r.resolveAiGateway(ctx, experiment)
+	if err != nil {
+		return result, fmt.Errorf("resolving AiGateway: %w", err)
+	}
+	wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, aiGateway, generatedResources)
 	if err != nil {
 		result.workflowErr = err
 		return result, fmt.Errorf("reconciling TestWorkflow: %w", err)
@@ -165,13 +172,15 @@ func (r *ExperimentReconciler) reconcileConfigMap(
 	cm := &corev1.ConfigMap{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      cmName,
-			Namespace: experiment.Namespace,
+			Namespace: testkubeNamespace,
 		},
 	}
 
 	_, err := controllerutil.CreateOrUpdate(ctx, r.Client, cm, func() error {
-		if err := controllerutil.SetControllerReference(experiment, cm, r.Scheme); err != nil {
-			return err
+		if experiment.Namespace == testkubeNamespace {
+			if err := controllerutil.SetControllerReference(experiment, cm, r.Scheme); err != nil {
+				return err
+			}
 		}
 		data, buildErr := r.buildExperimentJSON(experiment)
 		if buildErr != nil {
@@ -255,11 +264,14 @@ func (r *ExperimentReconciler) convertStep(step testbenchv1alpha1.Step) stepJSON
 func (r *ExperimentReconciler) reconcileTestWorkflow(
 	ctx context.Context,
 	experiment *testbenchv1alpha1.Experiment,
+	aiGateway *runtimev1alpha1.AiGateway,
 	generatedResources *[]testbenchv1alpha1.GeneratedResource,
 ) (bool, error) {
-	workflow := r.buildTestWorkflow(experiment)
-	if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil {
-		return false, err
+	workflow := r.buildTestWorkflow(experiment, aiGateway)
+	if experiment.Namespace == testkubeNamespace {
+		if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil {
+			return false, err
+		}
 	}
 
 	existing := &unstructured.Unstructured{}
@@ -292,7 +304,7 @@ func (r *ExperimentReconciler) reconcileTestWorkflow(
 }
 
 // buildTestWorkflow constructs the desired TestWorkflow unstructured object.
-func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured {
+func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment, aiGateway *runtimev1alpha1.AiGateway) *unstructured.Unstructured {
 	agentURL := r.resolveAgentURL(experiment)
 
 	// Build the list of phase templates to chain.
@@ -305,6 +317,13 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E
 			},
 		})
 	}
+	evaluateTemplate := map[string]interface{}{"name": "evaluate-template"}
+	if aiGateway != nil {
+		evaluateTemplate["config"] = map[string]interface{}{
+			"openApiBasePath": buildAiGatewayServiceUrl(*aiGateway),
+		}
+	}
+
 	useTemplates = append(useTemplates,
 		map[string]interface{}{
 			"name": "run-template",
@@ -312,26 +331,24 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E
 				"agentUrl": agentURL,
 			},
 		},
-		map[string]interface{}{"name": "evaluate-template"},
+		evaluateTemplate,
 		map[string]interface{}{"name": "publish-template"},
 		map[string]interface{}{"name": "visualize-template"},
 	)
 
 	spec := map[string]interface{}{
-		"container": map[string]interface{}{
+		"use": useTemplates,
+	}
+
+	if experiment.Spec.OTLPEndpoint != "" {
+		spec["container"] = map[string]interface{}{
 			"env": []interface{}{
 				map[string]interface{}{
-					"name": otelEndpointKey,
-					"valueFrom": map[string]interface{}{
-						"configMapKeyRef": map[string]interface{}{
-							"name": otelConfigMapName,
-							"key":  otelEndpointKey,
-						},
-					},
+					"name":  otelEndpointKey,
+					"value": experiment.Spec.OTLPEndpoint,
 				},
 			},
-		},
-		"use": useTemplates,
+		}
 	}
 
 	// For scenarios mode, mount the pre-populated ConfigMap as the experiment file.
@@ -357,7 +374,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E
 			"kind":       testWorkflowGVK.Kind,
 			"metadata": map[string]interface{}{
 				"name":      experiment.Name + "-workflow",
-				"namespace": experiment.Namespace,
+				"namespace": testkubeNamespace,
 			},
 			"spec": spec,
 		},
@@ -378,7 +395,7 @@ func (r *ExperimentReconciler) reconcileTestTrigger(
 		existing := &unstructured.Unstructured{}
 		existing.SetGroupVersionKind(testTriggerGVK)
 		existing.SetName(triggerName)
-		existing.SetNamespace(experiment.Namespace)
+		existing.SetNamespace(testkubeNamespace)
 		if delErr := r.Delete(ctx, existing); delErr != nil && !errors.IsNotFound(delErr) {
 			if isCRDNotInstalled(delErr) {
 				return nil
@@ -389,13 +406,15 @@ func (r *ExperimentReconciler) reconcileTestTrigger(
 	}
 
 	trigger := r.buildTestTrigger(experiment)
-	if err := controllerutil.SetControllerReference(experiment, trigger, r.Scheme); err != nil {
-		return err
+	if experiment.Namespace == testkubeNamespace {
+		if err := controllerutil.SetControllerReference(experiment, trigger, r.Scheme); err != nil {
+			return err
+		}
 	}
 
 	existing := &unstructured.Unstructured{}
 	existing.SetGroupVersionKind(testTriggerGVK)
-	err := r.Get(ctx, types.NamespacedName{Name: triggerName, Namespace: experiment.Namespace}, existing)
+	err := r.Get(ctx, types.NamespacedName{Name: triggerName, Namespace: testkubeNamespace}, existing)
 	if errors.IsNotFound(err) {
 		if createErr := r.Create(ctx, trigger); createErr != nil {
 			return createErr
@@ -440,7 +459,7 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex
 			"kind":       testTriggerGVK.Kind,
 			"metadata": map[string]interface{}{
 				"name":      experiment.Name + "-trigger",
-				"namespace": experiment.Namespace,
+				"namespace": testkubeNamespace,
 			},
 			"spec": map[string]interface{}{
 				"resource": "deployment",
@@ -454,7 +473,7 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex
 				"concurrencyPolicy": concurrencyPolicy,
 				"testSelector": map[string]interface{}{
 					"name":      experiment.Name + "-workflow",
-					"namespace": experiment.Namespace,
+					"namespace": testkubeNamespace,
 				},
 				"disabled": false,
 			},
@@ -542,6 +561,69 @@ func (r *ExperimentReconciler) resolveDatasetURL(experiment *testbenchv1alpha1.E
 	return ""
 }
 
+// resolveAiGateway resolves the AiGateway resource for an experiment.
+func (r *ExperimentReconciler) resolveAiGateway(ctx context.Context, experiment *testbenchv1alpha1.Experiment) (*runtimev1alpha1.AiGateway, error) {
+	if experiment.Spec.AiGatewayRef != nil {
+		return r.resolveExplicitAiGateway(ctx, experiment.Spec.AiGatewayRef, experiment.Namespace)
+	}
+	return r.resolveDefaultAiGateway(ctx)
+}
+
+// resolveExplicitAiGateway resolves a specific AiGateway referenced by the experiment.
+func (r *ExperimentReconciler) resolveExplicitAiGateway(ctx context.Context, ref *corev1.ObjectReference, experimentNamespace string) (*runtimev1alpha1.AiGateway, error) {
+	namespace := ref.Namespace
+	if namespace == "" {
+		namespace = experimentNamespace
+	}
+
+	var aiGateway runtimev1alpha1.AiGateway
+	err := r.Get(ctx, types.NamespacedName{
+		Name:      ref.Name,
+		Namespace: namespace,
+	}, &aiGateway)
+
+	if err != nil {
+		if apimeta.IsNoMatchError(err) {
+			return nil, fmt.Errorf("AiGateway CRD is not installed in the cluster")
+		}
+		return nil, fmt.Errorf("failed to resolve AiGateway %s/%s: %w", namespace, ref.Name, err)
+	}
+
+	return &aiGateway, nil
+}
+
+// resolveDefaultAiGateway searches for any AiGateway in the default ai-gateway namespace.
+func (r *ExperimentReconciler) resolveDefaultAiGateway(ctx context.Context) (*runtimev1alpha1.AiGateway, error) {
+	logger := log.FromContext(ctx)
+
+	var aiGatewayList runtimev1alpha1.AiGatewayList
+	err := r.List(ctx, &aiGatewayList, client.InNamespace(defaultAiGatewayNamespace))
+	if err != nil {
+		if apimeta.IsNoMatchError(err) {
+			logger.Info("AiGateway CRD is not installed, skipping default gateway resolution")
+			return nil, nil
+		}
+		return nil, fmt.Errorf("failed to list AiGateways in namespace %s: %w", defaultAiGatewayNamespace, err)
+	}
+
+	if len(aiGatewayList.Items) == 0 {
+		return nil, nil
+	}
+
+	if len(aiGatewayList.Items) > 1 {
+		logger.Info("Multiple AiGateways found, selecting first one",
+			"selected", aiGatewayList.Items[0].Name,
+			"count", len(aiGatewayList.Items))
+	}
+
+	aiGateway := aiGatewayList.Items[0]
+	return &aiGateway, nil
+}
+
+func buildAiGatewayServiceUrl(aiGateway runtimev1alpha1.AiGateway) string {
+	return fmt.Sprintf("http://%s.%s.svc.cluster.local.:%d", aiGateway.Name, aiGateway.Namespace, aiGateway.Spec.Port)
+}
+
 // SetupWithManager sets up the controller with the Manager.
 func (r *ExperimentReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go
index ebf9284..45149fc 100644
--- a/operator/internal/controller/experiment_controller_test.go
+++ b/operator/internal/controller/experiment_controller_test.go
@@ -31,6 +31,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 
+	runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1"
 	testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1"
 )
 
@@ -600,6 +601,153 @@ var _ = Describe("Experiment Controller", func() {
 			Expect(fetched.Spec.AiGatewayRef.Name).To(Equal("my-gateway"))
 			Expect(fetched.Spec.AiGatewayRef.Namespace).To(Equal("ai-gateway"))
 		})
+
+		It("should resolve an explicit AiGateway by ref", func() {
+			By("creating an AiGateway resource")
+			gw := &runtimev1alpha1.AiGateway{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-gateway",
+					Namespace: namespace,
+				},
+				Spec: runtimev1alpha1.AiGatewaySpec{
+					Port:     4000,
+					AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}},
+				},
+			}
+			Expect(k8sClient.Create(ctx, gw)).To(Succeed())
+			defer func() { _ = k8sClient.Delete(ctx, gw) }()
+
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AiGatewayRef: &corev1.ObjectReference{
+						Name:      "test-gateway",
+						Namespace: namespace,
+					},
+				},
+			}
+			resolved, err := r.resolveAiGateway(ctx, exp)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(resolved).NotTo(BeNil())
+			Expect(resolved.Name).To(Equal("test-gateway"))
+			Expect(resolved.Spec.Port).To(Equal(int32(4000)))
+		})
+
+		It("should resolve default AiGateway from ai-gateway namespace", func() {
+			By("creating the ai-gateway namespace")
+			ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "ai-gateway"}}
+			_ = k8sClient.Create(ctx, ns)
+
+			By("creating an AiGateway in ai-gateway namespace")
+			gw := &runtimev1alpha1.AiGateway{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "default-gw",
+					Namespace: "ai-gateway",
+				},
+				Spec: runtimev1alpha1.AiGatewaySpec{
+					Port:     80,
+					AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}},
+				},
+			}
+			Expect(k8sClient.Create(ctx, gw)).To(Succeed())
+			defer func() { _ = k8sClient.Delete(ctx, gw) }()
+
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+				Spec:       testbenchv1alpha1.ExperimentSpec{},
+			}
+			resolved, err := r.resolveAiGateway(ctx, exp)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(resolved).NotTo(BeNil())
+			Expect(resolved.Name).To(Equal("default-gw"))
+		})
+
+		It("should return nil when no AiGateway exists", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+				Spec:       testbenchv1alpha1.ExperimentSpec{},
+			}
+			resolved, err := r.resolveAiGateway(ctx, exp)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(resolved).To(BeNil())
+		})
+
+		It("should return error when explicit ref points to non-existent gateway", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AiGatewayRef: &corev1.ObjectReference{
+						Name:      "nonexistent",
+						Namespace: namespace,
+					},
+				},
+			}
+			_, err := r.resolveAiGateway(ctx, exp)
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("failed to resolve AiGateway"))
+		})
+
+		It("should set openApiBasePath on evaluate-template when AiGateway is resolved", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: "exp-gw-url", Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
+					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+				},
+			}
+			gw := &runtimev1alpha1.AiGateway{
+				ObjectMeta: metav1.ObjectMeta{Name: "my-gw", Namespace: "ai-gateway"},
+				Spec:       runtimev1alpha1.AiGatewaySpec{Port: 4000, AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}},
+			}
+
+			wf := r.buildTestWorkflow(exp, gw)
+			spec := wf.Object["spec"].(map[string]interface{})
+			use := spec["use"].([]interface{})
+
+			var evalTemplate map[string]interface{}
+			for _, u := range use {
+				um := u.(map[string]interface{})
+				if um["name"] == "evaluate-template" {
+					evalTemplate = um
+					break
+				}
+			}
+			Expect(evalTemplate).NotTo(BeNil())
+			cfg := evalTemplate["config"].(map[string]interface{})
+			Expect(cfg["openApiBasePath"]).To(Equal("http://my-gw.ai-gateway.svc.cluster.local.:4000"))
+		})
+
+		It("should not set config on evaluate-template when no AiGateway", func() {
+			r := newReconciler()
+			exp := &testbenchv1alpha1.Experiment{
+				ObjectMeta: metav1.ObjectMeta{Name: "exp-no-gw", Namespace: namespace},
+				Spec: testbenchv1alpha1.ExperimentSpec{
+					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
+					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+				},
+			}
+
+			wf := r.buildTestWorkflow(exp, nil)
+			spec := wf.Object["spec"].(map[string]interface{})
+			use := spec["use"].([]interface{})
+
+			var evalTemplate map[string]interface{}
+			for _, u := range use {
+				um := u.(map[string]interface{})
+				if um["name"] == "evaluate-template" {
+					evalTemplate = um
+					break
+				}
+			}
+			Expect(evalTemplate).NotTo(BeNil())
+			_, hasConfig := evalTemplate["config"]
+			Expect(hasConfig).To(BeFalse())
+		})
 	})
 
 	Context("OTel env var injection", func() {

From ae15b9dd96b4cf443c148bddabe6e299626b247b Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 16:01:53 +0100
Subject: [PATCH 12/19] build: regenerate RBAC with AiGateway permissions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 operator/config/rbac/role.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml
index a6be05a..4ec3487 100644
--- a/operator/config/rbac/role.yaml
+++ b/operator/config/rbac/role.yaml
@@ -16,6 +16,14 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - runtime.agentic-layer.ai
+  resources:
+  - aigateways
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - testbench.agentic-layer.ai
   resources:

From ad0783e22f1ff1891a9ebcedfd7d3498fc51f473 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 16:03:01 +0100
Subject: [PATCH 13/19] docs: add aiGatewayRef to sample Experiment manifest

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../testbench_v1alpha1_experiment.yaml        | 74 +++++++++++++++++--
 1 file changed, 69 insertions(+), 5 deletions(-)

diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml
index 0bb8ea1..be2e900 100644
--- a/operator/config/samples/testbench_v1alpha1_experiment.yaml
+++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml
@@ -9,11 +9,75 @@ spec:
   agentRef:
     name: weather-agent
     namespace: sample-agents
+  aiGatewayRef:
+    name: ai-gateway
+    namespace: ai-gateway
+  otlpEndpoint: http://lgtm.monitoring.svc.cluster.local:4318
   llmAsAJudgeModel: gemini-2.5-flash-lite
   defaultThreshold: 0.9
-  dataset:
-    s3:
-      bucket: testbench
-      key: dataset.csv
+  scenarios:
+    - name: "Weather in New York"
+      steps:
+        - input: "What is the weather like in New York right now?"
+          reference:
+            toolCalls:
+              - name: get_weather
+                args:
+                  city: "New York"
+            topics:
+              - weather
+          metrics:
+            - metricName: AgentGoalAccuracyWithoutReference
+            - metricName: ToolCallAccuracy
+            - metricName: TopicAdherence
+              parameters:
+                mode: precision
+    - name: "Weather in Bangkok (unavailable)"
+      steps:
+        - input: "What is the weather like in Bangkok right now?"
+          reference:
+            toolCalls:
+              - name: get_weather
+                args:
+                  city: "New York"
+            topics:
+              - time
+          metrics:
+            - metricName: AgentGoalAccuracyWithoutReference
+            - metricName: ToolCallAccuracy
+            - metricName: TopicAdherence
+              parameters:
+                mode: precision
+    - name: "Weather in New York (alt reference)"
+      steps:
+        - input: "What is the weather like in New York right now?"
+          reference:
+            toolCalls:
+              - name: get_current_time
+                args:
+                  city: "New York"
+          metrics:
+            - metricName: AgentGoalAccuracyWithoutReference
+            - metricName: ToolCallAccuracy
+    - name: "Weather then time in New York (multi-step)"
+      steps:
+        - input: "What is the weather like in New York right now?"
+          reference:
+            toolCalls:
+              - name: get_weather
+                args:
+                  city: "New York"
+          metrics:
+            - metricName: AgentGoalAccuracyWithoutReference
+            - metricName: ToolCallAccuracy
+        - input: "What time is it in New York?"
+          reference:
+            toolCalls:
+              - name: get_current_time
+                args:
+                  city: "New York"
+          metrics:
+            - metricName: AgentGoalAccuracyWithoutReference
+            - metricName: ToolCallAccuracy
   trigger:
-    enabled: true
+    enabled: true
\ No newline at end of file

From 8e81511b16f67ba14476a0fde1a0e7f09f8aa3b8 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Wed, 25 Mar 2026 16:07:07 +0100
Subject: [PATCH 14/19] build: go mod tidy to mark agent-runtime-operator as
 direct dependency

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 operator/go.mod | 2 +-
 operator/go.sum | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/operator/go.mod b/operator/go.mod
index 18151e2..3806207 100644
--- a/operator/go.mod
+++ b/operator/go.mod
@@ -3,6 +3,7 @@ module github.com/agentic-layer/testbench/operator
 go 1.26.0
 
 require (
+	github.com/agentic-layer/agent-runtime-operator v0.25.0
 	github.com/onsi/ginkgo/v2 v2.28.1
 	github.com/onsi/gomega v1.39.1
 	k8s.io/api v0.35.2
@@ -14,7 +15,6 @@ require (
 require (
 	cel.dev/expr v0.24.0 // indirect
 	github.com/Masterminds/semver/v3 v3.4.0 // indirect
-	github.com/agentic-layer/agent-runtime-operator v0.25.0 // indirect
 	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
diff --git a/operator/go.sum b/operator/go.sum
index 3eb481a..589f07d 100644
--- a/operator/go.sum
+++ b/operator/go.sum
@@ -237,8 +237,6 @@ k8s.io/client-go v0.35.2 h1:YUfPefdGJA4aljDdayAXkc98DnPkIetMl4PrKX97W9o=
 k8s.io/client-go v0.35.2/go.mod h1:4QqEwh4oQpeK8AaefZ0jwTFJw/9kIjdQi0jpKeYvz7g=
 k8s.io/component-base v0.35.0 h1:+yBrOhzri2S1BVqyVSvcM3PtPyx5GUxCK2tinZz1G94=
 k8s.io/component-base v0.35.0/go.mod h1:85SCX4UCa6SCFt6p3IKAPej7jSnF3L8EbfSyMZayJR0=
-k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
-k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
 k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc=
 k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0=
 k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE=

From 2471fd30eb2f9066c0f6817b806d0ac8d2f51831 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Thu, 26 Mar 2026 13:56:27 +0100
Subject: [PATCH 15/19] fix: update resource selector to use matchLabels for
 deployment in experiment controller

---
 Tiltfile                                               |  4 ++--
 deploy/local/testkube/values.yaml                      |  3 ---
 operator/Dockerfile                                    |  2 +-
 operator/config/manager/manager.yaml                   |  1 +
 .../config/samples/testbench_v1alpha1_experiment.yaml  |  3 ---
 operator/internal/controller/experiment_controller.go  | 10 ++++++----
 6 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/Tiltfile b/Tiltfile
index a76d123..110c255 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -7,7 +7,7 @@ update_settings(max_parallel_updates=10, k8s_upsert_timeout_secs=600)
 load('ext://dotenv', 'dotenv')
 dotenv()
 
-v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.6.0')
+v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.13.0')
 
 v1alpha1.extension(name='cert-manager', repo_name='agentic-layer', repo_path='cert-manager')
 load('ext://cert-manager', 'cert_manager_install')
@@ -56,7 +56,7 @@ k8s_yaml(helm(
 # Apply local development manifests
 k8s_yaml(kustomize('deploy/local'))
 
-k8s_resource('ai-gateway-litellm', port_forwards=['11001:4000'])
+k8s_resource('ai-gateway', port_forwards=['11001:4000'])
 k8s_resource('weather-agent', port_forwards='11010:8000', labels=['agents'], resource_deps=['agent-runtime'])
 k8s_resource('lgtm', port_forwards=['11000:3000', '4318:4318'])
 
diff --git a/deploy/local/testkube/values.yaml b/deploy/local/testkube/values.yaml
index a5d8bfe..5eacb84 100644
--- a/deploy/local/testkube/values.yaml
+++ b/deploy/local/testkube/values.yaml
@@ -1,6 +1,3 @@
 global:
   testWorkflows:
     createOfficialTemplates: false
-testkube-operator:
-  ## deploy Operator chart
-  enabled: enable
diff --git a/operator/Dockerfile b/operator/Dockerfile
index 5a82af7..16ed4e9 100644
--- a/operator/Dockerfile
+++ b/operator/Dockerfile
@@ -1,5 +1,5 @@
 # Build the manager binary
-FROM golang:1.25 AS builder
+FROM golang:1.26 AS builder
 ARG TARGETOS
 ARG TARGETARCH
 
diff --git a/operator/config/manager/manager.yaml b/operator/config/manager/manager.yaml
index 7a734df..c373eb8 100644
--- a/operator/config/manager/manager.yaml
+++ b/operator/config/manager/manager.yaml
@@ -64,6 +64,7 @@ spec:
           - --leader-elect
           - --health-probe-bind-address=:8081
         image: controller:latest
+        imagePullPolicy: IfNotPresent
         name: manager
         securityContext:
           allowPrivilegeEscalation: false
diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml
index be2e900..ef0239c 100644
--- a/operator/config/samples/testbench_v1alpha1_experiment.yaml
+++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml
@@ -9,9 +9,6 @@ spec:
   agentRef:
     name: weather-agent
     namespace: sample-agents
-  aiGatewayRef:
-    name: ai-gateway
-    namespace: ai-gateway
   otlpEndpoint: http://lgtm.monitoring.svc.cluster.local:4318
   llmAsAJudgeModel: gemini-2.5-flash-lite
   defaultThreshold: 0.9
diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go
index 105a38d..a5091db 100644
--- a/operator/internal/controller/experiment_controller.go
+++ b/operator/internal/controller/experiment_controller.go
@@ -462,10 +462,12 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex
 				"namespace": testkubeNamespace,
 			},
 			"spec": map[string]interface{}{
-				"resource": "deployment",
-				"resourceSelector": map[string]interface{}{
-					"name":      experiment.Spec.AgentRef.Name,
-					"namespace": agentNs,
+				"selector": map[string]interface{}{
+					"matchLabels": map[string]interface{}{
+						"testkube.io/resource-kind":      "Deployment",
+						"testkube.io/resource-name":      experiment.Spec.AgentRef.Name,
+						"testkube.io/resource-namespace": agentNs,
+					},
 				},
 				"event":             r.resolveTriggerEvent(experiment),
 				"action":            "run",

From dbbc7083d298c1e872c5f58b7167cd306d272170 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Thu, 26 Mar 2026 15:11:15 +0100
Subject: [PATCH 16/19] refactor: restructure DatasetSource with InlineDataset
 type

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 operator/api/v1alpha1/experiment_types.go     |  48 +++--
 .../api/v1alpha1/zz_generated.deepcopy.go     |  45 ++--
 ...estbench.agentic-layer.ai_experiments.yaml | 199 +++++++++---------
 3 files changed, 165 insertions(+), 127 deletions(-)

diff --git a/operator/api/v1alpha1/experiment_types.go b/operator/api/v1alpha1/experiment_types.go
index 3ce9322..bd78760 100644
--- a/operator/api/v1alpha1/experiment_types.go
+++ b/operator/api/v1alpha1/experiment_types.go
@@ -47,7 +47,27 @@ type S3Source struct {
 	Key string `json:"key"`
 }
 
-// DatasetSource defines where to load the test dataset from
+// InlineDataset defines an inline experiment dataset with scenarios, model, and threshold.
+type InlineDataset struct {
+	// LLM model used for evaluation (e.g., "gemini-2.5-flash-lite")
+	// +optional
+	LLMAsAJudgeModel string `json:"llmAsAJudgeModel,omitempty"`
+
+	// Default threshold for all metrics (0.0-1.0)
+	// +optional
+	// +kubebuilder:validation:Minimum=0.0
+	// +kubebuilder:validation:Maximum=1.0
+	DefaultThreshold *float64 `json:"defaultThreshold,omitempty"`
+
+	// Test scenarios
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:MinItems=1
+	Scenarios []Scenario `json:"scenarios"`
+}
+
+// DatasetSource defines where to load the test dataset from.
+// Exactly one of s3, url, or inline must be set.
+// +kubebuilder:validation:XValidation:rule="(has(self.s3) ? 1 : 0) + ((has(self.url) && self.url != '') ? 1 : 0) + (has(self.inline) ? 1 : 0) == 1",message="exactly one of s3, url, or inline must be set"
 type DatasetSource struct {
 	// S3 source configuration
 	// +optional
@@ -56,6 +76,10 @@ type DatasetSource struct {
 	// URL source (HTTP/HTTPS)
 	// +optional
 	URL string `json:"url,omitempty"`
+
+	// Inline dataset with scenarios
+	// +optional
+	Inline *InlineDataset `json:"inline,omitempty"`
 }
 
 // ToolCall represents an expected tool invocation
@@ -155,7 +179,6 @@ type TriggerSpec struct {
 }
 
 // ExperimentSpec defines the desired state of Experiment
-// +kubebuilder:validation:XValidation:rule="!(has(self.dataset) && has(self.scenarios))",message="dataset and scenarios are mutually exclusive"
 type ExperimentSpec struct {
 	// Reference to the Agent to evaluate
 	// +kubebuilder:validation:Required
@@ -166,24 +189,9 @@ type ExperimentSpec struct {
 	// +optional
 	AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"`
 
-	// Source of the test dataset (mutually exclusive with scenarios)
-	// +optional
-	Dataset *DatasetSource `json:"dataset,omitempty"`
-
-	// LLM model used for evaluation (e.g., "gemini-2.5-flash-lite", "gpt-4o")
-	// +optional
-	LLMAsAJudgeModel string `json:"llmAsAJudgeModel,omitempty"`
-
-	// Default threshold for all metrics (0.0-1.0)
-	// +optional
-	// +kubebuilder:validation:Minimum=0.0
-	// +kubebuilder:validation:Maximum=1.0
-	// +kubebuilder:default=0.9
-	DefaultThreshold float64 `json:"defaultThreshold,omitempty"`
-
-	// Inline test scenarios (mutually exclusive with dataset)
-	// +optional
-	Scenarios []Scenario `json:"scenarios,omitempty"`
+	// Source of the test dataset
+	// +kubebuilder:validation:Required
+	Dataset DatasetSource `json:"dataset"`
 
 	// OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318")
 	// +optional
diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go
index fdf55f8..59bc1c6 100644
--- a/operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -49,6 +49,11 @@ func (in *DatasetSource) DeepCopyInto(out *DatasetSource) {
 		*out = new(S3Source)
 		**out = **in
 	}
+	if in.Inline != nil {
+		in, out := &in.Inline, &out.Inline
+		*out = new(InlineDataset)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DatasetSource.
@@ -129,18 +134,7 @@ func (in *ExperimentSpec) DeepCopyInto(out *ExperimentSpec) {
 		*out = new(v1.ObjectReference)
 		**out = **in
 	}
-	if in.Dataset != nil {
-		in, out := &in.Dataset, &out.Dataset
-		*out = new(DatasetSource)
-		(*in).DeepCopyInto(*out)
-	}
-	if in.Scenarios != nil {
-		in, out := &in.Scenarios, &out.Scenarios
-		*out = make([]Scenario, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
+	in.Dataset.DeepCopyInto(&out.Dataset)
 	if in.Trigger != nil {
 		in, out := &in.Trigger, &out.Trigger
 		*out = new(TriggerSpec)
@@ -205,6 +199,33 @@ func (in *GeneratedResource) DeepCopy() *GeneratedResource {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InlineDataset) DeepCopyInto(out *InlineDataset) {
+	*out = *in
+	if in.DefaultThreshold != nil {
+		in, out := &in.DefaultThreshold, &out.DefaultThreshold
+		*out = new(float64)
+		**out = **in
+	}
+	if in.Scenarios != nil {
+		in, out := &in.Scenarios, &out.Scenarios
+		*out = make([]Scenario, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InlineDataset.
+func (in *InlineDataset) DeepCopy() *InlineDataset {
+	if in == nil {
+		return nil
+	}
+	out := new(InlineDataset)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *LastExecution) DeepCopyInto(out *LastExecution) {
 	*out = *in
diff --git a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml
index 87834d3..e8f8964 100644
--- a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml
+++ b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml
@@ -109,8 +109,106 @@ spec:
                 type: object
                 x-kubernetes-map-type: atomic
               dataset:
-                description: Source of the test dataset (mutually exclusive with scenarios)
+                description: Source of the test dataset
                 properties:
+                  inline:
+                    description: Inline dataset with scenarios
+                    properties:
+                      defaultThreshold:
+                        description: Default threshold for all metrics (0.0-1.0)
+                        maximum: 1
+                        minimum: 0
+                        type: number
+                      llmAsAJudgeModel:
+                        description: LLM model used for evaluation (e.g., "gemini-2.5-flash-lite")
+                        type: string
+                      scenarios:
+                        description: Test scenarios
+                        items:
+                          description: Scenario represents a test scenario containing
+                            multiple steps
+                          properties:
+                            name:
+                              description: Name of the scenario
+                              type: string
+                            steps:
+                              description: Steps in this scenario
+                              items:
+                                description: Step represents a single test step within
+                                  a scenario
+                                properties:
+                                  customValues:
+                                    description: Custom key-value pairs (e.g., retrieved_contexts)
+                                    x-kubernetes-preserve-unknown-fields: true
+                                  input:
+                                    description: User input to the agent
+                                    type: string
+                                  metrics:
+                                    description: Metrics to evaluate for this step
+                                    items:
+                                      description: Metric defines a single metric
+                                        evaluation configuration
+                                      properties:
+                                        metricName:
+                                          description: Name of the metric (e.g., "ragas_faithfulness",
+                                            "tool_check")
+                                          type: string
+                                        parameters:
+                                          description: Additional parameters for the
+                                            metric
+                                          x-kubernetes-preserve-unknown-fields: true
+                                        threshold:
+                                          description: Threshold for pass/fail (0.0-1.0)
+                                          maximum: 1
+                                          minimum: 0
+                                          type: number
+                                      required:
+                                      - metricName
+                                      type: object
+                                    type: array
+                                  reference:
+                                    description: Expected reference data for evaluation
+                                    properties:
+                                      response:
+                                        description: Expected response text
+                                        type: string
+                                      toolCalls:
+                                        description: Expected tool calls
+                                        items:
+                                          description: ToolCall represents an expected
+                                            tool invocation
+                                          properties:
+                                            args:
+                                              description: Arguments passed to the
+                                                tool (JSON object)
+                                              x-kubernetes-preserve-unknown-fields: true
+                                            name:
+                                              description: Name of the tool
+                                              type: string
+                                          required:
+                                          - name
+                                          type: object
+                                        type: array
+                                      topics:
+                                        description: Expected topics to be covered
+                                        items:
+                                          type: string
+                                        type: array
+                                    type: object
+                                required:
+                                - input
+                                type: object
+                              minItems: 1
+                              type: array
+                          required:
+                          - name
+                          - steps
+                          type: object
+                        minItems: 1
+                        type: array
+                    required:
+                    - scenarios
+                    type: object
                   s3:
                     description: S3 source configuration
                     properties:
@@ -128,100 +226,13 @@ spec:
                     description: URL source (HTTP/HTTPS)
                     type: string
                 type: object
-              defaultThreshold:
-                default: 0.9
-                description: Default threshold for all metrics (0.0-1.0)
-                maximum: 1
-                minimum: 0
-                type: number
-              llmAsAJudgeModel:
-                description: LLM model used for evaluation (e.g., "gemini-2.5-flash-lite",
-                  "gpt-4o")
-                type: string
+                x-kubernetes-validations:
+                - message: exactly one of s3, url, or inline must be set
+                  rule: '(has(self.s3) ? 1 : 0) + ((has(self.url) && self.url != '''')
+                    ? 1 : 0) + (has(self.inline) ? 1 : 0) == 1'
               otlpEndpoint:
                 description: OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318")
                 type: string
-              scenarios:
-                description: Inline test scenarios (mutually exclusive with dataset)
-                items:
-                  description: Scenario represents a test scenario containing multiple
-                    steps
-                  properties:
-                    name:
-                      description: Name of the scenario
-                      type: string
-                    steps:
-                      description: Steps in this scenario
-                      items:
-                        description: Step represents a single test step within a scenario
-                        properties:
-                          customValues:
-                            description: Custom key-value pairs (e.g., retrieved_contexts)
-                            x-kubernetes-preserve-unknown-fields: true
-                          input:
-                            description: User input to the agent
-                            type: string
-                          metrics:
-                            description: Metrics to evaluate for this step
-                            items:
-                              description: Metric defines a single metric evaluation
-                                configuration
-                              properties:
-                                metricName:
-                                  description: Name of the metric (e.g., "ragas_faithfulness",
-                                    "tool_check")
-                                  type: string
-                                parameters:
-                                  description: Additional parameters for the metric
-                                  x-kubernetes-preserve-unknown-fields: true
-                                threshold:
-                                  description: Threshold for pass/fail (0.0-1.0)
-                                  maximum: 1
-                                  minimum: 0
-                                  type: number
-                              required:
-                              - metricName
-                              type: object
-                            type: array
-                          reference:
-                            description: Expected reference data for evaluation
-                            properties:
-                              response:
-                                description: Expected response text
-                                type: string
-                              toolCalls:
-                                description: Expected tool calls
-                                items:
-                                  description: ToolCall represents an expected tool
-                                    invocation
-                                  properties:
-                                    args:
-                                      description: Arguments passed to the tool (JSON
-                                        object)
-                                      x-kubernetes-preserve-unknown-fields: true
-                                    name:
-                                      description: Name of the tool
-                                      type: string
-                                  required:
-                                  - name
-                                  type: object
-                                type: array
-                              topics:
-                                description: Expected topics to be covered
-                                items:
-                                  type: string
-                                type: array
-                            type: object
-                        required:
-                        - input
-                        type: object
-                      minItems: 1
-                      type: array
-                  required:
-                  - name
-                  - steps
-                  type: object
-                type: array
               trigger:
                 description: Trigger configuration
                 properties:
@@ -242,10 +253,8 @@ spec:
                 type: object
             required:
             - agentRef
+            - dataset
             type: object
-            x-kubernetes-validations:
-            - message: dataset and scenarios are mutually exclusive
-              rule: '!(has(self.dataset) && has(self.scenarios))'
           status:
             description: ExperimentStatus defines the observed state of Experiment
             properties:

From c5c7d61836769c184e8bc4ce1c3da41c26a9f914 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Thu, 26 Mar 2026 15:12:54 +0100
Subject: [PATCH 17/19] refactor: update experimentJSON and controller to use
 Dataset.Inline

- Update experimentJSON struct to include LLMAsAJudgeModel and DefaultThreshold fields
- Source buildExperimentJSON from InlineDataset instead of Spec.Scenarios
- Add ConfigMap cleanup when switching from inline to S3/URL mode
- Branch buildTestWorkflow on Dataset.Inline == nil for setup-template inclusion
- Simplify resolveDatasetURL to remove nil guard (Dataset is now value type)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../controller/experiment_controller.go       | 42 ++++++++++++-------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go
index a5091db..c7f9c30 100644
--- a/operator/internal/controller/experiment_controller.go
+++ b/operator/internal/controller/experiment_controller.go
@@ -64,7 +64,7 @@ var (
 // experimentJSON is the JSON representation of experiment.json consumed by testbench scripts.
 type experimentJSON struct {
 	LLMAsAJudgeModel string         `json:"llm_as_a_judge_model,omitempty"`
-	DefaultThreshold float64        `json:"default_threshold"`
+	DefaultThreshold *float64       `json:"default_threshold,omitempty"`
 	Scenarios        []scenarioJSON `json:"scenarios"`
 }
 
@@ -162,13 +162,29 @@ func (r *ExperimentReconciler) reconcileResources(
 	return result, nil
 }
 
-// reconcileConfigMap creates or updates the ConfigMap holding experiment.json.
+// reconcileConfigMap creates or updates the ConfigMap holding experiment.json for inline mode,
+// or deletes a stale ConfigMap when switching to S3/URL mode.
 func (r *ExperimentReconciler) reconcileConfigMap(
 	ctx context.Context,
 	experiment *testbenchv1alpha1.Experiment,
 	generatedResources *[]testbenchv1alpha1.GeneratedResource,
 ) error {
 	cmName := experiment.Name + "-experiment"
+
+	if experiment.Spec.Dataset.Inline == nil {
+		// Delete stale ConfigMap if it exists (mode switched from inline to S3/URL).
+		cm := &corev1.ConfigMap{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      cmName,
+				Namespace: testkubeNamespace,
+			},
+		}
+		if err := r.Delete(ctx, cm); err != nil && !errors.IsNotFound(err) {
+			return err
+		}
+		return nil
+	}
+
 	cm := &corev1.ConfigMap{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      cmName,
@@ -203,15 +219,16 @@ func (r *ExperimentReconciler) reconcileConfigMap(
 	return nil
 }
 
-// buildExperimentJSON serializes the Experiment spec scenarios into the experiment.json format
-// expected by the testbench scripts. For dataset mode, it returns an empty scenarios list.
+// buildExperimentJSON serializes the InlineDataset into the experiment.json format
+// expected by the testbench scripts.
 func (r *ExperimentReconciler) buildExperimentJSON(experiment *testbenchv1alpha1.Experiment) (string, error) {
+	inline := experiment.Spec.Dataset.Inline
 	exp := experimentJSON{
-		LLMAsAJudgeModel: experiment.Spec.LLMAsAJudgeModel,
-		DefaultThreshold: experiment.Spec.DefaultThreshold,
-		Scenarios:        make([]scenarioJSON, 0, len(experiment.Spec.Scenarios)),
+		LLMAsAJudgeModel: inline.LLMAsAJudgeModel,
+		DefaultThreshold: inline.DefaultThreshold,
+		Scenarios:        make([]scenarioJSON, 0, len(inline.Scenarios)),
 	}
-	for _, scenario := range experiment.Spec.Scenarios {
+	for _, scenario := range inline.Scenarios {
 		sj := scenarioJSON{
 			Name:  scenario.Name,
 			Steps: make([]stepJSON, 0, len(scenario.Steps)),
@@ -309,7 +326,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E
 
 	// Build the list of phase templates to chain.
 	var useTemplates []interface{}
-	if experiment.Spec.Dataset != nil {
+	if experiment.Spec.Dataset.Inline == nil {
 		useTemplates = append(useTemplates, map[string]interface{}{
 			"name": "setup-template",
 			"config": map[string]interface{}{
@@ -351,8 +368,8 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E
 		}
 	}
 
-	// For scenarios mode, mount the pre-populated ConfigMap as the experiment file.
-	if experiment.Spec.Dataset == nil {
+	// For inline mode, mount the pre-populated ConfigMap as the experiment file.
+	if experiment.Spec.Dataset.Inline != nil {
 		spec["content"] = map[string]interface{}{
 			"files": []interface{}{
 				map[string]interface{}{
@@ -551,9 +568,6 @@ func (r *ExperimentReconciler) resolveAgentURL(experiment *testbenchv1alpha1.Exp
 
 // resolveDatasetURL extracts the dataset URL from the DatasetSource.
 func (r *ExperimentReconciler) resolveDatasetURL(experiment *testbenchv1alpha1.Experiment) string {
-	if experiment.Spec.Dataset == nil {
-		return ""
-	}
 	if experiment.Spec.Dataset.URL != "" {
 		return experiment.Spec.Dataset.URL
 	}

From 5cd2d345e965d3a1aef30953186e8de79843f222 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Thu, 26 Mar 2026 15:14:40 +0100
Subject: [PATCH 18/19] docs: update sample Experiment YAML for new dataset
 structure

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../testbench_v1alpha1_experiment.yaml        | 139 +++++++++---------
 1 file changed, 72 insertions(+), 67 deletions(-)

diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml
index ef0239c..c90d659 100644
--- a/operator/config/samples/testbench_v1alpha1_experiment.yaml
+++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml
@@ -9,72 +9,77 @@ spec:
   agentRef:
     name: weather-agent
     namespace: sample-agents
+  aiGatewayRef:
+    name: ai-gateway
+    namespace: ai-gateway
   otlpEndpoint: http://lgtm.monitoring.svc.cluster.local:4318
-  llmAsAJudgeModel: gemini-2.5-flash-lite
-  defaultThreshold: 0.9
-  scenarios:
-    - name: "Weather in New York"
-      steps:
-        - input: "What is the weather like in New York right now?"
-          reference:
-            toolCalls:
-              - name: get_weather
-                args:
-                  city: "New York"
-            topics:
-              - weather
-          metrics:
-            - metricName: AgentGoalAccuracyWithoutReference
-            - metricName: ToolCallAccuracy
-            - metricName: TopicAdherence
-              parameters:
-                mode: precision
-    - name: "Weather in Bangkok (unavailable)"
-      steps:
-        - input: "What is the weather like in Bangkok right now?"
-          reference:
-            toolCalls:
-              - name: get_weather
-                args:
-                  city: "New York"
-            topics:
-              - time
-          metrics:
-            - metricName: AgentGoalAccuracyWithoutReference
-            - metricName: ToolCallAccuracy
-            - metricName: TopicAdherence
-              parameters:
-                mode: precision
-    - name: "Weather in New York (alt reference)"
-      steps:
-        - input: "What is the weather like in New York right now?"
-          reference:
-            toolCalls:
-              - name: get_current_time
-                args:
-                  city: "New York"
-          metrics:
-            - metricName: AgentGoalAccuracyWithoutReference
-            - metricName: ToolCallAccuracy
-    - name: "Weather then time in New York (multi-step)"
-      steps:
-        - input: "What is the weather like in New York right now?"
-          reference:
-            toolCalls:
-              - name: get_weather
-                args:
-                  city: "New York"
-          metrics:
-            - metricName: AgentGoalAccuracyWithoutReference
-            - metricName: ToolCallAccuracy
-        - input: "What time is it in New York?"
-          reference:
-            toolCalls:
-              - name: get_current_time
-                args:
-                  city: "New York"
-          metrics:
-            - metricName: AgentGoalAccuracyWithoutReference
-            - metricName: ToolCallAccuracy
+  dataset:
+    inline:
+      llmAsAJudgeModel: gemini-2.5-flash-lite
+      defaultThreshold: 0.9
+      scenarios:
+        - name: "Weather in New York"
+          steps:
+            - input: "What is the weather like in New York right now?"
+              reference:
+                toolCalls:
+                  - name: get_weather
+                    args:
+                      city: "New York"
+                topics:
+                  - weather
+              metrics:
+                - metricName: AgentGoalAccuracyWithoutReference
+                - metricName: ToolCallAccuracy
+                - metricName: TopicAdherence
+                  parameters:
+                    mode: precision
+        - name: "Weather in Bangkok (unavailable)"
+          steps:
+            - input: "What is the weather like in Bangkok right now?"
+              reference:
+                toolCalls:
+                  - name: get_weather
+                    args:
+                      city: "New York"
+                topics:
+                  - time
+              metrics:
+                - metricName: AgentGoalAccuracyWithoutReference
+                - metricName: ToolCallAccuracy
+                - metricName: TopicAdherence
+                  parameters:
+                    mode: precision
+        - name: "Weather in New York (alt reference)"
+          steps:
+            - input: "What is the weather like in New York right now?"
+              reference:
+                toolCalls:
+                  - name: get_current_time
+                    args:
+                      city: "New York"
+              metrics:
+                - metricName: AgentGoalAccuracyWithoutReference
+                - metricName: ToolCallAccuracy
+        - name: "Weather then time in New York (multi-step)"
+          steps:
+            - input: "What is the weather like in New York right now?"
+              reference:
+                toolCalls:
+                  - name: get_weather
+                    args:
+                      city: "New York"
+              metrics:
+                - metricName: AgentGoalAccuracyWithoutReference
+                - metricName: ToolCallAccuracy
+            - input: "What time is it in New York?"
+              reference:
+                toolCalls:
+                  - name: get_current_time
+                    args:
+                      city: "New York"
+              metrics:
+                - metricName: AgentGoalAccuracyWithoutReference
+                - metricName: ToolCallAccuracy
   trigger:
-    enabled: true
\ No newline at end of file
+    enabled: true

From 04c2b5961c212a7a349f2a3f51244a1d56601e05 Mon Sep 17 00:00:00 2001
From: Florian Mallmann <florian.mallmann@qaware.de>
Date: Thu, 26 Mar 2026 15:19:16 +0100
Subject: [PATCH 19/19] test: update all fixtures for DatasetSource
 restructuring

Replace Scenarios at ExperimentSpec level with Dataset.Inline wrapper,
change Dataset pointer to value type, delete obsolete dataset-mode
ConfigMap test, and remove the now-invalid buildExperimentJSON URL-mode test.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../controller/experiment_controller_test.go  | 118 ++++++++----------
 1 file changed, 50 insertions(+), 68 deletions(-)

diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go
index 45149fc..2e7447b 100644
--- a/operator/internal/controller/experiment_controller_test.go
+++ b/operator/internal/controller/experiment_controller_test.go
@@ -82,28 +82,30 @@ var _ = Describe("Experiment Controller", func() {
 			exp := &testbenchv1alpha1.Experiment{
 				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
-					AgentRef:         testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"},
-					LLMAsAJudgeModel: "gemini-2.5-flash-lite",
-					DefaultThreshold: 0.9,
-					Scenarios: []testbenchv1alpha1.Scenario{
-						{
-							Name: "test scenario",
-							Steps: []testbenchv1alpha1.Step{
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"},
+					Dataset: testbenchv1alpha1.DatasetSource{
+						Inline: &testbenchv1alpha1.InlineDataset{
+							Scenarios: []testbenchv1alpha1.Scenario{
 								{
-									Input: "What is the weather?",
-									Reference: &testbenchv1alpha1.Reference{
-										Response: "It is sunny",
-										Topics:   []string{"weather"},
-										ToolCalls: []testbenchv1alpha1.ToolCall{
-											{
-												Name: "get_weather",
-												Args: runtime.RawExtension{Raw: []byte(`{"city":"NY"}`)},
+									Name: "test scenario",
+									Steps: []testbenchv1alpha1.Step{
+										{
+											Input: "What is the weather?",
+											Reference: &testbenchv1alpha1.Reference{
+												Response: "It is sunny",
+												Topics:   []string{"weather"},
+												ToolCalls: []testbenchv1alpha1.ToolCall{
+													{
+														Name: "get_weather",
+														Args: runtime.RawExtension{Raw: []byte(`{"city":"NY"}`)},
+													},
+												},
+											},
+											Metrics: []testbenchv1alpha1.Metric{
+												{MetricName: "AgentGoalAccuracy"},
 											},
 										},
 									},
-									Metrics: []testbenchv1alpha1.Metric{
-										{MetricName: "AgentGoalAccuracy"},
-									},
 								},
 							},
 						},
@@ -129,8 +131,6 @@ var _ = Describe("Experiment Controller", func() {
 			By("verifying the experiment.json content")
 			var expJSON experimentJSON
 			Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed())
-			Expect(expJSON.LLMAsAJudgeModel).To(Equal("gemini-2.5-flash-lite"))
-			Expect(expJSON.DefaultThreshold).To(Equal(0.9))
 			Expect(expJSON.Scenarios).To(HaveLen(1))
 			Expect(expJSON.Scenarios[0].Name).To(Equal("test scenario"))
 			Expect(expJSON.Scenarios[0].Steps).To(HaveLen(1))
@@ -274,7 +274,7 @@ var _ = Describe("Experiment Controller", func() {
 				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
 					AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"},
-					Dataset: &testbenchv1alpha1.DatasetSource{
+					Dataset: testbenchv1alpha1.DatasetSource{
 						URL: "http://data-server/dataset.csv",
 					},
 				},
@@ -286,16 +286,12 @@ var _ = Describe("Experiment Controller", func() {
 			cleanupExperiment(expName)
 		})
 
-		It("should create a ConfigMap with empty scenarios as placeholder", func() {
+		It("should not create a ConfigMap in URL mode", func() {
 			Expect(reconcileExperiment(expName)).To(Succeed())
 
 			cm := &corev1.ConfigMap{}
-			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed())
-			Expect(cm.Data).To(HaveKey("experiment.json"))
-
-			var expJSON experimentJSON
-			Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed())
-			Expect(expJSON.Scenarios).To(BeEmpty())
+			err := k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)
+			Expect(errors.IsNotFound(err)).To(BeTrue())
 		})
 
 		It("should create a TestWorkflow with setup-template and correct datasetUrl", func() {
@@ -322,7 +318,7 @@ var _ = Describe("Experiment Controller", func() {
 		It("should resolve S3 dataset URL correctly", func() {
 			exp := &testbenchv1alpha1.Experiment{}
 			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed())
-			exp.Spec.Dataset = &testbenchv1alpha1.DatasetSource{
+			exp.Spec.Dataset = testbenchv1alpha1.DatasetSource{
 				S3: &testbenchv1alpha1.S3Source{Bucket: "my-bucket", Key: "data/dataset.csv"},
 			}
 			Expect(k8sClient.Update(ctx, exp)).To(Succeed())
@@ -352,9 +348,7 @@ var _ = Describe("Experiment Controller", func() {
 				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
 					AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"},
-					Scenarios: []testbenchv1alpha1.Scenario{
-						{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}},
-					},
+					Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}},
 					Trigger: trigger,
 				},
 			}
@@ -472,8 +466,8 @@ var _ = Describe("Experiment Controller", func() {
 			exp := &testbenchv1alpha1.Experiment{
 				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
-					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent"},
-					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"},
+					Dataset:  testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}},
 				},
 			}
 			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
@@ -525,19 +519,22 @@ var _ = Describe("Experiment Controller", func() {
 			r := newReconciler()
 			exp := &testbenchv1alpha1.Experiment{
 				Spec: testbenchv1alpha1.ExperimentSpec{
-					DefaultThreshold: 0.8,
-					Scenarios: []testbenchv1alpha1.Scenario{
-						{
-							Name: "s",
-							Steps: []testbenchv1alpha1.Step{
+					Dataset: testbenchv1alpha1.DatasetSource{
+						Inline: &testbenchv1alpha1.InlineDataset{
+							Scenarios: []testbenchv1alpha1.Scenario{
 								{
-									Input:        "q",
-									CustomValues: runtime.RawExtension{Raw: []byte(`{"key":"value"}`)},
-									Metrics: []testbenchv1alpha1.Metric{
+									Name: "s",
+									Steps: []testbenchv1alpha1.Step{
 										{
-											MetricName: "M",
-											Threshold:  0.7,
-											Parameters: runtime.RawExtension{Raw: []byte(`{"mode":"precision"}`)},
+											Input:        "q",
+											CustomValues: runtime.RawExtension{Raw: []byte(`{"key":"value"}`)},
+											Metrics: []testbenchv1alpha1.Metric{
+												{
+													MetricName: "M",
+													Threshold:  0.7,
+													Parameters: runtime.RawExtension{Raw: []byte(`{"mode":"precision"}`)},
+												},
+											},
 										},
 									},
 								},
@@ -551,23 +548,10 @@ var _ = Describe("Experiment Controller", func() {
 
 			var result experimentJSON
 			Expect(json.Unmarshal([]byte(data), &result)).To(Succeed())
-			Expect(result.DefaultThreshold).To(Equal(0.8))
 			Expect(result.Scenarios[0].Steps[0].CustomValues).To(MatchJSON(`{"key":"value"}`))
 			Expect(result.Scenarios[0].Steps[0].Metrics[0].Parameters).To(MatchJSON(`{"mode":"precision"}`))
 		})
 
-		It("should produce empty scenarios list for dataset mode", func() {
-			r := newReconciler()
-			exp := &testbenchv1alpha1.Experiment{
-				Spec: testbenchv1alpha1.ExperimentSpec{
-					DefaultThreshold: 0.9,
-					Dataset:          &testbenchv1alpha1.DatasetSource{URL: "http://example.com/data.csv"},
-				},
-			}
-			data, err := r.buildExperimentJSON(exp)
-			Expect(err).NotTo(HaveOccurred())
-			Expect(data).To(ContainSubstring(`"scenarios": []`))
-		})
 	})
 
 	Context("AiGateway resolution", func() {
@@ -583,9 +567,7 @@ var _ = Describe("Experiment Controller", func() {
 						Name:      "my-gateway",
 						Namespace: "ai-gateway",
 					},
-					Scenarios: []testbenchv1alpha1.Scenario{
-						{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}},
-					},
+					Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}},
 				},
 			}
 			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
@@ -696,8 +678,8 @@ var _ = Describe("Experiment Controller", func() {
 			exp := &testbenchv1alpha1.Experiment{
 				ObjectMeta: metav1.ObjectMeta{Name: "exp-gw-url", Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
-					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
-					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
+					Dataset:  testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}},
 				},
 			}
 			gw := &runtimev1alpha1.AiGateway{
@@ -727,8 +709,8 @@ var _ = Describe("Experiment Controller", func() {
 			exp := &testbenchv1alpha1.Experiment{
 				ObjectMeta: metav1.ObjectMeta{Name: "exp-no-gw", Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
-					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
-					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"},
+					Dataset:  testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}},
 				},
 			}
 
@@ -763,7 +745,7 @@ var _ = Describe("Experiment Controller", func() {
 				Spec: testbenchv1alpha1.ExperimentSpec{
 					AgentRef:     testbenchv1alpha1.AgentRef{Name: "agent"},
 					OTLPEndpoint: "http://lgtm.monitoring.svc.cluster.local:4318",
-					Scenarios:    []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+					Dataset:      testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}},
 				},
 			}
 			Expect(k8sClient.Create(ctx, exp)).To(Succeed())
@@ -786,8 +768,8 @@ var _ = Describe("Experiment Controller", func() {
 			exp := &testbenchv1alpha1.Experiment{
 				ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace},
 				Spec: testbenchv1alpha1.ExperimentSpec{
-					AgentRef:  testbenchv1alpha1.AgentRef{Name: "agent"},
-					Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}},
+					AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"},
+					Dataset:  testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}},
 				},
 			}
 			Expect(k8sClient.Create(ctx, exp)).To(Succeed())