From 56e507b19848d2742c537e172dc93c8088cf8da4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Mar 2026 11:27:34 +0000 Subject: [PATCH 01/19] Initial plan From 4843de35c50aa7e37dfeaad6d91b931353a2810c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:02:04 +0000 Subject: [PATCH 02/19] Implement Experiment reconciler with ConfigMap, TestWorkflow, TestTrigger, status reporting, and tests Co-authored-by: fmallmann <30110193+fmallmann@users.noreply.github.com> --- operator/.golangci.yml | 39 +- operator/config/rbac/role.yaml | 36 ++ operator/go.mod | 2 +- .../controller/experiment_controller.go | 495 ++++++++++++++- .../controller/experiment_controller_test.go | 588 +++++++++++++++++- operator/internal/controller/suite_test.go | 5 +- .../crds/tests.testkube.io_testtriggers.yaml | 21 + ...stworkflows.testkube.io_testworkflows.yaml | 21 + 8 files changed, 1140 insertions(+), 67 deletions(-) create mode 100644 operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml create mode 100644 operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml diff --git a/operator/.golangci.yml b/operator/.golangci.yml index aac8a13..e24a15b 100644 --- a/operator/.golangci.yml +++ b/operator/.golangci.yml @@ -1,33 +1,20 @@ +version: "2" + run: timeout: 5m allow-parallel-runners: true + # go version is set to 1.25 for compatibility with golangci-lint v2.10.1 + # which was built with go1.25; update when a newer linter release is available. + go: "1.25" -issues: - # don't skip warning about doc comments - # don't exclude the default set of lint - exclude-use-default: false - # restore some of the defaults - # (fill in the rest as needed) - exclude-rules: - - path: "api/*" - linters: - - lll - - path: "internal/*" - linters: - - dupl - - lll linters: disable-all: true enable: - dupl - errcheck - - exportloopref - ginkgolinter - goconst - gocyclo - - gofmt - - goimports - - gosimple - govet - ineffassign - lll @@ -36,10 +23,24 @@ linters: - prealloc - revive - staticcheck - - typecheck - unconvert - unparam - unused + exclusions: + rules: + - path: "^api/" + linters: + - lll + - path: "^internal/" + linters: + - dupl + - lll + - path: "(^internal/|^test/|^cmd/)" + linters: + - revive + - path: "^test/" + linters: + - staticcheck linters-settings: revive: diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 49b99de..a6be05a 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -4,6 +4,18 @@ kind: ClusterRole metadata: name: manager-role rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - testbench.agentic-layer.ai resources: @@ -30,3 +42,27 @@ rules: - get - patch - update +- apiGroups: + - tests.testkube.io + resources: + - testtriggers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - testworkflows.testkube.io + resources: + - testworkflows + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/operator/go.mod b/operator/go.mod index 0b2a062..27d9d75 100644 --- a/operator/go.mod +++ b/operator/go.mod @@ -1,6 +1,6 @@ module github.com/agentic-layer/testbench/operator -go 1.26.0 +go 1.25.0 require ( github.com/onsi/ginkgo/v2 v2.28.1 diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go index d187d4d..7565db7 100644 --- a/operator/internal/controller/experiment_controller.go +++ b/operator/internal/controller/experiment_controller.go @@ -18,16 +18,84 @@ package controller import ( "context" + "encoding/json" + "fmt" + "strings" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1" ) -// ExperimentReconciler reconciles a Experiment object +const ( + conditionReady = "Ready" + conditionWorkflowReady = "WorkflowReady" + otelConfigMapName = "otel-config" + otelEndpointKey = "OTEL_EXPORTER_OTLP_ENDPOINT" + defaultAgentPort = "8000" +) + +var ( + testWorkflowGVK = schema.GroupVersionKind{ + Group: "testworkflows.testkube.io", + Version: "v1", + Kind: "TestWorkflow", + } + testTriggerGVK = schema.GroupVersionKind{ + Group: "tests.testkube.io", + Version: "v1", + Kind: "TestTrigger", + } +) + +// experimentJSON is the JSON representation of experiment.json consumed by testbench scripts. +type experimentJSON struct { + LLMAsAJudgeModel string `json:"llm_as_a_judge_model,omitempty"` + DefaultThreshold float64 `json:"default_threshold"` + Scenarios []scenarioJSON `json:"scenarios"` +} + +type scenarioJSON struct { + Name string `json:"name"` + Steps []stepJSON `json:"steps"` +} + +type stepJSON struct { + Input string `json:"input"` + Reference *referenceJSON `json:"reference,omitempty"` + CustomValues json.RawMessage `json:"custom_values,omitempty"` + Metrics []metricJSON `json:"metrics,omitempty"` +} + +type referenceJSON struct { + Response string `json:"response,omitempty"` + ToolCalls []toolCallJSON `json:"tool_calls,omitempty"` + Topics []string `json:"topics,omitempty"` +} + +type toolCallJSON struct { + Name string `json:"name"` + Args json.RawMessage `json:"args,omitempty"` +} + +type metricJSON struct { + MetricName string `json:"metric_name"` + Threshold float64 `json:"threshold,omitempty"` + Parameters json.RawMessage `json:"parameters,omitempty"` +} + +// ExperimentReconciler reconciles an Experiment object. type ExperimentReconciler struct { client.Client Scheme *runtime.Scheme @@ -36,27 +104,428 @@ type ExperimentReconciler struct { // +kubebuilder:rbac:groups=testbench.agentic-layer.ai,resources=experiments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=testbench.agentic-layer.ai,resources=experiments/status,verbs=get;update;patch // +kubebuilder:rbac:groups=testbench.agentic-layer.ai,resources=experiments/finalizers,verbs=update +// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=testworkflows.testkube.io,resources=testworkflows,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=tests.testkube.io,resources=testtriggers,verbs=get;list;watch;create;update;patch;delete -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the Experiment object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile +// Reconcile moves the cluster state closer to the desired state specified by the Experiment. func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) + logger := log.FromContext(ctx) + + experiment := &testbenchv1alpha1.Experiment{} + if err := r.Get(ctx, req.NamespacedName, experiment); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + var generatedResources []testbenchv1alpha1.GeneratedResource + reconcileErr := r.reconcileResources(ctx, experiment, &generatedResources) + + if statusErr := r.updateStatus(ctx, experiment, generatedResources, reconcileErr); statusErr != nil { + logger.Error(statusErr, "failed to update status") + return ctrl.Result{}, statusErr + } - // TODO(user): your logic here + return ctrl.Result{}, reconcileErr +} - return ctrl.Result{}, nil +func (r *ExperimentReconciler) reconcileResources( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) error { + if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil { + return fmt.Errorf("reconciling ConfigMap: %w", err) + } + if err := r.reconcileTestWorkflow(ctx, experiment, generatedResources); err != nil { + return fmt.Errorf("reconciling TestWorkflow: %w", err) + } + if err := r.reconcileTestTrigger(ctx, experiment, generatedResources); err != nil { + return fmt.Errorf("reconciling TestTrigger: %w", err) + } + return nil +} + +// reconcileConfigMap creates or updates the ConfigMap holding experiment.json. +func (r *ExperimentReconciler) reconcileConfigMap( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) error { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: experiment.Name, + Namespace: experiment.Namespace, + }, + } + + _, err := controllerutil.CreateOrUpdate(ctx, r.Client, cm, func() error { + if err := controllerutil.SetControllerReference(experiment, cm, r.Scheme); err != nil { + return err + } + data, buildErr := r.buildExperimentJSON(experiment) + if buildErr != nil { + return buildErr + } + cm.Data = map[string]string{ + "experiment.json": data, + } + return nil + }) + if err != nil { + return err + } + + *generatedResources = append(*generatedResources, testbenchv1alpha1.GeneratedResource{ + Kind: "ConfigMap", + Name: cm.Name, + Namespace: cm.Namespace, + }) + return nil +} + +// buildExperimentJSON serializes the Experiment spec scenarios into the experiment.json format +// expected by the testbench scripts. For dataset mode, it returns an empty scenarios list. +func (r *ExperimentReconciler) buildExperimentJSON(experiment *testbenchv1alpha1.Experiment) (string, error) { + exp := experimentJSON{ + LLMAsAJudgeModel: experiment.Spec.LLMAsAJudgeModel, + DefaultThreshold: experiment.Spec.DefaultThreshold, + Scenarios: make([]scenarioJSON, 0, len(experiment.Spec.Scenarios)), + } + for _, scenario := range experiment.Spec.Scenarios { + sj := scenarioJSON{ + Name: scenario.Name, + Steps: make([]stepJSON, 0, len(scenario.Steps)), + } + for _, step := range scenario.Steps { + sj.Steps = append(sj.Steps, r.convertStep(step)) + } + exp.Scenarios = append(exp.Scenarios, sj) + } + data, err := json.MarshalIndent(exp, "", " ") + if err != nil { + return "", err + } + return string(data), nil +} + +func (r *ExperimentReconciler) convertStep(step testbenchv1alpha1.Step) stepJSON { + sj := stepJSON{Input: step.Input} + if step.Reference != nil { + ref := &referenceJSON{ + Response: step.Reference.Response, + Topics: step.Reference.Topics, + } + for _, tc := range step.Reference.ToolCalls { + ref.ToolCalls = append(ref.ToolCalls, toolCallJSON{ + Name: tc.Name, + Args: tc.Args.Raw, + }) + } + sj.Reference = ref + } + if step.CustomValues.Raw != nil { + sj.CustomValues = step.CustomValues.Raw + } + for _, m := range step.Metrics { + mj := metricJSON{ + MetricName: m.MetricName, + Threshold: m.Threshold, + } + if m.Parameters.Raw != nil { + mj.Parameters = m.Parameters.Raw + } + sj.Metrics = append(sj.Metrics, mj) + } + return sj +} + +// reconcileTestWorkflow creates or updates the Testkube TestWorkflow for the Experiment. +func (r *ExperimentReconciler) reconcileTestWorkflow( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) error { + workflow := r.buildTestWorkflow(experiment) + if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil { + return err + } + + existing := &unstructured.Unstructured{} + existing.SetGroupVersionKind(testWorkflowGVK) + err := r.Get(ctx, types.NamespacedName{Name: workflow.GetName(), Namespace: workflow.GetNamespace()}, existing) + if errors.IsNotFound(err) { + if createErr := r.Create(ctx, workflow); createErr != nil { + return createErr + } + } else if err != nil { + if isCRDNotInstalled(err) { + log.FromContext(ctx).Info("Testkube TestWorkflow CRD not installed; skipping TestWorkflow reconciliation") + return nil + } + return err + } else { + existing.Object["spec"] = workflow.Object["spec"] + existing.SetOwnerReferences(workflow.GetOwnerReferences()) + if updateErr := r.Update(ctx, existing); updateErr != nil { + return updateErr + } + } + + *generatedResources = append(*generatedResources, testbenchv1alpha1.GeneratedResource{ + Kind: "TestWorkflow", + Name: workflow.GetName(), + Namespace: workflow.GetNamespace(), + }) + return nil +} + +// buildTestWorkflow constructs the desired TestWorkflow unstructured object. +func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured { + agentURL := r.resolveAgentURL(experiment) + + // Build the list of phase templates to chain. + var useTemplates []interface{} + if experiment.Spec.Dataset != nil { + useTemplates = append(useTemplates, map[string]interface{}{ + "name": "setup-template", + "config": map[string]interface{}{ + "datasetUrl": r.resolveDatasetURL(experiment), + }, + }) + } + useTemplates = append(useTemplates, + map[string]interface{}{ + "name": "run-template", + "config": map[string]interface{}{ + "agentUrl": agentURL, + }, + }, + map[string]interface{}{"name": "evaluate-template"}, + map[string]interface{}{"name": "publish-template"}, + map[string]interface{}{"name": "visualize-template"}, + ) + + spec := map[string]interface{}{ + "container": map[string]interface{}{ + "env": []interface{}{ + map[string]interface{}{ + "name": otelEndpointKey, + "valueFrom": map[string]interface{}{ + "configMapKeyRef": map[string]interface{}{ + "name": otelConfigMapName, + "key": otelEndpointKey, + }, + }, + }, + }, + }, + "use": useTemplates, + } + + // For scenarios mode, mount the pre-populated ConfigMap as the experiment file. + if experiment.Spec.Dataset == nil { + spec["content"] = map[string]interface{}{ + "files": []interface{}{ + map[string]interface{}{ + "path": "/data/datasets/experiment.json", + "contentFrom": map[string]interface{}{ + "configMapKeyRef": map[string]interface{}{ + "name": experiment.Name, + "key": "experiment.json", + }, + }, + }, + }, + } + } + + workflow := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": testWorkflowGVK.GroupVersion().String(), + "kind": testWorkflowGVK.Kind, + "metadata": map[string]interface{}{ + "name": experiment.Name, + "namespace": experiment.Namespace, + }, + "spec": spec, + }, + } + return workflow +} + +// reconcileTestTrigger creates, updates, or deletes the Testkube TestTrigger. +func (r *ExperimentReconciler) reconcileTestTrigger( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) error { + triggerName := experiment.Name + "-trigger" + + if experiment.Spec.Trigger == nil || !experiment.Spec.Trigger.Enabled { + // Delete trigger if it exists. + existing := &unstructured.Unstructured{} + existing.SetGroupVersionKind(testTriggerGVK) + existing.SetName(triggerName) + existing.SetNamespace(experiment.Namespace) + if delErr := r.Delete(ctx, existing); delErr != nil && !errors.IsNotFound(delErr) { + if isCRDNotInstalled(delErr) { + return nil + } + return delErr + } + return nil + } + + trigger := r.buildTestTrigger(experiment) + if err := controllerutil.SetControllerReference(experiment, trigger, r.Scheme); err != nil { + return err + } + + existing := &unstructured.Unstructured{} + existing.SetGroupVersionKind(testTriggerGVK) + err := r.Get(ctx, types.NamespacedName{Name: triggerName, Namespace: experiment.Namespace}, existing) + if errors.IsNotFound(err) { + if createErr := r.Create(ctx, trigger); createErr != nil { + return createErr + } + } else if err != nil { + if isCRDNotInstalled(err) { + log.FromContext(ctx).Info("Testkube TestTrigger CRD not installed; skipping TestTrigger reconciliation") + return nil + } + return err + } else { + existing.Object["spec"] = trigger.Object["spec"] + existing.SetOwnerReferences(trigger.GetOwnerReferences()) + if updateErr := r.Update(ctx, existing); updateErr != nil { + return updateErr + } + } + + *generatedResources = append(*generatedResources, testbenchv1alpha1.GeneratedResource{ + Kind: "TestTrigger", + Name: triggerName, + Namespace: experiment.Namespace, + }) + return nil +} + +// buildTestTrigger constructs the desired TestTrigger unstructured object. +func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured { + agentNs := experiment.Spec.AgentRef.Namespace + if agentNs == "" { + agentNs = experiment.Namespace + } + + concurrencyPolicy := "allow" + if experiment.Spec.Trigger != nil && experiment.Spec.Trigger.ConcurrencyPolicy != "" { + concurrencyPolicy = strings.ToLower(experiment.Spec.Trigger.ConcurrencyPolicy) + } + + return &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": testTriggerGVK.GroupVersion().String(), + "kind": testTriggerGVK.Kind, + "metadata": map[string]interface{}{ + "name": experiment.Name + "-trigger", + "namespace": experiment.Namespace, + }, + "spec": map[string]interface{}{ + "resource": "deployment", + "resourceSelector": map[string]interface{}{ + "name": experiment.Spec.AgentRef.Name, + "namespace": agentNs, + }, + "event": "modified", + "action": "run", + "execution": "testworkflow", + "concurrencyPolicy": concurrencyPolicy, + "testSelector": map[string]interface{}{ + "name": experiment.Name, + "namespace": experiment.Namespace, + }, + "disabled": false, + }, + }, + } +} + +// updateStatus updates Ready and WorkflowReady conditions and the generatedResources list. +func (r *ExperimentReconciler) updateStatus( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources []testbenchv1alpha1.GeneratedResource, + reconcileErr error, +) error { + experiment.Status.GeneratedResources = generatedResources + + readyStatus := metav1.ConditionTrue + readyReason := "ReconcileSucceeded" + readyMsg := "All resources reconciled successfully" + if reconcileErr != nil { + readyStatus = metav1.ConditionFalse + readyReason = "ReconcileFailed" + readyMsg = reconcileErr.Error() + } + apimeta.SetStatusCondition(&experiment.Status.Conditions, metav1.Condition{ + Type: conditionReady, + Status: readyStatus, + ObservedGeneration: experiment.Generation, + Reason: readyReason, + Message: readyMsg, + }) + + wfStatus := metav1.ConditionTrue + wfReason := "WorkflowCreated" + wfMsg := "TestWorkflow created successfully" + if reconcileErr != nil { + wfStatus = metav1.ConditionFalse + wfReason = "WorkflowNotReady" + wfMsg = reconcileErr.Error() + } + apimeta.SetStatusCondition(&experiment.Status.Conditions, metav1.Condition{ + Type: conditionWorkflowReady, + Status: wfStatus, + ObservedGeneration: experiment.Generation, + Reason: wfReason, + Message: wfMsg, + }) + + return r.Status().Update(ctx, experiment) +} + +// resolveAgentURL builds the in-cluster DNS URL for the agent service. +func (r *ExperimentReconciler) resolveAgentURL(experiment *testbenchv1alpha1.Experiment) string { + ns := experiment.Spec.AgentRef.Namespace + if ns == "" { + ns = experiment.Namespace + } + return fmt.Sprintf("http://%s.%s:%s", experiment.Spec.AgentRef.Name, ns, defaultAgentPort) +} + +// resolveDatasetURL extracts the dataset URL from the DatasetSource. +func (r *ExperimentReconciler) resolveDatasetURL(experiment *testbenchv1alpha1.Experiment) string { + if experiment.Spec.Dataset == nil { + return "" + } + if experiment.Spec.Dataset.URL != "" { + return experiment.Spec.Dataset.URL + } + if experiment.Spec.Dataset.S3 != nil { + return fmt.Sprintf("s3://%s/%s", experiment.Spec.Dataset.S3.Bucket, experiment.Spec.Dataset.S3.Key) + } + return "" } // SetupWithManager sets up the controller with the Manager. func (r *ExperimentReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testbenchv1alpha1.Experiment{}). + Owns(&corev1.ConfigMap{}). Complete(r) } + +// isCRDNotInstalled returns true when the error indicates the target CRD is not registered. +func isCRDNotInstalled(err error) bool { + return apimeta.IsNoMatchError(err) +} diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go index d36b947..90d0ed0 100644 --- a/operator/internal/controller/experiment_controller_test.go +++ b/operator/internal/controller/experiment_controller_test.go @@ -18,67 +18,589 @@ package controller import ( "context" + "encoding/json" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1" ) var _ = Describe("Experiment Controller", func() { - Context("When reconciling a resource", func() { - const resourceName = "test-resource" + const namespace = "default" + ctx := context.Background() - ctx := context.Background() + newReconciler := func() *ExperimentReconciler { + return &ExperimentReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + } - typeNamespacedName := types.NamespacedName{ - Name: resourceName, - Namespace: "default", // TODO(user):Modify as needed + reconcileExperiment := func(name string) error { + _, err := newReconciler().Reconcile(ctx, reconcile.Request{ + NamespacedName: types.NamespacedName{Name: name, Namespace: namespace}, + }) + return err + } + + cleanupExperiment := func(name string) { + exp := &testbenchv1alpha1.Experiment{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, exp); err == nil { + _ = k8sClient.Delete(ctx, exp) + } + cm := &corev1.ConfigMap{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, cm); err == nil { + _ = k8sClient.Delete(ctx, cm) + } + wf := &unstructured.Unstructured{} + wf.SetGroupVersionKind(testWorkflowGVK) + if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, wf); err == nil { + _ = k8sClient.Delete(ctx, wf) + } + trig := &unstructured.Unstructured{} + trig.SetGroupVersionKind(testTriggerGVK) + if err := k8sClient.Get(ctx, types.NamespacedName{Name: name + "-trigger", Namespace: namespace}, trig); err == nil { + _ = k8sClient.Delete(ctx, trig) } - experiment := &testbenchv1alpha1.Experiment{} + } + + Context("Scenarios mode reconciliation", func() { + const expName = "exp-scenarios" BeforeEach(func() { - By("creating the custom resource for the Kind Experiment") - err := k8sClient.Get(ctx, typeNamespacedName, experiment) - if err != nil && errors.IsNotFound(err) { - resource := &testbenchv1alpha1.Experiment{ - ObjectMeta: metav1.ObjectMeta{ - Name: resourceName, - Namespace: "default", + By("creating the Experiment with inline scenarios") + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"}, + LLMAsAJudgeModel: "gemini-2.5-flash-lite", + DefaultThreshold: 0.9, + Scenarios: []testbenchv1alpha1.Scenario{ + { + Name: "test scenario", + Steps: []testbenchv1alpha1.Step{ + { + Input: "What is the weather?", + Reference: &testbenchv1alpha1.Reference{ + Response: "It is sunny", + Topics: []string{"weather"}, + ToolCalls: []testbenchv1alpha1.ToolCall{ + { + Name: "get_weather", + Args: runtime.RawExtension{Raw: []byte(`{"city":"NY"}`)}, + }, + }, + }, + Metrics: []testbenchv1alpha1.Metric{ + {MetricName: "AgentGoalAccuracy"}, + }, + }, + }, + }, }, - // TODO(user): Specify other spec details if needed. + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + }) + + AfterEach(func() { + cleanupExperiment(expName) + }) + + It("should create a ConfigMap with experiment.json", func() { + By("reconciling the Experiment") + Expect(reconcileExperiment(expName)).To(Succeed()) + + By("checking the ConfigMap exists") + cm := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed()) + Expect(cm.Data).To(HaveKey("experiment.json")) + + By("verifying the experiment.json content") + var expJSON experimentJSON + Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed()) + Expect(expJSON.LLMAsAJudgeModel).To(Equal("gemini-2.5-flash-lite")) + Expect(expJSON.DefaultThreshold).To(Equal(0.9)) + Expect(expJSON.Scenarios).To(HaveLen(1)) + Expect(expJSON.Scenarios[0].Name).To(Equal("test scenario")) + Expect(expJSON.Scenarios[0].Steps).To(HaveLen(1)) + Expect(expJSON.Scenarios[0].Steps[0].Input).To(Equal("What is the weather?")) + Expect(expJSON.Scenarios[0].Steps[0].Reference).NotTo(BeNil()) + Expect(expJSON.Scenarios[0].Steps[0].Reference.Response).To(Equal("It is sunny")) + Expect(expJSON.Scenarios[0].Steps[0].Reference.Topics).To(ConsistOf("weather")) + Expect(expJSON.Scenarios[0].Steps[0].Reference.ToolCalls).To(HaveLen(1)) + Expect(expJSON.Scenarios[0].Steps[0].Reference.ToolCalls[0].Name).To(Equal("get_weather")) + Expect(expJSON.Scenarios[0].Steps[0].Metrics).To(HaveLen(1)) + Expect(expJSON.Scenarios[0].Steps[0].Metrics[0].MetricName).To(Equal("AgentGoalAccuracy")) + }) + + It("should set ConfigMap owner reference to the Experiment", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + cm := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed()) + Expect(cm.OwnerReferences).To(HaveLen(1)) + Expect(cm.OwnerReferences[0].Kind).To(Equal("Experiment")) + Expect(cm.OwnerReferences[0].Name).To(Equal(expName)) + Expect(cm.OwnerReferences[0].Controller).NotTo(BeNil()) + Expect(*cm.OwnerReferences[0].Controller).To(BeTrue()) + }) + + It("should create a TestWorkflow without setup-template", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + wf := &unstructured.Unstructured{} + wf.SetGroupVersionKind(testWorkflowGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + + spec := wf.Object["spec"].(map[string]interface{}) + + By("checking content.files mounts the ConfigMap") + content, ok := spec["content"].(map[string]interface{}) + Expect(ok).To(BeTrue(), "spec.content should be present in scenarios mode") + files := content["files"].([]interface{}) + Expect(files).To(HaveLen(1)) + file := files[0].(map[string]interface{}) + Expect(file["path"]).To(Equal("/data/datasets/experiment.json")) + contentFrom := file["contentFrom"].(map[string]interface{}) + cmRef := contentFrom["configMapKeyRef"].(map[string]interface{}) + Expect(cmRef["name"]).To(Equal(expName)) + Expect(cmRef["key"]).To(Equal("experiment.json")) + + By("checking use templates do NOT include setup-template") + use := spec["use"].([]interface{}) + templateNames := make([]string, 0, len(use)) + for _, u := range use { + templateNames = append(templateNames, u.(map[string]interface{})["name"].(string)) + } + Expect(templateNames).NotTo(ContainElement("setup-template")) + Expect(templateNames).To(ContainElements("run-template", "evaluate-template", "publish-template", "visualize-template")) + + By("checking the run-template has the correct agentUrl") + for _, u := range use { + um := u.(map[string]interface{}) + if um["name"] == "run-template" { + cfg := um["config"].(map[string]interface{}) + Expect(cfg["agentUrl"]).To(Equal("http://my-agent.agents:8000")) } - Expect(k8sClient.Create(ctx, resource)).To(Succeed()) } }) + It("should set TestWorkflow owner reference", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + wf := &unstructured.Unstructured{} + wf.SetGroupVersionKind(testWorkflowGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + Expect(wf.GetOwnerReferences()).To(HaveLen(1)) + Expect(wf.GetOwnerReferences()[0].Kind).To(Equal("Experiment")) + Expect(wf.GetOwnerReferences()[0].Name).To(Equal(expName)) + }) + + It("should not create a TestTrigger when trigger is nil", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + trig := &unstructured.Unstructured{} + trig.SetGroupVersionKind(testTriggerGVK) + err := k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-trigger", Namespace: namespace}, trig) + Expect(errors.IsNotFound(err)).To(BeTrue()) + }) + + It("should set Ready=True status condition after successful reconciliation", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + exp := &testbenchv1alpha1.Experiment{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed()) + + var readyCond *metav1.Condition + for i := range exp.Status.Conditions { + if exp.Status.Conditions[i].Type == conditionReady { + readyCond = &exp.Status.Conditions[i] + break + } + } + Expect(readyCond).NotTo(BeNil()) + Expect(readyCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(readyCond.Reason).To(Equal("ReconcileSucceeded")) + Expect(readyCond.ObservedGeneration).To(Equal(exp.Generation)) + }) + + It("should populate generatedResources in status", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + exp := &testbenchv1alpha1.Experiment{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed()) + + kinds := make([]string, 0, len(exp.Status.GeneratedResources)) + for _, gr := range exp.Status.GeneratedResources { + kinds = append(kinds, gr.Kind) + } + Expect(kinds).To(ContainElements("ConfigMap", "TestWorkflow")) + }) + + It("should be idempotent on re-reconciliation", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + Expect(reconcileExperiment(expName)).To(Succeed()) + + cmList := &corev1.ConfigMapList{} + Expect(k8sClient.List(ctx, cmList, + client.InNamespace(namespace), client.MatchingLabels{})).To(Succeed()) + count := 0 + for _, cm := range cmList.Items { + if cm.Name == expName { + count++ + } + } + Expect(count).To(Equal(1)) + }) + }) + + Context("Dataset mode reconciliation", func() { + const expName = "exp-dataset" + + BeforeEach(func() { + By("creating the Experiment with a dataset URL") + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"}, + Dataset: &testbenchv1alpha1.DatasetSource{ + URL: "http://data-server/dataset.csv", + }, + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + }) + AfterEach(func() { - // TODO(user): Cleanup logic after each test, like removing the resource instance. - resource := &testbenchv1alpha1.Experiment{} - err := k8sClient.Get(ctx, typeNamespacedName, resource) + cleanupExperiment(expName) + }) + + It("should create a ConfigMap with empty scenarios as placeholder", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + cm := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed()) + Expect(cm.Data).To(HaveKey("experiment.json")) + + var expJSON experimentJSON + Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed()) + Expect(expJSON.Scenarios).To(BeEmpty()) + }) + + It("should create a TestWorkflow with setup-template and correct datasetUrl", func() { + Expect(reconcileExperiment(expName)).To(Succeed()) + + wf := &unstructured.Unstructured{} + wf.SetGroupVersionKind(testWorkflowGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + + spec := wf.Object["spec"].(map[string]interface{}) + + By("checking no content.files in dataset mode") + _, hasContent := spec["content"] + Expect(hasContent).To(BeFalse(), "spec.content should be absent in dataset mode") + + By("checking setup-template is first in use list") + use := spec["use"].([]interface{}) + first := use[0].(map[string]interface{}) + Expect(first["name"]).To(Equal("setup-template")) + cfg := first["config"].(map[string]interface{}) + Expect(cfg["datasetUrl"]).To(Equal("http://data-server/dataset.csv")) + }) + + It("should resolve S3 dataset URL correctly", func() { + exp := &testbenchv1alpha1.Experiment{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed()) + exp.Spec.Dataset = &testbenchv1alpha1.DatasetSource{ + S3: &testbenchv1alpha1.S3Source{Bucket: "my-bucket", Key: "data/dataset.csv"}, + } + Expect(k8sClient.Update(ctx, exp)).To(Succeed()) + Expect(reconcileExperiment(expName)).To(Succeed()) + + wf := &unstructured.Unstructured{} + wf.SetGroupVersionKind(testWorkflowGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + spec := wf.Object["spec"].(map[string]interface{}) + use := spec["use"].([]interface{}) + first := use[0].(map[string]interface{}) + Expect(first["name"]).To(Equal("setup-template")) + Expect(first["config"].(map[string]interface{})["datasetUrl"]). + To(Equal("s3://my-bucket/data/dataset.csv")) + }) + }) + + Context("Trigger management", func() { + const expName = "exp-trigger" + + createExperiment := func(triggerEnabled bool, policy string) { + trigger := &testbenchv1alpha1.TriggerSpec{ + Enabled: triggerEnabled, + ConcurrencyPolicy: policy, + } + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"}, + Scenarios: []testbenchv1alpha1.Scenario{ + {Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}, + }, + Trigger: trigger, + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + } + + AfterEach(func() { + cleanupExperiment(expName) + }) + + It("should create a TestTrigger when trigger.enabled=true", func() { + createExperiment(true, "Forbid") + Expect(reconcileExperiment(expName)).To(Succeed()) + + trig := &unstructured.Unstructured{} + trig.SetGroupVersionKind(testTriggerGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: expName + "-trigger", + Namespace: namespace, + }, trig)).To(Succeed()) + + spec := trig.Object["spec"].(map[string]interface{}) + Expect(spec["resource"]).To(Equal("deployment")) + Expect(spec["concurrencyPolicy"]).To(Equal("forbid")) + Expect(spec["action"]).To(Equal("run")) + Expect(spec["execution"]).To(Equal("testworkflow")) + Expect(spec["disabled"]).To(BeFalse()) + + resSelector := spec["resourceSelector"].(map[string]interface{}) + Expect(resSelector["name"]).To(Equal("my-agent")) + Expect(resSelector["namespace"]).To(Equal("agents")) + + testSelector := spec["testSelector"].(map[string]interface{}) + Expect(testSelector["name"]).To(Equal(expName)) + Expect(testSelector["namespace"]).To(Equal(namespace)) + }) + + It("should set TestTrigger owner reference", func() { + createExperiment(true, "Allow") + Expect(reconcileExperiment(expName)).To(Succeed()) + + trig := &unstructured.Unstructured{} + trig.SetGroupVersionKind(testTriggerGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: expName + "-trigger", + Namespace: namespace, + }, trig)).To(Succeed()) + Expect(trig.GetOwnerReferences()).To(HaveLen(1)) + Expect(trig.GetOwnerReferences()[0].Kind).To(Equal("Experiment")) + }) + + It("should not create a TestTrigger when trigger.enabled=false", func() { + createExperiment(false, "") + Expect(reconcileExperiment(expName)).To(Succeed()) + + trig := &unstructured.Unstructured{} + trig.SetGroupVersionKind(testTriggerGVK) + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: expName + "-trigger", + Namespace: namespace, + }, trig) + Expect(errors.IsNotFound(err)).To(BeTrue()) + }) + + It("should delete the TestTrigger when trigger is disabled after being enabled", func() { + By("creating an experiment with trigger enabled") + createExperiment(true, "Allow") + Expect(reconcileExperiment(expName)).To(Succeed()) + + trig := &unstructured.Unstructured{} + trig.SetGroupVersionKind(testTriggerGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: expName + "-trigger", + Namespace: namespace, + }, trig)).To(Succeed()) + + By("disabling the trigger") + exp := &testbenchv1alpha1.Experiment{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed()) + exp.Spec.Trigger.Enabled = false + Expect(k8sClient.Update(ctx, exp)).To(Succeed()) + + Expect(reconcileExperiment(expName)).To(Succeed()) + + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: expName + "-trigger", + Namespace: namespace, + }, trig) + Expect(errors.IsNotFound(err)).To(BeTrue()) + }) + + It("should include TestTrigger in generatedResources when enabled", func() { + createExperiment(true, "Allow") + Expect(reconcileExperiment(expName)).To(Succeed()) + + exp := &testbenchv1alpha1.Experiment{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed()) + + kinds := make([]string, 0, len(exp.Status.GeneratedResources)) + for _, gr := range exp.Status.GeneratedResources { + kinds = append(kinds, gr.Kind) + } + Expect(kinds).To(ContainElements("ConfigMap", "TestWorkflow", "TestTrigger")) + }) + }) + + Context("Status management", func() { + const expName = "exp-status" + + AfterEach(func() { + cleanupExperiment(expName) + }) + + It("should set WorkflowReady condition to True on success", func() { + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + Expect(reconcileExperiment(expName)).To(Succeed()) + + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed()) + var wfCond *metav1.Condition + for i := range exp.Status.Conditions { + if exp.Status.Conditions[i].Type == conditionWorkflowReady { + wfCond = &exp.Status.Conditions[i] + break + } + } + Expect(wfCond).NotTo(BeNil()) + Expect(wfCond.Status).To(Equal(metav1.ConditionTrue)) + }) + + It("should handle missing Experiment gracefully (not found)", func() { + err := reconcileExperiment("nonexistent") Expect(err).NotTo(HaveOccurred()) + }) + }) - By("Cleanup the specific resource instance Experiment") - Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + Context("Agent URL resolution", func() { + It("should use agentRef.Namespace for the agent URL", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "weather-agent", Namespace: "sample-agents"}, + }, + } + Expect(r.resolveAgentURL(exp)).To(Equal("http://weather-agent.sample-agents:8000")) }) - It("should successfully reconcile the resource", func() { - By("Reconciling the created resource") - controllerReconciler := &ExperimentReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), + + It("should fall back to experiment namespace when agentRef.Namespace is empty", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: "my-ns"}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent"}, + }, } + Expect(r.resolveAgentURL(exp)).To(Equal("http://my-agent.my-ns:8000")) + }) + }) - _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: typeNamespacedName, - }) + Context("buildExperimentJSON", func() { + It("should serialize customValues and metric parameters as raw JSON", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + Spec: testbenchv1alpha1.ExperimentSpec{ + DefaultThreshold: 0.8, + Scenarios: []testbenchv1alpha1.Scenario{ + { + Name: "s", + Steps: []testbenchv1alpha1.Step{ + { + Input: "q", + CustomValues: runtime.RawExtension{Raw: []byte(`{"key":"value"}`)}, + Metrics: []testbenchv1alpha1.Metric{ + { + MetricName: "M", + Threshold: 0.7, + Parameters: runtime.RawExtension{Raw: []byte(`{"mode":"precision"}`)}, + }, + }, + }, + }, + }, + }, + }, + } + data, err := r.buildExperimentJSON(exp) Expect(err).NotTo(HaveOccurred()) - // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. - // Example: If you expect a certain status condition after reconciliation, verify it here. + + var result experimentJSON + Expect(json.Unmarshal([]byte(data), &result)).To(Succeed()) + Expect(result.DefaultThreshold).To(Equal(0.8)) + Expect(result.Scenarios[0].Steps[0].CustomValues).To(MatchJSON(`{"key":"value"}`)) + Expect(result.Scenarios[0].Steps[0].Metrics[0].Parameters).To(MatchJSON(`{"mode":"precision"}`)) + }) + + It("should produce empty scenarios list for dataset mode", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + Spec: testbenchv1alpha1.ExperimentSpec{ + DefaultThreshold: 0.9, + Dataset: &testbenchv1alpha1.DatasetSource{URL: "http://example.com/data.csv"}, + }, + } + data, err := r.buildExperimentJSON(exp) + Expect(err).NotTo(HaveOccurred()) + Expect(data).To(ContainSubstring(`"scenarios": []`)) + }) + }) + + Context("OTel env var injection", func() { + const expName = "exp-otel" + + AfterEach(func() { + cleanupExperiment(expName) + }) + + It("should inject OTEL_EXPORTER_OTLP_ENDPOINT from otel-config ConfigMap", func() { + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + Expect(reconcileExperiment(expName)).To(Succeed()) + + wf := &unstructured.Unstructured{} + wf.SetGroupVersionKind(testWorkflowGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + + spec := wf.Object["spec"].(map[string]interface{}) + container := spec["container"].(map[string]interface{}) + envList := container["env"].([]interface{}) + Expect(envList).To(HaveLen(1)) + envVar := envList[0].(map[string]interface{}) + Expect(envVar["name"]).To(Equal(otelEndpointKey)) + valueFrom := envVar["valueFrom"].(map[string]interface{}) + cmRef := valueFrom["configMapKeyRef"].(map[string]interface{}) + Expect(cmRef["name"]).To(Equal(otelConfigMapName)) + Expect(cmRef["key"]).To(Equal(otelEndpointKey)) }) }) }) diff --git a/operator/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go index d89999c..6250695 100644 --- a/operator/internal/controller/suite_test.go +++ b/operator/internal/controller/suite_test.go @@ -54,7 +54,10 @@ var _ = BeforeSuite(func() { By("bootstrapping test environment") testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + CRDDirectoryPaths: []string{ + filepath.Join("..", "..", "config", "crd", "bases"), + filepath.Join("testdata", "crds"), + }, ErrorIfCRDPathMissing: true, // The BinaryAssetsDirectory is only required if you want to run the tests directly diff --git a/operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml b/operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml new file mode 100644 index 0000000..ff40b84 --- /dev/null +++ b/operator/internal/controller/testdata/crds/tests.testkube.io_testtriggers.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: testtriggers.tests.testkube.io +spec: + group: tests.testkube.io + names: + kind: TestTrigger + listKind: TestTriggerList + plural: testtriggers + singular: testtrigger + scope: Namespaced + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + x-kubernetes-preserve-unknown-fields: true diff --git a/operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml b/operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml new file mode 100644 index 0000000..1d54d9b --- /dev/null +++ b/operator/internal/controller/testdata/crds/testworkflows.testkube.io_testworkflows.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: testworkflows.testworkflows.testkube.io +spec: + group: testworkflows.testkube.io + names: + kind: TestWorkflow + listKind: TestWorkflowList + plural: testworkflows + singular: testworkflow + scope: Namespaced + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + x-kubernetes-preserve-unknown-fields: true From 23e67016009da1354dd78fdbd8a881a0834752c2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:04:58 +0000 Subject: [PATCH 03/19] Final verification and documentation complete Co-authored-by: fmallmann <30110193+fmallmann@users.noreply.github.com> --- operator/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operator/go.mod b/operator/go.mod index 27d9d75..c4da270 100644 --- a/operator/go.mod +++ b/operator/go.mod @@ -5,6 +5,7 @@ go 1.25.0 require ( github.com/onsi/ginkgo/v2 v2.28.1 github.com/onsi/gomega v1.39.1 + k8s.io/api v0.35.2 k8s.io/apimachinery v0.35.2 k8s.io/client-go v0.35.2 sigs.k8s.io/controller-runtime v0.23.3 @@ -85,7 +86,6 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.35.2 // indirect k8s.io/apiextensions-apiserver v0.35.0 // indirect k8s.io/apiserver v0.35.0 // indirect k8s.io/component-base v0.35.0 // indirect From 8c1c97d0c2756244c4f762016e1677dfb38933cb Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Tue, 24 Mar 2026 15:30:03 +0100 Subject: [PATCH 04/19] feat: update Experiment reconciler to enhance resource reconciliation and status reporting --- operator/Dockerfile | 2 +- operator/config/manager/kustomization.yaml | 6 ++ .../testbench_v1alpha1_experiment.yaml | 2 - .../controller/experiment_controller.go | 69 +++++++++++++------ .../controller/experiment_controller_test.go | 26 +++---- 5 files changed, 67 insertions(+), 38 deletions(-) diff --git a/operator/Dockerfile b/operator/Dockerfile index a48973e..5a82af7 100644 --- a/operator/Dockerfile +++ b/operator/Dockerfile @@ -1,5 +1,5 @@ # Build the manager binary -FROM golang:1.22 AS builder +FROM golang:1.25 AS builder ARG TARGETOS ARG TARGETARCH diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml index 5c5f0b8..ad13e96 100644 --- a/operator/config/manager/kustomization.yaml +++ b/operator/config/manager/kustomization.yaml @@ -1,2 +1,8 @@ resources: - manager.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: controller + newTag: latest diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml index 28ab44e..0bb8ea1 100644 --- a/operator/config/samples/testbench_v1alpha1_experiment.yaml +++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml @@ -17,5 +17,3 @@ spec: key: dataset.csv trigger: enabled: true - event: on_push - concurrencyPolicy: Forbid diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go index 7565db7..eb240d8 100644 --- a/operator/internal/controller/experiment_controller.go +++ b/operator/internal/controller/experiment_controller.go @@ -118,9 +118,9 @@ func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) } var generatedResources []testbenchv1alpha1.GeneratedResource - reconcileErr := r.reconcileResources(ctx, experiment, &generatedResources) + result, reconcileErr := r.reconcileResources(ctx, experiment, &generatedResources) - if statusErr := r.updateStatus(ctx, experiment, generatedResources, reconcileErr); statusErr != nil { + if statusErr := r.updateStatus(ctx, experiment, generatedResources, result, reconcileErr); statusErr != nil { logger.Error(statusErr, "failed to update status") return ctrl.Result{}, statusErr } @@ -128,21 +128,31 @@ func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, reconcileErr } +// reconcileResult tracks per-resource errors so status conditions can be set accurately. +type reconcileResult struct { + workflowSkipped bool + workflowErr error +} + func (r *ExperimentReconciler) reconcileResources( ctx context.Context, experiment *testbenchv1alpha1.Experiment, generatedResources *[]testbenchv1alpha1.GeneratedResource, -) error { +) (reconcileResult, error) { + var result reconcileResult if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil { - return fmt.Errorf("reconciling ConfigMap: %w", err) + return result, fmt.Errorf("reconciling ConfigMap: %w", err) } - if err := r.reconcileTestWorkflow(ctx, experiment, generatedResources); err != nil { - return fmt.Errorf("reconciling TestWorkflow: %w", err) + wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, generatedResources) + if err != nil { + result.workflowErr = err + return result, fmt.Errorf("reconciling TestWorkflow: %w", err) } + result.workflowSkipped = wfSkipped if err := r.reconcileTestTrigger(ctx, experiment, generatedResources); err != nil { - return fmt.Errorf("reconciling TestTrigger: %w", err) + return result, fmt.Errorf("reconciling TestTrigger: %w", err) } - return nil + return result, nil } // reconcileConfigMap creates or updates the ConfigMap holding experiment.json. @@ -151,9 +161,10 @@ func (r *ExperimentReconciler) reconcileConfigMap( experiment *testbenchv1alpha1.Experiment, generatedResources *[]testbenchv1alpha1.GeneratedResource, ) error { + cmName := experiment.Name + "-experiment" cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ - Name: experiment.Name, + Name: cmName, Namespace: experiment.Namespace, }, } @@ -240,14 +251,15 @@ func (r *ExperimentReconciler) convertStep(step testbenchv1alpha1.Step) stepJSON } // reconcileTestWorkflow creates or updates the Testkube TestWorkflow for the Experiment. +// It returns (skipped, error) where skipped is true when the CRD is not installed. func (r *ExperimentReconciler) reconcileTestWorkflow( ctx context.Context, experiment *testbenchv1alpha1.Experiment, generatedResources *[]testbenchv1alpha1.GeneratedResource, -) error { +) (bool, error) { workflow := r.buildTestWorkflow(experiment) if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil { - return err + return false, err } existing := &unstructured.Unstructured{} @@ -255,19 +267,19 @@ func (r *ExperimentReconciler) reconcileTestWorkflow( err := r.Get(ctx, types.NamespacedName{Name: workflow.GetName(), Namespace: workflow.GetNamespace()}, existing) if errors.IsNotFound(err) { if createErr := r.Create(ctx, workflow); createErr != nil { - return createErr + return false, createErr } } else if err != nil { if isCRDNotInstalled(err) { log.FromContext(ctx).Info("Testkube TestWorkflow CRD not installed; skipping TestWorkflow reconciliation") - return nil + return true, nil } - return err + return false, err } else { existing.Object["spec"] = workflow.Object["spec"] existing.SetOwnerReferences(workflow.GetOwnerReferences()) if updateErr := r.Update(ctx, existing); updateErr != nil { - return updateErr + return false, updateErr } } @@ -276,7 +288,7 @@ func (r *ExperimentReconciler) reconcileTestWorkflow( Name: workflow.GetName(), Namespace: workflow.GetNamespace(), }) - return nil + return false, nil } // buildTestWorkflow constructs the desired TestWorkflow unstructured object. @@ -330,7 +342,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E "path": "/data/datasets/experiment.json", "contentFrom": map[string]interface{}{ "configMapKeyRef": map[string]interface{}{ - "name": experiment.Name, + "name": experiment.Name + "-experiment", "key": "experiment.json", }, }, @@ -344,7 +356,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E "apiVersion": testWorkflowGVK.GroupVersion().String(), "kind": testWorkflowGVK.Kind, "metadata": map[string]interface{}{ - "name": experiment.Name, + "name": experiment.Name + "-workflow", "namespace": experiment.Namespace, }, "spec": spec, @@ -436,12 +448,12 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex "name": experiment.Spec.AgentRef.Name, "namespace": agentNs, }, - "event": "modified", + "event": r.resolveTriggerEvent(experiment), "action": "run", "execution": "testworkflow", "concurrencyPolicy": concurrencyPolicy, "testSelector": map[string]interface{}{ - "name": experiment.Name, + "name": experiment.Name + "-workflow", "namespace": experiment.Namespace, }, "disabled": false, @@ -455,6 +467,7 @@ func (r *ExperimentReconciler) updateStatus( ctx context.Context, experiment *testbenchv1alpha1.Experiment, generatedResources []testbenchv1alpha1.GeneratedResource, + result reconcileResult, reconcileErr error, ) error { experiment.Status.GeneratedResources = generatedResources @@ -478,10 +491,14 @@ func (r *ExperimentReconciler) updateStatus( wfStatus := metav1.ConditionTrue wfReason := "WorkflowCreated" wfMsg := "TestWorkflow created successfully" - if reconcileErr != nil { + if result.workflowErr != nil { wfStatus = metav1.ConditionFalse wfReason = "WorkflowNotReady" - wfMsg = reconcileErr.Error() + wfMsg = result.workflowErr.Error() + } else if result.workflowSkipped { + wfStatus = metav1.ConditionFalse + wfReason = "CRDNotInstalled" + wfMsg = "TestWorkflow CRD not installed; workflow was not created" } apimeta.SetStatusCondition(&experiment.Status.Conditions, metav1.Condition{ Type: conditionWorkflowReady, @@ -494,6 +511,14 @@ func (r *ExperimentReconciler) updateStatus( return r.Status().Update(ctx, experiment) } +// resolveTriggerEvent returns the trigger event, defaulting to "modified". +func (r *ExperimentReconciler) resolveTriggerEvent(experiment *testbenchv1alpha1.Experiment) string { + if experiment.Spec.Trigger != nil && experiment.Spec.Trigger.Event != "" { + return strings.ToLower(experiment.Spec.Trigger.Event) + } + return "modified" +} + // resolveAgentURL builds the in-cluster DNS URL for the agent service. func (r *ExperimentReconciler) resolveAgentURL(experiment *testbenchv1alpha1.Experiment) string { ns := experiment.Spec.AgentRef.Namespace diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go index 90d0ed0..2a18a63 100644 --- a/operator/internal/controller/experiment_controller_test.go +++ b/operator/internal/controller/experiment_controller_test.go @@ -58,12 +58,12 @@ var _ = Describe("Experiment Controller", func() { _ = k8sClient.Delete(ctx, exp) } cm := &corev1.ConfigMap{} - if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, cm); err == nil { + if err := k8sClient.Get(ctx, types.NamespacedName{Name: name + "-experiment", Namespace: namespace}, cm); err == nil { _ = k8sClient.Delete(ctx, cm) } wf := &unstructured.Unstructured{} wf.SetGroupVersionKind(testWorkflowGVK) - if err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, wf); err == nil { + if err := k8sClient.Get(ctx, types.NamespacedName{Name: name + "-workflow", Namespace: namespace}, wf); err == nil { _ = k8sClient.Delete(ctx, wf) } trig := &unstructured.Unstructured{} @@ -122,7 +122,7 @@ var _ = Describe("Experiment Controller", func() { By("checking the ConfigMap exists") cm := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed()) Expect(cm.Data).To(HaveKey("experiment.json")) By("verifying the experiment.json content") @@ -147,7 +147,7 @@ var _ = Describe("Experiment Controller", func() { Expect(reconcileExperiment(expName)).To(Succeed()) cm := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed()) Expect(cm.OwnerReferences).To(HaveLen(1)) Expect(cm.OwnerReferences[0].Kind).To(Equal("Experiment")) Expect(cm.OwnerReferences[0].Name).To(Equal(expName)) @@ -160,7 +160,7 @@ var _ = Describe("Experiment Controller", func() { wf := &unstructured.Unstructured{} wf.SetGroupVersionKind(testWorkflowGVK) - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed()) spec := wf.Object["spec"].(map[string]interface{}) @@ -173,7 +173,7 @@ var _ = Describe("Experiment Controller", func() { Expect(file["path"]).To(Equal("/data/datasets/experiment.json")) contentFrom := file["contentFrom"].(map[string]interface{}) cmRef := contentFrom["configMapKeyRef"].(map[string]interface{}) - Expect(cmRef["name"]).To(Equal(expName)) + Expect(cmRef["name"]).To(Equal(expName + "-experiment")) Expect(cmRef["key"]).To(Equal("experiment.json")) By("checking use templates do NOT include setup-template") @@ -200,7 +200,7 @@ var _ = Describe("Experiment Controller", func() { wf := &unstructured.Unstructured{} wf.SetGroupVersionKind(testWorkflowGVK) - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed()) Expect(wf.GetOwnerReferences()).To(HaveLen(1)) Expect(wf.GetOwnerReferences()[0].Kind).To(Equal("Experiment")) Expect(wf.GetOwnerReferences()[0].Name).To(Equal(expName)) @@ -256,7 +256,7 @@ var _ = Describe("Experiment Controller", func() { client.InNamespace(namespace), client.MatchingLabels{})).To(Succeed()) count := 0 for _, cm := range cmList.Items { - if cm.Name == expName { + if cm.Name == expName+"-experiment" { count++ } } @@ -289,7 +289,7 @@ var _ = Describe("Experiment Controller", func() { Expect(reconcileExperiment(expName)).To(Succeed()) cm := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, cm)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed()) Expect(cm.Data).To(HaveKey("experiment.json")) var expJSON experimentJSON @@ -302,7 +302,7 @@ var _ = Describe("Experiment Controller", func() { wf := &unstructured.Unstructured{} wf.SetGroupVersionKind(testWorkflowGVK) - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed()) spec := wf.Object["spec"].(map[string]interface{}) @@ -329,7 +329,7 @@ var _ = Describe("Experiment Controller", func() { wf := &unstructured.Unstructured{} wf.SetGroupVersionKind(testWorkflowGVK) - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed()) spec := wf.Object["spec"].(map[string]interface{}) use := spec["use"].([]interface{}) first := use[0].(map[string]interface{}) @@ -387,7 +387,7 @@ var _ = Describe("Experiment Controller", func() { Expect(resSelector["namespace"]).To(Equal("agents")) testSelector := spec["testSelector"].(map[string]interface{}) - Expect(testSelector["name"]).To(Equal(expName)) + Expect(testSelector["name"]).To(Equal(expName + "-workflow")) Expect(testSelector["namespace"]).To(Equal(namespace)) }) @@ -589,7 +589,7 @@ var _ = Describe("Experiment Controller", func() { wf := &unstructured.Unstructured{} wf.SetGroupVersionKind(testWorkflowGVK) - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, wf)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed()) spec := wf.Object["spec"].(map[string]interface{}) container := spec["container"].(map[string]interface{}) From 793ac47199c4df5715dd7e3deeb941652c56b047 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 14:52:33 +0100 Subject: [PATCH 05/19] docs: add design spec for AI Gateway resolution in Experiment reconciler Co-Authored-By: Claude Opus 4.6 (1M context) --- .../2026-03-25-aigateway-resolution-design.md | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md diff --git a/docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md b/docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md new file mode 100644 index 0000000..17109e1 --- /dev/null +++ b/docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md @@ -0,0 +1,177 @@ +# AI Gateway Resolution for Experiment Reconciler + +**Date:** 2026-03-25 +**Status:** Approved + +## Problem + +The Experiment reconciler creates a TestWorkflow with an evaluate-template that requires `OPENAI_BASE_URL` to access LLM models for evaluation metrics. Currently, the reconciler passes no config to the evaluate-template, so the base URL is unset. We need to resolve the AI Gateway service URL — following the same pattern as the agent-runtime-operator — and pass it as `openApiBasePath` to the evaluate-template config. + +## Approach + +**Resolve at reconcile time.** During `reconcileResources`, look up the AiGateway using the same 2-tier strategy as the agent-runtime-operator's `resolveAiGateway` method, build the in-cluster service URL, and bake it into the TestWorkflow's evaluate-template config entry. + +### Why this approach + +- Consistent with how `resolveAgentURL` already works (URL baked at reconcile time) +- Simple — no runtime discovery needed by testbench scripts +- AiGateway rarely changes; a watch can be added later if needed + +### Alternatives considered + +1. **Resolve + watch AiGateway** — re-enqueue Experiments on gateway changes. More complex, deferred for now. +2. **Pass gateway name/namespace to script** — breaks the "reconciler resolves everything" pattern, adds Kubernetes awareness to Python side. + +## Design + +### 0. Evaluate-Template Default Value (Prerequisite) + +The existing `evaluate-template.yaml` declares `openApiBasePath` as a config parameter with no default value. When the reconciler omits this config (no AiGateway found), Testkube may reject the workflow or render an empty/error value. Add a default: + +```yaml +config: + openApiBasePath: + type: string + description: "Base path for OpenAI API" + default: "" +``` + +This ensures the template is valid even when no `openApiBasePath` is provided. + +### 1. CRD Changes (`experiment_types.go`) + +Add an optional `AiGatewayRef` field to `ExperimentSpec`: + +```go +import corev1 "k8s.io/api/core/v1" + +// AiGatewayRef references an AiGateway resource for LLM access during evaluation. +// Only Name and Namespace fields are used. +// +optional +AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"` +``` + +Uses `corev1.ObjectReference` — same type the agent-runtime-operator uses for its `AiGatewayRef` on the Agent spec. Only `Name` and `Namespace` fields are used; other fields are ignored. This trades a heavier CRD schema for consistency with the agent-runtime-operator. + +### 2. Go Module Dependency + +Import the agent-runtime-operator module for typed `AiGateway` and `AiGatewayList` types: + +``` +go get github.com/agentic-layer/agent-runtime-operator +``` + +Register the types in the scheme: + +```go +import runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" + +runtimev1alpha1.AddToScheme(scheme) +``` + +### 3. AI Gateway Resolution (`experiment_controller.go`) + +Three new methods mirroring the agent-runtime-operator: + +**`resolveAiGateway(ctx, experiment) (*runtimev1alpha1.AiGateway, error)`** +- If `experiment.Spec.AiGatewayRef` is set, call `resolveExplicitAiGateway` +- Otherwise, call `resolveDefaultAiGateway` + +**`resolveExplicitAiGateway(ctx, ref, experimentNamespace) (*runtimev1alpha1.AiGateway, error)`** +- Use ref's namespace, fall back to experiment's namespace +- `r.Get()` the AiGateway by name/namespace +- If CRD not installed (`meta.IsNoMatchError`), return clear error +- If any other error, return wrapped error + +**`resolveDefaultAiGateway(ctx) (*runtimev1alpha1.AiGateway, error)`** +- `r.List()` AiGateways in `ai-gateway` namespace +- If CRD not installed, return `nil, nil` +- If no items found, return `nil, nil` +- If multiple found, log and pick the first one + +**Constants:** + +```go +const defaultAiGatewayNamespace = "ai-gateway" +``` + +**URL builder:** + +```go +func buildAiGatewayServiceUrl(aiGateway runtimev1alpha1.AiGateway) string { + return fmt.Sprintf("http://%s.%s.svc.cluster.local.:%d", aiGateway.Name, aiGateway.Namespace, aiGateway.Spec.Port) +} +``` + +Note: The URL format (including trailing dot before port) matches the agent-runtime-operator exactly. The trailing dot is a DNS root domain marker used in the reference implementation. + +### 4. Wiring into `buildTestWorkflow` + +**In `reconcileResources`**, before `reconcileTestWorkflow`: + +```go +aiGateway, err := r.resolveAiGateway(ctx, experiment) +if err != nil { + return result, fmt.Errorf("resolving AiGateway: %w", err) +} +``` + +Pass the resolved gateway to `buildTestWorkflow` (new signature): + +```go +func (r *ExperimentReconciler) buildTestWorkflow( + experiment *testbenchv1alpha1.Experiment, + aiGateway *runtimev1alpha1.AiGateway, +) *unstructured.Unstructured +``` + +In the evaluate-template entry, conditionally set `openApiBasePath`: + +```go +evaluateTemplate := map[string]interface{}{"name": "evaluate-template"} +if aiGateway != nil { + evaluateTemplate["config"] = map[string]interface{}{ + "openApiBasePath": buildAiGatewayServiceUrl(*aiGateway), + } +} +``` + +When no gateway is found, the evaluate-template falls back to its default empty string config. No error, no degraded status. This is valid because `OPENAI_BASE_URL` is only needed when metrics require LLM-as-a-judge. + +### 5. RBAC + +Add kubebuilder RBAC marker: + +```go +// +kubebuilder:rbac:groups=runtime.agentic-layer.ai,resources=aigateways,verbs=get;list;watch +``` + +### 6. Tests + +**Updated tests:** +- Existing `buildTestWorkflow` calls pass `nil` as the aiGateway parameter + +**New tests:** +- **Explicit AiGatewayRef resolved** — fake AiGateway, assert evaluate-template has `config.openApiBasePath` set to expected service URL +- **No AiGateway found** — pass `nil`, assert evaluate-template has no `config` key +- **resolveAiGateway with explicit ref** — seed fake client, verify correct gateway returned +- **resolveDefaultAiGateway** — seed fake client with gateway in `ai-gateway` namespace, verify discovery +- **resolveAiGateway with no gateway** — empty cluster, verify `nil, nil` + +## Known Limitations + +- **No AiGateway watch:** Changes to an AiGateway resource (e.g., port update) do not trigger re-reconciliation of Experiments. The TestWorkflow will have a stale URL until the Experiment is manually updated or re-reconciled. A watch handler (similar to `findAgentsReferencingAiGateway` in the agent-runtime-operator) can be added in a follow-up. +- **No status reporting for resolution outcome:** The reconciler does not surface whether an AiGateway was resolved in the Experiment status. Users must inspect the generated TestWorkflow to verify. A dedicated `AiGatewayResolved` condition can be added in a follow-up. + +## Files Changed + +| File | Change | +|------|--------| +| `chart/templates/evaluate-template.yaml` | Add `default: ""` to `openApiBasePath` config | +| `operator/api/v1alpha1/experiment_types.go` | Add `corev1` import and `AiGatewayRef` field | +| `operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml` | Regenerated CRD | +| `operator/internal/controller/experiment_controller.go` | Add resolution logic, wire into workflow builder | +| `operator/internal/controller/experiment_controller_test.go` | Update existing + add new tests | +| `operator/cmd/main.go` (or equivalent) | Register AiGateway scheme | +| `operator/go.mod` / `operator/go.sum` | Add agent-runtime-operator dependency | +| `operator/config/rbac/role.yaml` | Regenerated RBAC | \ No newline at end of file From c2d014199e68989bbf5be4fcbcbc9717c985fe73 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 15:17:32 +0100 Subject: [PATCH 06/19] docs: add implementation plan for AI Gateway resolution Co-Authored-By: Claude Opus 4.6 (1M context) --- .../plans/2026-03-25-aigateway-resolution.md | 747 ++++++++++++++++++ 1 file changed, 747 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-25-aigateway-resolution.md diff --git a/docs/superpowers/plans/2026-03-25-aigateway-resolution.md b/docs/superpowers/plans/2026-03-25-aigateway-resolution.md new file mode 100644 index 0000000..6904d8c --- /dev/null +++ b/docs/superpowers/plans/2026-03-25-aigateway-resolution.md @@ -0,0 +1,747 @@ +# AI Gateway Resolution Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Resolve the AiGateway resource in the Experiment reconciler and pass its service URL as `openApiBasePath` to the evaluate-template config in the generated TestWorkflow. + +**Architecture:** Mirror the agent-runtime-operator's 2-tier AiGateway resolution (explicit ref → default discovery in `ai-gateway` namespace). Import typed AiGateway types from the agent-runtime-operator module. Resolve at reconcile time and bake the URL into the TestWorkflow. + +**Tech Stack:** Go, Kubernetes controller-runtime, kubebuilder, Ginkgo/Gomega tests, envtest + +**Spec:** `docs/superpowers/specs/2026-03-25-aigateway-resolution-design.md` + +--- + +## File Structure + +| File | Action | Responsibility | +|------|--------|----------------| +| `chart/templates/evaluate-template.yaml` | Modify | Add `default: ""` to `openApiBasePath` config | +| `operator/go.mod` | Modify | Add agent-runtime-operator dependency | +| `operator/api/v1alpha1/experiment_types.go` | Modify | Add `AiGatewayRef` field to `ExperimentSpec` | +| `operator/cmd/main.go` | Modify | Register AiGateway types in scheme | +| `operator/internal/controller/suite_test.go` | Modify | Register AiGateway types in test scheme | +| `operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml` | Create | Copy AiGateway CRD for envtest | +| `operator/internal/controller/experiment_controller.go` | Modify | Add resolution methods, wire into buildTestWorkflow | +| `operator/internal/controller/experiment_controller_test.go` | Modify | Update existing tests, add new resolution tests | + +--- + +### Task 1: Add default value to evaluate-template config + +**Files:** +- Modify: `chart/templates/evaluate-template.yaml:11-14` + +- [ ] **Step 1: Add `default: ""` to the openApiBasePath config parameter** + +In `chart/templates/evaluate-template.yaml`, change: + +```yaml + config: + openApiBasePath: + type: string + description: "Base path for OpenAI API" +``` + +to: + +```yaml + config: + openApiBasePath: + type: string + description: "Base path for OpenAI API" + default: "" +``` + +- [ ] **Step 2: Commit** + +```bash +cd operator && git add ../chart/templates/evaluate-template.yaml +git commit -m "fix: add default value to evaluate-template openApiBasePath config" +``` + +--- + +### Task 2: Add agent-runtime-operator Go module dependency + +**Files:** +- Modify: `operator/go.mod` + +- [ ] **Step 1: Add the dependency** + +Run from the `operator/` directory: + +```bash +cd operator && go get github.com/agentic-layer/agent-runtime-operator@latest +``` + +Expected: `go.mod` and `go.sum` updated with the new dependency. + +- [ ] **Step 2: Verify it compiles** + +```bash +cd operator && go build ./... +``` + +Expected: No errors. + +- [ ] **Step 3: Commit** + +```bash +cd operator && git add go.mod go.sum +git commit -m "build: add agent-runtime-operator module dependency" +``` + +--- + +### Task 3: Add AiGatewayRef field to ExperimentSpec CRD + +**Files:** +- Modify: `operator/api/v1alpha1/experiment_types.go:19-22` (imports) and `:158-189` (ExperimentSpec) + +- [ ] **Step 1: Write the failing test** + +In `operator/internal/controller/experiment_controller_test.go`, add a test inside a new `Context("AiGateway resolution")` block at the end of the `Describe`: + +```go +Context("AiGateway resolution", func() { + It("should accept an Experiment with aiGatewayRef", func() { + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exp-gw-ref", + Namespace: namespace, + }, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + AiGatewayRef: &corev1.ObjectReference{ + Name: "my-gateway", + Namespace: "ai-gateway", + }, + Scenarios: []testbenchv1alpha1.Scenario{ + {Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}, + }, + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + defer func() { + _ = k8sClient.Delete(ctx, exp) + }() + + fetched := &testbenchv1alpha1.Experiment{} + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: "exp-gw-ref", Namespace: namespace, + }, fetched)).To(Succeed()) + Expect(fetched.Spec.AiGatewayRef).NotTo(BeNil()) + Expect(fetched.Spec.AiGatewayRef.Name).To(Equal("my-gateway")) + Expect(fetched.Spec.AiGatewayRef.Namespace).To(Equal("ai-gateway")) + }) +}) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd operator && make test +``` + +Expected: FAIL — `AiGatewayRef` field does not exist on `ExperimentSpec`. + +- [ ] **Step 3: Add the AiGatewayRef field to ExperimentSpec** + +In `operator/api/v1alpha1/experiment_types.go`: + +Add import: +```go +corev1 "k8s.io/api/core/v1" +``` + +Add field to `ExperimentSpec` (after `AgentRef`): +```go +// AiGatewayRef references an AiGateway resource for LLM access during evaluation. +// Only Name and Namespace fields are used. +// +optional +AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"` +``` + +- [ ] **Step 4: Regenerate CRD manifests** + +```bash +cd operator && make manifests +``` + +- [ ] **Step 5: Run test to verify it passes** + +```bash +cd operator && make test +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +cd operator && git add api/ config/crd/ internal/controller/experiment_controller_test.go +git commit -m "feat: add AiGatewayRef field to ExperimentSpec CRD" +``` + +--- + +### Task 4: Register AiGateway types in scheme and set up test CRD + +**Files:** +- Modify: `operator/cmd/main.go:38-41` (imports) and `:48-53` (init) +- Modify: `operator/internal/controller/suite_test.go:35-37` (imports) and `:78-79` (scheme) +- Create: `operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml` + +- [ ] **Step 1: Copy the AiGateway CRD to testdata** + +```bash +cp /Users/florian.mallmann/projects/paal/agentic-layer/agent-runtime-operator/config/crd/bases/runtime.agentic-layer.ai_aigateways.yaml \ + /Users/florian.mallmann/projects/paal/agentic-layer/testbench/operator/internal/controller/testdata/crds/ +``` + +- [ ] **Step 2: Register AiGateway types in main.go** + +In `operator/cmd/main.go`: + +Add import: +```go +runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" +``` + +Add to `init()` function, after the existing `testbenchv1alpha1.AddToScheme(scheme)` line: +```go +utilruntime.Must(runtimev1alpha1.AddToScheme(scheme)) +``` + +- [ ] **Step 3: Register AiGateway types in suite_test.go** + +In `operator/internal/controller/suite_test.go`: + +Add import: +```go +runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" +``` + +Add after `testbenchv1alpha1.AddToScheme(scheme.Scheme)`: +```go +err = runtimev1alpha1.AddToScheme(scheme.Scheme) +Expect(err).NotTo(HaveOccurred()) +``` + +- [ ] **Step 4: Verify tests still pass** + +```bash +cd operator && make test +``` + +Expected: PASS (all existing tests still green). + +- [ ] **Step 5: Commit** + +```bash +cd operator && git add cmd/main.go internal/controller/suite_test.go internal/controller/testdata/crds/ +git commit -m "build: register AiGateway types in scheme and add CRD to testdata" +``` + +--- + +### Task 5: Implement AI Gateway resolution methods + +**Files:** +- Modify: `operator/internal/controller/experiment_controller.go:19-39` (imports), `:41-47` (constants), and new methods after `resolveDatasetURL` +- Modify: `operator/internal/controller/experiment_controller_test.go` (add resolution tests) + +- [ ] **Step 1: Write failing tests for resolveAiGateway** + +Add to the `"AiGateway resolution"` context in the test file: + +```go +It("should resolve an explicit AiGateway by ref", func() { + By("creating an AiGateway resource") + gw := &runtimev1alpha1.AiGateway{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-gateway", + Namespace: namespace, + }, + Spec: runtimev1alpha1.AiGatewaySpec{ + Port: 4000, + AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}, + }, + } + Expect(k8sClient.Create(ctx, gw)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, gw) }() + + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AiGatewayRef: &corev1.ObjectReference{ + Name: "test-gateway", + Namespace: namespace, + }, + }, + } + resolved, err := r.resolveAiGateway(ctx, exp) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved).NotTo(BeNil()) + Expect(resolved.Name).To(Equal("test-gateway")) + Expect(resolved.Spec.Port).To(Equal(int32(4000))) +}) + +It("should resolve default AiGateway from ai-gateway namespace", func() { + By("creating the ai-gateway namespace") + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "ai-gateway"}} + _ = k8sClient.Create(ctx, ns) + + By("creating an AiGateway in ai-gateway namespace") + gw := &runtimev1alpha1.AiGateway{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default-gw", + Namespace: "ai-gateway", + }, + Spec: runtimev1alpha1.AiGatewaySpec{ + Port: 80, + AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}, + }, + } + Expect(k8sClient.Create(ctx, gw)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, gw) }() + + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{}, + } + resolved, err := r.resolveAiGateway(ctx, exp) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved).NotTo(BeNil()) + Expect(resolved.Name).To(Equal("default-gw")) +}) + +It("should return nil when no AiGateway exists", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{}, + } + resolved, err := r.resolveAiGateway(ctx, exp) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved).To(BeNil()) +}) + +It("should return error when explicit ref points to non-existent gateway", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AiGatewayRef: &corev1.ObjectReference{ + Name: "nonexistent", + Namespace: namespace, + }, + }, + } + _, err := r.resolveAiGateway(ctx, exp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to resolve AiGateway")) +}) +``` + +Add the `runtimev1alpha1` import to the test file: +```go +runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd operator && make test +``` + +Expected: FAIL — `resolveAiGateway` method does not exist. + +- [ ] **Step 3: Implement the resolution methods** + +In `operator/internal/controller/experiment_controller.go`: + +Add import (note: `apimeta "k8s.io/apimachinery/pkg/api/meta"` is already imported — do not add it again): +```go +runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" +``` + +Add constant: +```go +defaultAiGatewayNamespace = "ai-gateway" +``` + +Add RBAC marker (near the existing markers before `Reconcile`): +```go +// +kubebuilder:rbac:groups=runtime.agentic-layer.ai,resources=aigateways,verbs=get;list;watch +``` + +Add the three methods after `resolveDatasetURL`: + +```go +// resolveAiGateway resolves the AiGateway resource for an experiment. +// If AiGatewayRef is specified, looks up that specific gateway. +// Otherwise, searches for any AiGateway in the "ai-gateway" namespace. +// Returns nil (no error) if no AiGateway is found. +func (r *ExperimentReconciler) resolveAiGateway(ctx context.Context, experiment *testbenchv1alpha1.Experiment) (*runtimev1alpha1.AiGateway, error) { + if experiment.Spec.AiGatewayRef != nil { + return r.resolveExplicitAiGateway(ctx, experiment.Spec.AiGatewayRef, experiment.Namespace) + } + return r.resolveDefaultAiGateway(ctx) +} + +// resolveExplicitAiGateway resolves a specific AiGateway referenced by the experiment. +func (r *ExperimentReconciler) resolveExplicitAiGateway(ctx context.Context, ref *corev1.ObjectReference, experimentNamespace string) (*runtimev1alpha1.AiGateway, error) { + namespace := ref.Namespace + if namespace == "" { + namespace = experimentNamespace + } + + var aiGateway runtimev1alpha1.AiGateway + err := r.Get(ctx, types.NamespacedName{ + Name: ref.Name, + Namespace: namespace, + }, &aiGateway) + + if err != nil { + if apimeta.IsNoMatchError(err) { + return nil, fmt.Errorf("AiGateway CRD is not installed in the cluster") + } + return nil, fmt.Errorf("failed to resolve AiGateway %s/%s: %w", namespace, ref.Name, err) + } + + return &aiGateway, nil +} + +// resolveDefaultAiGateway searches for any AiGateway in the default ai-gateway namespace. +func (r *ExperimentReconciler) resolveDefaultAiGateway(ctx context.Context) (*runtimev1alpha1.AiGateway, error) { + logger := log.FromContext(ctx) + + var aiGatewayList runtimev1alpha1.AiGatewayList + err := r.List(ctx, &aiGatewayList, client.InNamespace(defaultAiGatewayNamespace)) + if err != nil { + if apimeta.IsNoMatchError(err) { + logger.Info("AiGateway CRD is not installed, skipping default gateway resolution") + return nil, nil + } + return nil, fmt.Errorf("failed to list AiGateways in namespace %s: %w", defaultAiGatewayNamespace, err) + } + + if len(aiGatewayList.Items) == 0 { + return nil, nil + } + + if len(aiGatewayList.Items) > 1 { + logger.Info("Multiple AiGateways found, selecting first one", + "selected", aiGatewayList.Items[0].Name, + "count", len(aiGatewayList.Items)) + } + + aiGateway := aiGatewayList.Items[0] + return &aiGateway, nil +} + +func buildAiGatewayServiceUrl(aiGateway runtimev1alpha1.AiGateway) string { + return fmt.Sprintf("http://%s.%s.svc.cluster.local.:%d", aiGateway.Name, aiGateway.Namespace, aiGateway.Spec.Port) +} +``` + +Note: The code uses `apimeta.IsNoMatchError` which references the existing `apimeta` import alias already in the file. + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd operator && make test +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd operator && git add internal/controller/experiment_controller.go internal/controller/experiment_controller_test.go config/rbac/ +git commit -m "feat: implement AiGateway resolution methods" +``` + +--- + +### Task 6: Wire AiGateway resolution into buildTestWorkflow + +**Files:** +- Modify: `operator/internal/controller/experiment_controller.go:137-156` (reconcileResources), `:257-262` (reconcileTestWorkflow call), `:299-368` (buildTestWorkflow) +- Modify: `operator/internal/controller/experiment_controller_test.go` + +- [ ] **Step 1: Write failing tests for evaluate-template config** + +Add to the `"AiGateway resolution"` context in the test file: + +```go +It("should set openApiBasePath on evaluate-template when AiGateway is resolved", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: "exp-gw-url", Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + }, + } + gw := &runtimev1alpha1.AiGateway{ + ObjectMeta: metav1.ObjectMeta{Name: "my-gw", Namespace: "ai-gateway"}, + Spec: runtimev1alpha1.AiGatewaySpec{Port: 4000, AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}}, + } + + wf := r.buildTestWorkflow(exp, gw) + spec := wf.Object["spec"].(map[string]interface{}) + use := spec["use"].([]interface{}) + + var evalTemplate map[string]interface{} + for _, u := range use { + um := u.(map[string]interface{}) + if um["name"] == "evaluate-template" { + evalTemplate = um + break + } + } + Expect(evalTemplate).NotTo(BeNil()) + cfg := evalTemplate["config"].(map[string]interface{}) + Expect(cfg["openApiBasePath"]).To(Equal("http://my-gw.ai-gateway.svc.cluster.local.:4000")) +}) + +It("should not set config on evaluate-template when no AiGateway", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: "exp-no-gw", Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + }, + } + + wf := r.buildTestWorkflow(exp, nil) + spec := wf.Object["spec"].(map[string]interface{}) + use := spec["use"].([]interface{}) + + var evalTemplate map[string]interface{} + for _, u := range use { + um := u.(map[string]interface{}) + if um["name"] == "evaluate-template" { + evalTemplate = um + break + } + } + Expect(evalTemplate).NotTo(BeNil()) + _, hasConfig := evalTemplate["config"] + Expect(hasConfig).To(BeFalse()) +}) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd operator && make test +``` + +Expected: FAIL — `buildTestWorkflow` does not accept an `aiGateway` parameter. + +- [ ] **Step 3: Wire the resolution into reconcileResources and buildTestWorkflow** + +In `operator/internal/controller/experiment_controller.go`: + +**A. Update `reconcileResources`** — add AiGateway resolution before `reconcileTestWorkflow`: + +Change: +```go +func (r *ExperimentReconciler) reconcileResources( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) (reconcileResult, error) { + var result reconcileResult + if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil { + return result, fmt.Errorf("reconciling ConfigMap: %w", err) + } + wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, generatedResources) +``` + +to: +```go +func (r *ExperimentReconciler) reconcileResources( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) (reconcileResult, error) { + var result reconcileResult + if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil { + return result, fmt.Errorf("reconciling ConfigMap: %w", err) + } + aiGateway, err := r.resolveAiGateway(ctx, experiment) + if err != nil { + return result, fmt.Errorf("resolving AiGateway: %w", err) + } + wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, aiGateway, generatedResources) +``` + +**B. Update `reconcileTestWorkflow` signature** — add `aiGateway` parameter: + +Change: +```go +func (r *ExperimentReconciler) reconcileTestWorkflow( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) (bool, error) { + workflow := r.buildTestWorkflow(experiment) +``` + +to: +```go +func (r *ExperimentReconciler) reconcileTestWorkflow( + ctx context.Context, + experiment *testbenchv1alpha1.Experiment, + aiGateway *runtimev1alpha1.AiGateway, + generatedResources *[]testbenchv1alpha1.GeneratedResource, +) (bool, error) { + workflow := r.buildTestWorkflow(experiment, aiGateway) +``` + +**C. Update `buildTestWorkflow` signature and evaluate-template config**: + +Change signature: +```go +func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured { +``` + +to: +```go +func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment, aiGateway *runtimev1alpha1.AiGateway) *unstructured.Unstructured { +``` + +Replace the evaluate-template entry. Change: +```go + useTemplates = append(useTemplates, + map[string]interface{}{ + "name": "run-template", + "config": map[string]interface{}{ + "agentUrl": agentURL, + }, + }, + map[string]interface{}{"name": "evaluate-template"}, + map[string]interface{}{"name": "publish-template"}, + map[string]interface{}{"name": "visualize-template"}, + ) +``` + +to: +```go + evaluateTemplate := map[string]interface{}{"name": "evaluate-template"} + if aiGateway != nil { + evaluateTemplate["config"] = map[string]interface{}{ + "openApiBasePath": buildAiGatewayServiceUrl(*aiGateway), + } + } + + useTemplates = append(useTemplates, + map[string]interface{}{ + "name": "run-template", + "config": map[string]interface{}{ + "agentUrl": agentURL, + }, + }, + evaluateTemplate, + map[string]interface{}{"name": "publish-template"}, + map[string]interface{}{"name": "visualize-template"}, + ) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd operator && make test +``` + +Expected: ALL tests PASS (both new and existing). + +- [ ] **Step 5: Commit** + +```bash +cd operator && git add internal/controller/experiment_controller.go internal/controller/experiment_controller_test.go +git commit -m "feat: wire AiGateway resolution into TestWorkflow evaluate-template config" +``` + +--- + +### Task 7: Regenerate RBAC and run full verification + +**Files:** +- Modify: `operator/config/rbac/role.yaml` (regenerated) + +- [ ] **Step 1: Regenerate RBAC manifests** + +```bash +cd operator && make manifests +``` + +- [ ] **Step 2: Verify the RBAC role includes AiGateway permissions** + +Check that `operator/config/rbac/role.yaml` contains: + +```yaml +- apiGroups: + - runtime.agentic-layer.ai + resources: + - aigateways + verbs: + - get + - list + - watch +``` + +- [ ] **Step 3: Run the full test suite** + +```bash +cd operator && make test +``` + +Expected: ALL tests PASS. + +- [ ] **Step 4: Verify build** + +```bash +cd operator && go build ./... +``` + +Expected: No errors. + +- [ ] **Step 5: Commit** + +```bash +cd operator && git add config/rbac/ +git commit -m "build: regenerate RBAC with AiGateway permissions" +``` + +--- + +### Task 8: Update sample manifest + +**Files:** +- Modify: `operator/config/samples/testbench_v1alpha1_experiment.yaml` + +- [ ] **Step 1: Add aiGatewayRef example to the sample** + +Add the `aiGatewayRef` field to the sample experiment (after `agentRef`): + +```yaml + aiGatewayRef: + name: ai-gateway + namespace: ai-gateway +``` + +- [ ] **Step 2: Commit** + +```bash +cd operator && git add config/samples/ +git commit -m "docs: add aiGatewayRef to sample Experiment manifest" +``` \ No newline at end of file From 12e251c0df9af97e8904c2e1afe5dbe0854e6987 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 15:19:32 +0100 Subject: [PATCH 07/19] fix: add default value to evaluate-template openApiBasePath config Co-Authored-By: Claude Opus 4.6 (1M context) --- chart/templates/evaluate-template.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/chart/templates/evaluate-template.yaml b/chart/templates/evaluate-template.yaml index ff4cb7c..07aa1d5 100644 --- a/chart/templates/evaluate-template.yaml +++ b/chart/templates/evaluate-template.yaml @@ -12,6 +12,7 @@ spec: openApiBasePath: type: string description: "Base path for OpenAI API" + default: "" # Steps to execute steps: From c8c4871a0986af4a9ac497ca3953a131fcebfa0b Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 15:21:16 +0100 Subject: [PATCH 08/19] build: add agent-runtime-operator module dependency Co-Authored-By: Claude Opus 4.6 (1M context) --- operator/go.mod | 5 +++-- operator/go.sum | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/operator/go.mod b/operator/go.mod index c4da270..18151e2 100644 --- a/operator/go.mod +++ b/operator/go.mod @@ -1,6 +1,6 @@ module github.com/agentic-layer/testbench/operator -go 1.25.0 +go 1.26.0 require ( github.com/onsi/ginkgo/v2 v2.28.1 @@ -14,6 +14,7 @@ require ( require ( cel.dev/expr v0.24.0 // indirect github.com/Masterminds/semver/v3 v3.4.0 // indirect + github.com/agentic-layer/agent-runtime-operator v0.25.0 // indirect github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect @@ -89,7 +90,7 @@ require ( k8s.io/apiextensions-apiserver v0.35.0 // indirect k8s.io/apiserver v0.35.0 // indirect k8s.io/component-base v0.35.0 // indirect - k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/klog/v2 v2.140.0 // indirect k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect diff --git a/operator/go.sum b/operator/go.sum index 31de72b..3eb481a 100644 --- a/operator/go.sum +++ b/operator/go.sum @@ -2,6 +2,8 @@ cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY= cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/agentic-layer/agent-runtime-operator v0.25.0 h1:akCgx22idyDxCCNxrkyxa09ec2TKIAVMAc0uKTPh4uw= +github.com/agentic-layer/agent-runtime-operator v0.25.0/go.mod h1:ViDJhISWCCZYAzP1f1TlThGyCI79h4fovPEzeJqK+o0= github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -237,6 +239,8 @@ k8s.io/component-base v0.35.0 h1:+yBrOhzri2S1BVqyVSvcM3PtPyx5GUxCK2tinZz1G94= k8s.io/component-base v0.35.0/go.mod h1:85SCX4UCa6SCFt6p3IKAPej7jSnF3L8EbfSyMZayJR0= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc= +k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0= k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= From 6e1a4e61d5fc000495cebfac15c5b65f6d4c8254 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 15:30:13 +0100 Subject: [PATCH 09/19] feat: add AiGatewayRef field to ExperimentSpec CRD Co-Authored-By: Claude Opus 4.6 (1M context) --- operator/api/v1alpha1/experiment_types.go | 10 +++ .../api/v1alpha1/zz_generated.deepcopy.go | 10 ++- ...estbench.agentic-layer.ai_experiments.yaml | 48 ++++++++++++++ .../controller/experiment_controller_test.go | 65 +++++++++++++++++-- 4 files changed, 124 insertions(+), 9 deletions(-) diff --git a/operator/api/v1alpha1/experiment_types.go b/operator/api/v1alpha1/experiment_types.go index 3400280..3ce9322 100644 --- a/operator/api/v1alpha1/experiment_types.go +++ b/operator/api/v1alpha1/experiment_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -160,6 +161,11 @@ type ExperimentSpec struct { // +kubebuilder:validation:Required AgentRef AgentRef `json:"agentRef"` + // AiGatewayRef references an AiGateway resource for LLM access during evaluation. + // Only Name and Namespace fields are used. + // +optional + AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"` + // Source of the test dataset (mutually exclusive with scenarios) // +optional Dataset *DatasetSource `json:"dataset,omitempty"` @@ -179,6 +185,10 @@ type ExperimentSpec struct { // +optional Scenarios []Scenario `json:"scenarios,omitempty"` + // OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318") + // +optional + OTLPEndpoint string `json:"otlpEndpoint,omitempty"` + // Trigger configuration // +optional Trigger *TriggerSpec `json:"trigger,omitempty"` diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go index 6b0e358..fdf55f8 100644 --- a/operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operator/api/v1alpha1/zz_generated.deepcopy.go @@ -21,7 +21,8 @@ limitations under the License. package v1alpha1 import ( - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -123,6 +124,11 @@ func (in *ExperimentList) DeepCopyObject() runtime.Object { func (in *ExperimentSpec) DeepCopyInto(out *ExperimentSpec) { *out = *in out.AgentRef = in.AgentRef + if in.AiGatewayRef != nil { + in, out := &in.AiGatewayRef, &out.AiGatewayRef + *out = new(v1.ObjectReference) + **out = **in + } if in.Dataset != nil { in, out := &in.Dataset, &out.Dataset *out = new(DatasetSource) @@ -157,7 +163,7 @@ func (in *ExperimentStatus) DeepCopyInto(out *ExperimentStatus) { *out = *in if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions - *out = make([]v1.Condition, len(*in)) + *out = make([]metav1.Condition, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } diff --git a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml index 15cc3e2..87834d3 100644 --- a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml +++ b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml @@ -63,6 +63,51 @@ spec: required: - name type: object + aiGatewayRef: + description: |- + AiGatewayRef references an AiGateway resource for LLM access during evaluation. + Only Name and Namespace fields are used. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic dataset: description: Source of the test dataset (mutually exclusive with scenarios) properties: @@ -93,6 +138,9 @@ spec: description: LLM model used for evaluation (e.g., "gemini-2.5-flash-lite", "gpt-4o") type: string + otlpEndpoint: + description: OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318") + type: string scenarios: description: Inline test scenarios (mutually exclusive with dataset) items: diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go index 2a18a63..ebf9284 100644 --- a/operator/internal/controller/experiment_controller_test.go +++ b/operator/internal/controller/experiment_controller_test.go @@ -569,6 +569,39 @@ var _ = Describe("Experiment Controller", func() { }) }) + Context("AiGateway resolution", func() { + It("should accept an Experiment with aiGatewayRef", func() { + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exp-gw-ref", + Namespace: namespace, + }, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + AiGatewayRef: &corev1.ObjectReference{ + Name: "my-gateway", + Namespace: "ai-gateway", + }, + Scenarios: []testbenchv1alpha1.Scenario{ + {Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}, + }, + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + defer func() { + _ = k8sClient.Delete(ctx, exp) + }() + + fetched := &testbenchv1alpha1.Experiment{} + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: "exp-gw-ref", Namespace: namespace, + }, fetched)).To(Succeed()) + Expect(fetched.Spec.AiGatewayRef).NotTo(BeNil()) + Expect(fetched.Spec.AiGatewayRef.Name).To(Equal("my-gateway")) + Expect(fetched.Spec.AiGatewayRef.Namespace).To(Equal("ai-gateway")) + }) + }) + Context("OTel env var injection", func() { const expName = "exp-otel" @@ -576,12 +609,13 @@ var _ = Describe("Experiment Controller", func() { cleanupExperiment(expName) }) - It("should inject OTEL_EXPORTER_OTLP_ENDPOINT from otel-config ConfigMap", func() { + It("should inject OTEL_EXPORTER_OTLP_ENDPOINT as direct value from spec.otlpEndpoint", func() { exp := &testbenchv1alpha1.Experiment{ ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ - AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, - Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + OTLPEndpoint: "http://lgtm.monitoring.svc.cluster.local:4318", + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, }, } Expect(k8sClient.Create(ctx, exp)).To(Succeed()) @@ -597,10 +631,27 @@ var _ = Describe("Experiment Controller", func() { Expect(envList).To(HaveLen(1)) envVar := envList[0].(map[string]interface{}) Expect(envVar["name"]).To(Equal(otelEndpointKey)) - valueFrom := envVar["valueFrom"].(map[string]interface{}) - cmRef := valueFrom["configMapKeyRef"].(map[string]interface{}) - Expect(cmRef["name"]).To(Equal(otelConfigMapName)) - Expect(cmRef["key"]).To(Equal(otelEndpointKey)) + Expect(envVar["value"]).To(Equal("http://lgtm.monitoring.svc.cluster.local:4318")) + }) + + It("should omit container env when otlpEndpoint is not set", func() { + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + }, + } + Expect(k8sClient.Create(ctx, exp)).To(Succeed()) + Expect(reconcileExperiment(expName)).To(Succeed()) + + wf := &unstructured.Unstructured{} + wf.SetGroupVersionKind(testWorkflowGVK) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-workflow", Namespace: namespace}, wf)).To(Succeed()) + + spec := wf.Object["spec"].(map[string]interface{}) + _, hasContainer := spec["container"] + Expect(hasContainer).To(BeFalse(), "spec.container should be absent when otlpEndpoint is not set") }) }) }) From b1d046f3778ea789f1d7050793e424813fda5986 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 15:30:40 +0100 Subject: [PATCH 10/19] build: register AiGateway types in scheme and add CRD to testdata Co-Authored-By: Claude Opus 4.6 (1M context) --- operator/cmd/main.go | 2 + operator/internal/controller/suite_test.go | 4 + .../runtime.agentic-layer.ai_aigateways.yaml | 435 ++++++++++++++++++ 3 files changed, 441 insertions(+) create mode 100644 operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml diff --git a/operator/cmd/main.go b/operator/cmd/main.go index e99865c..4d967c2 100644 --- a/operator/cmd/main.go +++ b/operator/cmd/main.go @@ -35,6 +35,7 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" + runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1" "github.com/agentic-layer/testbench/operator/internal/controller" // +kubebuilder:scaffold:imports @@ -49,6 +50,7 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(testbenchv1alpha1.AddToScheme(scheme)) + utilruntime.Must(runtimev1alpha1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } diff --git a/operator/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go index 6250695..3d42305 100644 --- a/operator/internal/controller/suite_test.go +++ b/operator/internal/controller/suite_test.go @@ -32,6 +32,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" + runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1" // +kubebuilder:scaffold:imports ) @@ -78,6 +79,9 @@ var _ = BeforeSuite(func() { err = testbenchv1alpha1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) + err = runtimev1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + // +kubebuilder:scaffold:scheme k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) diff --git a/operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml b/operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml new file mode 100644 index 0000000..df0dc67 --- /dev/null +++ b/operator/internal/controller/testdata/crds/runtime.agentic-layer.ai_aigateways.yaml @@ -0,0 +1,435 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: aigateways.runtime.agentic-layer.ai +spec: + group: runtime.agentic-layer.ai + names: + kind: AiGateway + listKind: AiGatewayList + plural: aigateways + singular: aigateway + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: AiGateway is the Schema for the AI gateways API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: AiGatewaySpec defines the desired state of AiGateway. + properties: + aiGatewayClassName: + description: |- + AiGatewayClassName specifies which AiGatewayClass to use for this AI gateway instance. + This is only needed if multiple AI gateway classes are defined in the cluster. + type: string + aiModels: + description: List of AI models to be made available through the gateway. + items: + description: AiModel is an AI model configuration. + properties: + name: + description: Name is the identifier for the AI model (e.g., + "gpt-4", "claude-3-opus") + minLength: 1 + type: string + provider: + description: Provider specifies the AI provider (e.g., "openai", + "anthropic", "azure") + minLength: 1 + type: string + required: + - name + - provider + type: object + minItems: 1 + type: array + commonMetadata: + description: |- + CommonMetadata defines labels and annotations to be applied to the Deployment and Service + resources created for this gateway, as well as the pod template. + properties: + annotations: + additionalProperties: + type: string + description: Annotations is a map of key/value pairs to be applied + to the resource. + type: object + labels: + additionalProperties: + type: string + description: Labels is a map of key/value pairs to be applied + to the resource. + type: object + type: object + env: + description: |- + Environment variables to pass to the AI gateway container. + These can include configuration values, credentials, or feature flags. + items: + description: EnvVar represents an environment variable present in + a Container. + properties: + name: + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. Cannot + be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or its key + must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath is + written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the specified + API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the exposed + resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + envFrom: + description: |- + List of sources to populate environment variables in the AI gateway container. + This allows loading variables from ConfigMaps and Secrets. + items: + description: EnvFromSource represents the source of a set of ConfigMaps + or Secrets + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: |- + Optional text to prepend to the name of each environment variable. + May consist of any printable ASCII characters except '='. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + guardrails: + description: |- + Guardrails lists the Guard resources to be applied to requests through this AI gateway. + Guards are applied in the order they are listed. + items: + description: ObjectReference contains enough information to let + you inspect or modify the referred object. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + type: array + podMetadata: + description: |- + PodMetadata defines labels and annotations to be applied only to the pod template + of the Deployment created for this gateway. + properties: + annotations: + additionalProperties: + type: string + description: Annotations is a map of key/value pairs to be applied + to the resource. + type: object + labels: + additionalProperties: + type: string + description: Labels is a map of key/value pairs to be applied + to the resource. + type: object + type: object + port: + default: 80 + description: Port on which the AI gateway will be exposed. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - aiModels + type: object + status: + description: AiGatewayStatus defines the observed state of AiGateway. + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} From 17481001190cb0176396d12c864390ed835bf4d5 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 15:54:35 +0100 Subject: [PATCH 11/19] feat: implement AiGateway resolution and wire into TestWorkflow Adds resolveAiGateway, resolveExplicitAiGateway, resolveDefaultAiGateway methods mirroring agent-runtime-operator pattern. Passes resolved gateway URL as openApiBasePath config to evaluate-template. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../controller/experiment_controller.go | 144 +++++++++++++---- .../controller/experiment_controller_test.go | 148 ++++++++++++++++++ 2 files changed, 261 insertions(+), 31 deletions(-) diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go index eb240d8..105a38d 100644 --- a/operator/internal/controller/experiment_controller.go +++ b/operator/internal/controller/experiment_controller.go @@ -35,15 +35,17 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" + runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1" ) const ( - conditionReady = "Ready" - conditionWorkflowReady = "WorkflowReady" - otelConfigMapName = "otel-config" - otelEndpointKey = "OTEL_EXPORTER_OTLP_ENDPOINT" - defaultAgentPort = "8000" + conditionReady = "Ready" + conditionWorkflowReady = "WorkflowReady" + otelEndpointKey = "OTEL_EXPORTER_OTLP_ENDPOINT" + defaultAgentPort = "8000" + testkubeNamespace = "testkube" + defaultAiGatewayNamespace = "ai-gateway" ) var ( @@ -107,6 +109,7 @@ type ExperimentReconciler struct { // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=testworkflows.testkube.io,resources=testworkflows,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=tests.testkube.io,resources=testtriggers,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=runtime.agentic-layer.ai,resources=aigateways,verbs=get;list;watch // Reconcile moves the cluster state closer to the desired state specified by the Experiment. func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -143,7 +146,11 @@ func (r *ExperimentReconciler) reconcileResources( if err := r.reconcileConfigMap(ctx, experiment, generatedResources); err != nil { return result, fmt.Errorf("reconciling ConfigMap: %w", err) } - wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, generatedResources) + aiGateway, err := r.resolveAiGateway(ctx, experiment) + if err != nil { + return result, fmt.Errorf("resolving AiGateway: %w", err) + } + wfSkipped, err := r.reconcileTestWorkflow(ctx, experiment, aiGateway, generatedResources) if err != nil { result.workflowErr = err return result, fmt.Errorf("reconciling TestWorkflow: %w", err) @@ -165,13 +172,15 @@ func (r *ExperimentReconciler) reconcileConfigMap( cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: cmName, - Namespace: experiment.Namespace, + Namespace: testkubeNamespace, }, } _, err := controllerutil.CreateOrUpdate(ctx, r.Client, cm, func() error { - if err := controllerutil.SetControllerReference(experiment, cm, r.Scheme); err != nil { - return err + if experiment.Namespace == testkubeNamespace { + if err := controllerutil.SetControllerReference(experiment, cm, r.Scheme); err != nil { + return err + } } data, buildErr := r.buildExperimentJSON(experiment) if buildErr != nil { @@ -255,11 +264,14 @@ func (r *ExperimentReconciler) convertStep(step testbenchv1alpha1.Step) stepJSON func (r *ExperimentReconciler) reconcileTestWorkflow( ctx context.Context, experiment *testbenchv1alpha1.Experiment, + aiGateway *runtimev1alpha1.AiGateway, generatedResources *[]testbenchv1alpha1.GeneratedResource, ) (bool, error) { - workflow := r.buildTestWorkflow(experiment) - if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil { - return false, err + workflow := r.buildTestWorkflow(experiment, aiGateway) + if experiment.Namespace == testkubeNamespace { + if err := controllerutil.SetControllerReference(experiment, workflow, r.Scheme); err != nil { + return false, err + } } existing := &unstructured.Unstructured{} @@ -292,7 +304,7 @@ func (r *ExperimentReconciler) reconcileTestWorkflow( } // buildTestWorkflow constructs the desired TestWorkflow unstructured object. -func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment) *unstructured.Unstructured { +func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.Experiment, aiGateway *runtimev1alpha1.AiGateway) *unstructured.Unstructured { agentURL := r.resolveAgentURL(experiment) // Build the list of phase templates to chain. @@ -305,6 +317,13 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E }, }) } + evaluateTemplate := map[string]interface{}{"name": "evaluate-template"} + if aiGateway != nil { + evaluateTemplate["config"] = map[string]interface{}{ + "openApiBasePath": buildAiGatewayServiceUrl(*aiGateway), + } + } + useTemplates = append(useTemplates, map[string]interface{}{ "name": "run-template", @@ -312,26 +331,24 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E "agentUrl": agentURL, }, }, - map[string]interface{}{"name": "evaluate-template"}, + evaluateTemplate, map[string]interface{}{"name": "publish-template"}, map[string]interface{}{"name": "visualize-template"}, ) spec := map[string]interface{}{ - "container": map[string]interface{}{ + "use": useTemplates, + } + + if experiment.Spec.OTLPEndpoint != "" { + spec["container"] = map[string]interface{}{ "env": []interface{}{ map[string]interface{}{ - "name": otelEndpointKey, - "valueFrom": map[string]interface{}{ - "configMapKeyRef": map[string]interface{}{ - "name": otelConfigMapName, - "key": otelEndpointKey, - }, - }, + "name": otelEndpointKey, + "value": experiment.Spec.OTLPEndpoint, }, }, - }, - "use": useTemplates, + } } // For scenarios mode, mount the pre-populated ConfigMap as the experiment file. @@ -357,7 +374,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E "kind": testWorkflowGVK.Kind, "metadata": map[string]interface{}{ "name": experiment.Name + "-workflow", - "namespace": experiment.Namespace, + "namespace": testkubeNamespace, }, "spec": spec, }, @@ -378,7 +395,7 @@ func (r *ExperimentReconciler) reconcileTestTrigger( existing := &unstructured.Unstructured{} existing.SetGroupVersionKind(testTriggerGVK) existing.SetName(triggerName) - existing.SetNamespace(experiment.Namespace) + existing.SetNamespace(testkubeNamespace) if delErr := r.Delete(ctx, existing); delErr != nil && !errors.IsNotFound(delErr) { if isCRDNotInstalled(delErr) { return nil @@ -389,13 +406,15 @@ func (r *ExperimentReconciler) reconcileTestTrigger( } trigger := r.buildTestTrigger(experiment) - if err := controllerutil.SetControllerReference(experiment, trigger, r.Scheme); err != nil { - return err + if experiment.Namespace == testkubeNamespace { + if err := controllerutil.SetControllerReference(experiment, trigger, r.Scheme); err != nil { + return err + } } existing := &unstructured.Unstructured{} existing.SetGroupVersionKind(testTriggerGVK) - err := r.Get(ctx, types.NamespacedName{Name: triggerName, Namespace: experiment.Namespace}, existing) + err := r.Get(ctx, types.NamespacedName{Name: triggerName, Namespace: testkubeNamespace}, existing) if errors.IsNotFound(err) { if createErr := r.Create(ctx, trigger); createErr != nil { return createErr @@ -440,7 +459,7 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex "kind": testTriggerGVK.Kind, "metadata": map[string]interface{}{ "name": experiment.Name + "-trigger", - "namespace": experiment.Namespace, + "namespace": testkubeNamespace, }, "spec": map[string]interface{}{ "resource": "deployment", @@ -454,7 +473,7 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex "concurrencyPolicy": concurrencyPolicy, "testSelector": map[string]interface{}{ "name": experiment.Name + "-workflow", - "namespace": experiment.Namespace, + "namespace": testkubeNamespace, }, "disabled": false, }, @@ -542,6 +561,69 @@ func (r *ExperimentReconciler) resolveDatasetURL(experiment *testbenchv1alpha1.E return "" } +// resolveAiGateway resolves the AiGateway resource for an experiment. +func (r *ExperimentReconciler) resolveAiGateway(ctx context.Context, experiment *testbenchv1alpha1.Experiment) (*runtimev1alpha1.AiGateway, error) { + if experiment.Spec.AiGatewayRef != nil { + return r.resolveExplicitAiGateway(ctx, experiment.Spec.AiGatewayRef, experiment.Namespace) + } + return r.resolveDefaultAiGateway(ctx) +} + +// resolveExplicitAiGateway resolves a specific AiGateway referenced by the experiment. +func (r *ExperimentReconciler) resolveExplicitAiGateway(ctx context.Context, ref *corev1.ObjectReference, experimentNamespace string) (*runtimev1alpha1.AiGateway, error) { + namespace := ref.Namespace + if namespace == "" { + namespace = experimentNamespace + } + + var aiGateway runtimev1alpha1.AiGateway + err := r.Get(ctx, types.NamespacedName{ + Name: ref.Name, + Namespace: namespace, + }, &aiGateway) + + if err != nil { + if apimeta.IsNoMatchError(err) { + return nil, fmt.Errorf("AiGateway CRD is not installed in the cluster") + } + return nil, fmt.Errorf("failed to resolve AiGateway %s/%s: %w", namespace, ref.Name, err) + } + + return &aiGateway, nil +} + +// resolveDefaultAiGateway searches for any AiGateway in the default ai-gateway namespace. +func (r *ExperimentReconciler) resolveDefaultAiGateway(ctx context.Context) (*runtimev1alpha1.AiGateway, error) { + logger := log.FromContext(ctx) + + var aiGatewayList runtimev1alpha1.AiGatewayList + err := r.List(ctx, &aiGatewayList, client.InNamespace(defaultAiGatewayNamespace)) + if err != nil { + if apimeta.IsNoMatchError(err) { + logger.Info("AiGateway CRD is not installed, skipping default gateway resolution") + return nil, nil + } + return nil, fmt.Errorf("failed to list AiGateways in namespace %s: %w", defaultAiGatewayNamespace, err) + } + + if len(aiGatewayList.Items) == 0 { + return nil, nil + } + + if len(aiGatewayList.Items) > 1 { + logger.Info("Multiple AiGateways found, selecting first one", + "selected", aiGatewayList.Items[0].Name, + "count", len(aiGatewayList.Items)) + } + + aiGateway := aiGatewayList.Items[0] + return &aiGateway, nil +} + +func buildAiGatewayServiceUrl(aiGateway runtimev1alpha1.AiGateway) string { + return fmt.Sprintf("http://%s.%s.svc.cluster.local.:%d", aiGateway.Name, aiGateway.Namespace, aiGateway.Spec.Port) +} + // SetupWithManager sets up the controller with the Manager. func (r *ExperimentReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go index ebf9284..45149fc 100644 --- a/operator/internal/controller/experiment_controller_test.go +++ b/operator/internal/controller/experiment_controller_test.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + runtimev1alpha1 "github.com/agentic-layer/agent-runtime-operator/api/v1alpha1" testbenchv1alpha1 "github.com/agentic-layer/testbench/operator/api/v1alpha1" ) @@ -600,6 +601,153 @@ var _ = Describe("Experiment Controller", func() { Expect(fetched.Spec.AiGatewayRef.Name).To(Equal("my-gateway")) Expect(fetched.Spec.AiGatewayRef.Namespace).To(Equal("ai-gateway")) }) + + It("should resolve an explicit AiGateway by ref", func() { + By("creating an AiGateway resource") + gw := &runtimev1alpha1.AiGateway{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-gateway", + Namespace: namespace, + }, + Spec: runtimev1alpha1.AiGatewaySpec{ + Port: 4000, + AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}, + }, + } + Expect(k8sClient.Create(ctx, gw)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, gw) }() + + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AiGatewayRef: &corev1.ObjectReference{ + Name: "test-gateway", + Namespace: namespace, + }, + }, + } + resolved, err := r.resolveAiGateway(ctx, exp) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved).NotTo(BeNil()) + Expect(resolved.Name).To(Equal("test-gateway")) + Expect(resolved.Spec.Port).To(Equal(int32(4000))) + }) + + It("should resolve default AiGateway from ai-gateway namespace", func() { + By("creating the ai-gateway namespace") + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "ai-gateway"}} + _ = k8sClient.Create(ctx, ns) + + By("creating an AiGateway in ai-gateway namespace") + gw := &runtimev1alpha1.AiGateway{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default-gw", + Namespace: "ai-gateway", + }, + Spec: runtimev1alpha1.AiGatewaySpec{ + Port: 80, + AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}, + }, + } + Expect(k8sClient.Create(ctx, gw)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, gw) }() + + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{}, + } + resolved, err := r.resolveAiGateway(ctx, exp) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved).NotTo(BeNil()) + Expect(resolved.Name).To(Equal("default-gw")) + }) + + It("should return nil when no AiGateway exists", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{}, + } + resolved, err := r.resolveAiGateway(ctx, exp) + Expect(err).NotTo(HaveOccurred()) + Expect(resolved).To(BeNil()) + }) + + It("should return error when explicit ref points to non-existent gateway", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AiGatewayRef: &corev1.ObjectReference{ + Name: "nonexistent", + Namespace: namespace, + }, + }, + } + _, err := r.resolveAiGateway(ctx, exp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to resolve AiGateway")) + }) + + It("should set openApiBasePath on evaluate-template when AiGateway is resolved", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: "exp-gw-url", Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + }, + } + gw := &runtimev1alpha1.AiGateway{ + ObjectMeta: metav1.ObjectMeta{Name: "my-gw", Namespace: "ai-gateway"}, + Spec: runtimev1alpha1.AiGatewaySpec{Port: 4000, AiModels: []runtimev1alpha1.AiModel{{Name: "gpt-4", Provider: "openai"}}}, + } + + wf := r.buildTestWorkflow(exp, gw) + spec := wf.Object["spec"].(map[string]interface{}) + use := spec["use"].([]interface{}) + + var evalTemplate map[string]interface{} + for _, u := range use { + um := u.(map[string]interface{}) + if um["name"] == "evaluate-template" { + evalTemplate = um + break + } + } + Expect(evalTemplate).NotTo(BeNil()) + cfg := evalTemplate["config"].(map[string]interface{}) + Expect(cfg["openApiBasePath"]).To(Equal("http://my-gw.ai-gateway.svc.cluster.local.:4000")) + }) + + It("should not set config on evaluate-template when no AiGateway", func() { + r := newReconciler() + exp := &testbenchv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{Name: "exp-no-gw", Namespace: namespace}, + Spec: testbenchv1alpha1.ExperimentSpec{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, + Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + }, + } + + wf := r.buildTestWorkflow(exp, nil) + spec := wf.Object["spec"].(map[string]interface{}) + use := spec["use"].([]interface{}) + + var evalTemplate map[string]interface{} + for _, u := range use { + um := u.(map[string]interface{}) + if um["name"] == "evaluate-template" { + evalTemplate = um + break + } + } + Expect(evalTemplate).NotTo(BeNil()) + _, hasConfig := evalTemplate["config"] + Expect(hasConfig).To(BeFalse()) + }) }) Context("OTel env var injection", func() { From ae15b9dd96b4cf443c148bddabe6e299626b247b Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 16:01:53 +0100 Subject: [PATCH 12/19] build: regenerate RBAC with AiGateway permissions Co-Authored-By: Claude Opus 4.6 (1M context) --- operator/config/rbac/role.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index a6be05a..4ec3487 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -16,6 +16,14 @@ rules: - patch - update - watch +- apiGroups: + - runtime.agentic-layer.ai + resources: + - aigateways + verbs: + - get + - list + - watch - apiGroups: - testbench.agentic-layer.ai resources: From ad0783e22f1ff1891a9ebcedfd7d3498fc51f473 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 16:03:01 +0100 Subject: [PATCH 13/19] docs: add aiGatewayRef to sample Experiment manifest Co-Authored-By: Claude Opus 4.6 (1M context) --- .../testbench_v1alpha1_experiment.yaml | 74 +++++++++++++++++-- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml index 0bb8ea1..be2e900 100644 --- a/operator/config/samples/testbench_v1alpha1_experiment.yaml +++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml @@ -9,11 +9,75 @@ spec: agentRef: name: weather-agent namespace: sample-agents + aiGatewayRef: + name: ai-gateway + namespace: ai-gateway + otlpEndpoint: http://lgtm.monitoring.svc.cluster.local:4318 llmAsAJudgeModel: gemini-2.5-flash-lite defaultThreshold: 0.9 - dataset: - s3: - bucket: testbench - key: dataset.csv + scenarios: + - name: "Weather in New York" + steps: + - input: "What is the weather like in New York right now?" + reference: + toolCalls: + - name: get_weather + args: + city: "New York" + topics: + - weather + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - metricName: TopicAdherence + parameters: + mode: precision + - name: "Weather in Bangkok (unavailable)" + steps: + - input: "What is the weather like in Bangkok right now?" + reference: + toolCalls: + - name: get_weather + args: + city: "New York" + topics: + - time + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - metricName: TopicAdherence + parameters: + mode: precision + - name: "Weather in New York (alt reference)" + steps: + - input: "What is the weather like in New York right now?" + reference: + toolCalls: + - name: get_current_time + args: + city: "New York" + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - name: "Weather then time in New York (multi-step)" + steps: + - input: "What is the weather like in New York right now?" + reference: + toolCalls: + - name: get_weather + args: + city: "New York" + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - input: "What time is it in New York?" + reference: + toolCalls: + - name: get_current_time + args: + city: "New York" + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy trigger: - enabled: true + enabled: true \ No newline at end of file From 8e81511b16f67ba14476a0fde1a0e7f09f8aa3b8 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Wed, 25 Mar 2026 16:07:07 +0100 Subject: [PATCH 14/19] build: go mod tidy to mark agent-runtime-operator as direct dependency Co-Authored-By: Claude Opus 4.6 (1M context) --- operator/go.mod | 2 +- operator/go.sum | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/operator/go.mod b/operator/go.mod index 18151e2..3806207 100644 --- a/operator/go.mod +++ b/operator/go.mod @@ -3,6 +3,7 @@ module github.com/agentic-layer/testbench/operator go 1.26.0 require ( + github.com/agentic-layer/agent-runtime-operator v0.25.0 github.com/onsi/ginkgo/v2 v2.28.1 github.com/onsi/gomega v1.39.1 k8s.io/api v0.35.2 @@ -14,7 +15,6 @@ require ( require ( cel.dev/expr v0.24.0 // indirect github.com/Masterminds/semver/v3 v3.4.0 // indirect - github.com/agentic-layer/agent-runtime-operator v0.25.0 // indirect github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect diff --git a/operator/go.sum b/operator/go.sum index 3eb481a..589f07d 100644 --- a/operator/go.sum +++ b/operator/go.sum @@ -237,8 +237,6 @@ k8s.io/client-go v0.35.2 h1:YUfPefdGJA4aljDdayAXkc98DnPkIetMl4PrKX97W9o= k8s.io/client-go v0.35.2/go.mod h1:4QqEwh4oQpeK8AaefZ0jwTFJw/9kIjdQi0jpKeYvz7g= k8s.io/component-base v0.35.0 h1:+yBrOhzri2S1BVqyVSvcM3PtPyx5GUxCK2tinZz1G94= k8s.io/component-base v0.35.0/go.mod h1:85SCX4UCa6SCFt6p3IKAPej7jSnF3L8EbfSyMZayJR0= -k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= -k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc= k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0= k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= From 2471fd30eb2f9066c0f6817b806d0ac8d2f51831 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Thu, 26 Mar 2026 13:56:27 +0100 Subject: [PATCH 15/19] fix: update resource selector to use matchLabels for deployment in experiment controller --- Tiltfile | 4 ++-- deploy/local/testkube/values.yaml | 3 --- operator/Dockerfile | 2 +- operator/config/manager/manager.yaml | 1 + .../config/samples/testbench_v1alpha1_experiment.yaml | 3 --- operator/internal/controller/experiment_controller.go | 10 ++++++---- 6 files changed, 10 insertions(+), 13 deletions(-) diff --git a/Tiltfile b/Tiltfile index a76d123..110c255 100644 --- a/Tiltfile +++ b/Tiltfile @@ -7,7 +7,7 @@ update_settings(max_parallel_updates=10, k8s_upsert_timeout_secs=600) load('ext://dotenv', 'dotenv') dotenv() -v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.6.0') +v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.13.0') v1alpha1.extension(name='cert-manager', repo_name='agentic-layer', repo_path='cert-manager') load('ext://cert-manager', 'cert_manager_install') @@ -56,7 +56,7 @@ k8s_yaml(helm( # Apply local development manifests k8s_yaml(kustomize('deploy/local')) -k8s_resource('ai-gateway-litellm', port_forwards=['11001:4000']) +k8s_resource('ai-gateway', port_forwards=['11001:4000']) k8s_resource('weather-agent', port_forwards='11010:8000', labels=['agents'], resource_deps=['agent-runtime']) k8s_resource('lgtm', port_forwards=['11000:3000', '4318:4318']) diff --git a/deploy/local/testkube/values.yaml b/deploy/local/testkube/values.yaml index a5d8bfe..5eacb84 100644 --- a/deploy/local/testkube/values.yaml +++ b/deploy/local/testkube/values.yaml @@ -1,6 +1,3 @@ global: testWorkflows: createOfficialTemplates: false -testkube-operator: - ## deploy Operator chart - enabled: enable diff --git a/operator/Dockerfile b/operator/Dockerfile index 5a82af7..16ed4e9 100644 --- a/operator/Dockerfile +++ b/operator/Dockerfile @@ -1,5 +1,5 @@ # Build the manager binary -FROM golang:1.25 AS builder +FROM golang:1.26 AS builder ARG TARGETOS ARG TARGETARCH diff --git a/operator/config/manager/manager.yaml b/operator/config/manager/manager.yaml index 7a734df..c373eb8 100644 --- a/operator/config/manager/manager.yaml +++ b/operator/config/manager/manager.yaml @@ -64,6 +64,7 @@ spec: - --leader-elect - --health-probe-bind-address=:8081 image: controller:latest + imagePullPolicy: IfNotPresent name: manager securityContext: allowPrivilegeEscalation: false diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml index be2e900..ef0239c 100644 --- a/operator/config/samples/testbench_v1alpha1_experiment.yaml +++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml @@ -9,9 +9,6 @@ spec: agentRef: name: weather-agent namespace: sample-agents - aiGatewayRef: - name: ai-gateway - namespace: ai-gateway otlpEndpoint: http://lgtm.monitoring.svc.cluster.local:4318 llmAsAJudgeModel: gemini-2.5-flash-lite defaultThreshold: 0.9 diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go index 105a38d..a5091db 100644 --- a/operator/internal/controller/experiment_controller.go +++ b/operator/internal/controller/experiment_controller.go @@ -462,10 +462,12 @@ func (r *ExperimentReconciler) buildTestTrigger(experiment *testbenchv1alpha1.Ex "namespace": testkubeNamespace, }, "spec": map[string]interface{}{ - "resource": "deployment", - "resourceSelector": map[string]interface{}{ - "name": experiment.Spec.AgentRef.Name, - "namespace": agentNs, + "selector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + "testkube.io/resource-kind": "Deployment", + "testkube.io/resource-name": experiment.Spec.AgentRef.Name, + "testkube.io/resource-namespace": agentNs, + }, }, "event": r.resolveTriggerEvent(experiment), "action": "run", From dbbc7083d298c1e872c5f58b7167cd306d272170 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Thu, 26 Mar 2026 15:11:15 +0100 Subject: [PATCH 16/19] refactor: restructure DatasetSource with InlineDataset type Co-Authored-By: Claude Sonnet 4.6 --- operator/api/v1alpha1/experiment_types.go | 48 +++-- .../api/v1alpha1/zz_generated.deepcopy.go | 45 ++-- ...estbench.agentic-layer.ai_experiments.yaml | 199 +++++++++--------- 3 files changed, 165 insertions(+), 127 deletions(-) diff --git a/operator/api/v1alpha1/experiment_types.go b/operator/api/v1alpha1/experiment_types.go index 3ce9322..bd78760 100644 --- a/operator/api/v1alpha1/experiment_types.go +++ b/operator/api/v1alpha1/experiment_types.go @@ -47,7 +47,27 @@ type S3Source struct { Key string `json:"key"` } -// DatasetSource defines where to load the test dataset from +// InlineDataset defines an inline experiment dataset with scenarios, model, and threshold. +type InlineDataset struct { + // LLM model used for evaluation (e.g., "gemini-2.5-flash-lite") + // +optional + LLMAsAJudgeModel string `json:"llmAsAJudgeModel,omitempty"` + + // Default threshold for all metrics (0.0-1.0) + // +optional + // +kubebuilder:validation:Minimum=0.0 + // +kubebuilder:validation:Maximum=1.0 + DefaultThreshold *float64 `json:"defaultThreshold,omitempty"` + + // Test scenarios + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + Scenarios []Scenario `json:"scenarios"` +} + +// DatasetSource defines where to load the test dataset from. +// Exactly one of s3, url, or inline must be set. +// +kubebuilder:validation:XValidation:rule="(has(self.s3) ? 1 : 0) + ((has(self.url) && self.url != '') ? 1 : 0) + (has(self.inline) ? 1 : 0) == 1",message="exactly one of s3, url, or inline must be set" type DatasetSource struct { // S3 source configuration // +optional @@ -56,6 +76,10 @@ type DatasetSource struct { // URL source (HTTP/HTTPS) // +optional URL string `json:"url,omitempty"` + + // Inline dataset with scenarios + // +optional + Inline *InlineDataset `json:"inline,omitempty"` } // ToolCall represents an expected tool invocation @@ -155,7 +179,6 @@ type TriggerSpec struct { } // ExperimentSpec defines the desired state of Experiment -// +kubebuilder:validation:XValidation:rule="!(has(self.dataset) && has(self.scenarios))",message="dataset and scenarios are mutually exclusive" type ExperimentSpec struct { // Reference to the Agent to evaluate // +kubebuilder:validation:Required @@ -166,24 +189,9 @@ type ExperimentSpec struct { // +optional AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"` - // Source of the test dataset (mutually exclusive with scenarios) - // +optional - Dataset *DatasetSource `json:"dataset,omitempty"` - - // LLM model used for evaluation (e.g., "gemini-2.5-flash-lite", "gpt-4o") - // +optional - LLMAsAJudgeModel string `json:"llmAsAJudgeModel,omitempty"` - - // Default threshold for all metrics (0.0-1.0) - // +optional - // +kubebuilder:validation:Minimum=0.0 - // +kubebuilder:validation:Maximum=1.0 - // +kubebuilder:default=0.9 - DefaultThreshold float64 `json:"defaultThreshold,omitempty"` - - // Inline test scenarios (mutually exclusive with dataset) - // +optional - Scenarios []Scenario `json:"scenarios,omitempty"` + // Source of the test dataset + // +kubebuilder:validation:Required + Dataset DatasetSource `json:"dataset"` // OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318") // +optional diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go index fdf55f8..59bc1c6 100644 --- a/operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operator/api/v1alpha1/zz_generated.deepcopy.go @@ -49,6 +49,11 @@ func (in *DatasetSource) DeepCopyInto(out *DatasetSource) { *out = new(S3Source) **out = **in } + if in.Inline != nil { + in, out := &in.Inline, &out.Inline + *out = new(InlineDataset) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DatasetSource. @@ -129,18 +134,7 @@ func (in *ExperimentSpec) DeepCopyInto(out *ExperimentSpec) { *out = new(v1.ObjectReference) **out = **in } - if in.Dataset != nil { - in, out := &in.Dataset, &out.Dataset - *out = new(DatasetSource) - (*in).DeepCopyInto(*out) - } - if in.Scenarios != nil { - in, out := &in.Scenarios, &out.Scenarios - *out = make([]Scenario, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } + in.Dataset.DeepCopyInto(&out.Dataset) if in.Trigger != nil { in, out := &in.Trigger, &out.Trigger *out = new(TriggerSpec) @@ -205,6 +199,33 @@ func (in *GeneratedResource) DeepCopy() *GeneratedResource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InlineDataset) DeepCopyInto(out *InlineDataset) { + *out = *in + if in.DefaultThreshold != nil { + in, out := &in.DefaultThreshold, &out.DefaultThreshold + *out = new(float64) + **out = **in + } + if in.Scenarios != nil { + in, out := &in.Scenarios, &out.Scenarios + *out = make([]Scenario, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InlineDataset. +func (in *InlineDataset) DeepCopy() *InlineDataset { + if in == nil { + return nil + } + out := new(InlineDataset) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LastExecution) DeepCopyInto(out *LastExecution) { *out = *in diff --git a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml index 87834d3..e8f8964 100644 --- a/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml +++ b/operator/config/crd/bases/testbench.agentic-layer.ai_experiments.yaml @@ -109,8 +109,106 @@ spec: type: object x-kubernetes-map-type: atomic dataset: - description: Source of the test dataset (mutually exclusive with scenarios) + description: Source of the test dataset properties: + inline: + description: Inline dataset with scenarios + properties: + defaultThreshold: + description: Default threshold for all metrics (0.0-1.0) + maximum: 1 + minimum: 0 + type: number + llmAsAJudgeModel: + description: LLM model used for evaluation (e.g., "gemini-2.5-flash-lite") + type: string + scenarios: + description: Test scenarios + items: + description: Scenario represents a test scenario containing + multiple steps + properties: + name: + description: Name of the scenario + type: string + steps: + description: Steps in this scenario + items: + description: Step represents a single test step within + a scenario + properties: + customValues: + description: Custom key-value pairs (e.g., retrieved_contexts) + x-kubernetes-preserve-unknown-fields: true + input: + description: User input to the agent + type: string + metrics: + description: Metrics to evaluate for this step + items: + description: Metric defines a single metric + evaluation configuration + properties: + metricName: + description: Name of the metric (e.g., "ragas_faithfulness", + "tool_check") + type: string + parameters: + description: Additional parameters for the + metric + x-kubernetes-preserve-unknown-fields: true + threshold: + description: Threshold for pass/fail (0.0-1.0) + maximum: 1 + minimum: 0 + type: number + required: + - metricName + type: object + type: array + reference: + description: Expected reference data for evaluation + properties: + response: + description: Expected response text + type: string + toolCalls: + description: Expected tool calls + items: + description: ToolCall represents an expected + tool invocation + properties: + args: + description: Arguments passed to the + tool (JSON object) + x-kubernetes-preserve-unknown-fields: true + name: + description: Name of the tool + type: string + required: + - name + type: object + type: array + topics: + description: Expected topics to be covered + items: + type: string + type: array + type: object + required: + - input + type: object + minItems: 1 + type: array + required: + - name + - steps + type: object + minItems: 1 + type: array + required: + - scenarios + type: object s3: description: S3 source configuration properties: @@ -128,100 +226,13 @@ spec: description: URL source (HTTP/HTTPS) type: string type: object - defaultThreshold: - default: 0.9 - description: Default threshold for all metrics (0.0-1.0) - maximum: 1 - minimum: 0 - type: number - llmAsAJudgeModel: - description: LLM model used for evaluation (e.g., "gemini-2.5-flash-lite", - "gpt-4o") - type: string + x-kubernetes-validations: + - message: exactly one of s3, url, or inline must be set + rule: '(has(self.s3) ? 1 : 0) + ((has(self.url) && self.url != '''') + ? 1 : 0) + (has(self.inline) ? 1 : 0) == 1' otlpEndpoint: description: OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318") type: string - scenarios: - description: Inline test scenarios (mutually exclusive with dataset) - items: - description: Scenario represents a test scenario containing multiple - steps - properties: - name: - description: Name of the scenario - type: string - steps: - description: Steps in this scenario - items: - description: Step represents a single test step within a scenario - properties: - customValues: - description: Custom key-value pairs (e.g., retrieved_contexts) - x-kubernetes-preserve-unknown-fields: true - input: - description: User input to the agent - type: string - metrics: - description: Metrics to evaluate for this step - items: - description: Metric defines a single metric evaluation - configuration - properties: - metricName: - description: Name of the metric (e.g., "ragas_faithfulness", - "tool_check") - type: string - parameters: - description: Additional parameters for the metric - x-kubernetes-preserve-unknown-fields: true - threshold: - description: Threshold for pass/fail (0.0-1.0) - maximum: 1 - minimum: 0 - type: number - required: - - metricName - type: object - type: array - reference: - description: Expected reference data for evaluation - properties: - response: - description: Expected response text - type: string - toolCalls: - description: Expected tool calls - items: - description: ToolCall represents an expected tool - invocation - properties: - args: - description: Arguments passed to the tool (JSON - object) - x-kubernetes-preserve-unknown-fields: true - name: - description: Name of the tool - type: string - required: - - name - type: object - type: array - topics: - description: Expected topics to be covered - items: - type: string - type: array - type: object - required: - - input - type: object - minItems: 1 - type: array - required: - - name - - steps - type: object - type: array trigger: description: Trigger configuration properties: @@ -242,10 +253,8 @@ spec: type: object required: - agentRef + - dataset type: object - x-kubernetes-validations: - - message: dataset and scenarios are mutually exclusive - rule: '!(has(self.dataset) && has(self.scenarios))' status: description: ExperimentStatus defines the observed state of Experiment properties: From c5c7d61836769c184e8bc4ce1c3da41c26a9f914 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Thu, 26 Mar 2026 15:12:54 +0100 Subject: [PATCH 17/19] refactor: update experimentJSON and controller to use Dataset.Inline - Update experimentJSON struct to include LLMAsAJudgeModel and DefaultThreshold fields - Source buildExperimentJSON from InlineDataset instead of Spec.Scenarios - Add ConfigMap cleanup when switching from inline to S3/URL mode - Branch buildTestWorkflow on Dataset.Inline == nil for setup-template inclusion - Simplify resolveDatasetURL to remove nil guard (Dataset is now value type) Co-Authored-By: Claude Sonnet 4.6 --- .../controller/experiment_controller.go | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/operator/internal/controller/experiment_controller.go b/operator/internal/controller/experiment_controller.go index a5091db..c7f9c30 100644 --- a/operator/internal/controller/experiment_controller.go +++ b/operator/internal/controller/experiment_controller.go @@ -64,7 +64,7 @@ var ( // experimentJSON is the JSON representation of experiment.json consumed by testbench scripts. type experimentJSON struct { LLMAsAJudgeModel string `json:"llm_as_a_judge_model,omitempty"` - DefaultThreshold float64 `json:"default_threshold"` + DefaultThreshold *float64 `json:"default_threshold,omitempty"` Scenarios []scenarioJSON `json:"scenarios"` } @@ -162,13 +162,29 @@ func (r *ExperimentReconciler) reconcileResources( return result, nil } -// reconcileConfigMap creates or updates the ConfigMap holding experiment.json. +// reconcileConfigMap creates or updates the ConfigMap holding experiment.json for inline mode, +// or deletes a stale ConfigMap when switching to S3/URL mode. func (r *ExperimentReconciler) reconcileConfigMap( ctx context.Context, experiment *testbenchv1alpha1.Experiment, generatedResources *[]testbenchv1alpha1.GeneratedResource, ) error { cmName := experiment.Name + "-experiment" + + if experiment.Spec.Dataset.Inline == nil { + // Delete stale ConfigMap if it exists (mode switched from inline to S3/URL). + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: testkubeNamespace, + }, + } + if err := r.Delete(ctx, cm); err != nil && !errors.IsNotFound(err) { + return err + } + return nil + } + cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: cmName, @@ -203,15 +219,16 @@ func (r *ExperimentReconciler) reconcileConfigMap( return nil } -// buildExperimentJSON serializes the Experiment spec scenarios into the experiment.json format -// expected by the testbench scripts. For dataset mode, it returns an empty scenarios list. +// buildExperimentJSON serializes the InlineDataset into the experiment.json format +// expected by the testbench scripts. func (r *ExperimentReconciler) buildExperimentJSON(experiment *testbenchv1alpha1.Experiment) (string, error) { + inline := experiment.Spec.Dataset.Inline exp := experimentJSON{ - LLMAsAJudgeModel: experiment.Spec.LLMAsAJudgeModel, - DefaultThreshold: experiment.Spec.DefaultThreshold, - Scenarios: make([]scenarioJSON, 0, len(experiment.Spec.Scenarios)), + LLMAsAJudgeModel: inline.LLMAsAJudgeModel, + DefaultThreshold: inline.DefaultThreshold, + Scenarios: make([]scenarioJSON, 0, len(inline.Scenarios)), } - for _, scenario := range experiment.Spec.Scenarios { + for _, scenario := range inline.Scenarios { sj := scenarioJSON{ Name: scenario.Name, Steps: make([]stepJSON, 0, len(scenario.Steps)), @@ -309,7 +326,7 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E // Build the list of phase templates to chain. var useTemplates []interface{} - if experiment.Spec.Dataset != nil { + if experiment.Spec.Dataset.Inline == nil { useTemplates = append(useTemplates, map[string]interface{}{ "name": "setup-template", "config": map[string]interface{}{ @@ -351,8 +368,8 @@ func (r *ExperimentReconciler) buildTestWorkflow(experiment *testbenchv1alpha1.E } } - // For scenarios mode, mount the pre-populated ConfigMap as the experiment file. - if experiment.Spec.Dataset == nil { + // For inline mode, mount the pre-populated ConfigMap as the experiment file. + if experiment.Spec.Dataset.Inline != nil { spec["content"] = map[string]interface{}{ "files": []interface{}{ map[string]interface{}{ @@ -551,9 +568,6 @@ func (r *ExperimentReconciler) resolveAgentURL(experiment *testbenchv1alpha1.Exp // resolveDatasetURL extracts the dataset URL from the DatasetSource. func (r *ExperimentReconciler) resolveDatasetURL(experiment *testbenchv1alpha1.Experiment) string { - if experiment.Spec.Dataset == nil { - return "" - } if experiment.Spec.Dataset.URL != "" { return experiment.Spec.Dataset.URL } From 5cd2d345e965d3a1aef30953186e8de79843f222 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Thu, 26 Mar 2026 15:14:40 +0100 Subject: [PATCH 18/19] docs: update sample Experiment YAML for new dataset structure Co-Authored-By: Claude Sonnet 4.6 --- .../testbench_v1alpha1_experiment.yaml | 139 +++++++++--------- 1 file changed, 72 insertions(+), 67 deletions(-) diff --git a/operator/config/samples/testbench_v1alpha1_experiment.yaml b/operator/config/samples/testbench_v1alpha1_experiment.yaml index ef0239c..c90d659 100644 --- a/operator/config/samples/testbench_v1alpha1_experiment.yaml +++ b/operator/config/samples/testbench_v1alpha1_experiment.yaml @@ -9,72 +9,77 @@ spec: agentRef: name: weather-agent namespace: sample-agents + aiGatewayRef: + name: ai-gateway + namespace: ai-gateway otlpEndpoint: http://lgtm.monitoring.svc.cluster.local:4318 - llmAsAJudgeModel: gemini-2.5-flash-lite - defaultThreshold: 0.9 - scenarios: - - name: "Weather in New York" - steps: - - input: "What is the weather like in New York right now?" - reference: - toolCalls: - - name: get_weather - args: - city: "New York" - topics: - - weather - metrics: - - metricName: AgentGoalAccuracyWithoutReference - - metricName: ToolCallAccuracy - - metricName: TopicAdherence - parameters: - mode: precision - - name: "Weather in Bangkok (unavailable)" - steps: - - input: "What is the weather like in Bangkok right now?" - reference: - toolCalls: - - name: get_weather - args: - city: "New York" - topics: - - time - metrics: - - metricName: AgentGoalAccuracyWithoutReference - - metricName: ToolCallAccuracy - - metricName: TopicAdherence - parameters: - mode: precision - - name: "Weather in New York (alt reference)" - steps: - - input: "What is the weather like in New York right now?" - reference: - toolCalls: - - name: get_current_time - args: - city: "New York" - metrics: - - metricName: AgentGoalAccuracyWithoutReference - - metricName: ToolCallAccuracy - - name: "Weather then time in New York (multi-step)" - steps: - - input: "What is the weather like in New York right now?" - reference: - toolCalls: - - name: get_weather - args: - city: "New York" - metrics: - - metricName: AgentGoalAccuracyWithoutReference - - metricName: ToolCallAccuracy - - input: "What time is it in New York?" - reference: - toolCalls: - - name: get_current_time - args: - city: "New York" - metrics: - - metricName: AgentGoalAccuracyWithoutReference - - metricName: ToolCallAccuracy + dataset: + inline: + llmAsAJudgeModel: gemini-2.5-flash-lite + defaultThreshold: 0.9 + scenarios: + - name: "Weather in New York" + steps: + - input: "What is the weather like in New York right now?" + reference: + toolCalls: + - name: get_weather + args: + city: "New York" + topics: + - weather + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - metricName: TopicAdherence + parameters: + mode: precision + - name: "Weather in Bangkok (unavailable)" + steps: + - input: "What is the weather like in Bangkok right now?" + reference: + toolCalls: + - name: get_weather + args: + city: "New York" + topics: + - time + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - metricName: TopicAdherence + parameters: + mode: precision + - name: "Weather in New York (alt reference)" + steps: + - input: "What is the weather like in New York right now?" + reference: + toolCalls: + - name: get_current_time + args: + city: "New York" + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - name: "Weather then time in New York (multi-step)" + steps: + - input: "What is the weather like in New York right now?" + reference: + toolCalls: + - name: get_weather + args: + city: "New York" + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy + - input: "What time is it in New York?" + reference: + toolCalls: + - name: get_current_time + args: + city: "New York" + metrics: + - metricName: AgentGoalAccuracyWithoutReference + - metricName: ToolCallAccuracy trigger: - enabled: true \ No newline at end of file + enabled: true From 04c2b5961c212a7a349f2a3f51244a1d56601e05 Mon Sep 17 00:00:00 2001 From: Florian Mallmann Date: Thu, 26 Mar 2026 15:19:16 +0100 Subject: [PATCH 19/19] test: update all fixtures for DatasetSource restructuring Replace Scenarios at ExperimentSpec level with Dataset.Inline wrapper, change Dataset pointer to value type, delete obsolete dataset-mode ConfigMap test, and remove the now-invalid buildExperimentJSON URL-mode test. Co-Authored-By: Claude Sonnet 4.6 --- .../controller/experiment_controller_test.go | 118 ++++++++---------- 1 file changed, 50 insertions(+), 68 deletions(-) diff --git a/operator/internal/controller/experiment_controller_test.go b/operator/internal/controller/experiment_controller_test.go index 45149fc..2e7447b 100644 --- a/operator/internal/controller/experiment_controller_test.go +++ b/operator/internal/controller/experiment_controller_test.go @@ -82,28 +82,30 @@ var _ = Describe("Experiment Controller", func() { exp := &testbenchv1alpha1.Experiment{ ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ - AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"}, - LLMAsAJudgeModel: "gemini-2.5-flash-lite", - DefaultThreshold: 0.9, - Scenarios: []testbenchv1alpha1.Scenario{ - { - Name: "test scenario", - Steps: []testbenchv1alpha1.Step{ + AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"}, + Dataset: testbenchv1alpha1.DatasetSource{ + Inline: &testbenchv1alpha1.InlineDataset{ + Scenarios: []testbenchv1alpha1.Scenario{ { - Input: "What is the weather?", - Reference: &testbenchv1alpha1.Reference{ - Response: "It is sunny", - Topics: []string{"weather"}, - ToolCalls: []testbenchv1alpha1.ToolCall{ - { - Name: "get_weather", - Args: runtime.RawExtension{Raw: []byte(`{"city":"NY"}`)}, + Name: "test scenario", + Steps: []testbenchv1alpha1.Step{ + { + Input: "What is the weather?", + Reference: &testbenchv1alpha1.Reference{ + Response: "It is sunny", + Topics: []string{"weather"}, + ToolCalls: []testbenchv1alpha1.ToolCall{ + { + Name: "get_weather", + Args: runtime.RawExtension{Raw: []byte(`{"city":"NY"}`)}, + }, + }, + }, + Metrics: []testbenchv1alpha1.Metric{ + {MetricName: "AgentGoalAccuracy"}, }, }, }, - Metrics: []testbenchv1alpha1.Metric{ - {MetricName: "AgentGoalAccuracy"}, - }, }, }, }, @@ -129,8 +131,6 @@ var _ = Describe("Experiment Controller", func() { By("verifying the experiment.json content") var expJSON experimentJSON Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed()) - Expect(expJSON.LLMAsAJudgeModel).To(Equal("gemini-2.5-flash-lite")) - Expect(expJSON.DefaultThreshold).To(Equal(0.9)) Expect(expJSON.Scenarios).To(HaveLen(1)) Expect(expJSON.Scenarios[0].Name).To(Equal("test scenario")) Expect(expJSON.Scenarios[0].Steps).To(HaveLen(1)) @@ -274,7 +274,7 @@ var _ = Describe("Experiment Controller", func() { ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"}, - Dataset: &testbenchv1alpha1.DatasetSource{ + Dataset: testbenchv1alpha1.DatasetSource{ URL: "http://data-server/dataset.csv", }, }, @@ -286,16 +286,12 @@ var _ = Describe("Experiment Controller", func() { cleanupExperiment(expName) }) - It("should create a ConfigMap with empty scenarios as placeholder", func() { + It("should not create a ConfigMap in URL mode", func() { Expect(reconcileExperiment(expName)).To(Succeed()) cm := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm)).To(Succeed()) - Expect(cm.Data).To(HaveKey("experiment.json")) - - var expJSON experimentJSON - Expect(json.Unmarshal([]byte(cm.Data["experiment.json"]), &expJSON)).To(Succeed()) - Expect(expJSON.Scenarios).To(BeEmpty()) + err := k8sClient.Get(ctx, types.NamespacedName{Name: expName + "-experiment", Namespace: namespace}, cm) + Expect(errors.IsNotFound(err)).To(BeTrue()) }) It("should create a TestWorkflow with setup-template and correct datasetUrl", func() { @@ -322,7 +318,7 @@ var _ = Describe("Experiment Controller", func() { It("should resolve S3 dataset URL correctly", func() { exp := &testbenchv1alpha1.Experiment{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: expName, Namespace: namespace}, exp)).To(Succeed()) - exp.Spec.Dataset = &testbenchv1alpha1.DatasetSource{ + exp.Spec.Dataset = testbenchv1alpha1.DatasetSource{ S3: &testbenchv1alpha1.S3Source{Bucket: "my-bucket", Key: "data/dataset.csv"}, } Expect(k8sClient.Update(ctx, exp)).To(Succeed()) @@ -352,9 +348,7 @@ var _ = Describe("Experiment Controller", func() { ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ AgentRef: testbenchv1alpha1.AgentRef{Name: "my-agent", Namespace: "agents"}, - Scenarios: []testbenchv1alpha1.Scenario{ - {Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}, - }, + Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}}, Trigger: trigger, }, } @@ -472,8 +466,8 @@ var _ = Describe("Experiment Controller", func() { exp := &testbenchv1alpha1.Experiment{ ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ - AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, - Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}}, }, } Expect(k8sClient.Create(ctx, exp)).To(Succeed()) @@ -525,19 +519,22 @@ var _ = Describe("Experiment Controller", func() { r := newReconciler() exp := &testbenchv1alpha1.Experiment{ Spec: testbenchv1alpha1.ExperimentSpec{ - DefaultThreshold: 0.8, - Scenarios: []testbenchv1alpha1.Scenario{ - { - Name: "s", - Steps: []testbenchv1alpha1.Step{ + Dataset: testbenchv1alpha1.DatasetSource{ + Inline: &testbenchv1alpha1.InlineDataset{ + Scenarios: []testbenchv1alpha1.Scenario{ { - Input: "q", - CustomValues: runtime.RawExtension{Raw: []byte(`{"key":"value"}`)}, - Metrics: []testbenchv1alpha1.Metric{ + Name: "s", + Steps: []testbenchv1alpha1.Step{ { - MetricName: "M", - Threshold: 0.7, - Parameters: runtime.RawExtension{Raw: []byte(`{"mode":"precision"}`)}, + Input: "q", + CustomValues: runtime.RawExtension{Raw: []byte(`{"key":"value"}`)}, + Metrics: []testbenchv1alpha1.Metric{ + { + MetricName: "M", + Threshold: 0.7, + Parameters: runtime.RawExtension{Raw: []byte(`{"mode":"precision"}`)}, + }, + }, }, }, }, @@ -551,23 +548,10 @@ var _ = Describe("Experiment Controller", func() { var result experimentJSON Expect(json.Unmarshal([]byte(data), &result)).To(Succeed()) - Expect(result.DefaultThreshold).To(Equal(0.8)) Expect(result.Scenarios[0].Steps[0].CustomValues).To(MatchJSON(`{"key":"value"}`)) Expect(result.Scenarios[0].Steps[0].Metrics[0].Parameters).To(MatchJSON(`{"mode":"precision"}`)) }) - It("should produce empty scenarios list for dataset mode", func() { - r := newReconciler() - exp := &testbenchv1alpha1.Experiment{ - Spec: testbenchv1alpha1.ExperimentSpec{ - DefaultThreshold: 0.9, - Dataset: &testbenchv1alpha1.DatasetSource{URL: "http://example.com/data.csv"}, - }, - } - data, err := r.buildExperimentJSON(exp) - Expect(err).NotTo(HaveOccurred()) - Expect(data).To(ContainSubstring(`"scenarios": []`)) - }) }) Context("AiGateway resolution", func() { @@ -583,9 +567,7 @@ var _ = Describe("Experiment Controller", func() { Name: "my-gateway", Namespace: "ai-gateway", }, - Scenarios: []testbenchv1alpha1.Scenario{ - {Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}, - }, + Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}}, }, } Expect(k8sClient.Create(ctx, exp)).To(Succeed()) @@ -696,8 +678,8 @@ var _ = Describe("Experiment Controller", func() { exp := &testbenchv1alpha1.Experiment{ ObjectMeta: metav1.ObjectMeta{Name: "exp-gw-url", Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ - AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, - Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, + Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}}, }, } gw := &runtimev1alpha1.AiGateway{ @@ -727,8 +709,8 @@ var _ = Describe("Experiment Controller", func() { exp := &testbenchv1alpha1.Experiment{ ObjectMeta: metav1.ObjectMeta{Name: "exp-no-gw", Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ - AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, - Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent", Namespace: "agents"}, + Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}}, }, } @@ -763,7 +745,7 @@ var _ = Describe("Experiment Controller", func() { Spec: testbenchv1alpha1.ExperimentSpec{ AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, OTLPEndpoint: "http://lgtm.monitoring.svc.cluster.local:4318", - Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}}, }, } Expect(k8sClient.Create(ctx, exp)).To(Succeed()) @@ -786,8 +768,8 @@ var _ = Describe("Experiment Controller", func() { exp := &testbenchv1alpha1.Experiment{ ObjectMeta: metav1.ObjectMeta{Name: expName, Namespace: namespace}, Spec: testbenchv1alpha1.ExperimentSpec{ - AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, - Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}, + AgentRef: testbenchv1alpha1.AgentRef{Name: "agent"}, + Dataset: testbenchv1alpha1.DatasetSource{Inline: &testbenchv1alpha1.InlineDataset{Scenarios: []testbenchv1alpha1.Scenario{{Name: "s", Steps: []testbenchv1alpha1.Step{{Input: "q"}}}}}}, }, } Expect(k8sClient.Create(ctx, exp)).To(Succeed())