Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions api/v1alpha1/modelvalidation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,15 @@
}

// ContinuousValidation defines the configuration for continuous model validation.
// When enabled, the validation container runs as a native sidecar with restartPolicy: Always,
// allowing periodic re-validation of the model.
// Note: Native sidecar support requires Kubernetes 1.28+ with the feature explicitly enabled,
// or Kubernetes 1.29+ where it is enabled by default.
// When enabled, the validation container periodically re-validates the model.
// On Kubernetes 1.28+ (with SidecarContainers feature gate) or 1.29+ (enabled by default),
// this uses a native sidecar (init container with restartPolicy: Always).
// On older clusters, the operator falls back to injecting a one-shot init container
// for initial validation plus a traditional sidecar container for periodic re-validation.
type ContinuousValidation struct {
// Enabled controls whether continuous validation is active.
// When true, the validation container runs as a native sidecar with restartPolicy: Always.
// When false (default), the validation container runs as a standard init container.
// When true, the validation container periodically re-validates the model.
// When false (default), the validation container runs as a standard init container (one-shot).
// +kubebuilder:default=false
Enabled bool `json:"enabled"`

Expand All @@ -132,7 +133,7 @@
// Minimum interval is 1m to prevent excessive CPU usage.
// +kubebuilder:default="5m"
// +kubebuilder:validation:Pattern=`^([0-9]+(\.[0-9]+)?(m|h))+$`
// +kubebuilder:validation:XValidation:rule="self == '' || duration(self) >= duration('1m')", message="interval must be at least 1m"

Check failure on line 136 in api/v1alpha1/modelvalidation_types.go

View workflow job for this annotation

GitHub Actions / Run Linting

The line is 133 characters long, which exceeds the maximum of 120 characters. (lll)
Interval string `json:"interval,omitempty"`
}

Expand All @@ -154,8 +155,8 @@
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`

// ContinuousValidation enables periodic re-validation of the model.
// When enabled, the init container becomes a native sidecar.
// Requires Kubernetes 1.28+ with SidecarContainers feature gate enabled, or 1.29+ (enabled by default).
// On Kubernetes 1.28+, uses a native sidecar. On older clusters, falls back to
// a traditional sidecar container alongside a one-shot init container.
// +kubebuilder:validation:Optional
ContinuousValidation *ContinuousValidation `json:"continuousValidation,omitempty"`

Expand Down
17 changes: 16 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func main() {
var secureMetrics bool
var enableHTTP2 bool
var disableWebhook bool
var forceLegacySidecar bool
var tlsOpts []func(*tls.Config)

// Status tracker configuration
Expand Down Expand Up @@ -108,6 +109,9 @@ func main() {
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
flag.BoolVar(&disableWebhook, "disable-webhook", false,
"Disable the webhook server for development environments without certificates.")
utils.BoolFlagOrEnv(&forceLegacySidecar, "force-legacy-sidecar",
"FORCE_LEGACY_SIDECAR", false,
"Force legacy sidecar mode for continuous validation, even on clusters that support native sidecars.")
utils.StringFlagOrEnv(&constants.ModelValidationAgentImage,
"validation-agent-image",
"VALIDATION_AGENT_IMAGE",
Expand Down Expand Up @@ -281,8 +285,19 @@ func main() {
}

if !disableWebhook {
nativeSidecarSupport, err := webhooks.NativeSidecarSupport(mgr.GetConfig())
if err != nil {
setupLog.Error(err, "failed to detect native sidecar support, assuming not supported")
nativeSidecarSupport = false
}
if forceLegacySidecar {
setupLog.Info("Forcing legacy sidecar mode (--force-legacy-sidecar is set)")
nativeSidecarSupport = false
}
setupLog.Info("Kubernetes sidecar support", "nativeSidecars", nativeSidecarSupport)

decoder := admission.NewDecoder(mgr.GetScheme())
interceptor := webhooks.NewPodInterceptor(mgr.GetClient(), decoder)
interceptor := webhooks.NewPodInterceptor(mgr.GetClient(), decoder, nativeSidecarSupport)
mgr.GetWebhookServer().Register("/mutate-v1-pod", &admission.Webhook{
Handler: interceptor,
})
Expand Down
32 changes: 20 additions & 12 deletions cmd/validation-agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,18 @@ var (
func main() {
var interval time.Duration
var healthPort int
var skipInitial bool

flag.DurationVar(&interval, "interval", 0, "Validation interval (e.g., 5m, 1h). If 0 or not set, runs once and exits.")
flag.IntVar(&healthPort, "health-port", 8080, "Health check server port")
flag.BoolVar(&skipInitial, "skip-initial", false,
"Skip initial validation (used with legacy sidecar mode where init container already validated)")
flag.Parse()

log.SetLogger(zap.New())
logger := log.Log.WithName("validation-agent")

logger.Info("Starting validation agent", "interval", interval, "healthPort", healthPort)
logger.Info("Starting validation agent", "interval", interval, "healthPort", healthPort, "skipInitial", skipInitial)

// Setup signal handling
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
Expand All @@ -52,18 +55,23 @@ func main() {
// Validation args are remaining flags (passed to model-transparency-cli)
validationArgs := flag.Args()

// Run initial validation
logger.Info("Running initial validation")
if err := runValidation(ctx, validationArgs, logger); err != nil {
logger.Error(err, "Initial validation failed")
if interval == 0 {
// One-shot mode: exit with error
os.Exit(1)
}
// Continuous mode: don't mark ready, but continue (will retry)
} else {
logger.Info("Initial validation successful")
// Run initial validation (unless skipped for legacy sidecar mode)
if skipInitial {
logger.Info("Skipping initial validation (init container already validated)")
markReady()
} else {
logger.Info("Running initial validation")
if err := runValidation(ctx, validationArgs, logger); err != nil {
logger.Error(err, "Initial validation failed")
if interval == 0 {
// One-shot mode: exit with error
os.Exit(1)
}
// Continuous mode: don't mark ready, but continue (will retry)
} else {
logger.Info("Initial validation successful")
markReady()
}
}

// Continuous validation loop
Expand Down
8 changes: 4 additions & 4 deletions config/crd/bases/ml.sigstore.dev_modelvalidations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,15 @@ spec:
continuousValidation:
description: |-
ContinuousValidation enables periodic re-validation of the model.
When enabled, the init container becomes a native sidecar.
Requires Kubernetes 1.28+ with SidecarContainers feature gate enabled, or 1.29+ (enabled by default).
On Kubernetes 1.28+, uses a native sidecar. On older clusters, falls back to
a traditional sidecar container alongside a one-shot init container.
properties:
enabled:
default: false
description: |-
Enabled controls whether continuous validation is active.
When true, the validation container runs as a native sidecar with restartPolicy: Always.
When false (default), the validation container runs as a standard init container.
When true, the validation container periodically re-validates the model.
When false (default), the validation container runs as a standard init container (one-shot).
type: boolean
interval:
default: 5m
Expand Down
4 changes: 4 additions & 0 deletions internal/constants/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
const (
// ModelValidationInitContainerName is the name of the init container injected for model validation
ModelValidationInitContainerName = "model-validation"

// ModelValidationSidecarContainerName is the name of the regular sidecar container
// used for continuous validation on clusters that don't support native sidecars (pre-1.28)
ModelValidationSidecarContainerName = "model-validation-sidecar"
)

var (
Expand All @@ -26,6 +30,6 @@
// using the model-transparency-go library. Used for both one-shot and
// continuous validation modes.
// This can be overridden at build time via ldflags:
// go build -ldflags="-X github.com/sigstore/model-validation-operator/internal/constants.ModelValidationAgentImage=myimage:tag"

Check failure on line 33 in internal/constants/images.go

View workflow job for this annotation

GitHub Actions / Run Linting

The line is 131 characters long, which exceeds the maximum of 120 characters. (lll)
ModelValidationAgentImage = "ghcr.io/sigstore/model-validation-agent:v0.1.0"
)
11 changes: 11 additions & 0 deletions internal/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,14 @@ func StringFlagOrEnv(p *string, name string, envName string, defaultValue string
}
flag.StringVar(p, name, defaultValue, usage)
}

// BoolFlagOrEnv defines a bool flag which can be set by an environment variable.
// Precedence: flag > env var > default value.
// The env var is considered true if set to "true", "1", or "yes" (case-insensitive).
func BoolFlagOrEnv(p *bool, name string, envName string, defaultValue bool, usage string) {
envValue := os.Getenv(envName)
if envValue == "true" || envValue == "1" || envValue == "yes" {
defaultValue = true
}
flag.BoolVar(p, name, defaultValue, usage)
}
130 changes: 118 additions & 12 deletions internal/webhooks/pod_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,16 @@ import (
"k8s.io/utils/ptr"
)

// NewPodInterceptor creates a new pod mutating webhook to be registered
func NewPodInterceptor(c client.Client, decoder admission.Decoder) webhook.AdmissionHandler {
// NewPodInterceptor creates a new pod mutating webhook to be registered.
// nativeSidecarSupport indicates whether the cluster supports native sidecars
// (init containers with restartPolicy: Always, available in Kubernetes 1.28+).
// When false, continuous validation falls back to injecting a traditional sidecar
// container alongside a one-shot init container.
func NewPodInterceptor(c client.Client, decoder admission.Decoder, nativeSidecarSupport bool) webhook.AdmissionHandler {
return &podInterceptor{
client: c,
decoder: decoder,
client: c,
decoder: decoder,
nativeSidecarSupport: nativeSidecarSupport,
}
}

Expand All @@ -54,8 +59,9 @@ func NewPodInterceptor(c client.Client, decoder admission.Decoder) webhook.Admis

// podInterceptor extends pods with Model Validation Init-Container if annotation is specified.
type podInterceptor struct {
client client.Client
decoder admission.Decoder
client client.Client
decoder admission.Decoder
nativeSidecarSupport bool
}

// Handle extends pods with Model Validation Init-Container if annotation is specified.
Expand Down Expand Up @@ -103,6 +109,11 @@ func (p *podInterceptor) Handle(ctx context.Context, req admission.Request) admi
return admission.Allowed("validation exists, no action needed")
}
}
for _, c := range pod.Spec.Containers {
if c.Name == constants.ModelValidationSidecarContainerName {
return admission.Allowed("validation exists, no action needed")
}
}

mergedModel := mergeModelWithAnnotations(logger, mv.Spec.Model, pod.Annotations)

Expand All @@ -125,9 +136,23 @@ func (p *podInterceptor) Handle(ctx context.Context, req admission.Request) admi
vm = append(vm, c.VolumeMounts...)
}

container := buildValidationContainer(mv, args, vm, pp)
continuousEnabled := mv.Spec.ContinuousValidation != nil && mv.Spec.ContinuousValidation.Enabled
useLegacySidecar := continuousEnabled && !p.nativeSidecarSupport

if useLegacySidecar {
logger.Info("Using legacy sidecar for continuous validation (native sidecars not supported)")
}

container := buildValidationContainer(mv, args, vm, pp, p.nativeSidecarSupport)
pp.Spec.InitContainers = append(pp.Spec.InitContainers, container)

// On pre-1.28 clusters with continuous validation, inject a traditional sidecar
// container alongside the init container for periodic re-validation.
if useLegacySidecar {
sidecar := buildLegacySidecarContainer(mv, args, vm, pp)
pp.Spec.Containers = append(pp.Spec.Containers, sidecar)
}

marshaledPod, err := json.Marshal(pp)
if err != nil {
return admission.Errored(http.StatusInternalServerError, err)
Expand All @@ -136,10 +161,15 @@ func (p *podInterceptor) Handle(ctx context.Context, req admission.Request) admi
return admission.PatchResponseFromRaw(req.Object.Raw, marshaledPod)
}

// buildValidationContainer constructs the validation container with appropriate configuration
// for either one-shot or continuous validation based on ModelValidation spec
// buildValidationContainer constructs the validation init container.
// When nativeSidecarSupport is true and continuous validation is enabled,
// the init container is configured as a native sidecar (restartPolicy: Always).
// When nativeSidecarSupport is false and continuous validation is enabled,
// the init container runs one-shot validation only; a separate legacy sidecar
// container is added by the caller via buildLegacySidecarContainer.
func buildValidationContainer(
mv *v1alpha1.ModelValidation, args []string, vm []corev1.VolumeMount, pp *corev1.Pod,
nativeSidecarSupport bool,
) corev1.Container {
// Determine image pull policy
imagePullPolicy := corev1.PullAlways
Expand All @@ -156,8 +186,9 @@ func buildValidationContainer(
VolumeMounts: vm,
}

// Add continuous validation configuration if enabled
if mv.Spec.ContinuousValidation != nil && mv.Spec.ContinuousValidation.Enabled {
// Add continuous validation configuration if enabled AND native sidecars are supported
continuousEnabled := mv.Spec.ContinuousValidation != nil && mv.Spec.ContinuousValidation.Enabled
if continuousEnabled && nativeSidecarSupport {
interval := "5m"
if mv.Spec.ContinuousValidation.Interval != "" {
interval = mv.Spec.ContinuousValidation.Interval
Expand Down Expand Up @@ -192,8 +223,10 @@ func buildValidationContainer(
InitialDelaySeconds: 10,
PeriodSeconds: 30,
}
}

// Add annotation to track continuous validation (for informational/tracking purposes)
// Track continuous validation in annotations regardless of sidecar mode
if continuousEnabled {
if pp.Annotations == nil {
pp.Annotations = make(map[string]string)
}
Expand All @@ -219,6 +252,79 @@ func buildValidationContainer(
return container
}

// buildLegacySidecarContainer constructs a traditional sidecar container for
// continuous validation on clusters that don't support native sidecars (pre-1.28).
// This container runs alongside the application containers and periodically
// re-validates the model. The init container (built by buildValidationContainer)
// handles the initial one-shot validation to block pod startup.
func buildLegacySidecarContainer(
mv *v1alpha1.ModelValidation, args []string, vm []corev1.VolumeMount, _ *corev1.Pod,
) corev1.Container {
imagePullPolicy := corev1.PullAlways
if mv.Spec.ImagePullPolicy != "" {
imagePullPolicy = mv.Spec.ImagePullPolicy
}

interval := "5m"
if mv.Spec.ContinuousValidation != nil && mv.Spec.ContinuousValidation.Interval != "" {
interval = mv.Spec.ContinuousValidation.Interval
}

// Prepend interval flag and --skip-initial to args.
// --skip-initial tells the agent to skip initial validation since the
// init container already performed it.
sidecarArgs := append([]string{"--interval=" + interval, "--skip-initial"}, args...)

container := corev1.Container{
Name: constants.ModelValidationSidecarContainerName,
Image: constants.ModelValidationAgentImage,
ImagePullPolicy: imagePullPolicy,
Command: []string{"/usr/local/bin/validation-agent"},
Args: sidecarArgs,
VolumeMounts: vm,
// Add readiness probe (ready immediately since init container already validated)
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/ready",
Port: intstr.FromInt(8080),
},
},
InitialDelaySeconds: 5,
PeriodSeconds: 10,
},
// Add liveness probe
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
InitialDelaySeconds: 10,
PeriodSeconds: 30,
},
}

// Apply resource requirements
if mv.Spec.Resources != nil {
container.Resources = *mv.Spec.Resources
} else {
container.Resources = corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("128Mi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("512Mi"),
},
}
}

return container
}

func validationConfigToArgs(logger logr.Logger, cfg v1alpha1.ValidationConfig, model v1alpha1.Model) []string {
logger.Info("construct args")
res := []string{}
Expand Down
Loading
Loading