diff --git a/content/kubevirt/kubevirt-csi-driver/assets/Screenshot 2026-02-18 at 11.06.56.png b/content/kubevirt/kubevirt-csi-driver/assets/Screenshot 2026-02-18 at 11.06.56.png new file mode 100644 index 00000000..1456296e Binary files /dev/null and b/content/kubevirt/kubevirt-csi-driver/assets/Screenshot 2026-02-18 at 11.06.56.png differ diff --git a/content/kubevirt/kubevirt-csi-driver/controller-infra.yaml b/content/kubevirt/kubevirt-csi-driver/controller-infra.yaml new file mode 100644 index 00000000..925daf0f --- /dev/null +++ b/content/kubevirt/kubevirt-csi-driver/controller-infra.yaml @@ -0,0 +1,174 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: kubevirt-csi-controller +# namespace: kubevirt-csi-driver + labels: + app: kubevirt-csi-driver +spec: + replicas: 1 + selector: + matchLabels: + app: kubevirt-csi-driver + template: + metadata: + labels: + app: kubevirt-csi-driver + spec: + enableServiceLinks: false + serviceAccount: kubevirt-csi + priorityClassName: system-cluster-critical + nodeSelector: + node-role.kubernetes.io/control-plane: "" + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: node-role.kubernetes.io/master + operator: Exists + effect: "NoSchedule" + containers: + - name: csi-driver + imagePullPolicy: Always + image: quay.io/kubevirt/kubevirt-csi-driver:latest + args: + - "--endpoint=$(CSI_ENDPOINT)" + - "--infra-cluster-namespace=$(INFRACLUSTER_NAMESPACE)" + - "--infra-cluster-labels=$(INFRACLUSTER_LABELS)" + - "--tenant-cluster-kubeconfig=/var/run/secrets/tenantcluster/value" + - "--run-node-service=false" + - "--run-controller-service=true" + - "--v=5" + ports: + - name: healthz + containerPort: 10301 + protocol: TCP + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: INFRACLUSTER_NAMESPACE + valueFrom: + configMapKeyRef: + name: driver-config + key: infraClusterNamespace + - name: INFRACLUSTER_LABELS + valueFrom: + configMapKeyRef: + name: driver-config + key: infraClusterLabels + - name: INFRA_STORAGE_CLASS_ENFORCEMENT + valueFrom: + configMapKeyRef: + name: driver-config + key: infraStorageClassEnforcement + optional: true + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: tenantcluster + mountPath: "/var/run/secrets/tenantcluster" + resources: + requests: + memory: 50Mi + cpu: 10m + - name: csi-provisioner + image: quay.io/openshift/origin-csi-external-provisioner:latest + args: + - "--csi-address=$(ADDRESS)" + - "--default-fstype=ext4" + - "--kubeconfig=/var/run/secrets/tenantcluster/value" + - "--v=5" + - "--timeout=3m" + - "--retry-interval-max=1m" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: tenantcluster + mountPath: "/var/run/secrets/tenantcluster" + - name: csi-attacher + image: quay.io/openshift/origin-csi-external-attacher:latest + args: + - "--csi-address=$(ADDRESS)" + - "--kubeconfig=/var/run/secrets/tenantcluster/value" + - "--v=5" + - "--timeout=3m" + - "--retry-interval-max=1m" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: tenantcluster + mountPath: "/var/run/secrets/tenantcluster" + resources: + requests: + memory: 50Mi + cpu: 10m + - name: csi-liveness-probe + image: quay.io/openshift/origin-csi-livenessprobe:latest + args: + - "--csi-address=/csi/csi.sock" + - "--probe-timeout=3s" + - "--health-port=10301" + volumeMounts: + - name: socket-dir + mountPath: /csi + - name: tenantcluster + mountPath: "/var/run/secrets/tenantcluster" + resources: + requests: + memory: 50Mi + cpu: 10m + - name: csi-snapshotter + args: + - "--v=5" + - "--csi-address=/csi/csi.sock" + - "--kubeconfig=/var/run/secrets/tenantcluster/value" + - "--timeout=3m" + image: k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.1 + imagePullPolicy: IfNotPresent + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /csi + name: socket-dir + - name: tenantcluster + mountPath: "/var/run/secrets/tenantcluster" + resources: + requests: + memory: 20Mi + cpu: 10m + - name: csi-resizer + image: registry.k8s.io/sig-storage/csi-resizer:v1.13.1 + args: + - "-csi-address=/csi/csi.sock" + - "-kubeconfig=/var/run/secrets/tenantcluster/value" + - "-v=5" + - "-timeout=3m" + - '-handle-volume-inuse-error=false' + volumeMounts: + - name: socket-dir + mountPath: /csi + - name: tenantcluster + mountPath: "/var/run/secrets/tenantcluster" + resources: + requests: + cpu: 10m + memory: 20Mi + securityContext: + capabilities: + drop: + - ALL + volumes: + - name: socket-dir + emptyDir: {} + - name: tenantcluster + secret: + secretName: kvcluster-kubeconfig diff --git a/content/kubevirt/kubevirt-csi-driver/infra-cluster-serviceaccount.yaml b/content/kubevirt/kubevirt-csi-driver/infra-cluster-serviceaccount.yaml new file mode 100644 index 00000000..4773f95d --- /dev/null +++ b/content/kubevirt/kubevirt-csi-driver/infra-cluster-serviceaccount.yaml @@ -0,0 +1,45 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubevirt-csi +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kubevirt-csi +rules: +- apiGroups: ["cdi.kubevirt.io"] + resources: ["datavolumes"] + verbs: ["get", "create", "delete"] +- apiGroups: ["kubevirt.io"] + resources: ["virtualmachineinstances"] + verbs: ["list", "get"] +- apiGroups: ["kubevirt.io"] + resources: ["virtualmachines"] + verbs: ["list", "get", "watch"] +- apiGroups: ["subresources.kubevirt.io"] + resources: + - "virtualmachines/addvolume" + - "virtualmachines/removevolume" + verbs: ["update"] +- apiGroups: ["subresources.kubevirt.io"] + resources: ["virtualmachineinstances/addvolume", "virtualmachineinstances/removevolume"] + verbs: ["update"] +- apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: ["get", "create", "delete"] +- apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kubevirt-csi +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kubevirt-csi +subjects: +- kind: ServiceAccount + name: kubevirt-csi diff --git a/content/kubevirt/kubevirt-csi-driver/kubevirt-csi-driver-complete-tenant.yaml b/content/kubevirt/kubevirt-csi-driver/kubevirt-csi-driver-complete-tenant.yaml new file mode 100644 index 00000000..0cbbbf2c --- /dev/null +++ b/content/kubevirt/kubevirt-csi-driver/kubevirt-csi-driver-complete-tenant.yaml @@ -0,0 +1,328 @@ +apiVersion: storage.k8s.io/v1 +kind: CSIDriver +metadata: + name: csi.kubevirt.io +spec: + attachRequired: true + podInfoOnMount: true + fsGroupPolicy: ReadWriteOnceWithFSType +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubevirt-csi-controller-sa + namespace: kubevirt-csi-driver +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubevirt-csi-controller-cr +rules: + - apiGroups: [''] + resources: ['persistentvolumes'] + verbs: ['create', 'delete', 'get', 'list', 'watch', 'update', 'patch'] + - apiGroups: [''] + resources: ['secrets'] + verbs: ['get', 'list'] + - apiGroups: [''] + resources: ['persistentvolumeclaims'] + verbs: ['get', 'list', 'watch', 'update'] + - apiGroups: [""] + resources: ["persistentvolumeclaims/status"] + verbs: ["update", "patch"] + - apiGroups: [''] + resources: ['nodes'] + verbs: ['get', 'list', 'watch'] + - apiGroups: ['storage.k8s.io'] + resources: ['volumeattachments'] + verbs: ['get', 'list', 'watch', 'update', 'patch'] + - apiGroups: ['storage.k8s.io'] + resources: ['storageclasses'] + verbs: ['get', 'list', 'watch'] + - apiGroups: ['csi.storage.k8s.io'] + resources: ['csidrivers'] + verbs: ['get', 'list', 'watch', 'update', 'create'] + - apiGroups: [''] + resources: ['events'] + verbs: ['list', 'watch', 'create', 'update', 'patch'] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["create", "get", "list", "watch", "update", "delete"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots/status"] + verbs: ["update"] + - apiGroups: [ "storage.k8s.io" ] + resources: [ "volumeattachments/status" ] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: ["storage.k8s.io"] + resources: ["csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["security.openshift.io"] + resources: ["securitycontextconstraints"] + verbs: ["use"] + resourceNames: ["privileged"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kubevirt-csi-controller-binding +subjects: + - kind: ServiceAccount + name: kubevirt-csi-controller-sa + namespace: kubevirt-csi-driver +roleRef: + kind: ClusterRole + name: kubevirt-csi-controller-cr + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubevirt-csi-node-sa + namespace: kubevirt-csi-driver +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubevirt-csi-snapshot-sa + namespace: kubevirt-csi-driver +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubevirt-csi-node-cr +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update", "create", "delete"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["csi.storage.k8s.io"] + resources: ["csinodeinfos"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments/status"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["security.openshift.io"] + resources: ["securitycontextconstraints"] + verbs: ["use"] + resourceNames: ["privileged"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotclasses"] + verbs: ["list"] +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: external-snapshotter-runner +rules: + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["create", "get", "list", "watch", "update", "delete", "patch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents/status"] + verbs: ["update", "patch"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kubevirt-csi-node-binding +subjects: + - kind: ServiceAccount + name: kubevirt-csi-node-sa + namespace: kubevirt-csi-driver +roleRef: + kind: ClusterRole + name: kubevirt-csi-node-cr + apiGroup: rbac.authorization.k8s.io +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-snapshotter-role +subjects: + - kind: ServiceAccount + name: kubevirt-csi-snapshot-sa + namespace: kubevirt-csi-driver +roleRef: + kind: ClusterRole + # change the name also here if the ClusterRole gets renamed + name: external-snapshotter-runner + apiGroup: rbac.authorization.k8s.io +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: kubevirt-csi-node + namespace: kubevirt-csi-driver +spec: + selector: + matchLabels: + app: kubevirt-csi-driver + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: kubevirt-csi-driver + spec: + serviceAccount: kubevirt-csi-node-sa + priorityClassName: system-node-critical + tolerations: + - operator: Exists + containers: + - name: csi-driver + securityContext: + privileged: true + allowPrivilegeEscalation: true + imagePullPolicy: Always + image: quay.io/kubevirt/kubevirt-csi-driver:latest + args: + - "--endpoint=unix:/csi/csi.sock" + - "--node-name=$(KUBE_NODE_NAME)" + - "--run-node-service=true" + - "--run-controller-service=false" + - "--v=5" + env: + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: kubelet-dir + mountPath: /var/lib/kubelet + mountPropagation: "Bidirectional" + - name: plugin-dir + mountPath: /csi + - name: device-dir + mountPath: /dev + - name: udev + mountPath: /run/udev + ports: + - name: healthz + containerPort: 10300 + protocol: TCP + livenessProbe: + httpGet: + path: /healthz + port: healthz + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 10 + failureThreshold: 5 + resources: + requests: + memory: 50Mi + cpu: 10m + - name: csi-node-driver-registrar + image: quay.io/openshift/origin-csi-node-driver-registrar:latest + args: + - "--csi-address=$(ADDRESS)" + - "--kubelet-registration-path=$(DRIVER_REG_SOCK_PATH)" + - "--v=5" + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "rm -rf /registration/csi.kubevirt.io-reg.sock /csi/csi.sock"] + env: + - name: ADDRESS + value: /csi/csi.sock + - name: DRIVER_REG_SOCK_PATH + value: /var/lib/kubelet/plugins/csi.kubevirt.io/csi.sock + volumeMounts: + - name: plugin-dir + mountPath: /csi + - name: registration-dir + mountPath: /registration + resources: + requests: + memory: 20Mi + cpu: 5m + - name: csi-liveness-probe + image: quay.io/openshift/origin-csi-livenessprobe:latest + args: + - "--csi-address=/csi/csi.sock" + - "--probe-timeout=3s" + - "--health-port=10300" + volumeMounts: + - name: plugin-dir + mountPath: /csi + resources: + requests: + memory: 20Mi + cpu: 5m + volumes: + - name: kubelet-dir + hostPath: + path: /var/lib/kubelet + type: Directory + - name: plugin-dir + hostPath: + path: /var/lib/kubelet/plugins/csi.kubevirt.io/ + type: DirectoryOrCreate + - name: registration-dir + hostPath: + path: /var/lib/kubelet/plugins_registry/ + type: Directory + - name: device-dir + hostPath: + path: /dev + type: Directory + - name: udev + hostPath: + path: /run/udev +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: driver-config + namespace: kubevirt-csi-driver +data: + infraClusterNamespace: rguske-ocp42 + infraClusterLabels: csi-driver/cluster=rguske-ocp42 +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: kubevirt + annotations: + storageclass.kubernetes.io/is-default-class: "true" +provisioner: csi.kubevirt.io +parameters: + infraStorageClassName: standard + bus: scsi +--- +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: kubevirt-csi-snapclass +driver: csi.kubevirt.io +deletionPolicy: Delete diff --git a/content/kubevirt/kubevirt-csi-driver/kubevirt-csi.md b/content/kubevirt/kubevirt-csi-driver/kubevirt-csi.md new file mode 100644 index 00000000..29ffd3cf --- /dev/null +++ b/content/kubevirt/kubevirt-csi-driver/kubevirt-csi.md @@ -0,0 +1,557 @@ +--- +title: KubeVirt CSI Driver +linktitle: kubevirt-csi-driver +description: Page for the KubeVirt CSI Driver Installation +tags: ['cnv', 'kubevirt', 'storage', 'ocp-v', 'csi'] +--- + +# KubeVirt CSI Driver Installation + +Official Repository: + +- [KubeVirt CSI Driver](https://github.com/kubevirt/csi-driver) + +???+ Important + + This CSI driver is made for a tenant cluster deployed on top of kubevirt VMs, and enables it to get its persistent data + from the underlying, infrastructure cluster. To avoid confusion, this CSI driver is deployed on the tenant cluster, and does not require kubevirt installation at all. + +## Controller deployment on the Infra-Cluster + +- Create a `Secret` within the tenant-cluster project/namespace which contains the kube config of your tenant-cluster: + +```code +export OCP42PATH='/Users/rguske/dev/openshift/openshift-on-openshift/rguske-ocp42/conf' +``` + +```code +oc create secret generic kvcluster-kubeconfig --from-file=value=$OCP42PATH/rguske-ocp42-kubeconfig +``` + +- Label the virtualized nodes (vms) accordingly so that the CSI Driver can pick up the labels in order to operate: + +```code +for vm in $(oc get vms -o jsonpath='{.items[*].metadata.name}'); do echo ${vm} ; oc label vm/${vm} csi-driver/cluster="rguske-ocp42" ; done +``` + +- Create a `ConfigMap` within the tanant-cluster project which the KubeVirt CSI Controller is using to identify the tenant-cluster name via the label as well as the tenant-cluster namespace: + +```yaml +oc apply -f - < 89m +pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 2Gi RWO Delete Bound rguske-tests/1g-kubevirt-disk kubevirt 12m +``` + +```code +oc get pvc +NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS VOLUMEATTRIBUTESCLASS AGE +1g-kubevirt-disk Bound pvc-c08d2bd3-c43c-4157-82ab-3fa81464bbd0 1Gi RWO kubevirt 43h +``` + +```code +PVC_UID=eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 +``` + +```code +PV=$(oc get pv -o jsonpath='{range .items[?(@.spec.claimRef.uid=="'"$PVC_UID"'")]}{.metadata.name}{"\n"}{end}') +``` + +```code +oc get pv "$PV" -o jsonpath='{.spec.csi.volumeHandle}{"\n"}' +pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 +``` + +```code +NODE=rguske-ocp42-n3 + +oc debug node/$NODE -- chroot /host bash -lc ' + ls -l /dev/disk/by-id | sed -e "s#^#BY-ID: #"; + echo; + lsblk -o NAME,KNAME,TYPE,SIZE,MODEL,SERIAL' +Starting pod/rguske-ocp42-n3-debug-w9xks ... +To use host binaries, run `chroot /host`. Instead, if you need to access host namespaces, run `nsenter -a -t 1`. +ls: cannot access '/dev/disk/by-id': No such file or directory + +NAME KNAME TYPE SIZE MODEL SERIAL +loop0 loop0 loop 5.8M +vda vda disk 120G +├─vda1 vda1 part 1M +├─vda2 vda2 part 127M +├─vda3 vda3 part 384M +└─vda4 vda4 part 119.5G + +Removing debug pod ... +``` + +```code +oc debug node/$NODE -- chroot /host bash -lc ' + echo "== SCSI hosts =="; + ls -l /sys/class/scsi_host 2>/dev/null || echo "NO_SCSI_HOST"; + echo; + echo "== PCI storage controllers =="; + lspci -nn | egrep -i "scsi|storage|virtio" || true; + echo; + echo "== Kernel messages (storage) =="; + dmesg | egrep -i "scsi|virtio|block|sd[a-z]" | tail -n 50 || true; +' +Starting pod/rguske-ocp42-n3-debug-8qwmm ... +To use host binaries, run `chroot /host`. Instead, if you need to access host namespaces, run `nsenter -a -t 1`. +== SCSI hosts == +total 0 +lrwxrwxrwx. 1 root root 0 Feb 17 08:47 host0 -> ../../devices/pci0000:00/0000:00:03.2/0000:0b:00.0/virtio1/host0/scsi_host/host0 +lrwxrwxrwx. 1 root root 0 Feb 17 08:47 host1 -> ../../devices/pci0000:00/0000:00:1f.2/ata1/host1/scsi_host/host1 +lrwxrwxrwx. 1 root root 0 Feb 17 08:47 host2 -> ../../devices/pci0000:00/0000:00:1f.2/ata2/host2/scsi_host/host2 +lrwxrwxrwx. 1 root root 0 Feb 17 08:47 host3 -> ../../devices/pci0000:00/0000:00:1f.2/ata3/host3/scsi_host/host3 +lrwxrwxrwx. 1 root root 0 Feb 17 08:47 host4 -> ../../devices/pci0000:00/0000:00:1f.2/ata4/host4/scsi_host/host4 +lrwxrwxrwx. 1 root root 0 Feb 17 08:47 host5 -> ../../devices/pci0000:00/0000:00:1f.2/ata5/host5/scsi_host/host5 +lrwxrwxrwx. 1 root root 0 Feb 17 08:47 host6 -> ../../devices/pci0000:00/0000:00:1f.2/ata6/host6/scsi_host/host6 + +== PCI storage controllers == +01:00.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network device [1af4:1041] (rev 01) +0b:00.0 SCSI storage controller [0100]: Red Hat, Inc. Virtio 1.0 SCSI [1af4:1048] (rev 01) +0c:00.0 Communication controller [0780]: Red Hat, Inc. Virtio 1.0 console [1af4:1043] (rev 01) +0d:00.0 SCSI storage controller [0100]: Red Hat, Inc. Virtio 1.0 block device [1af4:1042] (rev 01) +0e:00.0 Unclassified device [00ff]: Red Hat, Inc. Virtio 1.0 memory balloon [1af4:1045] (rev 01) +0f:00.0 Unclassified device [00ff]: Red Hat, Inc. Virtio 1.0 RNG [1af4:1044] (rev 01) + +== Kernel messages (storage) == +[ 0.020071] ACPI: RSDP 0x00000000000F54A0 000014 (v00 BOCHS ) +[ 0.020078] ACPI: RSDT 0x000000007FFE2E4F 000038 (v01 BOCHS BXPC 00000001 BXPC 00000001) +[ 0.020092] ACPI: DSDT 0x000000007FFDF5C0 0033B7 (v01 BOCHS BXPC 00000001 BXPC 00000001) +[ 0.020118] ACPI: Reserving DSDT table memory at [mem 0x7ffdf5c0-0x7ffe2976] +[ 0.333351] x86/mm: Memory block size: 128MB +[ 0.379458] ACPI: Enabled 2 GPEs in block 00 to 3F +[ 0.706086] SCSI subsystem initialized +[ 1.134417] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 246) +[ 2.305506] systemd[1]: Listening on Open-iSCSI iscsid Socket. +[ 2.306895] systemd[1]: Listening on Open-iSCSI iscsiuio Socket. +[ 2.315245] systemd[1]: Check That Initrd Matches Kernel was skipped because of an unmet condition check (ConditionPathIsDirectory=!/usr/lib/modules/5.14.0-570.83.1.el9_6.x86_64). +[ 2.440742] Loading iSCSI transport class v2.0-870. +[ 2.454825] iscsi: registered transport (iser) +[ 2.644810] iscsi: registered transport (tcp) +[ 2.682132] iscsi: registered transport (qla4xxx) +[ 2.682698] QLogic iSCSI HBA Driver +[ 2.694396] libcxgbi:libcxgbi_init_module: Chelsio iSCSI driver library libcxgbi v0.9.1-ko (Apr. 2015) +[ 2.762615] Chelsio T4-T6 iSCSI Driver cxgb4i v0.9.5-ko (Apr. 2015) +[ 2.763228] iscsi: registered transport (cxgb4i) +[ 2.785001] QLogic NetXtreme II iSCSI Driver bnx2i v2.7.10.1 (Jul 16, 2014) +[ 2.785574] iscsi: registered transport (bnx2i) +[ 2.799647] iscsi: registered transport (be2iscsi) +[ 2.800127] In beiscsi_module_init, tt=00000000e3e2ce31 +[ 3.463881] virtio_blk virtio3: 1/0/0 default/read/poll queues +[ 3.466980] virtio_blk virtio3: [vda] 251658240 512-byte logical blocks (129 GB/120 GiB) +[ 3.525550] scsi host0: Virtio SCSI HBA +[ 3.567813] virtio_net virtio0 enp1s0: renamed from eth0 +[ 3.573555] scsi host1: ahci +[ 3.573805] scsi host2: ahci +[ 3.574676] scsi host3: ahci +[ 3.574949] scsi host4: ahci +[ 3.575552] scsi host5: ahci +[ 3.575970] scsi host6: ahci +[ 4.985041] systemd[1]: iscsid.socket: Deactivated successfully. +[ 4.985652] systemd[1]: Closed Open-iSCSI iscsid Socket. +[ 4.998835] systemd[1]: iscsiuio.socket: Deactivated successfully. +[ 4.999420] systemd[1]: Closed Open-iSCSI iscsiuio Socket. +[ 10.150956] virtio_net virtio0 enp1s0: entered promiscuous mode +[ 10.487925] virtio_net virtio0 enp1s0: left promiscuous mode +[ 15.282762] virtio_net virtio0 enp1s0: entered promiscuous mode +[ 15.835818] virtio_net virtio0 enp1s0: left promiscuous mode +[ 15.840535] virtio_net virtio0 enp1s0: entered promiscuous mode + +Removing debug pod ... +``` + +### On the Infra-Cluster + +Check the controller logs `oc logs deploy/kubevirt-csi-controller -f` + +```code +I0218 10:14:27.668076 1 controller.go:241] creating new DataVolume rguske-ocp42/pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 +I0218 10:14:27.686963 1 server.go:126] /csi.v1.Controller/CreateVolume returned with response: {"volume":{"capacity_bytes":2147483648,"volume_context":{"bus":"scsi","serial":"5bcccca9-2b42-4de8-8b62-a1e72ab38b58"},"volume_id":"pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9"}} +I0218 10:15:29.630238 1 server.go:121] /csi.v1.Controller/ControllerPublishVolume called with request: {"node_id":"rguske-ocp42/rguske-ocp42-n3","volume_capability":{"AccessType":{"Mount":{"fs_type":"ext4"}},"access_mode":{"mode":1}},"volume_context":{"bus":"scsi","serial":"5bcccca9-2b42-4de8-8b62-a1e72ab38b58","storage.kubernetes.io/csiProvisionerIdentity":"1771401314268-2085-csi.kubevirt.io"},"volume_id":"pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9"} +I0218 10:15:29.639135 1 controller.go:403] Attaching DataVolume pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 to Node ID rguske-ocp42/rguske-ocp42-n3 +I0218 10:15:29.644847 1 controller.go:430] Start attaching DataVolume pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 to VM rguske-ocp42-n3. Volume name: pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9. Serial: 5bcccca9-2b42-4de8-8b62-a1e72ab38b58. Bus: scsi +E0218 10:17:29.674335 1 controller.go:468] volume pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 failed to be ready in time (2m) in VM rguske-ocp42-n3, client rate limiter Wait returned an error: context deadline exceeded +E0218 10:17:29.674361 1 server.go:124] /csi.v1.Controller/ControllerPublishVolume returned with error: client rate limiter Wait returned an error: context deadline exceeded +``` + +Checking the scsi controller which is used when hot-plugging a PVC: + +```yaml +oc -n rguske-ocp42 get vmi rguske-ocp42-n3 -o yaml | sed -n '1,140p' +apiVersion: kubevirt.io/v1 +kind: VirtualMachineInstance +metadata: + +[...] + +spec: + architecture: amd64 + domain: + cpu: + cores: 1 + maxSockets: 24 + model: IvyBridge-v2 + sockets: 6 + threads: 1 + devices: + disks: + - bootOrder: 1 + disk: + bus: virtio + name: rootdisk + - disk: + bus: scsi + name: pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 + serial: 5bcccca9-2b42-4de8-8b62-a1e72ab38b58 + interfaces: + - bridge: {} + macAddress: 02:06:b6:02:4d:b6 + model: virtio + name: coe-bridge + state: up + rng: {} + +[...] + + volumes: + - dataVolume: + name: rguske-ocp42-n3-rootdisk-mig-62fcqw-mig-ctpb + name: rootdisk + - dataVolume: + hotpluggable: true + name: pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 + name: pvc-eb7ff8dc-ed38-473f-a5ea-4baa4686b0b9 +[...] +``` + +The hotplug disk is attached as `disk.bus: scsi` and the serial is set correctly (5bcccca9-…). + +But inside the guest you never see a second block device (only vda), so the CSI node can’t possibly find /dev/disk/by-id/**. + +```code +NODE=rguske-ocp42-n3 + +oc debug node/$NODE -- chroot /host bash -lc ' + ls -l /dev/disk/by-id | sed -e "s#^#BY-ID: #"; + echo; + lsblk -o NAME,KNAME,TYPE,SIZE,MODEL,SERIAL' +Starting pod/rguske-ocp42-n3-debug-w9xks ... +To use host binaries, run `chroot /host`. Instead, if you need to access host namespaces, run `nsenter -a -t 1`. +ls: cannot access '/dev/disk/by-id': No such file or directory + +NAME KNAME TYPE SIZE MODEL SERIAL +loop0 loop0 loop 5.8M +vda vda disk 120G +├─vda1 vda1 part 1M +├─vda2 vda2 part 127M +├─vda3 vda3 part 384M +└─vda4 vda4 part 119.5G + +Removing debug pod ... +``` + +## StorageClass fixed the Issue + +I changed the StorageClass from `odf-replica-two-block` to `ocs-storagecluster-ceph-rbd-virtualization`. The difference between both sc's were the `volumeBindingMode:`. The working one has `volumeBindingMode: Immediate`. + +```yaml +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + annotations: + storageclass.kubernetes.io/is-default-class: 'true' +provisioner: csi.kubevirt.io +parameters: + bus: scsi + infraStorageClassName: ocs-storagecluster-ceph-rbd-virtualization +reclaimPolicy: Delete +volumeBindingMode: Immediate +``` + +Deployed the Pod with the PVC accordingly and: + +```code +I0218 11:07:36.353840 1 controller.go:163] Create Volume Request: name:"pvc-846e7c17-7655-41e1-9668-af5241c1aaad" capacity_range:{required_bytes:1073741824} volume_capabilities:{mount:{fs_type:"ext4"} access_mode:{mode:SINGLE_NODE_WRITER}} parameters:{key:"bus" value:"scsi"} parameters:{key:"infraStorageClassName" value:"ocs-storagecluster-ceph-rbd-virtualization"} +I0218 11:07:36.358648 1 controller.go:241] creating new DataVolume rguske-ocp42/pvc-846e7c17-7655-41e1-9668-af5241c1aaad +I0218 11:07:36.378859 1 server.go:126] /csi.v1.Controller/CreateVolume returned with response: {"volume":{"capacity_bytes":1073741824,"volume_context":{"bus":"scsi","serial":"0778c423-7c23-49ec-98a5-957183a31639"},"volume_id":"pvc-846e7c17-7655-41e1-9668-af5241c1aaad"}} +I0218 11:08:27.138455 1 server.go:121] /csi.v1.Controller/ControllerPublishVolume called with request: {"node_id":"rguske-ocp42/rguske-ocp42-n3","volume_capability":{"AccessType":{"Mount":{"fs_type":"ext4"}},"access_mode":{"mode":1}},"volume_context":{"bus":"scsi","serial":"0778c423-7c23-49ec-98a5-957183a31639","storage.kubernetes.io/csiProvisionerIdentity":"1771401314268-2085-csi.kubevirt.io"},"volume_id":"pvc-846e7c17-7655-41e1-9668-af5241c1aaad"} +I0218 11:08:27.149209 1 controller.go:403] Attaching DataVolume pvc-846e7c17-7655-41e1-9668-af5241c1aaad to Node ID rguske-ocp42/rguske-ocp42-n3 +I0218 11:08:27.154702 1 controller.go:430] Start attaching DataVolume pvc-846e7c17-7655-41e1-9668-af5241c1aaad to VM rguske-ocp42-n3. Volume name: pvc-846e7c17-7655-41e1-9668-af5241c1aaad. Serial: 0778c423-7c23-49ec-98a5-957183a31639. Bus: scsi +I0218 11:08:36.187861 1 controller.go:472] Successfully attached volume pvc-846e7c17-7655-41e1-9668-af5241c1aaad to VM rguske-ocp42-n3 +I0218 11:08:36.187881 1 server.go:126] /csi.v1.Controller/ControllerPublishVolume returned with response: {} +``` + +## StorageProfile Adjustments + +Make sure that the `AccessMode` is configured for the `StorageProfile` otherwise, you'll get an error message for the respective `DataVolume` that the `AccessMode` is missing/not specified. + +```yaml +oc get storageprofiles.cdi.kubevirt.io kubevirt-ceph-rbd-virt -oyaml + +apiVersion: cdi.kubevirt.io/v1beta1 +kind: StorageProfile +metadata: + name: kubevirt-ceph-rbd-virt +spec: + claimPropertySets: + - accessModes: + - ReadWriteMany + volumeMode: Block +``` + +## VirtLauncher Pod can't be scheduled + +```code +0/6 nodes are available: 3 node(s) didn't match Pod's node affinity/selector, 3 node(s) had untolerated taint {node-role.kubernetes.io/master: }. preemption: 0/6 nodes are available: 6 Preemption is not helpful for scheduling. +``` + +Check KubeVirt specific labels: + +```code +oc get nodes rguske-ocp42-n1 rguske-ocp42-n2 rguske-ocp42-n3 --show-labels | egrep -o 'kubevirt\.io/schedulable=[^, ]+' || true + +kubevirt.io/schedulable=true +kubevirt.io/schedulable=true +kubevirt.io/schedulable=true +``` + +```code +oc describe pvc rhel-9-ivory-whippet-47-volume + +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Warning UnrecognizedDataSourceKind 3m5s (x5 over 3m5s) volume-data-source-validator The datasource for this PVC does not match any registered VolumePopulator + Normal Provisioning 3m5s (x4 over 3m5s) csi.kubevirt.io_kubevirt-csi-controller-6d7fc974b4-xr4hq_b59c3cb9-66f2-47c1-8f30-2aa3a4e7c7d6 External provisioner is provisioning volume for claim "rguske-tests/rhel-9-ivory-whippet-47-volume" + Normal Provisioning 3m5s (x4 over 3m5s) external-provisioner Assuming an external populator will provision the volume + Normal VolumeSnapshotClassSelected 3m4s (x11 over 3m5s) clone-populator VolumeSnapshotClass selected according to StorageProfile kubevirt-csi-snapclass + Normal ExternalProvisioning 3s (x18 over 3m5s) persistentvolume-controller Waiting for a volume to be created either by the external provisioner 'csi.kubevirt.io' or manually by the system administrator. If volume creation is delayed, please verify that the provisioner is running and correctly registered. +``` + +### VolumeSnapshotContent Error + +???+ warning + + Failed to check and update snapshot content: failed to add VolumeSnapshotBeingCreated annotation on the content snapcontent-4860fa21-076c-49b1-9cbf-5b66407fbe72: "snapshot controller failed to update snapcontent-4860fa21-076c-49b1-9cbf-5b66407fbe72 on API server: VolumeSnapshotContent.snapshot.storage.k8s.io \"snapcontent-4860fa21-076c-49b1-9cbf-5b66407fbe72\" is invalid: spec: Invalid value: \"object\": sourceVolumeMode is required once set"' + +Check whether your CRDs enforce `sourceVolumeMode`: + +```code +oc get crd volumesnapshotcontents.snapshot.storage.k8s.io -o yaml | egrep -n 'sourceVolumeMode|required once set' + +145: - message: volumeHandle is required once set +147: - message: snapshotHandle is required once set +153: sourceVolumeMode: +162: - message: sourceVolumeMode is immutable +235: - message: sourceVolumeMode is required once set +236: rule: '!has(oldSelf.sourceVolumeMode) || has(self.sourceVolumeMode)' +``` + +In Kubernetes snapshots, there are two relevant components: + +- snapshot-controller (cluster-wide) - OpenShift provides this + +```code +oc -n openshift-cluster-storage-operator get deploy +NAME READY UP-TO-DATE AVAILABLE AGE +cluster-storage-operator 1/1 1 1 3d1h +csi-snapshot-controller 2/2 2 2 3d1h +csi-snapshot-controller-operator 1/1 1 1 3d1h +volume-data-source-validator 1/1 1 1 3d1h +``` + +- csi-snapshotter sidecar – runs inside the CSI driver controller deployment in my case, the KubeVirt CSI driver I've installed in the tenant cluster. It watches VolumeSnapshot/VolumeSnapshotContent and performs the CSI snapshot RPCs, and updates VolumeSnapshotContent objects. + +### Solution + +I've deleted the associated CRDs: + +```code +oc delete crd \ + volumesnapshots.snapshot.storage.k8s.io \ + volumesnapshotcontents.snapshot.storage.k8s.io \ + volumesnapshotclasses.snapshot.storage.k8s.io +customresourcedefinition.apiextensions.k8s.io "volumesnapshots.snapshot.storage.k8s.io" deleted +customresourcedefinition.apiextensions.k8s.io "volumesnapshotcontents.snapshot.storage.k8s.io" deleted +customresourcedefinition.apiextensions.k8s.io "volumesnapshotclasses.snapshot.storage.k8s.io" deleted +``` diff --git a/hooks/__pycache__/sha256_filter.cpython-312.pyc b/hooks/__pycache__/sha256_filter.cpython-312.pyc new file mode 100644 index 00000000..6a9216e1 Binary files /dev/null and b/hooks/__pycache__/sha256_filter.cpython-312.pyc differ diff --git a/mkdocs.yml b/mkdocs.yml index 2d1c1dd6..05724851 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -282,6 +282,7 @@ nav: - Ansible: kubevirt/ansible/README.md - Networking: kubevirt/networking/index.md - Storage: kubevirt/storage.md + - KubeVirt CSI Driver: kubevirt/kubevirt-csi-driver/kubevirt-csi.md - NFS CSI Driver: kubevirt/nfs-csi-driver.md - Adjust domain.xml: kubevirt/adjust-domain-xml.md