From 09fc2fae424493ff9580c6d38e63f207b916529c Mon Sep 17 00:00:00 2001 From: melanie witt Date: Wed, 16 Apr 2025 15:20:23 -0700 Subject: [PATCH 1/5] libvirt: Use common naming convention for ephemeral disk labels The _create_ephemeral() method is responsible for creating ephemeral disks with image type "raw" and formatting them with mkfs. In the case of [libvirt]images_type "qcow2", _create_ephemeral() will create backing files. Currently we are not using a consistent naming convention for choosing the filesystem label for ephemeral disks. When we create a server for example, we go through the disks and label them "ephemeral0", "ephemeral1", "ephemeral2", etc. When we hard reboot a server, there is a check to create missing backing files and if so, a new backing file will be created but instead of being labeled "ephemeralN" the code attempts to label them with the name of the backing file itself for example "ephemeral_1_40d1d2c". This will fail if the filesystem used for ephemeral disks has limitations on the length of filesystem label names (VFAT, XFS, ...). For example: mkfs.vfat: Label can be no longer than 11 characters This adds a helper method for obtaining ephemeral disks filesystem label names and uses it the same way in the few places fs_label is specified. Closes-Bug: #2061701 Change-Id: Id033a5760272e4fb06dee2342414b26aa16ffe24 (cherry picked from commit 82856f95c69bb07bd2a61decae9abe827a2a1567) --- nova/tests/unit/virt/libvirt/test_driver.py | 5 ++++- nova/virt/libvirt/driver.py | 13 ++++++++++--- ...01-ephemeral-disk-fs-label-504484c4522e6d6a.yaml | 6 ++++++ 3 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/bug-2061701-ephemeral-disk-fs-label-504484c4522e6d6a.yaml diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 313a025bb4a..f2ad2654daa 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -15195,8 +15195,11 @@ def test_create_images_and_backing_ephemeral_gets_created( 'ephemeral_foo') ] + # This also asserts that the filesystem label name is generated + # correctly as 'ephemeral0' to help prevent regression of the + # related bug fix from https://launchpad.net/bugs/2061701 create_ephemeral_mock.assert_called_once_with( - ephemeral_size=1, fs_label='ephemeral_foo', + ephemeral_size=1, fs_label='ephemeral0', os_type='linux', target=ephemeral_backing) fetch_image_mock.assert_called_once_with( diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 68c461168c9..6ccd5609a0d 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -5153,6 +5153,13 @@ def _inject_data(self, disk, instance, injection_info): {'img_id': img_id, 'e': e}, instance=instance) + @staticmethod + def _get_fs_label_ephemeral(index: int) -> str: + # Use a consistent naming convention for FS labels. We need to be + # mindful of various filesystems label name length limitations. + # See for example: https://bugs.launchpad.net/nova/+bug/2061701 + return f'ephemeral{index}' + # NOTE(sileht): many callers of this method assume that this # method doesn't fail if an image already exists but instead # think that it will be reused (ie: (live)-migration/resize) @@ -5268,7 +5275,7 @@ def raw(fname, disk_info_mapping=None): created_disks = created_disks or not disk_image.exists() fn = functools.partial(self._create_ephemeral, - fs_label='ephemeral0', + fs_label=self._get_fs_label_ephemeral(0), os_type=instance.os_type, is_block_dev=disk_image.is_block_dev, vm_mode=vm_mode) @@ -5292,7 +5299,7 @@ def raw(fname, disk_info_mapping=None): raise exception.InvalidBDMFormat(details=msg) fn = functools.partial(self._create_ephemeral, - fs_label='ephemeral%d' % idx, + fs_label=self._get_fs_label_ephemeral(idx), os_type=instance.os_type, is_block_dev=disk_image.is_block_dev, vm_mode=vm_mode) @@ -11748,7 +11755,7 @@ def _create_images_and_backing(self, context, instance, instance_dir, # cached. disk.cache( fetch_func=self._create_ephemeral, - fs_label=cache_name, + fs_label=self._get_fs_label_ephemeral(0), os_type=instance.os_type, filename=cache_name, size=info['virt_disk_size'], diff --git a/releasenotes/notes/bug-2061701-ephemeral-disk-fs-label-504484c4522e6d6a.yaml b/releasenotes/notes/bug-2061701-ephemeral-disk-fs-label-504484c4522e6d6a.yaml new file mode 100644 index 00000000000..5f4c22ca248 --- /dev/null +++ b/releasenotes/notes/bug-2061701-ephemeral-disk-fs-label-504484c4522e6d6a.yaml @@ -0,0 +1,6 @@ +fixes: + - | + Fixed an issue where certain server actions could fail for servers with + ephemeral disks due to filesystem label name length limitations + (VFAT, XFS, ...). Filesystem label name generation has been fixed for these + cases. See https://launchpad.net/bugs/2061701 for more details. From 9662994cd019ebb65a6a87dd9f6c5eda43f2d3b9 Mon Sep 17 00:00:00 2001 From: Rajesh Tailor Date: Mon, 15 Sep 2025 16:00:41 +0530 Subject: [PATCH 2/5] Fix string format specifier This change fixes string format specifier from $ to % for correct formatting. Closes-Bug: #2123840 Signed-off-by: Rajesh Tailor Change-Id: I04f6e1ba3eff443d40a13c6fe2d0b77a78a020e6 (cherry picked from commit ca158f2da3c49ccea0c77ec0240ced44f0ec1d20) (cherry picked from commit b1da6b58af53b6ef58fa7c4256bf4e13b41f846f) --- nova/exception.py | 2 +- nova/tests/functional/test_report_client.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/nova/exception.py b/nova/exception.py index 34b4ff27181..5626885b67a 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2245,7 +2245,7 @@ class PlacementReshapeConflict(PlacementAPIConflict): """ msg_fmt = _( "A conflict was encountered attempting to reshape a provider tree: " - "$(error)s" + "%(error)s" ) diff --git a/nova/tests/functional/test_report_client.py b/nova/tests/functional/test_report_client.py index a1270723515..a3e81c7022f 100644 --- a/nova/tests/functional/test_report_client.py +++ b/nova/tests/functional/test_report_client.py @@ -1569,13 +1569,17 @@ def test_update_from_provider_tree_reshape_conflict_retry(self): # So we expect that it is signalled with an exception so that the # upper layer can re-drive the reshape process with a fresh tree that # now has the inventories - self.assertRaises( + ex = self.assertRaises( exception.PlacementReshapeConflict, self.client.update_from_provider_tree, self.context, ptree, allocations=allocs, ) + ex_msg = str(ex) + self.assertNotIn('$', ex_msg) + self.assertIn("A conflict was encountered attempting to reshape " + "a provider tree", ex_msg) # also we except that the internal caches is cleared so that the # re-drive will have a chance to load fresh data from placement self.assertEqual(0, len(self.client._provider_tree.roots)) From e69248933fca7ee12c5d6ba12e4395851ddf77ee Mon Sep 17 00:00:00 2001 From: melanie witt Date: Thu, 18 Sep 2025 11:57:08 -0700 Subject: [PATCH 3/5] Add functional reproducer for bug 2125030 This reproduces the bug where an attempt to revert a resize from a flavor with vTPM to a different flavor with vTPM results in the revert failing and the instance going into ERROR state when storage is not shared. Because of the lack of test coverage of vTPM with non-shared storage, this change also just adds a subclass to run all of the vTPM functional tests with the test environment mocked to behave as though storage is not shared between compute hosts. A bug fix will follow these functional tests. Related-Bug: #2125030 Change-Id: I49745a8ba78e1ea6a1b009bccab32a002cb6afb0 Signed-off-by: melanie witt (cherry picked from commit 650772d97efe98e578c18b8268b3bbed63ac4d18) (cherry picked from commit b4d0692f42878f6c300dfeda5511524f4e1a9d32) --- nova/tests/functional/libvirt/test_vtpm.py | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/nova/tests/functional/libvirt/test_vtpm.py b/nova/tests/functional/libvirt/test_vtpm.py index 8c0fd9dbe14..e39c87288ae 100644 --- a/nova/tests/functional/libvirt/test_vtpm.py +++ b/nova/tests/functional/libvirt/test_vtpm.py @@ -17,6 +17,7 @@ from castellan.common.objects import passphrase from castellan.key_manager import key_manager +import fixtures from oslo_log import log as logging from oslo_utils import uuidutils @@ -239,6 +240,51 @@ def test_hard_reboot_server(self): # is still correct self.assertInstanceHasSecret(server) + def _test_resize_revert_server__vtpm_to_vtpm(self, extra_specs=None): + """Test behavior of revert when a vTPM is retained across a resize. + + Other tests cover going from no vTPM => vTPM and vice versa. + """ + for host in ('test_compute0', 'test_compute1'): + self.start_compute(host) + + server = self._create_server_with_vtpm() + + # Create a different flavor with a vTPM. + extra_specs = extra_specs or { + 'hw:tpm_model': 'tpm-tis', 'hw:tpm_version': '1.2'} + flavor_id = self._create_flavor(extra_spec=extra_specs) + + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._resize_server(server, flavor_id=flavor_id) + + # ensure our instance's system_metadata field and key manager inventory + # is updated to reflect the new vTPM requirement + self.assertInstanceHasSecret(server) + + # revert the instance rather than confirming it, and ensure the secret + # is correctly cleaned up + + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._revert_resize(server) + + # Should still have a secret because we had a vTPM before too. + self.assertInstanceHasSecret(server) + + def test_resize_revert_server__vtpm_to_vtpm_same_config(self): + self._test_resize_revert_server__vtpm_to_vtpm() + + def test_resize_revert_server__vtpm_to_vtpm_different_config(self): + extra_specs = {'hw:tpm_model': 'tpm-tis', 'hw:tpm_version': '2.0'} + self._test_resize_revert_server__vtpm_to_vtpm( + extra_specs=extra_specs) + def test_resize_server__no_vtpm_to_vtpm(self): for host in ('test_compute0', 'test_compute1'): self.start_compute(host) @@ -379,3 +425,30 @@ def test_shelve_server(self): self.assertRaises( client.OpenStackApiException, self._shelve_server, server) + + +class VTPMServersTestNonShared(VTPMServersTest): + + def setUp(self): + super().setUp() + self.useFixture(fixtures.MockPatch( + 'nova.compute.manager.ComputeManager._is_instance_storage_shared', + return_value=False)) + + # FIXME: Remove this entire method when + # https://bugs.launchpad.net/nova/+bug/2125030 is fixed. + def test_resize_revert_server__vtpm_to_vtpm_same_config(self): + ex = self.assertRaises( + client.OpenStackApiException, + self._test_resize_revert_server__vtpm_to_vtpm) + self.assertEqual(500, ex.response.status_code) + + # FIXME: Remove this entire method when + # https://bugs.launchpad.net/nova/+bug/2125030 is fixed. + def test_resize_revert_server__vtpm_to_vtpm_different_config(self): + extra_specs = {'hw:tpm_model': 'tpm-tis', 'hw:tpm_version': '2.0'} + ex = self.assertRaises( + client.OpenStackApiException, + self._test_resize_revert_server__vtpm_to_vtpm, + extra_specs=extra_specs) + self.assertEqual(500, ex.response.status_code) From 28c633faf48d73924fede5b57442ae0fb426f2e0 Mon Sep 17 00:00:00 2001 From: melanie witt Date: Mon, 22 Sep 2025 08:34:47 -0700 Subject: [PATCH 4/5] Move cleanup of vTPM secret from driver to compute Currently, vTPM secrets are deleted from Barbican any time instance disks are deleted when driver.destroy() is called. This is fine if the instance is also being deleted but if it's not, such as during a resize revert, it will fail with the following error: nova.exception.Invalid: Refusing to create an emulated TPM with no secret! which will bubble up to the API as a HTTP 500. This moves deletion of the vTPM secret from Barbican from the libvirt driver destroy() path to the compute manager _delete_instance() path so that the vTPM secret is deleted only if the instance is being deleted. Closes-Bug: #2125030 Change-Id: I1a43dc0502e1e65b4ef0348610f5eddb43dbff02 Signed-off-by: melanie witt (cherry picked from commit 787d2a130053bd369d35480d6534f01b07c2310d) (cherry picked from commit a9bcf1105baba46561bf0d88bc327c494d17beea) --- nova/compute/manager.py | 4 ++ nova/tests/functional/libvirt/test_vtpm.py | 18 -------- nova/tests/unit/compute/test_compute_mgr.py | 46 +++++++++++++++++++-- nova/tests/unit/virt/libvirt/test_driver.py | 6 ++- nova/virt/libvirt/driver.py | 8 ++-- 5 files changed, 54 insertions(+), 28 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 89d0d180d55..61808588fcf 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -72,6 +72,7 @@ from nova import conductor import nova.conf import nova.context +from nova import crypto from nova import exception from nova import exception_wrapper from nova.i18n import _ @@ -940,6 +941,9 @@ def _complete_deletion(self, context, instance): self._clean_instance_console_tokens(context, instance) self._delete_scheduler_instance_info(context, instance.uuid) + # Delete the vTPM secret in the key manager service if needed. + crypto.delete_vtpm_secret(context, instance) + def _validate_pinning_configuration(self, instances): if not self.driver.capabilities.get('supports_pcpus', False): return diff --git a/nova/tests/functional/libvirt/test_vtpm.py b/nova/tests/functional/libvirt/test_vtpm.py index e39c87288ae..6fb36dc404b 100644 --- a/nova/tests/functional/libvirt/test_vtpm.py +++ b/nova/tests/functional/libvirt/test_vtpm.py @@ -434,21 +434,3 @@ def setUp(self): self.useFixture(fixtures.MockPatch( 'nova.compute.manager.ComputeManager._is_instance_storage_shared', return_value=False)) - - # FIXME: Remove this entire method when - # https://bugs.launchpad.net/nova/+bug/2125030 is fixed. - def test_resize_revert_server__vtpm_to_vtpm_same_config(self): - ex = self.assertRaises( - client.OpenStackApiException, - self._test_resize_revert_server__vtpm_to_vtpm) - self.assertEqual(500, ex.response.status_code) - - # FIXME: Remove this entire method when - # https://bugs.launchpad.net/nova/+bug/2125030 is fixed. - def test_resize_revert_server__vtpm_to_vtpm_different_config(self): - extra_specs = {'hw:tpm_model': 'tpm-tis', 'hw:tpm_version': '2.0'} - ex = self.assertRaises( - client.OpenStackApiException, - self._test_resize_revert_server__vtpm_to_vtpm, - extra_specs=extra_specs) - self.assertEqual(500, ex.response.status_code) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 7c6a0b8f92a..6206fa5bb35 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -1766,6 +1766,40 @@ def test__get_power_state_NotFound(self): self.compute._get_power_state, instance) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance') + @mock.patch('nova.crypto.delete_vtpm_secret') + @ddt.data(0, 3600) + def test__complete_deletion( + self, reclaim_instance_interval, mock_delete_vtpm, + mock_delete_alloc): + self.flags(reclaim_instance_interval=reclaim_instance_interval) + instance = objects.Instance(uuid=uuids.instance) + + with mock.patch.multiple( + self.compute, + _update_resource_tracker=mock.DEFAULT, + _clean_instance_console_tokens=mock.DEFAULT, + _delete_scheduler_instance_info=mock.DEFAULT) as mocks: + self.compute._complete_deletion(self.context, instance) + + mocks['_update_resource_tracker'].assert_called_once_with( + self.context, instance) + mocks['_clean_instance_console_tokens'].assert_called_once_with( + self.context, instance) + mocks['_delete_scheduler_instance_info'].assert_called_once_with( + self.context, instance.uuid) + mock_delete_vtpm.assert_called_once_with(self.context, instance) + # _complete_deletion() is only called at actual delete time (either + # regular delete or when reaping after soft delete). The force argument + # differs based on actual or reap delete for other reasons. + if reclaim_instance_interval > 0: + mock_delete_alloc.assert_called_once_with( + self.context, instance.uuid, force=False) + else: + mock_delete_alloc.assert_called_once_with( + self.context, instance.uuid, force=True) + @mock.patch.object(manager.ComputeManager, '_mount_all_shares') @mock.patch.object(manager.ComputeManager, '_get_share_info') @mock.patch.object(manager.ComputeManager, '_get_power_state') @@ -1806,6 +1840,7 @@ def test_init_instance_failed_resume_sets_error(self, mock_set_inst, mock_get_share_info.assert_called_once_with(mock.ANY, instance) mock_mount.assert_called_once_with(mock.ANY, instance, share_info) + @mock.patch('nova.crypto.delete_vtpm_secret') @mock.patch.object(objects.BlockDeviceMapping, 'destroy') @mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid') @mock.patch.object(objects.Instance, 'destroy') @@ -1814,7 +1849,7 @@ def test_init_instance_failed_resume_sets_error(self, mock_set_inst, def test_init_instance_complete_partial_deletion( self, mock_ids_from_instance, mock_inst_destroy, mock_obj_load_attr, mock_get_by_instance_uuid, - mock_bdm_destroy): + mock_bdm_destroy, mock_delete_vtpm): """Test to complete deletion for instances in DELETED status but not marked as deleted in the DB """ @@ -1847,9 +1882,10 @@ def fake_inst_destroy(): instance.user_id) mock_inst_destroy.side_effect = fake_inst_destroy() - with mock.patch( - "nova.compute.manager.ComputeManager._get_share_info", - return_value=objects.ShareMappingList(), + with mock.patch.multiple( + self.compute, + _get_share_info=mock.Mock(return_value=objects.ShareMappingList()), + _clean_instance_console_tokens=mock.DEFAULT, ): self.compute._init_instance(self.context, instance) @@ -1857,6 +1893,8 @@ def fake_inst_destroy(): # instance was deleted from db. self.assertNotEqual(0, instance.deleted) + mock_delete_vtpm.assert_called_once_with(self.context, instance) + @mock.patch('nova.compute.manager.LOG') def test_init_instance_complete_partial_deletion_raises_exception( self, mock_log): diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 6155176a499..68aae7fc9d0 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -21415,7 +21415,8 @@ def test_cleanup_pass( mock_unplug.assert_called_once_with(fake_inst, 'netinfo', True) mock_get_mapping.assert_called_once_with(None) mock_delete_files.assert_called_once_with(fake_inst) - mock_delete_vtpm.assert_called_once_with('ctxt', fake_inst) + # vTPM secret should not be deleted until instance is deleted. + mock_delete_vtpm.assert_not_called() mock_undefine.assert_called_once_with(fake_inst) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._undefine_domain') @@ -21439,7 +21440,8 @@ def test_cleanup_instance_marked_deleted( instance_save.side_effect = exception.InstanceNotFound( instance_id=uuids.instance) drvr.cleanup('ctxt', fake_inst, 'netinfo') - mock_delete_vtpm.assert_called_once_with('ctxt', fake_inst) + # vTPM secret should not be deleted until instance is deleted. + mock_delete_vtpm.assert_not_called() mock_undefine.assert_called_once_with(fake_inst) @mock.patch.object(libvirt_driver.LibvirtDriver, 'delete_instance_files', diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index ccd9b084418..a999b4a3f7c 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -1712,7 +1712,8 @@ def cleanup(self, context, instance, network_info, block_device_info=None, :param destroy_disks: if local ephemeral disks should be destroyed :param migrate_data: optional migrate_data object :param destroy_vifs: if plugged vifs should be unplugged - :param destroy_secrets: Indicates if secrets should be destroyed + :param destroy_secrets: Indicates if libvirt secrets for Cinder volume + encryption should be destroyed """ cleanup_instance_dir = False cleanup_instance_disks = False @@ -1766,8 +1767,8 @@ def _cleanup(self, context, instance, network_info, block_device_info=None, :param cleanup_instance_dir: If the instance dir should be removed :param cleanup_instance_disks: If the instance disks should be removed. Also removes ephemeral encryption secrets, if present. - :param destroy_secrets: If the cinder volume encryption secrets should - be deleted. + :param destroy_secrets: If the cinder volume encryption libvirt secrets + should be deleted. """ # zero the data on backend pmem device vpmems = self._get_vpmems(instance) @@ -1832,7 +1833,6 @@ def _cleanup(self, context, instance, network_info, block_device_info=None, pass if cleanup_instance_disks: - crypto.delete_vtpm_secret(context, instance) # Make sure that the instance directory files were successfully # deleted before destroying the encryption secrets in the case of # image backends that are not 'lvm' or 'rbd'. We don't want to From 0ca00dfa9ae4c95ec7dd8b9fbee4d9a94e6028b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Sch=C3=A4fer?= Date: Tue, 22 Jul 2025 12:26:32 +0200 Subject: [PATCH 5/5] Preserve vTPM state between power off and power on Without this patch, due to power_on calling _hard_reboot, which in turn undefines the VM to ensure a clean domain XML, the TPM data is erased by libvirt. This is very surprising to users who store persistent data in the TPM, such as keys required to decrypt the main disk of the VM. Conflicts: nova/virt/libvirt/driver.py NOTE(nicolairuckel): The conflict is because change I33b8fc0136b4c1783b5c493e8ca9a15110767f6c (Implement sound model extra spec for libvirt.) is not in Epoxy. Closes-Bug: #2118888 Signed-Off-By: jonas.schaefer@cloudandheat.com Change-Id: Iefb879428681003d6db604b70353a91913c92461 (cherry picked from commit 2399a296e3ff21e47e9b86b517b6ae9f8f525c01) (cherry picked from commit 687f2db6c6352040d49f3d31781ef8d186e0351a) --- nova/tests/fixtures/libvirt.py | 1 + nova/tests/unit/virt/libvirt/test_driver.py | 108 +++++++++++++++++++- nova/tests/unit/virt/libvirt/test_guest.py | 7 ++ nova/virt/libvirt/driver.py | 44 +++++++- nova/virt/libvirt/guest.py | 19 +++- 5 files changed, 170 insertions(+), 9 deletions(-) diff --git a/nova/tests/fixtures/libvirt.py b/nova/tests/fixtures/libvirt.py index dd161b1d700..423c1418933 100644 --- a/nova/tests/fixtures/libvirt.py +++ b/nova/tests/fixtures/libvirt.py @@ -100,6 +100,7 @@ def _reset(): VIR_DOMAIN_UNDEFINE_MANAGED_SAVE = 1 VIR_DOMAIN_UNDEFINE_NVRAM = 4 +VIR_DOMAIN_UNDEFINE_KEEP_TPM = 64 VIR_DOMAIN_AFFECT_CURRENT = 0 VIR_DOMAIN_AFFECT_LIVE = 1 diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 68aae7fc9d0..d21bb9c3269 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -1661,6 +1661,39 @@ def test__check_vtpm_support_supported( mock_which.assert_not_called() + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_register_all_undefined_instance_details', + new=mock.Mock()) + @mock.patch.object(host.Host, 'has_min_version', return_value=True) + def test_keep_tpm_supported(self, mock_version): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr.init_host('dummyhost') + self.assertTrue( + drvr._may_keep_vtpm, + "LibvirtDriver did not correctly detect libvirt version " + "supporting KEEP_TPM" + ) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_register_all_undefined_instance_details', + new=mock.Mock()) + @mock.patch.object(host.Host, 'has_min_version') + def test_keep_tpm_unsupported(self, mock_version): + def version_check(lv_ver=None, **kwargs): + if lv_ver == libvirt_driver.MIN_VERSION_INT_FOR_KEEP_TPM: + return False + return True + + mock_version.side_effect = version_check + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr.init_host('dummyhost') + self.assertFalse( + drvr._may_keep_vtpm, + "LibvirtDriver did not correctly detect libvirt version which " + "does not support KEEP_TPM" + ) + def test__check_multipath_misconfiguration(self): self.flags(volume_use_multipath=False, volume_enforce_multipath=True, group='libvirt') @@ -18846,6 +18879,51 @@ def test_undefine_domain_handles_libvirt_errors(self, mock_get): # ensure no raise for no such domain drvr._undefine_domain(instance) + @mock.patch.object(host.Host, "get_guest") + def test_undefine_domain_disarms_keep_vtpm_if_not_supported( + self, mock_get): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + drvr._may_keep_vtpm = False # normally set by init_host + instance = objects.Instance(**self.test_instance) + fake_guest = mock.Mock() + mock_get.return_value = fake_guest + + drvr._undefine_domain(instance, keep_vtpm=True) + + fake_guest.delete_configuration.assert_called_once_with( + keep_vtpm=False, + ) + + # Check that it truly forces it to False and doesn't do a `not` or + # something weird :-). + fake_guest.reset_mock() + drvr._undefine_domain(instance, keep_vtpm=False) + + fake_guest.delete_configuration.assert_called_once_with( + keep_vtpm=False, + ) + + @mock.patch.object(host.Host, "get_guest") + def test_undefine_domain_passes_keep_vtpm_if_supported(self, mock_get): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + drvr._may_keep_vtpm = True # normally set by init_host + instance = objects.Instance(**self.test_instance) + fake_guest = mock.Mock() + mock_get.return_value = fake_guest + + drvr._undefine_domain(instance, keep_vtpm=True) + + fake_guest.delete_configuration.assert_called_once_with(keep_vtpm=True) + + # Check that it does not force keep_vtpm to true, just because it is + # supported. + fake_guest.reset_mock() + drvr._undefine_domain(instance, keep_vtpm=False) + + fake_guest.delete_configuration.assert_called_once_with( + keep_vtpm=False, + ) + @mock.patch.object(host.Host, "list_instance_domains") @mock.patch.object(objects.BlockDeviceMappingList, "bdms_by_instance_uuid") @mock.patch.object(objects.InstanceList, "get_by_filters") @@ -21417,7 +21495,33 @@ def test_cleanup_pass( mock_delete_files.assert_called_once_with(fake_inst) # vTPM secret should not be deleted until instance is deleted. mock_delete_vtpm.assert_not_called() - mock_undefine.assert_called_once_with(fake_inst) + mock_undefine.assert_called_once_with(fake_inst, keep_vtpm=False) + + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._undefine_domain') + @mock.patch('nova.crypto.delete_vtpm_secret') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.delete_instance_files') + @mock.patch('nova.virt.driver.block_device_info_get_mapping') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._unplug_vifs') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vpmems', + new=mock.Mock(return_value=None)) + def test_cleanup_preserves_tpm_if_not_destroying_disks( + self, mock_unplug, mock_get_mapping, mock_delete_files, + mock_delete_vtpm, mock_undefine, + ): + """Test with default parameters.""" + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI()) + fake_inst = objects.Instance(**self.test_instance) + mock_get_mapping.return_value = [] + mock_delete_files.return_value = True + + with mock.patch.object(fake_inst, 'save'): + drvr.cleanup('ctxt', fake_inst, 'netinfo', destroy_disks=False) + + mock_unplug.assert_called_once_with(fake_inst, 'netinfo', True) + mock_get_mapping.assert_called_once_with(None) + mock_delete_files.assert_not_called() + mock_delete_vtpm.assert_not_called() + mock_undefine.assert_called_once_with(fake_inst, keep_vtpm=True) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._undefine_domain') @mock.patch('nova.crypto.delete_vtpm_secret') @@ -21442,7 +21546,7 @@ def test_cleanup_instance_marked_deleted( drvr.cleanup('ctxt', fake_inst, 'netinfo') # vTPM secret should not be deleted until instance is deleted. mock_delete_vtpm.assert_not_called() - mock_undefine.assert_called_once_with(fake_inst) + mock_undefine.assert_called_once_with(fake_inst, keep_vtpm=False) @mock.patch.object(libvirt_driver.LibvirtDriver, 'delete_instance_files', return_value=True) diff --git a/nova/tests/unit/virt/libvirt/test_guest.py b/nova/tests/unit/virt/libvirt/test_guest.py index 6d9eb6ede50..359013c54ea 100644 --- a/nova/tests/unit/virt/libvirt/test_guest.py +++ b/nova/tests/unit/virt/libvirt/test_guest.py @@ -138,6 +138,13 @@ def test_delete_configuration(self): fakelibvirt.VIR_DOMAIN_UNDEFINE_MANAGED_SAVE | fakelibvirt.VIR_DOMAIN_UNDEFINE_NVRAM) + def test_delete_configuration_with_keep_vtpm_true(self): + self.guest.delete_configuration(keep_vtpm=True) + self.domain.undefineFlags.assert_called_once_with( + fakelibvirt.VIR_DOMAIN_UNDEFINE_MANAGED_SAVE | + fakelibvirt.VIR_DOMAIN_UNDEFINE_NVRAM | + fakelibvirt.VIR_DOMAIN_UNDEFINE_KEEP_TPM) + def test_delete_configuration_exception(self): self.domain.undefineFlags.side_effect = fakelibvirt.libvirtError( 'oops') diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index a999b4a3f7c..228301b2554 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -271,6 +271,9 @@ def repr_method(self): MIN_VFIO_PCI_VARIANT_LIBVIRT_VERSION = (10, 0, 0) MIN_VFIO_PCI_VARIANT_QEMU_VERSION = (8, 2, 2) +# Minimum version to preserve vTPM data +MIN_VERSION_INT_FOR_KEEP_TPM = (8, 9, 0) + REGISTER_IMAGE_PROPERTY_DEFAULTS = [ 'hw_machine_type', 'hw_cdrom_bus', @@ -584,6 +587,10 @@ def __init__(self, virtapi, read_only=False): # See also nova.virt.libvirt.cpu.api.API.core(). self.cpu_api = libvirt_cpu.API() + # Cache the availability of the VIR_DOMAIN_UNDEFINE_KEEP_TPM flag in + # this libvirt version. This is set in init_host. + self._may_keep_vtpm = False + def _discover_vpmems(self, vpmem_conf=None): """Discover vpmems on host and configuration. @@ -906,6 +913,12 @@ def init_host(self, host): self._check_vtpm_support() + # Cache the availability of the VIR_DOMAIN_UNDEFINE_KEEP_TPM flag in + # this libvirt version. + self._may_keep_vtpm = self._host.has_min_version( + MIN_VERSION_INT_FOR_KEEP_TPM, + ) + self._check_multipath() # Even if we already checked the whitelist at startup, this driver @@ -1675,11 +1688,32 @@ def destroy(self, context, instance, network_info, block_device_info=None, self.cleanup(context, instance, network_info, block_device_info, destroy_disks, destroy_secrets=destroy_secrets) - def _undefine_domain(self, instance): + def _delete_guest_configuration(self, guest, keep_vtpm): + """Wrapper around guest.delete_configuration which incorporates version + checks for the additional arguments. + + :param guest: The domain to undefine. + :param keep_vtpm: If set, the vTPM data (if any) is not deleted during + undefine. + + This flag may be ignored if libvirt is too old to support + preserving vTPM data (see bug #2118888). + """ + if keep_vtpm and not self._may_keep_vtpm: + LOG.warning( + "Temporary undefine operation is deleting vTPM contents. " + "Please upgrade libvirt to >= 8.9.0 to avoid this.", + instance=guest.uuid, + ) + keep_vtpm = False + + guest.delete_configuration(keep_vtpm=keep_vtpm) + + def _undefine_domain(self, instance, keep_vtpm=False): try: guest = self._host.get_guest(instance) try: - guest.delete_configuration() + self._delete_guest_configuration(guest, keep_vtpm=keep_vtpm) except libvirt.libvirtError as e: with excutils.save_and_reraise_exception() as ctxt: errcode = e.get_error_code() @@ -1842,7 +1876,7 @@ def _cleanup(self, context, instance, network_info, block_device_info=None, self._cleanup_ephemeral_encryption_secrets( context, instance, block_device_info) - self._undefine_domain(instance) + self._undefine_domain(instance, keep_vtpm=not cleanup_instance_disks) def _cleanup_ephemeral_encryption_secrets( self, context, instance, block_device_info @@ -2416,7 +2450,7 @@ def _swap_volume(self, guest, disk_dev, conf, resize_to): # undefine it. If any part of this block fails, the domain is # re-defined regardless. if guest.has_persistent_configuration(): - guest.delete_configuration() + self._delete_guest_configuration(guest, keep_vtpm=True) try: dev.copy(conf.to_xml(), reuse_ext=True) @@ -3545,7 +3579,7 @@ def _live_snapshot(self, context, instance, guest, disk_path, out_path, # If any part of this block fails, the domain is # re-defined regardless. if guest.has_persistent_configuration(): - guest.delete_configuration() + self._delete_guest_configuration(guest, keep_vtpm=True) # NOTE (rmk): Establish a temporary mirror of our root disk and # issue an abort once we have a complete copy. diff --git a/nova/virt/libvirt/guest.py b/nova/virt/libvirt/guest.py index 78ea60b39ef..6d7eb969df9 100644 --- a/nova/virt/libvirt/guest.py +++ b/nova/virt/libvirt/guest.py @@ -295,11 +295,26 @@ def get_vcpus_info(self): yield VCPUInfo( id=vcpu[0], cpu=vcpu[3], state=vcpu[1], time=vcpu[2]) - def delete_configuration(self): - """Undefines a domain from hypervisor.""" + def delete_configuration(self, keep_vtpm=False): + """Undefines a domain from hypervisor. + + :param keep_vtpm: If true, the vTPM data will be preserved. Otherwise, + it will be deleted. Defaults to false (that is, deleting the vTPM + data). + + Calling this with `keep_vtpm` set to True should, eventually, be + followed up with a call where it is set to False (after re-defining + the VM in libvirt with the same UUID), to prevent orphaning the vTPM + data in libvirt's data directory. + + It is the caller's responsibility to ensure that keep_vtpm is only set + to true on libvirt versions which support it, that is >= 8.9.0. + """ try: flags = libvirt.VIR_DOMAIN_UNDEFINE_MANAGED_SAVE flags |= libvirt.VIR_DOMAIN_UNDEFINE_NVRAM + if keep_vtpm: + flags |= libvirt.VIR_DOMAIN_UNDEFINE_KEEP_TPM self._domain.undefineFlags(flags) except libvirt.libvirtError: LOG.debug("Error from libvirt during undefineFlags for guest "