diff --git a/Cargo.lock b/Cargo.lock index 2b98022a5..74c8e5f86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1487,6 +1487,7 @@ dependencies = [ "hyperlight-component-macro", "hyperlight-guest-tracing", "hyperlight-testing", + "iced-x86", "kvm-bindings", "kvm-ioctls", "lazy_static", @@ -1582,6 +1583,15 @@ dependencies = [ "cc", ] +[[package]] +name = "iced-x86" +version = "1.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c447cff8c7f384a7d4f741cfcff32f75f3ad02b406432e8d6c878d56b1edf6b" +dependencies = [ + "lazy_static", +] + [[package]] name = "icu_collections" version = "2.1.1" diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 694175615..41bd7c3cc 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -84,6 +84,7 @@ mshv-ioctls = { version = "0.6", optional = true} uuid = { version = "1.20.0", features = ["v4"] } signal-hook-registry = "1.4.8" serde = "1.0" +iced-x86 = { version = "1.21", default-features = false, features = ["std", "code_asm"] } proptest = "1.9.0" tempfile = "3.24.0" crossbeam-queue = "0.3.12" diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index 4a3fc10ef..1bca9e944 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -326,7 +326,10 @@ impl HyperlightError { | HyperlightError::StackOverflow() | HyperlightError::MemoryAccessViolation(_, _, _) | HyperlightError::SnapshotSizeMismatch(_, _) - | HyperlightError::MemoryRegionSizeMismatch(_, _, _) => true, + | HyperlightError::MemoryRegionSizeMismatch(_, _, _) + // HyperlightVmError::Restore is already handled manually in restore(), but we mark it + // as poisoning here too for defense in depth. + | HyperlightError::HyperlightVmError(HyperlightVmError::Restore(_)) => true, // These errors poison the sandbox because they can leave // it in an inconsistent state due to snapshot restore diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index 260aa8488..196ae918c 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -46,7 +46,7 @@ use crate::hypervisor::crashdump; use crate::hypervisor::gdb::{DebugError, DebugMemoryAccessError}; #[cfg(gdb)] use crate::hypervisor::hyperlight_vm::debug::ProcessDebugRequestError; -use crate::hypervisor::regs::CommonSpecialRegisters; +use crate::hypervisor::regs::{CommonDebugRegs, CommonSpecialRegisters}; #[cfg(not(gdb))] use crate::hypervisor::virtual_machine::VirtualMachine; #[cfg(kvm)] @@ -68,6 +68,7 @@ use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; use crate::sandbox::outb::{HandleOutbError, handle_outb}; +use crate::sandbox::snapshot::GuestSegmentState; #[cfg(feature = "mem_profile")] use crate::sandbox::trace::MemTraceInfo; #[cfg(crashdump)] @@ -320,6 +321,8 @@ pub enum HyperlightVmError { Initialize(#[from] InitializeError), #[error("Map region error: {0}")] MapRegion(#[from] MapRegionError), + #[error("Restore VM (vcpu) error: {0}")] + Restore(#[from] RegisterError), #[error("Unmap region error: {0}")] UnmapRegion(#[from] UnmapRegionError), #[error("Update region error: {0}")] @@ -612,7 +615,27 @@ impl HyperlightVm { Ok(sregs.cr3) } - /// Set the current base page table physical address + /// Get the special registers that need to be stored in a snapshot. + pub(crate) fn get_snapshot_sregs(&mut self) -> Result { + let sregs = self.vm.sregs()?; + Ok(GuestSegmentState { + gdtr: sregs.gdt, + idtr: sregs.idt, + tr: sregs.tr, + cs: sregs.cs, + ds: sregs.ds, + es: sregs.es, + fs: sregs.fs, + gs: sregs.gs, + ss: sregs.ss, + }) + } + + /// Set the current base page table physical address. + /// + /// Currently unused because `reset_vcpu()` accepts CR3 directly to avoid an extra + /// hypercall, but kept for API symmetry with `get_root_pt()` and potential future use. + #[expect(dead_code)] pub(crate) fn set_root_pt(&mut self, addr: u64) -> Result<(), AccessPageTableError> { let mut sregs = self.vm.sregs()?; sregs.cr3 = addr; @@ -904,6 +927,49 @@ impl HyperlightVm { Ok(()) } + /// Resets the following vCPU state: + /// - General purpose registers + /// - Debug registers + /// - XSAVE (includes FPU/SSE state with proper FCW and MXCSR defaults) + /// - Special registers (CR3 and segment state are kept from snapshot) + // TODO: check if other state needs to be reset + pub(crate) fn reset_vcpu( + &self, + _cr3: u64, + _segment_state: &GuestSegmentState, + ) -> std::result::Result<(), RegisterError> { + self.vm.set_regs(&CommonRegisters { + rflags: 1 << 1, // Reserved bit always set + ..Default::default() + })?; + self.vm.set_debug_regs(&CommonDebugRegs::default())?; + self.vm.reset_xsave()?; + + #[cfg(feature = "init-paging")] + { + let mut sregs = CommonSpecialRegisters::standard_64bit_defaults(_cr3); + sregs.gdt = _segment_state.gdtr; + sregs.idt = _segment_state.idtr; + sregs.tr = _segment_state.tr; + sregs.cs = _segment_state.cs; + sregs.ds = _segment_state.ds; + sregs.es = _segment_state.es; + sregs.fs = _segment_state.fs; + sregs.gs = _segment_state.gs; + sregs.ss = _segment_state.ss; + self.vm.set_sregs(&sregs)?; + } + #[cfg(not(feature = "init-paging"))] + { + // TODO: This is probably not correct. + // Let's deal with it when we clean up the init-paging feature + self.vm + .set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; + } + + Ok(()) + } + // Handle a debug exit #[cfg(gdb)] fn handle_debug( @@ -1413,3 +1479,1383 @@ mod debug { } } } + +#[cfg(test)] +#[cfg(feature = "init-paging")] +#[allow(clippy::needless_range_loop)] +mod tests { + use std::sync::{Arc, Mutex}; + + use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind}; + use rand::Rng; + + use super::*; + #[cfg(kvm)] + use crate::hypervisor::regs::FP_CONTROL_WORD_DEFAULT; + use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister, MXCSR_DEFAULT}; + use crate::hypervisor::virtual_machine::VirtualMachine; + use crate::mem::layout::SandboxMemoryLayout; + use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; + use crate::mem::mgr::{GuestPageTableBuffer, SandboxMemoryManager}; + use crate::mem::ptr::RawPtr; + use crate::mem::ptr_offset::Offset; + use crate::mem::shared_mem::ExclusiveSharedMemory; + use crate::sandbox::SandboxConfiguration; + use crate::sandbox::host_funcs::FunctionRegistry; + use crate::sandbox::snapshot::GuestSegmentState; + #[cfg(any(crashdump, gdb))] + use crate::sandbox::uninitialized::SandboxRuntimeConfig; + use crate::sandbox::uninitialized_evolve::set_up_hypervisor_partition; + + /// Test context holding an initialized VM with memory manager. + /// Used by tests that need to interact with guest memory after execution. + struct TestVmContext { + vm: HyperlightVm, + hshm: SandboxMemoryManager, + host_funcs: Arc>, + #[cfg(gdb)] + dbg_mem_access_hdl: Arc>>, + } + + // ========================================================================== + // Dirty State Builders - Create non-default vCPU state for testing reset + // ========================================================================== + + /// Build dirty general purpose registers for testing reset_vcpu. + fn dirty_regs() -> CommonRegisters { + CommonRegisters { + rax: 0x1111111111111111, + rbx: 0x2222222222222222, + rcx: 0x3333333333333333, + rdx: 0x4444444444444444, + rsi: 0x5555555555555555, + rdi: 0x6666666666666666, + rsp: 0x7777777777777777, + rbp: 0x8888888888888888, + r8: 0x9999999999999999, + r9: 0xAAAAAAAAAAAAAAAA, + r10: 0xBBBBBBBBBBBBBBBB, + r11: 0xCCCCCCCCCCCCCCCC, + r12: 0xDDDDDDDDDDDDDDDD, + r13: 0xEEEEEEEEEEEEEEEE, + r14: 0xFFFFFFFFFFFFFFFF, + r15: 0x0123456789ABCDEF, + rip: 0xFEDCBA9876543210, + rflags: 0x202, // IF + reserved bit 1 + } + } + + /// Build dirty FPU state for testing reset_vcpu. + fn dirty_fpu() -> CommonFpu { + CommonFpu { + fpr: [[0xAB; 16]; 8], + fcw: 0x0F7F, // Different from default 0x037F + fsw: 0x1234, + ftwx: 0xAB, + last_opcode: 0x0123, + last_ip: 0xDEADBEEF00000000, + last_dp: 0xCAFEBABE00000000, + xmm: [[0xCD; 16]; 16], + mxcsr: 0x3F80, // Different from default 0x1F80 + } + } + + /// Build dirty special registers for testing reset_vcpu. + /// Must be consistent for 64-bit long mode (CR0/CR4/EFER). + fn dirty_sregs(_pml4_addr: u64) -> CommonSpecialRegisters { + let segment = CommonSegmentRegister { + base: 0x1000, + limit: 0xFFFF, + selector: 0x10, + type_: 3, // data segment, read/write, accessed + present: 1, + dpl: 0, + db: 1, + s: 1, + l: 0, + g: 1, + avl: 1, + unusable: 0, + padding: 0, + }; + // CS segment - 64-bit code segment + let cs_segment = CommonSegmentRegister { + base: 0, + limit: 0xFFFF, + selector: 0x08, + type_: 0b1011, // code segment, execute/read, accessed + present: 1, + dpl: 0, + db: 0, // must be 0 in 64-bit mode + s: 1, + l: 1, // 64-bit mode + g: 1, + avl: 0, + unusable: 0, + padding: 0, + }; + let table = CommonTableRegister { + base: 0xDEAD0000, + limit: 0xFFFF, + }; + CommonSpecialRegisters { + cs: cs_segment, + ds: segment, + es: segment, + fs: segment, + gs: segment, + ss: segment, + tr: CommonSegmentRegister { + type_: 0b1011, // busy TSS + present: 1, + ..segment + }, + ldt: segment, + gdt: table, + idt: table, + cr0: 0x80000011, // PE + ET + PG + cr2: 0xBADC0DE, + // MSHV validates cr3 and rejects bogus values; use valid _pml4_addr for MSHV + cr3: match get_available_hypervisor() { + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => _pml4_addr, + _ => 0x12345000, + }, + cr4: 0x20, // PAE + cr8: 0x5, + efer: 0x500, // LME + LMA + apic_base: 0xFEE00900, + interrupt_bitmap: [0; 4], // fails if non-zero on MSHV + } + } + + /// Build dirty debug registers for testing reset_vcpu. + /// + /// DR6 bit layout (Intel SDM / AMD APM): + /// Bits 0-3 (B0-B3): Breakpoint condition detected - software writable/clearable + /// Bits 4-10: Reserved, read as 1s on modern processors (read-only) + /// Bit 11 (BLD): Bus Lock Trap - cleared by processor, read-only on older CPUs + /// Bit 12: Reserved, always 0 + /// Bit 13 (BD): Debug Register Access Detected - software clearable + /// Bit 14 (BS): Single-Step - software clearable + /// Bit 15 (BT): Task Switch breakpoint - software clearable + /// Bit 16 (RTM): TSX-related, read-only (1 if no TSX) + /// Bits 17-31: Reserved, read as 1s on modern processors (read-only) + /// Bits 32-63: Reserved, must be 0 + /// + /// Writable bits: 0-3, 13, 14, 15 = mask 0xE00F + /// Reserved 1s: 4-10, 11 (if no BLD), 16 (if no TSX), 17-31 = ~0xE00F on lower 32 bits + const DR6_WRITABLE_MASK: u64 = 0xE00F; // B0-B3, BD, BS, BT + + /// DR7 bit layout: + /// Bits 0-7 (L0-L3, G0-G3): Local/global breakpoint enables - writable + /// Bits 8-9 (LE, GE): Local/Global Exact (386 only, ignored on modern) - writable + /// Bit 10: Reserved, must be 1 (read-only) + /// Bits 11-12: Reserved (RTM/TSX on some CPUs), must be 0 (read-only) + /// Bit 13 (GD): General Detect Enable - writable + /// Bits 14-15: Reserved, must be 0 (read-only) + /// Bits 16-31 (R/W0-3, LEN0-3): Breakpoint conditions and lengths - writable + /// Bits 32-63: Reserved, must be 0 (read-only) + /// + /// Writable bits: 0-9, 13, 16-31 = mask 0xFFFF23FF + const DR7_WRITABLE_MASK: u64 = 0xFFFF_23FF; + + fn dirty_debug_regs() -> CommonDebugRegs { + CommonDebugRegs { + dr0: 0xDEADBEEF00001000, + dr1: 0xDEADBEEF00002000, + dr2: 0xDEADBEEF00003000, + dr3: 0xDEADBEEF00004000, + // Set all writable bits: B0-B3 (0-3), BD (13), BS (14), BT (15) + dr6: DR6_WRITABLE_MASK, + // Set writable bits: L0-L3, G0-G3 (0-7), LE/GE (8-9), GD (13), conditions (16-31) + dr7: DR7_WRITABLE_MASK, + } + } + + /// Returns default test values for reset_vcpu parameters. + /// These tests use a simplified setup without a real GDT/IDT, so we use defaults. + fn default_segment_state() -> GuestSegmentState { + GuestSegmentState { + gdtr: CommonTableRegister::default(), + idtr: CommonTableRegister::default(), + tr: CommonSegmentRegister::default(), + cs: CommonSegmentRegister::default(), + ds: CommonSegmentRegister::default(), + es: CommonSegmentRegister::default(), + fs: CommonSegmentRegister::default(), + gs: CommonSegmentRegister::default(), + ss: CommonSegmentRegister::default(), + } + } + + // ========================================================================== + // Normalizers - Handle hypervisor-specific quirks when comparing vCPU state + // ========================================================================== + + /// Normalize debug registers for comparison by applying writable masks. + /// Reserved bits in DR6/DR7 are read-only (set by CPU), so we copy them from actual. + fn normalize_debug_regs(expected: &mut CommonDebugRegs, actual: &CommonDebugRegs) { + expected.dr6 = (expected.dr6 & DR6_WRITABLE_MASK) | (actual.dr6 & !DR6_WRITABLE_MASK); + expected.dr7 = (expected.dr7 & DR7_WRITABLE_MASK) | (actual.dr7 & !DR7_WRITABLE_MASK); + } + + /// Normalize segment hidden cache fields that hypervisors report differently. + /// Applies to: unusable, granularity (g), and ss.db fields. + /// Does NOT normalize type_ - use this when verifying explicitly-set dirty state. + fn normalize_sregs_hidden_cache( + expected: &mut CommonSpecialRegisters, + actual: &CommonSpecialRegisters, + ) { + expected.ss.db = actual.ss.db; + expected.cs.unusable = actual.cs.unusable; + expected.cs.g = actual.cs.g; + expected.ds.unusable = actual.ds.unusable; + expected.ds.g = actual.ds.g; + expected.es.unusable = actual.es.unusable; + expected.es.g = actual.es.g; + expected.fs.unusable = actual.fs.unusable; + expected.fs.g = actual.fs.g; + expected.gs.unusable = actual.gs.unusable; + expected.gs.g = actual.gs.g; + expected.ss.unusable = actual.ss.unusable; + expected.ss.g = actual.ss.g; + expected.tr.unusable = actual.tr.unusable; + expected.tr.g = actual.tr.g; + expected.ldt.unusable = actual.ldt.unusable; + expected.ldt.g = actual.ldt.g; + } + + /// Normalize sregs for verifying reset state (used by reset_vcpu_simple). + /// Includes hidden cache fields, segment types, AND the segment state fields + /// (GDT/IDT/TR/CS/SS) that are passed explicitly to reset_vcpu. + fn normalize_sregs_for_reset( + expected: &mut CommonSpecialRegisters, + actual: &CommonSpecialRegisters, + ) { + normalize_sregs_hidden_cache(expected, actual); + // Also normalize type_ for reset verification since hypervisors + // have different defaults (KVM returns type_=1, WHP returns type_=0) + expected.ds.type_ = actual.ds.type_; + expected.es.type_ = actual.es.type_; + expected.fs.type_ = actual.fs.type_; + expected.gs.type_ = actual.gs.type_; + expected.ss.type_ = actual.ss.type_; + // Normalize GDT, IDT, TR, CS, SS since they are passed explicitly to reset_vcpu + // and test values may differ from standard_64bit_defaults + expected.gdt = actual.gdt; + expected.idt = actual.idt; + expected.tr = actual.tr; + expected.cs = actual.cs; + expected.ss = actual.ss; + } + + /// Normalize sregs for tests that run actual guest code (used by run_tests module). + /// Similar to reset normalization but doesn't normalize granularity bits. + fn normalize_sregs_for_run_tests( + expected: &mut CommonSpecialRegisters, + actual: &CommonSpecialRegisters, + ) { + expected.ss.db = actual.ss.db; + expected.cs.unusable = actual.cs.unusable; + expected.ds.unusable = actual.ds.unusable; + expected.ds.type_ = actual.ds.type_; + expected.es.unusable = actual.es.unusable; + expected.es.type_ = actual.es.type_; + expected.fs.unusable = actual.fs.unusable; + expected.fs.type_ = actual.fs.type_; + expected.gs.unusable = actual.gs.unusable; + expected.gs.type_ = actual.gs.type_; + expected.ss.unusable = actual.ss.unusable; + expected.ss.type_ = actual.ss.type_; + expected.tr.unusable = actual.tr.unusable; + expected.ldt.unusable = actual.ldt.unusable; + // Normalize GDT, IDT, TR, CS, SS since they are passed explicitly to reset_vcpu + // and test values may differ from standard_64bit_defaults + expected.gdt = actual.gdt; + expected.idt = actual.idt; + expected.tr = actual.tr; + expected.cs = actual.cs; + expected.ss = actual.ss; + } + + /// Normalize FPU MXCSR for KVM quirk. + /// KVM doesn't preserve MXCSR via set_fpu/fpu(), so we need to set it manually + /// when comparing FPU state. + #[cfg_attr(not(kvm), allow(unused_variables))] + fn normalize_fpu_mxcsr_for_kvm(fpu: &mut CommonFpu, expected_mxcsr: u32) { + #[cfg(kvm)] + if *get_available_hypervisor().as_ref().unwrap() == HypervisorType::Kvm { + fpu.mxcsr = expected_mxcsr; + } + } + + /// Normalize FPU state for reset comparison. + /// + /// When ftwx == 0, all x87 FPU registers are marked as empty/unused. In this state: + /// - `fpr`: Hypervisors may not zero the register contents since they're unused. + /// - `last_ip`, `last_dp`, `last_opcode`: These track the last FPU instruction location. + /// On WHP, the register read API (WHvGetVirtualProcessorRegisters) may return stale + /// values even after reset_xsave() properly zeroes the XSAVE area. This is a WHP API + /// quirk - the guest-visible state (via FXSAVE/XSAVE instructions) IS properly reset. + /// Since guests cannot observe stale values, this doesn't affect security. + /// This behavior is verified by the `reset_vcpu_fpu_guest_visible_state` test which + /// runs actual guest code with FXSAVE to confirm the guest sees zeroed values. + /// + /// We normalize by copying these fields from actual to expected since the values + /// are architecturally undefined when the FPU is in init/empty state. + fn normalize_fpu_for_reset(expected: &mut CommonFpu, actual: &CommonFpu) { + if actual.ftwx == 0 { + expected.fpr = actual.fpr; + expected.last_ip = actual.last_ip; + expected.last_dp = actual.last_dp; + expected.last_opcode = actual.last_opcode; + } + } + + // ========================================================================== + // Assertion Helpers - Verify vCPU state after reset + // ========================================================================== + + /// Assert that debug registers are in reset state. + /// Reserved bits in DR6/DR7 are read-only (set by CPU), so we only check + /// that writable bits are cleared to 0 and DR0-DR3 are zeroed. + fn assert_debug_regs_reset(vm: &dyn VirtualMachine) { + let debug_regs = vm.debug_regs().unwrap(); + let expected = CommonDebugRegs { + dr0: 0, + dr1: 0, + dr2: 0, + dr3: 0, + dr6: debug_regs.dr6 & !DR6_WRITABLE_MASK, + dr7: debug_regs.dr7 & !DR7_WRITABLE_MASK, + }; + assert_eq!(debug_regs, expected); + } + + /// Assert that general-purpose registers are in reset state. + /// After reset, all registers should be zeroed except rflags which has + /// reserved bit 1 always set. + fn assert_regs_reset(vm: &dyn VirtualMachine) { + assert_eq!( + vm.regs().unwrap(), + CommonRegisters { + rflags: 1 << 1, // Reserved bit 1 is always set + ..Default::default() + } + ); + } + + /// Assert that FPU state is in reset state. + /// Handles hypervisor-specific quirks (KVM MXCSR, empty FPU registers). + fn assert_fpu_reset(vm: &dyn VirtualMachine) { + let fpu = vm.fpu().unwrap(); + let mut expected_fpu = CommonFpu::default(); + normalize_fpu_mxcsr_for_kvm(&mut expected_fpu, fpu.mxcsr); + normalize_fpu_for_reset(&mut expected_fpu, &fpu); + assert_eq!(fpu, expected_fpu); + } + + /// Assert that special registers are in reset state. + /// Handles hypervisor-specific differences in hidden descriptor cache fields. + fn assert_sregs_reset(vm: &dyn VirtualMachine, pml4_addr: u64) { + let defaults = CommonSpecialRegisters::standard_64bit_defaults(pml4_addr); + let sregs = vm.sregs().unwrap(); + let mut expected_sregs = defaults; + // Normalize hypervisor implementation-specific fields. + // These are part of the hidden descriptor cache. While guests can write them + // indirectly (by loading segments from a crafted GDT), guests cannot read them back + // (e.g., `mov ax, ds` only returns the selector, not the hidden cache). + // KVM and MSHV reset to different default values, but both properly reset so there's + // no information leakage between tenants. + normalize_sregs_for_reset(&mut expected_sregs, &sregs); + assert_eq!(sregs, expected_sregs); + } + + // ========================================================================== + // XSAVE Helpers - Build dirty XSAVE state for testing extended CPU state + // ========================================================================== + + /// Query CPUID.0DH.n for XSAVE component info. + /// Returns (size, offset, align_64) for the given component: + /// - size: CPUID.0DH.n:EAX - size in bytes + /// - offset: CPUID.0DH.n:EBX - offset from XSAVE base (standard format only) + /// - align_64: CPUID.0DH.n:ECX bit 1 - true if 64-byte aligned (compacted format) + fn xsave_component_info(comp_id: u32) -> (usize, usize, bool) { + let result = unsafe { std::arch::x86_64::__cpuid_count(0xD, comp_id) }; + let size = result.eax as usize; + let offset = result.ebx as usize; + let align_64 = (result.ecx & 0b10) != 0; + (size, offset, align_64) + } + + /// Query CPUID.0DH.00H for the bitmap of supported user state components. + /// EDX:EAX forms a 64-bit bitmap where bit i indicates support for component i. + fn xsave_supported_components() -> u64 { + let result = unsafe { std::arch::x86_64::__cpuid_count(0xD, 0) }; + (result.edx as u64) << 32 | (result.eax as u64) + } + + /// Dirty extended state components using compacted XSAVE format (MSHV/WHP). + /// Components are stored contiguously starting at byte 576, with alignment + /// requirements from CPUID.0DH.n:ECX[1]. + /// Returns a bitmask of components that were actually dirtied. + fn dirty_xsave_extended_compacted( + xsave: &mut [u32], + xcomp_bv: u64, + supported_components: u64, + ) -> u64 { + let mut dirtied_mask = 0u64; + let mut offset = 576usize; + + for comp_id in 2..63u32 { + // Skip if component not supported by CPU or not enabled in XCOMP_BV + if (supported_components & (1u64 << comp_id)) == 0 { + continue; + } + if (xcomp_bv & (1u64 << comp_id)) == 0 { + continue; + } + + let (size, _, align_64) = xsave_component_info(comp_id); + + // ECX[1]=1 means 64-byte aligned; ECX[1]=0 means immediately after previous + if align_64 { + offset = offset.next_multiple_of(64); + } + + // Dirty this component's data area (only if it fits in the buffer) + let start_idx = offset / 4; + let end_idx = (offset + size) / 4; + if end_idx <= xsave.len() { + for i in start_idx..end_idx { + xsave[i] = 0x12345678 ^ comp_id.wrapping_mul(0x11111111); + } + dirtied_mask |= 1u64 << comp_id; + } + + offset += size; + } + + dirtied_mask + } + + /// Dirty extended state components using standard XSAVE format (KVM). + /// Components are at fixed offsets from CPUID.0DH.n:EBX. + /// Returns a bitmask of components that were actually dirtied. + fn dirty_xsave_extended_standard(xsave: &mut [u32], supported_components: u64) -> u64 { + let mut dirtied_mask = 0u64; + + for comp_id in 2..63u32 { + // Skip if component not supported by CPU + if (supported_components & (1u64 << comp_id)) == 0 { + continue; + } + + let (size, fixed_offset, _) = xsave_component_info(comp_id); + + let start_idx = fixed_offset / 4; + let end_idx = (fixed_offset + size) / 4; + if end_idx <= xsave.len() { + for i in start_idx..end_idx { + xsave[i] = 0x12345678 ^ comp_id.wrapping_mul(0x11111111); + } + dirtied_mask |= 1u64 << comp_id; + } + } + + dirtied_mask + } + + /// Dirty the legacy XSAVE region (bytes 0-511) for testing reset_vcpu. + /// This includes FPU/x87 state, SSE state, and reserved areas. + /// + /// Layout (from Intel SDM Table 13-1): + /// Bytes 0-1: FCW, 2-3: FSW, 4: FTW, 5: reserved, 6-7: FOP + /// Bytes 8-15: FIP, 16-23: FDP + /// Bytes 24-27: MXCSR, 28-31: MXCSR_MASK (preserve - hardware defined) + /// Bytes 32-159: ST0-ST7/MM0-MM7 (8 regs × 16 bytes) + /// Bytes 160-415: XMM0-XMM15 (16 regs × 16 bytes) + /// Bytes 416-511: Reserved + fn dirty_xsave_legacy(xsave: &mut [u32], current_xsave: &[u8]) { + // FCW (bytes 0-1) + FSW (bytes 2-3) - pack into xsave[0] + // FCW = 0x0F7F (different from default 0x037F), FSW = 0x1234 + xsave[0] = 0x0F7F | (0x1234 << 16); + // FTW (byte 4) + reserved (byte 5) + FOP (bytes 6-7) - pack into xsave[1] + // FTW = 0xAB, FOP = 0x0123 + xsave[1] = 0xAB | (0x0123 << 16); + // FIP (bytes 8-15) - xsave[2] and xsave[3] + xsave[2] = 0xDEAD0001; + xsave[3] = 0xBEEF0002; + // FDP (bytes 16-23) - xsave[4] and xsave[5] + xsave[4] = 0xCAFE0003; + xsave[5] = 0xBABE0004; + // MXCSR (bytes 24-27) - xsave[6], use valid value different from default + xsave[6] = 0x3F80; + // xsave[7] is MXCSR_MASK - preserve from current (hardware defined, read-only) + if current_xsave.len() >= 32 { + xsave[7] = u32::from_le_bytes(current_xsave[28..32].try_into().unwrap()); + } + + // ST0-ST7/MM0-MM7 (bytes 32-159, indices 8-39) + for i in 8..40 { + xsave[i] = 0xCAFEBABE; + } + // XMM0-XMM15 (bytes 160-415, indices 40-103) + for i in 40..104 { + xsave[i] = 0xDEADBEEF; + } + + // Reserved area (bytes 416-511, indices 104-127) + for i in 104..128 { + xsave[i] = 0xABCDEF12; + } + } + + /// Preserve XSAVE header (bytes 512-575) from current state. + /// This includes XSTATE_BV and XCOMP_BV which hypervisors require. + fn preserve_xsave_header(xsave: &mut [u32], current_xsave: &[u8]) { + for i in 128..144 { + let byte_offset = i * 4; + xsave[i] = u32::from_le_bytes( + current_xsave[byte_offset..byte_offset + 4] + .try_into() + .unwrap(), + ); + } + } + + fn dirty_xsave(current_xsave: &[u8]) -> Vec { + let mut xsave = vec![0u32; current_xsave.len() / 4]; + + dirty_xsave_legacy(&mut xsave, current_xsave); + preserve_xsave_header(&mut xsave, current_xsave); + + let xcomp_bv = u64::from_le_bytes(current_xsave[520..528].try_into().unwrap()); + let supported_components = xsave_supported_components(); + + // Dirty extended components and get mask of what was actually dirtied + let extended_mask = if (xcomp_bv & (1u64 << 63)) != 0 { + // Compacted format (MSHV/WHP) + dirty_xsave_extended_compacted(&mut xsave, xcomp_bv, supported_components) + } else { + // Standard format (KVM) + dirty_xsave_extended_standard(&mut xsave, supported_components) + }; + + // UPDATE XSTATE_BV to indicate dirtied components have valid data. + // WHP validates consistency between XSTATE_BV and actual data in the buffer. + // Bits 0,1 = legacy x87/SSE (always set after dirty_xsave_legacy) + // Bits 2+ = extended components that we actually dirtied + let xstate_bv = 0x3 | extended_mask; + + // Write XSTATE_BV to bytes 512-519 (u32 indices 128-129) + xsave[128] = (xstate_bv & 0xFFFFFFFF) as u32; + xsave[129] = (xstate_bv >> 32) as u32; + + xsave + } + + // ========================================================================== + // Test VM Setup + // ========================================================================== + + /// Creates a test VM with the given code. This is the shared setup logic used by + /// both `hyperlight_vm()` and `create_test_vm_context()`. + fn create_test_vm_context(code: &[u8]) -> TestVmContext { + let config: SandboxConfiguration = Default::default(); + #[cfg(any(crashdump, gdb))] + let rt_cfg: SandboxRuntimeConfig = Default::default(); + + let mut layout = + SandboxMemoryLayout::new(config, code.len(), 4096, 4096, 0x3000, None).unwrap(); + + let pt_base_gpa = SandboxMemoryLayout::BASE_ADDRESS + layout.get_pt_offset(); + let pt_buf = GuestPageTableBuffer::new(pt_base_gpa); + + for rgn in layout + .get_memory_regions_::(()) + .unwrap() + .iter() + { + let readable = rgn.flags.contains(MemoryRegionFlags::READ); + let writable = rgn.flags.contains(MemoryRegionFlags::WRITE) + || rgn.flags.contains(MemoryRegionFlags::STACK_GUARD); + let executable = rgn.flags.contains(MemoryRegionFlags::EXECUTE); + let mapping = Mapping { + phys_base: rgn.guest_region.start as u64, + virt_base: rgn.guest_region.start as u64, + len: rgn.guest_region.len() as u64, + kind: MappingKind::BasicMapping(BasicMapping { + readable, + writable, + executable, + }), + }; + unsafe { vmem::map(&pt_buf, mapping) }; + } + + let mut pt_size_mapped = 0; + while pt_buf.size() > pt_size_mapped { + let mapping = Mapping { + phys_base: (pt_base_gpa + pt_size_mapped) as u64, + virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, + len: (pt_buf.size() - pt_size_mapped) as u64, + kind: MappingKind::BasicMapping(BasicMapping { + readable: true, + writable: true, + executable: false, + }), + }; + unsafe { vmem::map(&pt_buf, mapping) }; + pt_size_mapped = pt_buf.size(); + } + + // Map the scratch region at the top of the address space + let scratch_size = config.get_scratch_size(); + let scratch_gpa = hyperlight_common::layout::scratch_base_gpa(scratch_size); + let scratch_gva = hyperlight_common::layout::scratch_base_gva(scratch_size); + let scratch_mapping = Mapping { + phys_base: scratch_gpa, + virt_base: scratch_gva, + len: scratch_size as u64, + kind: MappingKind::BasicMapping(BasicMapping { + readable: true, + writable: true, + executable: true, // Match regular codepath (map_specials) + }), + }; + unsafe { vmem::map(&pt_buf, scratch_mapping) }; + + // Re-map page tables if they grew from scratch mapping + while pt_buf.size() > pt_size_mapped { + let mapping = Mapping { + phys_base: (pt_base_gpa + pt_size_mapped) as u64, + virt_base: (hyperlight_common::layout::SNAPSHOT_PT_GVA_MIN + pt_size_mapped) as u64, + len: (pt_buf.size() - pt_size_mapped) as u64, + kind: MappingKind::BasicMapping(BasicMapping { + readable: true, + writable: true, + executable: false, + }), + }; + unsafe { vmem::map(&pt_buf, mapping) }; + pt_size_mapped = pt_buf.size(); + } + + let pt_bytes = pt_buf.into_bytes(); + layout.set_pt_size(pt_bytes.len()); + + let mem_size = layout.get_memory_size().unwrap(); + let mut eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + eshm.copy_from_slice(&pt_bytes, layout.get_pt_offset()) + .unwrap(); + eshm.copy_from_slice(code, layout.get_guest_code_offset()) + .unwrap(); + + let load_addr = RawPtr::from(layout.get_guest_code_address() as u64); + let scratch_mem = ExclusiveSharedMemory::new(config.get_scratch_size()).unwrap(); + let mut mem_mgr = SandboxMemoryManager::new( + layout, + eshm, + scratch_mem, + load_addr, + Some(Offset::from(0u64)), + ); + mem_mgr.write_memory_layout().unwrap(); + + let (mut hshm, gshm) = mem_mgr.build().unwrap(); + + let peb_address = gshm.layout.peb_address; + let stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 + - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + + 1; + let mut vm = set_up_hypervisor_partition( + gshm, + &config, + stack_top_gva, + #[cfg(any(crashdump, gdb))] + &rt_cfg, + crate::mem::exe::LoadInfo::dummy(), + ) + .unwrap(); + + let seed = rand::rng().random::(); + let peb_addr = RawPtr::from(u64::try_from(peb_address).unwrap()); + let page_size = u32::try_from(page_size::get()).unwrap(); + + #[cfg(gdb)] + let dbg_mem_access_hdl = Arc::new(Mutex::new(hshm.clone())); + + let host_funcs = Arc::new(Mutex::new(FunctionRegistry::default())); + + vm.initialise( + peb_addr, + seed, + page_size, + &mut hshm, + &host_funcs, + None, + #[cfg(gdb)] + dbg_mem_access_hdl.clone(), + ) + .unwrap(); + + TestVmContext { + vm, + hshm, + host_funcs, + #[cfg(gdb)] + dbg_mem_access_hdl, + } + } + + /// Simple helper that returns just the VM for tests that don't need memory access. + fn hyperlight_vm(code: &[u8]) -> HyperlightVm { + create_test_vm_context(code).vm + } + + // ========================================================================== + // Tests + // ========================================================================== + + #[test] + fn reset_vcpu_simple() { + // push rax; hlt - aligns stack to 16 bytes + const CODE: [u8; 2] = [0x50, 0xf4]; + let hyperlight_vm = hyperlight_vm(&CODE); + let available_hv = *get_available_hypervisor().as_ref().unwrap(); + + // Get the initial CR3 value before dirtying sregs + let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; + + // Set all vCPU state to dirty values + let regs = dirty_regs(); + let fpu = dirty_fpu(); + let sregs = dirty_sregs(initial_cr3); + let current_xsave = hyperlight_vm.vm.xsave().unwrap(); + let xsave = dirty_xsave(¤t_xsave); + let debug_regs = dirty_debug_regs(); + + hyperlight_vm.vm.set_xsave(&xsave).unwrap(); + hyperlight_vm.vm.set_regs(®s).unwrap(); + hyperlight_vm.vm.set_fpu(&fpu).unwrap(); + hyperlight_vm.vm.set_sregs(&sregs).unwrap(); + hyperlight_vm.vm.set_debug_regs(&debug_regs).unwrap(); + + // Verify regs were set + assert_eq!(hyperlight_vm.vm.regs().unwrap(), regs); + + // Verify fpu was set + let mut got_fpu = hyperlight_vm.vm.fpu().unwrap(); + let mut expected_fpu = fpu; + // KVM doesn't preserve mxcsr via set_fpu/fpu(), copy expected to got + normalize_fpu_mxcsr_for_kvm(&mut got_fpu, fpu.mxcsr); + // fpr only uses 80 bits per register. Normalize upper bits for comparison. + for i in 0..8 { + expected_fpu.fpr[i][10..16].copy_from_slice(&got_fpu.fpr[i][10..16]); + } + assert_eq!(got_fpu, expected_fpu); + + // Verify xsave was set by checking key dirty values in the legacy region. + // Note: set_fpu() is called after set_xsave(), so XMM registers reflect fpu state (0xCD pattern). + let got_xsave = hyperlight_vm.vm.xsave().unwrap(); + // FCW (bytes 0-1) should be 0x0F7F (set by both xsave and fpu) + let got_fcw = u16::from_le_bytes(got_xsave[0..2].try_into().unwrap()); + assert_eq!(got_fcw, 0x0F7F, "xsave FCW should be dirty"); + // MXCSR (bytes 24-27) should be 0x3F80 (set by xsave; fpu doesn't update it on KVM) + let got_mxcsr = u32::from_le_bytes(got_xsave[24..28].try_into().unwrap()); + assert_eq!(got_mxcsr, 0x3F80, "xsave MXCSR should be dirty"); + // XMM0-XMM15 (bytes 160-415): set_fpu overwrites with 0xCD pattern from dirty_fpu() + for i in 0..16 { + let offset = 160 + i * 16; + let xmm_word = u32::from_le_bytes(got_xsave[offset..offset + 4].try_into().unwrap()); + assert_eq!( + xmm_word, 0xCDCDCDCD, + "xsave XMM{i} should match fpu dirty value" + ); + } + + // Verify debug regs were set + let got_debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); + let mut expected_debug_regs = debug_regs; + normalize_debug_regs(&mut expected_debug_regs, &got_debug_regs); + assert_eq!(got_debug_regs, expected_debug_regs); + + // Verify sregs were set + let got_sregs = hyperlight_vm.vm.sregs().unwrap(); + let mut expected_sregs = sregs; + normalize_sregs_hidden_cache(&mut expected_sregs, &got_sregs); + assert_eq!(got_sregs, expected_sregs); + + // Reset the vCPU + hyperlight_vm + .reset_vcpu(0, &default_segment_state()) + .unwrap(); + + // Verify registers are reset to defaults + assert_regs_reset(hyperlight_vm.vm.as_ref()); + + // Verify FPU is reset to defaults + assert_fpu_reset(hyperlight_vm.vm.as_ref()); + + // Verify debug registers are reset to defaults + assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); + + // Verify xsave is reset - should be zeroed except for hypervisor-specific fields + let reset_xsave = hyperlight_vm.vm.xsave().unwrap(); + // Build expected xsave: all zeros with fpu specific defaults. Then copy hypervisor-specific fields from actual + let mut expected_xsave = vec![0u8; reset_xsave.len()]; + #[cfg(mshv3)] + if available_hv == HypervisorType::Mshv { + // FCW (offset 0-1): When XSTATE_BV.LegacyX87 = 0 (init state), the hypervisor + // skips copying the FPU legacy region entirely, leaving zeros in the buffer. + // The actual guest FCW register is 0x037F (verified via fpu() assertion above), + // but xsave() doesn't report it because XSTATE_BV=0 means "init state, buffer + // contents undefined." We copy from actual to handle this. + expected_xsave[0..2].copy_from_slice(&reset_xsave[0..2]); + } + #[cfg(target_os = "windows")] + if available_hv == HypervisorType::Whp { + // FCW (offset 0-1): When XSTATE_BV.LegacyX87 = 0 (init state), the hypervisor + // skips copying the FPU legacy region entirely, leaving zeros in the buffer. + // The actual guest FCW register is 0x037F (verified via fpu() assertion above), + // but xsave() doesn't report it because XSTATE_BV=0 means "init state, buffer + // contents undefined." We copy from actual to handle this. + expected_xsave[0..2].copy_from_slice(&reset_xsave[0..2]); + } + #[cfg(kvm)] + if available_hv == HypervisorType::Kvm { + expected_xsave[0..2].copy_from_slice(&FP_CONTROL_WORD_DEFAULT.to_le_bytes()); + } + + // - MXCSR at offset 24-27: default FPU state set by hypervisor + expected_xsave[24..28].copy_from_slice(&MXCSR_DEFAULT.to_le_bytes()); + // - MXCSR_MASK at offset 28-31: hardware-defined, read-only + expected_xsave[28..32].copy_from_slice(&reset_xsave[28..32]); + // - Reserved bytes at offset 464-511: These are in the reserved/padding area of the legacy + // FXSAVE region (after XMM registers which end at byte 416). On KVM/Intel, these bytes + // may contain hypervisor-specific metadata that isn't cleared during vCPU reset. + // Since this is not guest-visible computational state, we copy from actual to expected. + expected_xsave[464..512].copy_from_slice(&reset_xsave[464..512]); + // - XSAVE header at offset 512-575: contains XSTATE_BV and XCOMP_BV (hypervisor-managed) + // XSTATE_BV (512-519): Bitmap indicating which state components have valid data in the + // buffer. When a bit is 0, the hypervisor uses the architectural init value for that + // component. After reset, xsave() may still return non-zero XSTATE_BV since the + // hypervisor reports which components it manages, not which have been modified. + // XCOMP_BV (520-527): Compaction bitmap. Bit 63 indicates compacted format (used by MSHV/WHP). + // When set, the XSAVE area uses a compact layout where only enabled components are stored + // contiguously. This is a format indicator, not state data, so it's preserved across reset. + // Both fields are managed by the hypervisor to describe the XSAVE area format and capabilities, + // not guest-visible computational state, so they don't need to be zeroed on reset. + if reset_xsave.len() >= 576 { + expected_xsave[512..576].copy_from_slice(&reset_xsave[512..576]); + } + assert_eq!( + reset_xsave, expected_xsave, + "xsave should be zeroed except for hypervisor-specific fields" + ); + + // Verify sregs are reset to defaults (CR3 is 0 as passed to reset_vcpu) + assert_sregs_reset(hyperlight_vm.vm.as_ref(), 0); + } + + /// Tests that actually runs code, as opposed to just setting vCPU state. + mod run_tests { + use iced_x86::code_asm::*; + + use super::*; + + #[test] + fn reset_vcpu_regs() { + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + a.mov(rax, 0x1111111111111111u64).unwrap(); + a.mov(rbx, 0x2222222222222222u64).unwrap(); + a.mov(rcx, 0x3333333333333333u64).unwrap(); + a.mov(rdx, 0x4444444444444444u64).unwrap(); + a.mov(rsi, 0x5555555555555555u64).unwrap(); + a.mov(rdi, 0x6666666666666666u64).unwrap(); + a.mov(rbp, 0x7777777777777777u64).unwrap(); + a.mov(r8, 0x8888888888888888u64).unwrap(); + a.mov(r9, 0x9999999999999999u64).unwrap(); + a.mov(r10, 0xAAAAAAAAAAAAAAAAu64).unwrap(); + a.mov(r11, 0xBBBBBBBBBBBBBBBBu64).unwrap(); + a.mov(r12, 0xCCCCCCCCCCCCCCCCu64).unwrap(); + a.mov(r13, 0xDDDDDDDDDDDDDDDDu64).unwrap(); + a.mov(r14, 0xEEEEEEEEEEEEEEEEu64).unwrap(); + a.mov(r15, 0xFFFFFFFFFFFFFFFFu64).unwrap(); + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let hyperlight_vm = hyperlight_vm(&code); + + // After run, check registers match expected dirty state + let regs = hyperlight_vm.vm.regs().unwrap(); + let mut expected_dirty = CommonRegisters { + rax: 0x1111111111111111, + rbx: 0x2222222222222222, + rcx: 0x3333333333333333, + rdx: 0x4444444444444444, + rsi: 0x5555555555555555, + rdi: 0x6666666666666666, + rsp: 0, + rbp: 0x7777777777777777, + r8: 0x8888888888888888, + r9: 0x9999999999999999, + r10: 0xAAAAAAAAAAAAAAAA, + r11: 0xBBBBBBBBBBBBBBBB, + r12: 0xCCCCCCCCCCCCCCCC, + r13: 0xDDDDDDDDDDDDDDDD, + r14: 0xEEEEEEEEEEEEEEEE, + r15: 0xFFFFFFFFFFFFFFFF, + rip: 0, + rflags: 0, + }; + // rip, rsp, and rflags are set by the CPU, we don't expect those to match our expected values + expected_dirty.rip = regs.rip; + expected_dirty.rsp = regs.rsp; + expected_dirty.rflags = regs.rflags; + assert_eq!(regs, expected_dirty); + + // Reset vcpu + hyperlight_vm + .reset_vcpu(0, &default_segment_state()) + .unwrap(); + + // Check registers are reset to defaults + assert_regs_reset(hyperlight_vm.vm.as_ref()); + } + + #[test] + fn reset_vcpu_fpu() { + #[cfg(kvm)] + use crate::hypervisor::regs::MXCSR_DEFAULT; + + #[cfg(kvm)] + let available_hv = *get_available_hypervisor().as_ref().unwrap(); + + // Build code to dirty XMM registers, x87 FPU, and MXCSR + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + + // xmm0-xmm7: use movd + pshufd to fill with pattern + let xmm_regs_low = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]; + let patterns_low: [u32; 8] = [ + 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, + 0x88888888, + ]; + for (xmm, pat) in xmm_regs_low.iter().zip(patterns_low.iter()) { + a.mov(eax, *pat).unwrap(); + a.movd(*xmm, eax).unwrap(); + a.pshufd(*xmm, *xmm, 0).unwrap(); + } + + // xmm8-xmm15: upper XMM registers + let xmm_regs_high = [xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15]; + let patterns_high: [u32; 8] = [ + 0x99999999, 0xAAAAAAAA, 0xBBBBBBBB, 0xCCCCCCCC, 0xDDDDDDDD, 0xEEEEEEEE, 0xFFFFFFFF, + 0x12345678, + ]; + for (xmm, pat) in xmm_regs_high.iter().zip(patterns_high.iter()) { + a.mov(eax, *pat).unwrap(); + a.movd(*xmm, eax).unwrap(); + a.pshufd(*xmm, *xmm, 0).unwrap(); + } + + // Use 7 FLDs so TOP=1 after execution, different from default TOP=0. + // This ensures reset properly clears TOP, not just register contents. + a.fldz().unwrap(); // 0.0 + a.fldl2e().unwrap(); // log2(e) + a.fldl2t().unwrap(); // log2(10) + a.fldlg2().unwrap(); // log10(2) + a.fldln2().unwrap(); // ln(2) + a.fldpi().unwrap(); // pi + // Push a memory value to also dirty last_dp + a.mov(rax, 0xDEADBEEFu64).unwrap(); + a.push(rax).unwrap(); + a.fld(qword_ptr(rsp)).unwrap(); // dirties last_dp + a.pop(rax).unwrap(); + + // Dirty FCW (0x0F7F, different from default 0x037F) + a.mov(eax, 0x0F7Fu32).unwrap(); + a.push(rax).unwrap(); + a.fldcw(word_ptr(rsp)).unwrap(); + a.pop(rax).unwrap(); + + // Dirty MXCSR (0x3F80, different from default 0x1F80) + a.mov(eax, 0x3F80u32).unwrap(); + a.push(rax).unwrap(); + a.ldmxcsr(dword_ptr(rsp)).unwrap(); + a.pop(rax).unwrap(); + + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let hyperlight_vm = hyperlight_vm(&code); + + // After run, check FPU state matches expected dirty values + let fpu = hyperlight_vm.vm.fpu().unwrap(); + + #[cfg_attr(not(kvm), allow(unused_mut))] + let mut expected_dirty = CommonFpu { + fcw: 0x0F7F, + ftwx: 0xFE, // 7 registers valid (bit 0 empty after 7 pushes with TOP=1) + xmm: [ + 0x11111111111111111111111111111111_u128.to_le_bytes(), + 0x22222222222222222222222222222222_u128.to_le_bytes(), + 0x33333333333333333333333333333333_u128.to_le_bytes(), + 0x44444444444444444444444444444444_u128.to_le_bytes(), + 0x55555555555555555555555555555555_u128.to_le_bytes(), + 0x66666666666666666666666666666666_u128.to_le_bytes(), + 0x77777777777777777777777777777777_u128.to_le_bytes(), + 0x88888888888888888888888888888888_u128.to_le_bytes(), + 0x99999999999999999999999999999999_u128.to_le_bytes(), + 0xAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA_u128.to_le_bytes(), + 0xBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB_u128.to_le_bytes(), + 0xCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC_u128.to_le_bytes(), + 0xDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD_u128.to_le_bytes(), + 0xEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE_u128.to_le_bytes(), + 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128.to_le_bytes(), + 0x12345678123456781234567812345678_u128.to_le_bytes(), + ], + mxcsr: 0x3F80, + fsw: 0x0802, // TOP=1 after 7 pushes (bits 11-13), DE flag from denormal load + // fpr: 80-bit values with 6 bytes padding; may vary between CPU vendors + fpr: fpu.fpr, + // last_opcode: FPU Opcode update varies by CPU (may only update on unmasked exceptions) + last_opcode: fpu.last_opcode, + // last_ip: code is loaded at runtime-determined address + last_ip: fpu.last_ip, + // last_dp: points to stack (rsp) which is runtime-determined + last_dp: fpu.last_dp, + }; + // KVM doesn't preserve mxcsr via fpu(), copy from actual + normalize_fpu_mxcsr_for_kvm(&mut expected_dirty, fpu.mxcsr); + assert_eq!(fpu, expected_dirty); + + // KVM's get_fpu/set_fpu ioctls don't include MXCSR (it's in the SSE state, + // not x87 FPU state). We must use xsave to verify MXCSR on KVM. + #[cfg(kvm)] + if available_hv == HypervisorType::Kvm { + let xsave = hyperlight_vm.vm.xsave().unwrap(); + let mxcsr = u32::from_le_bytes(xsave[24..28].try_into().unwrap()); + assert_eq!(mxcsr, 0x3F80, "MXCSR in XSAVE should be dirty"); + } + + // Reset vcpu + hyperlight_vm + .reset_vcpu(0, &default_segment_state()) + .unwrap(); + + // Check FPU is reset to defaults + assert_fpu_reset(hyperlight_vm.vm.as_ref()); + + // Verify MXCSR via xsave on KVM (fpu() doesn't include it) + #[cfg(kvm)] + if available_hv == HypervisorType::Kvm { + let xsave = hyperlight_vm.vm.xsave().unwrap(); + let mxcsr = u32::from_le_bytes(xsave[24..28].try_into().unwrap()); + assert_eq!(mxcsr, MXCSR_DEFAULT, "MXCSR in XSAVE should be reset"); + } + } + + #[test] + fn reset_vcpu_debug_regs() { + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + a.mov(rax, 0xDEAD_BEEF_0000_0000u64).unwrap(); + a.mov(dr0, rax).unwrap(); + a.mov(rax, 0xDEAD_BEEF_0000_0001u64).unwrap(); + a.mov(dr1, rax).unwrap(); + a.mov(rax, 0xDEAD_BEEF_0000_0002u64).unwrap(); + a.mov(dr2, rax).unwrap(); + a.mov(rax, 0xDEAD_BEEF_0000_0003u64).unwrap(); + a.mov(dr3, rax).unwrap(); + a.mov(rax, 1u64).unwrap(); + a.mov(dr6, rax).unwrap(); + a.mov(rax, 0xFFu64).unwrap(); + a.mov(dr7, rax).unwrap(); + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let hyperlight_vm = hyperlight_vm(&code); + + // Verify debug registers are dirty + let debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); + let expected_dirty = CommonDebugRegs { + dr0: 0xDEAD_BEEF_0000_0000, + dr1: 0xDEAD_BEEF_0000_0001, + dr2: 0xDEAD_BEEF_0000_0002, + dr3: 0xDEAD_BEEF_0000_0003, + // dr6: guest set B0 (bit 0) = 1, reserved bits vary by CPU + dr6: (debug_regs.dr6 & !DR6_WRITABLE_MASK) | 0x1, + // dr7: guest set lower byte = 0xFF, reserved bits vary by CPU + dr7: (debug_regs.dr7 & !DR7_WRITABLE_MASK) | 0xFF, + }; + assert_eq!(debug_regs, expected_dirty); + + // Reset vcpu + hyperlight_vm + .reset_vcpu(0, &default_segment_state()) + .unwrap(); + + // Check debug registers are reset to default values + assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); + } + + #[test] + fn reset_vcpu_sregs() { + // Build code that modifies special registers and halts + // We can modify CR0.WP, CR2, CR4.TSD, and CR8 from guest code in ring 0 + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + // Set CR0.WP (Write Protect, bit 16) + a.mov(rax, cr0).unwrap(); + a.or(rax, 0x10000i32).unwrap(); + a.mov(cr0, rax).unwrap(); + // Set CR2 + a.mov(rax, 0xDEADBEEFu64).unwrap(); + a.mov(cr2, rax).unwrap(); + // Set CR4.TSD (Time Stamp Disable, bit 2) + a.mov(rax, cr4).unwrap(); + a.or(rax, 0x4i32).unwrap(); + a.mov(cr4, rax).unwrap(); + // Set CR8 + a.mov(rax, 5u64).unwrap(); + a.mov(cr8, rax).unwrap(); + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let hyperlight_vm = hyperlight_vm(&code); + + // Get the initial CR3 value and expected defaults + let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; + let defaults = CommonSpecialRegisters::standard_64bit_defaults(initial_cr3); + + // Verify registers are dirty (CR0.WP, CR2, CR4.TSD and CR8 modified by our code) + let sregs = hyperlight_vm.vm.sregs().unwrap(); + let mut expected_dirty = CommonSpecialRegisters { + cr0: defaults.cr0 | 0x10000, // WP bit set + cr2: 0xDEADBEEF, + cr4: defaults.cr4 | 0x4, // TSD bit set + cr8: 0x5, + ..defaults + }; + normalize_sregs_for_run_tests(&mut expected_dirty, &sregs); + assert_eq!(sregs, expected_dirty); + + // Reset vcpu + hyperlight_vm + .reset_vcpu(0, &default_segment_state()) + .unwrap(); + + // Check registers are reset to defaults (CR3 is 0 as passed to reset_vcpu) + let sregs = hyperlight_vm.vm.sregs().unwrap(); + let mut expected_reset = CommonSpecialRegisters::standard_64bit_defaults(0); + normalize_sregs_for_run_tests(&mut expected_reset, &sregs); + assert_eq!(sregs, expected_reset); + } + + /// Verifies guest-visible FPU state (via FXSAVE) is properly reset. + /// Unlike tests using hypervisor API, this runs actual guest code with FXSAVE. + #[test] + fn reset_vcpu_fpu_guest_visible_state() { + let mut ctx = hyperlight_vm_with_mem_mgr_fxsave(); + + // Verify FPU was dirtied after first run + let fpu_before_reset = ctx.ctx.vm.vm.fpu().unwrap(); + assert_eq!( + fpu_before_reset.fcw, 0x0F7F, + "FCW should be dirty after first run" + ); + assert_ne!( + fpu_before_reset.ftwx, 0, + "FTW should indicate valid registers after first run" + ); + + let fxsave_before = ctx.read_fxsave(); + let fcw_before = u16::from_le_bytes(fxsave_before[0..2].try_into().unwrap()); + assert_eq!(fcw_before, 0x0F7F, "Guest FXSAVE FCW should be dirty"); + let mxcsr_before = u32::from_le_bytes(fxsave_before[24..28].try_into().unwrap()); + assert_eq!(mxcsr_before, 0x3F80, "Guest FXSAVE MXCSR should be dirty"); + let xmm0_before = u32::from_le_bytes(fxsave_before[160..164].try_into().unwrap()); + assert_eq!(xmm0_before, 0x11111111, "Guest FXSAVE XMM0 should be dirty"); + + let root_pt_addr = ctx.ctx.vm.get_root_pt().unwrap(); + let segment_state = ctx.ctx.vm.get_snapshot_sregs().unwrap(); + + ctx.ctx.vm.reset_vcpu(root_pt_addr, &segment_state).unwrap(); + + // Re-run from entrypoint (flag=1 means guest skips dirty phase, just does FXSAVE) + // Use stack_top - 8 to match initialise()'s behavior (simulates call pushing return addr) + let regs = CommonRegisters { + rip: ctx.ctx.vm.entrypoint.expect("entrypoint should be set"), + rsp: ctx.stack_top_gva() - 8, + rflags: 1 << 1, + ..Default::default() + }; + ctx.ctx.vm.vm.set_regs(®s).unwrap(); + ctx.run(); + + // Verify guest-visible state is reset + let fxsave_after = ctx.read_fxsave(); + let fcw_after = u16::from_le_bytes(fxsave_after[0..2].try_into().unwrap()); + assert_eq!( + fcw_after, 0x037F, + "Guest FXSAVE FCW should be reset to default 0x037F, got 0x{:04X}", + fcw_after + ); + + let fsw_after = u16::from_le_bytes(fxsave_after[2..4].try_into().unwrap()); + assert_eq!(fsw_after, 0, "FSW should be reset"); + + let ftw_after = fxsave_after[4]; + assert_eq!(ftw_after, 0, "FTW should be 0 (all empty)"); + + let fop_after = u16::from_le_bytes(fxsave_after[6..8].try_into().unwrap()); + assert_eq!(fop_after, 0, "FOP should be 0"); + + let fip_after = u64::from_le_bytes(fxsave_after[8..16].try_into().unwrap()); + assert_eq!(fip_after, 0, "FIP should be 0"); + + let fdp_after = u64::from_le_bytes(fxsave_after[16..24].try_into().unwrap()); + assert_eq!(fdp_after, 0, "FDP should be 0"); + + let mxcsr_after = u32::from_le_bytes(fxsave_after[24..28].try_into().unwrap()); + assert_eq!( + mxcsr_after, MXCSR_DEFAULT, + "Guest FXSAVE MXCSR should be reset to 0x{:08X}, got 0x{:08X}", + MXCSR_DEFAULT, mxcsr_after + ); + + // ST0-ST7 should be zeroed + for i in 0..8 { + let offset = 32 + i * 16; + let st_bytes = &fxsave_after[offset..offset + 10]; + assert!(st_bytes.iter().all(|&b| b == 0), "ST{} should be zeroed", i); + } + + // XMM0-XMM15 should be zeroed + for i in 0..16 { + let offset = 160 + i * 16; + let xmm_bytes = &fxsave_after[offset..offset + 16]; + assert!( + xmm_bytes.iter().all(|&b| b == 0), + "XMM{} should be zeroed", + i + ); + } + } + + /// Extended test context for FXSAVE tests that need to read memory at a specific offset. + struct FxsaveTestContext { + ctx: TestVmContext, + /// Offset in shared memory where FXSAVE data is stored (output_data region) + fxsave_offset: usize, + } + + impl FxsaveTestContext { + fn run(&mut self) { + self.ctx + .vm + .run( + &mut self.ctx.hshm, + &self.ctx.host_funcs, + #[cfg(gdb)] + self.ctx.dbg_mem_access_hdl.clone(), + ) + .unwrap(); + } + + fn read_fxsave(&self) -> [u8; 512] { + let mut fxsave = [0u8; 512]; + self.ctx + .hshm + .shared_mem + .copy_to_slice(&mut fxsave, self.fxsave_offset) + .unwrap(); + fxsave + } + + /// Get the stack top GVA, same as the regular codepath. + fn stack_top_gva(&self) -> u64 { + hyperlight_common::layout::MAX_GVA as u64 + - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + + 1 + } + } + + /// Creates VM with guest code that: dirtys FPU (if flag==0), does FXSAVE to buffer, sets flag=1. + /// Uses output_data region for FXSAVE buffer (like regular guest output), scratch for stack. + fn hyperlight_vm_with_mem_mgr_fxsave() -> FxsaveTestContext { + use iced_x86::code_asm::*; + + // Compute fixed addresses for FXSAVE buffer and flag. + // These are in the output_data region which starts at a known offset. + // We use a default SandboxConfiguration to get the same layout as create_test_vm_context. + let config: SandboxConfiguration = Default::default(); + let layout = SandboxMemoryLayout::new(config, 512, 4096, 4096, 0x3000, None).unwrap(); + let fxsave_offset = layout.get_output_data_offset(); + let fxsave_gva = (SandboxMemoryLayout::BASE_ADDRESS + fxsave_offset) as u64; + let flag_gva = fxsave_gva + 512; + + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + + // Check flag at fixed address: if flag != 0, skip dirty phase + a.mov(rax, flag_gva).unwrap(); + a.mov(al, byte_ptr(rax)).unwrap(); + a.test(al, al).unwrap(); + let mut skip_dirty = a.create_label(); + a.jnz(skip_dirty).unwrap(); + + // Dirty x87 FPU (7 pushes so TOP=1) + a.fldz().unwrap(); + a.fldl2e().unwrap(); + a.fldl2t().unwrap(); + a.fldlg2().unwrap(); + a.fldln2().unwrap(); + a.fldpi().unwrap(); + a.fld1().unwrap(); + + // Dirty FCW (0x0F7F vs default 0x037F) + a.sub(rsp, 16i32).unwrap(); + a.mov(dword_ptr(rsp), 0x0F7Fu32).unwrap(); + a.fldcw(word_ptr(rsp)).unwrap(); + a.add(rsp, 16i32).unwrap(); + + // Dirty MXCSR (0x3F80 vs default 0x1F80) + a.sub(rsp, 16i32).unwrap(); + a.mov(dword_ptr(rsp), 0x3F80u32).unwrap(); + a.ldmxcsr(dword_ptr(rsp)).unwrap(); + a.add(rsp, 16i32).unwrap(); + + // Dirty XMM0-7 + let xmm_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]; + for (i, xmm) in xmm_regs.iter().enumerate() { + a.mov(eax, 0x11111111u32 * (i as u32 + 1)).unwrap(); + a.movd(*xmm, eax).unwrap(); + a.pshufd(*xmm, *xmm, 0).unwrap(); + } + + // Set flag = 1 at fixed address + a.mov(rax, flag_gva).unwrap(); + a.mov(byte_ptr(rax), 1u32).unwrap(); + + // FXSAVE to buffer at fixed address (runs on both executions) + a.set_label(&mut skip_dirty).unwrap(); + a.mov(rax, fxsave_gva).unwrap(); + a.fxsave(ptr(rax)).unwrap(); + + a.hlt().unwrap(); + + let code = a.assemble(0).unwrap(); + + // Reuse common test setup - initialise() will run the code + let ctx = create_test_vm_context(&code); + + FxsaveTestContext { ctx, fxsave_offset } + } + } +} diff --git a/src/hyperlight_host/src/hypervisor/regs/fpu.rs b/src/hyperlight_host/src/hypervisor/regs/fpu.rs index 47ce8a853..93907c6a4 100644 --- a/src/hyperlight_host/src/hypervisor/regs/fpu.rs +++ b/src/hyperlight_host/src/hypervisor/regs/fpu.rs @@ -36,13 +36,11 @@ pub(crate) struct CommonFpu { pub fcw: u16, pub fsw: u16, pub ftwx: u8, - pub pad1: u8, pub last_opcode: u16, pub last_ip: u64, pub last_dp: u64, pub xmm: [[u8; 16]; 16], pub mxcsr: u32, - pub pad2: u32, } impl Default for CommonFpu { @@ -52,13 +50,11 @@ impl Default for CommonFpu { fcw: FP_CONTROL_WORD_DEFAULT, fsw: 0, ftwx: 0, - pad1: 0, last_opcode: 0, last_ip: 0, last_dp: 0, xmm: [[0u8; 16]; 16], mxcsr: MXCSR_DEFAULT, - pad2: 0, } } } @@ -71,13 +67,13 @@ impl From<&CommonFpu> for kvm_fpu { fcw: common_fpu.fcw, fsw: common_fpu.fsw, ftwx: common_fpu.ftwx, - pad1: common_fpu.pad1, + pad1: 0, last_opcode: common_fpu.last_opcode, last_ip: common_fpu.last_ip, last_dp: common_fpu.last_dp, xmm: common_fpu.xmm, mxcsr: common_fpu.mxcsr, - pad2: common_fpu.pad2, + pad2: 0, } } } @@ -90,13 +86,13 @@ impl From<&CommonFpu> for FloatingPointUnit { fcw: common_fpu.fcw, fsw: common_fpu.fsw, ftwx: common_fpu.ftwx, - pad1: common_fpu.pad1, + pad1: 0, last_opcode: common_fpu.last_opcode, last_ip: common_fpu.last_ip, last_dp: common_fpu.last_dp, xmm: common_fpu.xmm, mxcsr: common_fpu.mxcsr, - pad2: common_fpu.pad2, + pad2: 0, } } } @@ -109,13 +105,11 @@ impl From<&kvm_fpu> for CommonFpu { fcw: kvm_fpu.fcw, fsw: kvm_fpu.fsw, ftwx: kvm_fpu.ftwx, - pad1: kvm_fpu.pad1, last_opcode: kvm_fpu.last_opcode, last_ip: kvm_fpu.last_ip, last_dp: kvm_fpu.last_dp, xmm: kvm_fpu.xmm, mxcsr: kvm_fpu.mxcsr, - pad2: kvm_fpu.pad2, } } } @@ -128,13 +122,11 @@ impl From<&FloatingPointUnit> for CommonFpu { fcw: mshv_fpu.fcw, fsw: mshv_fpu.fsw, ftwx: mshv_fpu.ftwx, - pad1: mshv_fpu.pad1, last_opcode: mshv_fpu.last_opcode, last_ip: mshv_fpu.last_ip, last_dp: mshv_fpu.last_dp, xmm: mshv_fpu.xmm, mxcsr: mshv_fpu.mxcsr, - pad2: mshv_fpu.pad2, } } } @@ -174,7 +166,7 @@ impl From<&CommonFpu> for [(WHV_REGISTER_NAME, Align16); WHP FpControl: fpu.fcw, FpStatus: fpu.fsw, FpTag: fpu.ftwx, - Reserved: fpu.pad1, + Reserved: 0, LastFpOp: fpu.last_opcode, Anonymous: WHV_X64_FP_CONTROL_STATUS_REGISTER_0_0 { LastFpRip: fpu.last_ip, @@ -293,7 +285,6 @@ impl TryFrom<&[(WHV_REGISTER_NAME, Align16)]> for CommonFpu fpu.fcw = control.FpControl; fpu.fsw = control.FpStatus; fpu.ftwx = control.FpTag; - fpu.pad1 = control.Reserved; fpu.last_opcode = control.LastFpOp; fpu.last_ip = unsafe { control.Anonymous.LastFpRip }; } @@ -355,7 +346,6 @@ mod tests { fcw: 0x1234, fsw: 0x5678, ftwx: 0x9a, - pad1: 0xbc, last_opcode: 0xdef0, last_ip: 0xdeadbeefcafebabe, last_dp: 0xabad1deaf00dbabe, @@ -365,7 +355,6 @@ mod tests { [22u8; 16], [23u8; 16], ], mxcsr: 0x1f80, - pad2: 0, } } diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs index f2115b451..ecf3b6b91 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs @@ -18,7 +18,9 @@ use std::sync::LazyLock; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; -use kvm_bindings::{kvm_debugregs, kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region}; +use kvm_bindings::{ + kvm_debugregs, kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region, kvm_xsave, +}; use kvm_ioctls::Cap::UserMemory; use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; use tracing::{Span, instrument}; @@ -28,8 +30,11 @@ use tracing_opentelemetry::OpenTelemetrySpanExt; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; use crate::hypervisor::regs::{ - CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, + CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, + MXCSR_DEFAULT, }; +#[cfg(all(test, feature = "init-paging"))] +use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; use crate::hypervisor::virtual_machine::{ CreateVmError, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VirtualMachine, VmExit, @@ -206,6 +211,8 @@ impl VirtualMachine for KvmVm { } fn fpu(&self) -> std::result::Result { + // Note: On KVM this ignores MXCSR. + // See https://github.com/torvalds/linux/blob/d358e5254674b70f34c847715ca509e46eb81e6f/arch/x86/kvm/x86.c#L12554-L12599 let kvm_fpu = self .vcpu_fd .get_fpu() @@ -215,6 +222,8 @@ impl VirtualMachine for KvmVm { fn set_fpu(&self, fpu: &CommonFpu) -> std::result::Result<(), RegisterError> { let kvm_fpu: kvm_fpu = fpu.into(); + // Note: On KVM this ignores MXCSR. + // See https://github.com/torvalds/linux/blob/d358e5254674b70f34c847715ca509e46eb81e6f/arch/x86/kvm/x86.c#L12554-L12599 self.vcpu_fd .set_fpu(&kvm_fpu) .map_err(|e| RegisterError::SetFpu(e.into()))?; @@ -253,7 +262,7 @@ impl VirtualMachine for KvmVm { Ok(()) } - #[cfg(crashdump)] + #[allow(dead_code)] fn xsave(&self) -> std::result::Result, RegisterError> { let xsave = self .vcpu_fd @@ -265,6 +274,59 @@ impl VirtualMachine for KvmVm { .flat_map(u32::to_le_bytes) .collect()) } + + fn reset_xsave(&self) -> std::result::Result<(), RegisterError> { + let mut xsave = kvm_xsave::default(); // default is zeroed 4KB buffer with no FAM + + // XSAVE legacy region layout (Intel SDM Vol. 1 Section 13.4.1): + // - Bytes 0-1: FCW, 2-3: FSW + // - Bytes 24-27: MXCSR + // - Bytes 512-519: XSTATE_BV + // - Bytes 520-527: XCOMP_BV (compaction format indicator) + // + // kvm_xsave.region is [u32], so region[0] covers FCW (low 16) and FSW (high 16, stays 0). + xsave.region[0] = FP_CONTROL_WORD_DEFAULT as u32; + xsave.region[6] = MXCSR_DEFAULT; + // XSTATE_BV = 0x3: bits 0,1 = x87 + SSE valid. This tells KVM to apply + // the legacy region from this buffer. Without this, some KVM versions + // may ignore set_xsave entirely when XSTATE_BV=0. + xsave.region[128] = 0x3; + // Note: Unlike MSHV/WHP, we don't preserve XCOMP_BV because KVM uses + // standard (non-compacted) XSAVE format where XCOMP_BV remains 0. + + // SAFETY: No dynamic features enabled, 4KB is sufficient + unsafe { + self.vcpu_fd + .set_xsave(&xsave) + .map_err(|e| RegisterError::SetXsave(e.into()))? + }; + + Ok(()) + } + + #[cfg(test)] + #[cfg(feature = "init-paging")] + fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> { + if std::mem::size_of_val(xsave) != XSAVE_BUFFER_SIZE { + return Err(RegisterError::XsaveSizeMismatch { + expected: XSAVE_BUFFER_SIZE as u32, + actual: std::mem::size_of_val(xsave) as u32, + }); + } + let xsave = kvm_xsave { + region: xsave.try_into().expect("xsave slice has correct length"), + ..Default::default() + }; + // Safety: Safe because we only copy 4096 bytes + // and have not enabled any dynamic xsave features + unsafe { + self.vcpu_fd + .set_xsave(&xsave) + .map_err(|e| RegisterError::SetXsave(e.into()))? + }; + + Ok(()) + } } #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index ced8a7dfc..82e05c104 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -88,7 +88,7 @@ pub fn is_hypervisor_present() -> bool { } /// The hypervisor types available for the current platform -#[derive(PartialEq, Eq, Debug)] +#[derive(PartialEq, Eq, Debug, Copy, Clone)] pub(crate) enum HypervisorType { #[cfg(kvm)] Kvm, @@ -100,6 +100,17 @@ pub(crate) enum HypervisorType { Whp, } +/// Minimum XSAVE buffer size: 512 bytes legacy region + 64 bytes header. +/// Only used by MSHV and WHP which use compacted XSAVE format and need to +/// validate buffer size before accessing XCOMP_BV. +#[cfg(any(mshv3, target_os = "windows"))] +pub(crate) const XSAVE_MIN_SIZE: usize = 576; + +/// Standard XSAVE buffer size (4KB) used by KVM and MSHV. +/// WHP queries the required size dynamically. +#[cfg(all(any(kvm, mshv3), test, feature = "init-paging"))] +pub(crate) const XSAVE_BUFFER_SIZE: usize = 4096; + // Compiler error if no hypervisor type is available #[cfg(not(any(kvm, mshv3, target_os = "windows")))] compile_error!( @@ -207,6 +218,8 @@ pub enum RegisterError { SetDebugRegs(HypervisorError), #[error("Failed to get xsave: {0}")] GetXsave(HypervisorError), + #[error("Failed to set xsave: {0}")] + SetXsave(HypervisorError), #[error("Xsave size mismatch: expected {expected} bytes, got {actual}")] XsaveSizeMismatch { /// Expected size in bytes @@ -214,6 +227,11 @@ pub enum RegisterError { /// Actual size in bytes actual: u32, }, + #[error("Invalid xsave alignment")] + InvalidXsaveAlignment, + #[cfg(target_os = "windows")] + #[error("Failed to get xsave size: {0}")] + GetXsaveSize(#[from] HypervisorError), #[cfg(target_os = "windows")] #[error("Failed to convert WHP registers: {0}")] ConversionFailed(String), @@ -314,12 +332,17 @@ pub(crate) trait VirtualMachine: Debug + Send { #[allow(dead_code)] fn debug_regs(&self) -> std::result::Result; /// Set the debug registers of the vCPU - #[allow(dead_code)] fn set_debug_regs(&self, drs: &CommonDebugRegs) -> std::result::Result<(), RegisterError>; - /// xsave - #[cfg(crashdump)] + /// Get xsave + #[allow(dead_code)] fn xsave(&self) -> std::result::Result, RegisterError>; + /// Reset xsave to default state + fn reset_xsave(&self) -> std::result::Result<(), RegisterError>; + /// Set xsave - only used for tests + #[cfg(test)] + #[cfg(feature = "init-paging")] + fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError>; /// Get partition handle #[cfg(target_os = "windows")] diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs index 1084c0c39..74d7834ee 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs @@ -21,7 +21,7 @@ use std::sync::LazyLock; #[cfg(gdb)] use mshv_bindings::{DebugRegisters, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT}; use mshv_bindings::{ - FloatingPointUnit, SpecialRegisters, StandardRegisters, hv_message_type, + FloatingPointUnit, SpecialRegisters, StandardRegisters, XSave, hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, @@ -36,11 +36,14 @@ use tracing_opentelemetry::OpenTelemetrySpanExt; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; use crate::hypervisor::regs::{ - CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, + CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, + MXCSR_DEFAULT, }; +#[cfg(all(test, feature = "init-paging"))] +use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; use crate::hypervisor::virtual_machine::{ CreateVmError, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VirtualMachine, - VmExit, + VmExit, XSAVE_MIN_SIZE, }; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; #[cfg(feature = "trace_guest")] @@ -283,7 +286,7 @@ impl VirtualMachine for MshvVm { Ok(()) } - #[cfg(crashdump)] + #[allow(dead_code)] fn xsave(&self) -> std::result::Result, RegisterError> { let xsave = self .vcpu_fd @@ -291,6 +294,66 @@ impl VirtualMachine for MshvVm { .map_err(|e| RegisterError::GetXsave(e.into()))?; Ok(xsave.buffer.to_vec()) } + + fn reset_xsave(&self) -> std::result::Result<(), RegisterError> { + let current_xsave = self + .vcpu_fd + .get_xsave() + .map_err(|e| RegisterError::GetXsave(e.into()))?; + if current_xsave.buffer.len() < XSAVE_MIN_SIZE { + // Minimum: 512 legacy + 64 header + return Err(RegisterError::XsaveSizeMismatch { + expected: XSAVE_MIN_SIZE as u32, + actual: current_xsave.buffer.len() as u32, + }); + } + + let mut buf = XSave::default(); // default is zeroed 4KB buffer + + // Copy XCOMP_BV (offset 520-527) - preserves feature mask + compacted bit + buf.buffer[520..528].copy_from_slice(¤t_xsave.buffer[520..528]); + + // XSAVE area layout from Intel SDM Vol. 1 Section 13.4.1: + // - Bytes 0-1: FCW (x87 FPU Control Word) + // - Bytes 24-27: MXCSR + // - Bytes 512-519: XSTATE_BV (bitmap of valid state components) + buf.buffer[0..2].copy_from_slice(&FP_CONTROL_WORD_DEFAULT.to_le_bytes()); + buf.buffer[24..28].copy_from_slice(&MXCSR_DEFAULT.to_le_bytes()); + // XSTATE_BV = 0x3: bits 0,1 = x87 + SSE valid. Explicitly tell hypervisor + // to apply the legacy region from this buffer for consistent behavior. + buf.buffer[512..520].copy_from_slice(&0x3u64.to_le_bytes()); + + self.vcpu_fd + .set_xsave(&buf) + .map_err(|e| RegisterError::SetXsave(e.into()))?; + Ok(()) + } + + #[cfg(test)] + #[cfg(feature = "init-paging")] + fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> { + if std::mem::size_of_val(xsave) != XSAVE_BUFFER_SIZE { + return Err(RegisterError::XsaveSizeMismatch { + expected: XSAVE_BUFFER_SIZE as u32, + actual: std::mem::size_of_val(xsave) as u32, + }); + } + + // Safety: all valid u32 values are 4 valid u8 values + let (prefix, bytes, suffix) = unsafe { xsave.align_to() }; + if !prefix.is_empty() || !suffix.is_empty() { + return Err(RegisterError::InvalidXsaveAlignment); + } + let buf = XSave { + buffer: bytes + .try_into() + .expect("xsave slice has correct length and prefix and suffix are empty"), + }; + self.vcpu_fd + .set_xsave(&buf) + .map_err(|e| RegisterError::SetXsave(e.into()))?; + Ok(()) + } } #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 2bcad8bcd..9410e0d92 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -29,14 +29,15 @@ use windows_result::HRESULT; use crate::hypervisor::gdb::{DebugError, DebuggableVm}; use crate::hypervisor::regs::{ Align16, CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, - WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, - WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, WHP_SREGS_NAMES_LEN, + FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, + WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, + WHP_SREGS_NAMES_LEN, }; use crate::hypervisor::surrogate_process::SurrogateProcess; use crate::hypervisor::surrogate_process_manager::get_surrogate_process_manager; use crate::hypervisor::virtual_machine::{ CreateVmError, HypervisorError, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, - VirtualMachine, VmExit, + VirtualMachine, VmExit, XSAVE_MIN_SIZE, }; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; #[cfg(feature = "trace_guest")] @@ -449,7 +450,7 @@ impl VirtualMachine for WhpVm { Ok(()) } - #[cfg(crashdump)] + #[allow(dead_code)] fn xsave(&self) -> std::result::Result, RegisterError> { // Get the required buffer size by calling with NULL buffer. // If the buffer is not large enough (0 won't be), WHvGetVirtualProcessorXsaveState returns @@ -500,6 +501,131 @@ impl VirtualMachine for WhpVm { Ok(xsave_buffer) } + fn reset_xsave(&self) -> std::result::Result<(), RegisterError> { + // WHP uses compacted XSAVE format (bit 63 of XCOMP_BV set). + // We cannot just zero out the xsave area, we need to preserve the XCOMP_BV. + + // Get the required buffer size by calling with NULL buffer. + let mut buffer_size_needed: u32 = 0; + + let result = unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + std::ptr::null_mut(), + 0, + &mut buffer_size_needed, + ) + }; + + // Expect insufficient buffer error; any other error is unexpected + if let Err(e) = result + && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER + { + return Err(RegisterError::GetXsaveSize(e.into())); + } + + if buffer_size_needed < XSAVE_MIN_SIZE as u32 { + return Err(RegisterError::XsaveSizeMismatch { + expected: XSAVE_MIN_SIZE as u32, + actual: buffer_size_needed, + }); + } + + // Create a buffer to hold the current state (to get the correct XCOMP_BV) + let mut current_state = vec![0u8; buffer_size_needed as usize]; + let mut written_bytes = 0; + unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + current_state.as_mut_ptr() as *mut std::ffi::c_void, + buffer_size_needed, + &mut written_bytes, + ) + .map_err(|e| RegisterError::GetXsave(e.into()))?; + }; + + // Zero out most of the buffer, preserving only XCOMP_BV (520-528). + // Extended components with XSTATE_BV bit=0 will use their init values. + // + // - Legacy region (0-512): x87 FPU + SSE state + // - XSTATE_BV (512-520): Feature bitmap + // - XCOMP_BV (520-528): Compaction bitmap + format bit (KEEP) + // - Reserved (528-576): Header padding + // - Extended (576+): AVX, AVX-512, MPX, PKRU, AMX, etc. + current_state[0..520].fill(0); + current_state[528..].fill(0); + + // XSAVE area layout from Intel SDM Vol. 1 Section 13.4.1: + // - Bytes 0-1: FCW (x87 FPU Control Word) + // - Bytes 24-27: MXCSR + // - Bytes 512-519: XSTATE_BV (bitmap of valid state components) + current_state[0..2].copy_from_slice(&FP_CONTROL_WORD_DEFAULT.to_le_bytes()); + current_state[24..28].copy_from_slice(&MXCSR_DEFAULT.to_le_bytes()); + // XSTATE_BV = 0x3: bits 0,1 = x87 + SSE valid. Explicitly tell hypervisor + // to apply the legacy region from this buffer for consistent behavior. + current_state[512..520].copy_from_slice(&0x3u64.to_le_bytes()); + + unsafe { + WHvSetVirtualProcessorXsaveState( + self.partition, + 0, + current_state.as_ptr() as *const std::ffi::c_void, + buffer_size_needed, + ) + .map_err(|e| RegisterError::SetXsave(e.into()))?; + } + + Ok(()) + } + + #[cfg(test)] + #[cfg(feature = "init-paging")] + fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> { + // Get the required buffer size by calling with NULL buffer. + // If the buffer is not large enough (0 won't be), WHvGetVirtualProcessorXsaveState returns + // WHV_E_INSUFFICIENT_BUFFER and sets buffer_size_needed to the required size. + let mut buffer_size_needed: u32 = 0; + + let result = unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + std::ptr::null_mut(), + 0, + &mut buffer_size_needed, + ) + }; + + // Expect insufficient buffer error; any other error is unexpected + if let Err(e) = result + && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER + { + return Err(RegisterError::GetXsaveSize(e.into())); + } + + let provided_size = std::mem::size_of_val(xsave) as u32; + if provided_size != buffer_size_needed { + return Err(RegisterError::XsaveSizeMismatch { + expected: buffer_size_needed, + actual: provided_size, + }); + } + + unsafe { + WHvSetVirtualProcessorXsaveState( + self.partition, + 0, + xsave.as_ptr() as *const std::ffi::c_void, + buffer_size_needed, + ) + .map_err(|e| RegisterError::SetXsave(e.into()))?; + } + + Ok(()) + } + /// Get the partition handle for this VM fn partition_handle(&self) -> WHV_PARTITION_HANDLE { self.partition diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 314098774..d9c74eb78 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -27,7 +27,7 @@ use super::memory_region::MemoryRegion; use super::ptr::{GuestPtr, RawPtr}; use super::ptr_offset::Offset; use super::shared_mem::{ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, SharedMemory}; -use crate::sandbox::snapshot::Snapshot; +use crate::sandbox::snapshot::{GuestSegmentState, Snapshot}; use crate::{Result, new_error}; /// A struct that is responsible for laying out and managing the memory @@ -182,6 +182,7 @@ where mapped_regions: Vec, root_pt_gpa: u64, rsp_gva: u64, + segment_state: GuestSegmentState, ) -> Result { Snapshot::new( &mut self.shared_mem, @@ -192,6 +193,7 @@ where mapped_regions, root_pt_gpa, rsp_gva, + segment_state, ) } } diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 08f3eef06..a94dcea96 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -174,9 +174,17 @@ impl MultiUseSandbox { .get_root_pt() .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; let stack_top_gpa = self.vm.get_stack_top(); - let memory_snapshot = - self.mem_mgr - .snapshot(self.id, mapped_regions_vec, root_pt_gpa, stack_top_gpa)?; + let segment_state = self + .vm + .get_snapshot_sregs() + .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; + let memory_snapshot = self.mem_mgr.snapshot( + self.id, + mapped_regions_vec, + root_pt_gpa, + stack_top_gpa, + segment_state, + )?; let snapshot = Arc::new(memory_snapshot); self.snapshot = Some(snapshot.clone()); Ok(snapshot) @@ -297,9 +305,21 @@ impl MultiUseSandbox { .update_scratch_mapping(gscratch) .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; } + + let segment_state = snapshot.segment_state().ok_or_else(|| { + HyperlightError::Error( + "snapshot from running sandbox should have segment_state".to_string(), + ) + })?; + // TODO (ludfjig): Go through the rest of possible errors in this `MultiUseSandbox::restore` function + // and determine if they should also poison the sandbox. self.vm - .set_root_pt(snapshot.root_pt_gpa()) - .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; + .reset_vcpu(snapshot.root_pt_gpa(), segment_state) + .map_err(|e| { + self.poisoned = true; + HyperlightVmError::Restore(e) + })?; + self.vm.set_stack_top(snapshot.stack_top_gva()); let current_regions: HashSet<_> = self.vm.get_mapped_regions().cloned().collect(); @@ -1358,6 +1378,41 @@ mod tests { assert_ne!(sandbox3.id, sandbox_id); } + /// Test that snapshot restore properly resets vCPU debug registers. This test verifies + /// that restore() calls reset_vcpu(). + #[test] + fn snapshot_restore_resets_debug_registers() { + let mut sandbox: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve().unwrap() + }; + + let snapshot = sandbox.snapshot().unwrap(); + + // Verify DR0 is initially 0 (clean state) + let dr0_initial: u64 = sandbox.call("GetDr0", ()).unwrap(); + assert_eq!(dr0_initial, 0, "DR0 should initially be 0"); + + // Dirty DR0 by setting it to a known non-zero value + const DIRTY_VALUE: u64 = 0xDEAD_BEEF_CAFE_BABE; + sandbox.call::<()>("SetDr0", DIRTY_VALUE).unwrap(); + let dr0_dirty: u64 = sandbox.call("GetDr0", ()).unwrap(); + assert_eq!( + dr0_dirty, DIRTY_VALUE, + "DR0 should be dirty after SetDr0 call" + ); + + // Restore to the snapshot - this should reset vCPU state including debug registers + sandbox.restore(snapshot).unwrap(); + + let dr0_after_restore: u64 = sandbox.call("GetDr0", ()).unwrap(); + assert_eq!( + dr0_after_restore, 0, + "DR0 should be 0 after restore (reset_vcpu should have been called)" + ); + } + /// Test that sandboxes can be created and evolved with different heap sizes #[test] fn test_sandbox_creation_various_sizes() { diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot.rs index b1b252428..687ff1db0 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot.rs @@ -22,6 +22,7 @@ use tracing::{Span, instrument}; use crate::HyperlightError::MemoryRegionSizeMismatch; use crate::Result; +use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister}; use crate::mem::exe::LoadInfo; use crate::mem::layout::SandboxMemoryLayout; use crate::mem::memory_region::MemoryRegion; @@ -29,8 +30,24 @@ use crate::mem::mgr::GuestPageTableBuffer; use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory}; use crate::sandbox::SandboxConfiguration; use crate::sandbox::uninitialized::{GuestBinary, GuestEnvironment}; + pub(super) static SANDBOX_CONFIGURATION_COUNTER: AtomicU64 = AtomicU64::new(0); +/// Segment register state initialized by the guest, used for snapshot restoration. +#[cfg_attr(not(feature = "init-paging"), expect(dead_code))] +#[derive(Debug, Clone, Copy)] +pub(crate) struct GuestSegmentState { + pub(crate) gdtr: CommonTableRegister, + pub(crate) idtr: CommonTableRegister, + pub(crate) tr: CommonSegmentRegister, + pub(crate) cs: CommonSegmentRegister, + pub(crate) ds: CommonSegmentRegister, + pub(crate) es: CommonSegmentRegister, + pub(crate) fs: CommonSegmentRegister, + pub(crate) gs: CommonSegmentRegister, + pub(crate) ss: CommonSegmentRegister, +} + /// A wrapper around a `SharedMemory` reference and a snapshot /// of the memory therein pub struct Snapshot { @@ -70,6 +87,11 @@ pub struct Snapshot { /// The address of the top of the guest stack stack_top_gva: u64, + /// Segment register state initialized by the guest during preinitialisation. + /// None for snapshots created directly from a binary (before guest runs). + /// Some for snapshots taken from a running sandbox. + segment_state: Option, + /// Preinitialisation entry point for snapshots created directly from a /// guest binary. /// @@ -430,6 +452,7 @@ impl Snapshot { hash, root_pt_gpa: pt_base_gpa as u64, stack_top_gva: exn_stack_top_gva, + segment_state: None, preinitialise: Some(load_addr + entrypoint_offset), }) } @@ -452,6 +475,7 @@ impl Snapshot { regions: Vec, root_pt_gpa: u64, stack_top_gva: u64, + segment_state: GuestSegmentState, ) -> Result { let (new_root_pt_gpa, memory) = shared_mem.with_exclusivity(|snap_e| { scratch_mem.with_exclusivity(|scratch_e| { @@ -500,8 +524,9 @@ impl Snapshot { regions, load_info, hash, - root_pt_gpa: new_root_pt_gpa as u64, stack_top_gva, + segment_state: Some(segment_state), + root_pt_gpa: new_root_pt_gpa as u64, preinitialise: None, }) } @@ -545,6 +570,13 @@ impl Snapshot { self.stack_top_gva } + /// Returns the segment state (GDT, IDT, TR, CS, SS) stored in this snapshot. + /// Returns None for snapshots created directly from a binary (before preinitialisation). + /// Returns Some for snapshots taken from a running sandbox. + pub(crate) fn segment_state(&self) -> Option<&GuestSegmentState> { + self.segment_state.as_ref() + } + pub(crate) fn preinitialise(&self) -> Option { self.preinitialise } @@ -560,11 +592,27 @@ impl PartialEq for Snapshot { mod tests { use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind, PAGE_SIZE}; + use super::GuestSegmentState; + use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister}; use crate::mem::exe::LoadInfo; use crate::mem::layout::SandboxMemoryLayout; use crate::mem::mgr::{GuestPageTableBuffer, SandboxMemoryManager}; use crate::mem::shared_mem::{ExclusiveSharedMemory, HostSharedMemory, SharedMemory}; + fn default_segment_state() -> GuestSegmentState { + GuestSegmentState { + gdtr: CommonTableRegister::default(), + idtr: CommonTableRegister::default(), + tr: CommonSegmentRegister::default(), + cs: CommonSegmentRegister::default(), + ds: CommonSegmentRegister::default(), + es: CommonSegmentRegister::default(), + fs: CommonSegmentRegister::default(), + gs: CommonSegmentRegister::default(), + ss: CommonSegmentRegister::default(), + } + } + fn make_simple_pt_mems() -> (SandboxMemoryManager, u64) { let scratch_mem = ExclusiveSharedMemory::new(PAGE_SIZE).unwrap(); let pt_base = PAGE_SIZE + SandboxMemoryLayout::BASE_ADDRESS; @@ -617,6 +665,7 @@ mod tests { Vec::new(), pt_base, 0, + default_segment_state(), ) .unwrap(); @@ -647,6 +696,7 @@ mod tests { Vec::new(), pt_base, 0, + default_segment_state(), ) .unwrap(); assert_eq!(snapshot.mem_size(), size); @@ -668,6 +718,7 @@ mod tests { Vec::new(), pt_base, 0, + default_segment_state(), ) .unwrap(); @@ -683,6 +734,7 @@ mod tests { Vec::new(), pt_base, 0, + default_segment_state(), ) .unwrap(); diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 59fa7c3bb..b6f2c3ed6 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -234,7 +234,7 @@ fn interrupt_same_thread_no_barrier() { // Only allow successful calls or interrupted. // The call can be successful in case the call is finished before kill() is called. Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => {} - _ => panic!("Unexpected return"), + other => panic!("Unexpected return: {:?}", other), }; if sbox2.poisoned() { sbox2.restore(snapshot2.clone()).unwrap(); diff --git a/src/tests/rust_guests/simpleguest/Cargo.lock b/src/tests/rust_guests/simpleguest/Cargo.lock index 17e3e12da..63f55ecee 100644 --- a/src/tests/rust_guests/simpleguest/Cargo.lock +++ b/src/tests/rust_guests/simpleguest/Cargo.lock @@ -8,17 +8,11 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - [[package]] name = "bitflags" -version = "2.9.3" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "buddy_system_allocator" @@ -31,18 +25,19 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.34" +version = "1.2.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" +checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" dependencies = [ + "find-msvc-tools", "shlex", ] [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "equivalent" @@ -50,6 +45,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "find-msvc-tools" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + [[package]] name = "flatbuffers" version = "25.12.19" @@ -136,9 +137,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown", @@ -146,9 +147,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "linkme" @@ -172,11 +173,10 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] @@ -188,9 +188,9 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "pin-project-lite" @@ -209,18 +209,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -234,12 +234,6 @@ dependencies = [ "semver", ] -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - [[package]] name = "scopeguard" version = "1.2.0" @@ -248,9 +242,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" @@ -283,14 +277,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.143" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", + "serde_core", + "zmij", ] [[package]] @@ -322,9 +317,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.107" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -353,18 +348,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.3" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.23.7" +version = "0.23.10+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ "indexmap", "toml_datetime", @@ -374,9 +369,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.4" +version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" dependencies = [ "winnow", ] @@ -412,9 +407,9 @@ checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "winnow" @@ -424,3 +419,9 @@ checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ "memchr", ] + +[[package]] +name = "zmij" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 2434836e4..09f55212a 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -533,6 +533,18 @@ fn use_sse2_registers() { unsafe { core::arch::asm!("movss xmm1, DWORD PTR [{0}]", in(reg) &val) }; } +#[guest_function("SetDr0")] +fn set_dr0(value: u64) { + unsafe { core::arch::asm!("mov dr0, {}", in(reg) value) }; +} + +#[guest_function("GetDr0")] +fn get_dr0() -> u64 { + let value: u64; + unsafe { core::arch::asm!("mov {}, dr0", out(reg) value) }; + value +} + #[guest_function("Add")] fn add(a: i32, b: i32) -> Result { #[host_function("HostAdd")]