diff --git a/src/hyperlight_host/src/func/call_ctx.rs b/src/hyperlight_host/src/func/call_ctx.rs index 619c45818..c04ee2ad6 100644 --- a/src/hyperlight_host/src/func/call_ctx.rs +++ b/src/hyperlight_host/src/func/call_ctx.rs @@ -19,7 +19,6 @@ use hyperlight_common::flatbuffer_wrappers::function_types::{ }; use tracing::{instrument, Span}; -use super::guest_dispatch::call_function_on_guest; use crate::{MultiUseSandbox, Result}; /// A context for calling guest functions. /// @@ -72,7 +71,8 @@ impl MultiUseGuestCallContext { // !Send (and !Sync), we also don't need to worry about // synchronization - call_function_on_guest(&mut self.sbox, func_name, func_ret_type, args) + self.sbox + .call_guest_function_by_name_no_reset(func_name, func_ret_type, args) } /// Close out the context and get back the internally-stored diff --git a/src/hyperlight_host/src/func/guest_dispatch.rs b/src/hyperlight_host/src/func/guest_dispatch.rs deleted file mode 100644 index 8b512cead..000000000 --- a/src/hyperlight_host/src/func/guest_dispatch.rs +++ /dev/null @@ -1,461 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; -use hyperlight_common::flatbuffer_wrappers::function_types::{ - ParameterValue, ReturnType, ReturnValue, -}; -use tracing::{instrument, Span}; - -use super::guest_err::check_for_guest_error; -use crate::hypervisor::hypervisor_handler::HypervisorHandlerAction; -use crate::sandbox::WrapperGetter; -use crate::HyperlightError::GuestExecutionHungOnHostFunctionCall; -use crate::{HyperlightError, Result}; - -/// Call a guest function by name, using the given `wrapper_getter`. -#[instrument( - err(Debug), - skip(wrapper_getter, args), - parent = Span::current(), - level = "Trace" -)] -pub(crate) fn call_function_on_guest( - wrapper_getter: &mut WrapperGetterT, - function_name: &str, - return_type: ReturnType, - args: Option>, -) -> Result { - let mut timedout = false; - - let fc = FunctionCall::new( - function_name.to_string(), - args, - FunctionCallType::Guest, - return_type, - ); - - let buffer: Vec = fc - .try_into() - .map_err(|_| HyperlightError::Error("Failed to serialize FunctionCall".to_string()))?; - - { - let mem_mgr = wrapper_getter.get_mgr_wrapper_mut(); - mem_mgr.as_mut().write_guest_function_call(&buffer)?; - } - - let mut hv_handler = wrapper_getter.get_hv_handler().clone(); - match hv_handler.execute_hypervisor_handler_action( - HypervisorHandlerAction::DispatchCallFromHost(function_name.to_string()), - ) { - Ok(()) => {} - Err(e) => match e { - HyperlightError::HypervisorHandlerMessageReceiveTimedout() => { - timedout = true; - match hv_handler.terminate_hypervisor_handler_execution_and_reinitialise( - wrapper_getter.get_mgr_wrapper_mut().unwrap_mgr_mut(), - )? { - HyperlightError::HypervisorHandlerExecutionCancelAttemptOnFinishedExecution() => - {} - // ^^^ do nothing, we just want to actually get the Flatbuffer return value - // from shared memory in this case - e => return Err(e), - } - } - e => return Err(e), - }, - }; - - let mem_mgr = wrapper_getter.get_mgr_wrapper_mut(); - mem_mgr.check_stack_guard()?; // <- wrapper around mem_mgr `check_for_stack_guard` - check_for_guest_error(mem_mgr)?; - - mem_mgr - .as_mut() - .get_guest_function_call_result() - .map_err(|e| { - if timedout { - // if we timed-out, but still got here - // that means we had actually gotten stuck - // on the execution of a host function, and; - // hence, couldn't cancel guest execution. - // This particular check is needed now, because - // unlike w/ the previous scoped thread usage, - // we can't check if the thread completed or not. - log::error!("Guest execution hung on host function call"); - GuestExecutionHungOnHostFunctionCall() - } else { - e - } - }) -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; - use std::thread; - - use hyperlight_testing::{callback_guest_as_string, simple_guest_as_string}; - - use super::*; - use crate::func::call_ctx::MultiUseGuestCallContext; - use crate::sandbox::is_hypervisor_present; - use crate::sandbox::uninitialized::GuestBinary; - use crate::sandbox_state::sandbox::EvolvableSandbox; - use crate::sandbox_state::transition::Noop; - use crate::{new_error, HyperlightError, MultiUseSandbox, Result, UninitializedSandbox}; - - // simple function - fn test_function0(_: MultiUseGuestCallContext) -> Result { - Ok(42) - } - - struct GuestStruct; - - // function that return type unsupported by the host - fn test_function1(_: MultiUseGuestCallContext) -> Result { - Ok(GuestStruct) - } - - // function that takes a parameter - fn test_function2(_: MultiUseGuestCallContext, param: i32) -> Result { - Ok(param) - } - - #[test] - // TODO: Investigate why this test fails with an incorrect error when run alongside other tests - #[ignore] - #[cfg(target_os = "linux")] - fn test_violate_seccomp_filters() -> Result<()> { - if !is_hypervisor_present() { - panic!("Panic on create_multi_use_sandbox because no hypervisor is present"); - } - - fn make_get_pid_syscall() -> Result { - let pid = unsafe { libc::syscall(libc::SYS_getpid) }; - Ok(pid as u64) - } - - // First, run to make sure it fails. - { - let mut usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - usbox.register("MakeGetpidSyscall", make_get_pid_syscall)?; - - let mut sbox: MultiUseSandbox = usbox.evolve(Noop::default())?; - - let res = - sbox.call_guest_function_by_name("ViolateSeccompFilters", ReturnType::ULong, None); - - #[cfg(feature = "seccomp")] - match res { - Ok(_) => panic!("Expected to fail due to seccomp violation"), - Err(e) => match e { - HyperlightError::DisallowedSyscall => {} - _ => panic!("Expected DisallowedSyscall error: {}", e), - }, - } - - #[cfg(not(feature = "seccomp"))] - match res { - Ok(_) => (), - Err(e) => panic!("Expected to succeed without seccomp: {}", e), - } - } - - // Second, run with allowing `SYS_getpid` - #[cfg(feature = "seccomp")] - { - let mut usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - usbox.register_with_extra_allowed_syscalls( - "MakeGetpidSyscall", - make_get_pid_syscall, - vec![libc::SYS_getpid], - )?; - // ^^^ note, we are allowing SYS_getpid - - let mut sbox: MultiUseSandbox = usbox.evolve(Noop::default())?; - - let res = - sbox.call_guest_function_by_name("ViolateSeccompFilters", ReturnType::ULong, None); - - match res { - Ok(_) => {} - Err(e) => panic!("Expected to succeed due to seccomp violation: {}", e), - } - } - - Ok(()) - } - - #[test] - fn test_execute_in_host() { - let uninitialized_sandbox = || { - UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap() - }; - - // test_function0 - { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let result = test_function0(sandbox.new_call_context()); - assert_eq!(result.unwrap(), 42); - } - - // test_function1 - { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let result = test_function1(sandbox.new_call_context()); - assert!(result.is_ok()); - } - - // test_function2 - { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let result = test_function2(sandbox.new_call_context(), 42); - assert_eq!(result.unwrap(), 42); - } - - // test concurrent calls with a local closure that returns current count - { - let count = Arc::new(Mutex::new(0)); - let order = Arc::new(Mutex::new(vec![])); - - let mut handles = vec![]; - - for _ in 0..10 { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let _ctx = sandbox.new_call_context(); - let count = Arc::clone(&count); - let order = Arc::clone(&order); - let handle = thread::spawn(move || { - // we're not actually using the context, but we're calling - // it here to test the mutual exclusion - let mut num = count - .try_lock() - .map_err(|_| new_error!("Error locking")) - .unwrap(); - *num += 1; - order - .try_lock() - .map_err(|_| new_error!("Error locking")) - .unwrap() - .push(*num); - }); - handles.push(handle); - } - - for handle in handles { - handle.join().unwrap(); - } - - // Check if the order of operations is sequential - let order = order - .try_lock() - .map_err(|_| new_error!("Error locking")) - .unwrap(); - for i in 0..10 { - assert_eq!(order[i], i + 1); - } - } - - // TODO: Add tests to ensure State has been reset. - } - - #[track_caller] - fn guest_bin() -> GuestBinary { - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")) - } - - #[track_caller] - fn test_call_guest_function_by_name(u_sbox: UninitializedSandbox) { - let mu_sbox: MultiUseSandbox = u_sbox.evolve(Noop::default()).unwrap(); - - let msg = "Hello, World!!\n".to_string(); - let len = msg.len() as i32; - let mut ctx = mu_sbox.new_call_context(); - let result = ctx - .call( - "PrintOutput", - ReturnType::Int, - Some(vec![ParameterValue::String(msg.clone())]), - ) - .unwrap(); - - assert_eq!(result, ReturnValue::Int(len)); - } - - fn call_guest_function_by_name_hv() { - // in-hypervisor mode - let u_sbox = UninitializedSandbox::new( - guest_bin(), - // for now, we're using defaults. In the future, we should get - // variability below - None, - // by default, the below represents in-hypervisor mode - ) - .unwrap(); - test_call_guest_function_by_name(u_sbox); - } - - #[test] - fn test_call_guest_function_by_name_hv() { - call_guest_function_by_name_hv(); - } - - fn terminate_vcpu_after_1000ms() -> Result<()> { - // This test relies upon a Hypervisor being present so for now - // we will skip it if there isn't one. - if !is_hypervisor_present() { - println!("Skipping terminate_vcpu_after_1000ms because no hypervisor is present"); - return Ok(()); - } - let usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - )?; - let sandbox: MultiUseSandbox = usbox.evolve(Noop::default())?; - let mut ctx = sandbox.new_call_context(); - let result = ctx.call("Spin", ReturnType::Void, None); - - assert!(result.is_err()); - match result.unwrap_err() { - HyperlightError::ExecutionCanceledByHost() => {} - e => panic!( - "Expected HyperlightError::ExecutionCanceledByHost() but got {:?}", - e - ), - } - Ok(()) - } - - // Test that we can terminate a VCPU that has been running the VCPU for too long. - #[test] - fn test_terminate_vcpu_spinning_cpu() -> Result<()> { - terminate_vcpu_after_1000ms()?; - Ok(()) - } - - // Test that we can terminate a VCPU that has been running the VCPU for too long and then call a guest function on the same host thread. - #[test] - fn test_terminate_vcpu_and_then_call_guest_function_on_the_same_host_thread() -> Result<()> { - terminate_vcpu_after_1000ms()?; - call_guest_function_by_name_hv(); - Ok(()) - } - - // This test is to capture the case where the guest execution is running a host function when cancelled and that host function - // is never going to return. - // The host function that is called will end after 5 seconds, but by this time the cancellation will have given up - // (using default timeout settings) , so this tests looks for the error "Failed to cancel guest execution". - - #[test] - fn test_terminate_vcpu_calling_host_spinning_cpu() { - // This test relies upon a Hypervisor being present so for now - // we will skip it if there isn't one. - if !is_hypervisor_present() { - println!("Skipping test_call_guest_function_by_name because no hypervisor is present"); - return; - } - let mut usbox = UninitializedSandbox::new( - GuestBinary::FilePath(callback_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - // Make this host call run for 5 seconds - - fn spin() -> Result<()> { - thread::sleep(std::time::Duration::from_secs(5)); - Ok(()) - } - - #[cfg(any(target_os = "windows", not(feature = "seccomp")))] - usbox.register("Spin", spin).unwrap(); - - #[cfg(all(target_os = "linux", feature = "seccomp"))] - usbox - .register_with_extra_allowed_syscalls("Spin", spin, vec![libc::SYS_clock_nanosleep]) - .unwrap(); - - let sandbox: MultiUseSandbox = usbox.evolve(Noop::default()).unwrap(); - let mut ctx = sandbox.new_call_context(); - let result = ctx.call("CallHostSpin", ReturnType::Void, None); - - assert!(result.is_err()); - match result.unwrap_err() { - HyperlightError::GuestExecutionHungOnHostFunctionCall() => {} - e => panic!( - "Expected HyperlightError::GuestExecutionHungOnHostFunctionCall but got {:?}", - e - ), - } - } - - #[test] - fn test_trigger_exception_on_guest() { - let usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - let mut multi_use_sandbox: MultiUseSandbox = usbox.evolve(Noop::default()).unwrap(); - - let res = multi_use_sandbox.call_guest_function_by_name( - "TriggerException", - ReturnType::Void, - None, - ); - - assert!(res.is_err()); - - match res.unwrap_err() { - HyperlightError::GuestAborted(_, msg) => { - // msg should indicate we got an invalid opcode exception - assert!(msg.contains("InvalidOpcode")); - } - e => panic!( - "Expected HyperlightError::GuestExecutionError but got {:?}", - e - ), - } - } -} diff --git a/src/hyperlight_host/src/func/mod.rs b/src/hyperlight_host/src/func/mod.rs index e813ea7d5..38d2a8098 100644 --- a/src/hyperlight_host/src/func/mod.rs +++ b/src/hyperlight_host/src/func/mod.rs @@ -18,8 +18,6 @@ limitations under the License. /// functions on the same Hyperlight sandbox instance, all from within the /// same state and mutual exclusion context. pub mod call_ctx; -/// Functionality to dispatch a call from the host to the guest -pub(crate) mod guest_dispatch; /// Functionality to check for errors after a guest call pub(crate) mod guest_err; /// Definitions and functionality to enable guest-to-host function calling, diff --git a/src/hyperlight_host/src/hypervisor/crashdump.rs b/src/hyperlight_host/src/hypervisor/crashdump.rs index a70dc34c1..093b50618 100644 --- a/src/hyperlight_host/src/hypervisor/crashdump.rs +++ b/src/hyperlight_host/src/hypervisor/crashdump.rs @@ -2,21 +2,21 @@ use std::io::Write; use tempfile::NamedTempFile; -use super::Hypervisor; +use super::hyperlight_vm::HyperlightSandbox; +use crate::hypervisor::HyperlightVm; use crate::{new_error, Result}; /// Dump registers + memory regions + raw memory to a tempfile -#[cfg(crashdump)] -pub(crate) fn crashdump_to_tempfile(hv: &dyn Hypervisor) -> Result<()> { +pub(crate) fn crashdump_to_tempfile(vm: &HyperlightSandbox) -> Result<()> { let mut temp_file = NamedTempFile::with_prefix("mem")?; - let hv_details = format!("{:#x?}", hv); + let hv_details = format!("{:#x?}", vm); // write hypervisor details such as registers, info about mapped memory regions, etc. temp_file.write_all(hv_details.as_bytes())?; temp_file.write_all(b"================ MEMORY DUMP =================\n")?; // write the raw memory dump for each memory region - for region in hv.get_memory_regions() { + for region in vm.get_memory_regions() { if region.host_region.start == 0 || region.host_region.is_empty() { continue; } diff --git a/src/hyperlight_host/src/hypervisor/fpu.rs b/src/hyperlight_host/src/hypervisor/fpu.rs deleted file mode 100644 index 3cdd39134..000000000 --- a/src/hyperlight_host/src/hypervisor/fpu.rs +++ /dev/null @@ -1,19 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -pub(crate) const FP_CONTROL_WORD_DEFAULT: u16 = 0x37f; // mask all fp-exception, set rounding to nearest, set precision to 64-bit -pub(crate) const FP_TAG_WORD_DEFAULT: u8 = 0xff; // each 8 of x87 fpu registers is empty -pub(crate) const MXCSR_DEFAULT: u32 = 0x1f80; // mask simd fp-exceptions, clear exception flags, set rounding to nearest, disable flush-to-zero mode, disable denormals-are-zero mode diff --git a/src/hyperlight_host/src/hypervisor/gdb/arch.rs b/src/hyperlight_host/src/hypervisor/gdb/arch.rs index 5938112e3..42208d701 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/arch.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/arch.rs @@ -16,16 +16,17 @@ limitations under the License. //! This file contains architecture specific code for the x86_64 -use std::collections::HashMap; - use super::VcpuStopReason; +use crate::hypervisor::regs::CommonRegisters; +use crate::hypervisor::vm::Vm; +use crate::Result; // Described in Table 6-1. Exceptions and Interrupts at Page 6-13 Vol. 1 // of Intel 64 and IA-32 Architectures Software Developer's Manual /// Exception id for #DB -const DB_EX_ID: u32 = 1; +pub(crate) const DB_EX_ID: u32 = 1; /// Exception id for #BP - triggered by the INT3 instruction -const BP_EX_ID: u32 = 3; +pub(crate) const BP_EX_ID: u32 = 3; /// Software Breakpoint size in memory pub(crate) const SW_BP_SIZE: usize = 1; @@ -54,58 +55,51 @@ pub(crate) const DR6_HW_BP_FLAGS_MASK: u64 = 0x0F << DR6_HW_BP_FLAGS_POS; /// NOTE: Additional checks are done for the entrypoint, stored hw_breakpoints /// and sw_breakpoints to ensure the stop reason is valid with internal state pub(crate) fn vcpu_stop_reason( - single_step: bool, - rip: u64, - dr6: u64, + vm: &mut dyn Vm, entrypoint: u64, + dr6: u64, exception: u32, - hw_breakpoints: &[u64], - sw_breakpoints: &HashMap, -) -> VcpuStopReason { +) -> Result { + let CommonRegisters { rip, .. } = vm.get_regs()?; if DB_EX_ID == exception { // If the BS flag in DR6 register is set, it means a single step // instruction triggered the exit // Check page 19-4 Vol. 3B of Intel 64 and IA-32 // Architectures Software Developer's Manual - if dr6 & DR6_BS_FLAG_MASK != 0 && single_step { - return VcpuStopReason::DoneStep; + if dr6 & DR6_BS_FLAG_MASK != 0 { + return Ok(VcpuStopReason::DoneStep); } // If any of the B0-B3 flags in DR6 register is set, it means a // hardware breakpoint triggered the exit // Check page 19-4 Vol. 3B of Intel 64 and IA-32 // Architectures Software Developer's Manual - if DR6_HW_BP_FLAGS_MASK & dr6 != 0 && hw_breakpoints.contains(&rip) { + if DR6_HW_BP_FLAGS_MASK & dr6 != 0 { if rip == entrypoint { - return VcpuStopReason::EntryPointBp; + vm.remove_hw_breakpoint(entrypoint)?; + return Ok(VcpuStopReason::EntryPointBp); } - return VcpuStopReason::HwBp; + return Ok(VcpuStopReason::HwBp); } } - if BP_EX_ID == exception && sw_breakpoints.contains_key(&rip) { - return VcpuStopReason::SwBp; + if BP_EX_ID == exception { + return Ok(VcpuStopReason::SwBp); } // Log an error and provide internal debugging info log::error!( r"The vCPU exited because of an unknown reason: - single_step: {:?} rip: {:?} dr6: {:?} entrypoint: {:?} exception: {:?} - hw_breakpoints: {:?} - sw_breakpoints: {:?} ", - single_step, rip, dr6, entrypoint, exception, - hw_breakpoints, - sw_breakpoints, ); - VcpuStopReason::Unknown + Ok(VcpuStopReason::Unknown) } diff --git a/src/hyperlight_host/src/hypervisor/gdb/kvm_debug.rs b/src/hyperlight_host/src/hypervisor/gdb/kvm_debug.rs deleted file mode 100644 index 354b385b3..000000000 --- a/src/hyperlight_host/src/hypervisor/gdb/kvm_debug.rs +++ /dev/null @@ -1,243 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use std::collections::HashMap; - -use kvm_bindings::{ - kvm_debug_exit_arch, kvm_guest_debug, kvm_regs, KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, - KVM_GUESTDBG_USE_HW_BP, KVM_GUESTDBG_USE_SW_BP, -}; -use kvm_ioctls::VcpuFd; - -use super::arch::{vcpu_stop_reason, MAX_NO_OF_HW_BP, SW_BP_SIZE}; -use super::{GuestDebug, VcpuStopReason, X86_64Regs}; -use crate::{new_error, HyperlightError, Result}; - -/// KVM Debug struct -/// This struct is used to abstract the internal details of the kvm -/// guest debugging settings -#[derive(Default)] -pub(crate) struct KvmDebug { - /// vCPU stepping state - single_step: bool, - - /// Array of addresses for HW breakpoints - hw_breakpoints: Vec, - /// Saves the bytes modified to enable SW breakpoints - sw_breakpoints: HashMap, - - /// Sent to KVM for enabling guest debug - dbg_cfg: kvm_guest_debug, -} - -impl KvmDebug { - pub(crate) fn new() -> Self { - let dbg = kvm_guest_debug { - control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP, - ..Default::default() - }; - - Self { - single_step: false, - hw_breakpoints: vec![], - sw_breakpoints: HashMap::new(), - dbg_cfg: dbg, - } - } - - /// Returns the instruction pointer from the stopped vCPU - fn get_instruction_pointer(&self, vcpu_fd: &VcpuFd) -> Result { - let regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; - - Ok(regs.rip) - } - - /// This method sets the kvm debugreg fields to enable breakpoints at - /// specific addresses - /// - /// The first 4 debug registers are used to set the addresses - /// The 4th and 5th debug registers are obsolete and not used - /// The 7th debug register is used to enable the breakpoints - /// For more information see: DEBUG REGISTERS chapter in the architecture - /// manual - fn set_debug_config(&mut self, vcpu_fd: &VcpuFd, step: bool) -> Result<()> { - let addrs = &self.hw_breakpoints; - - self.dbg_cfg.arch.debugreg = [0; 8]; - for (k, addr) in addrs.iter().enumerate() { - self.dbg_cfg.arch.debugreg[k] = *addr; - self.dbg_cfg.arch.debugreg[7] |= 1 << (k * 2); - } - - if !addrs.is_empty() { - self.dbg_cfg.control |= KVM_GUESTDBG_USE_HW_BP; - } else { - self.dbg_cfg.control &= !KVM_GUESTDBG_USE_HW_BP; - } - - if step { - self.dbg_cfg.control |= KVM_GUESTDBG_SINGLESTEP; - } else { - self.dbg_cfg.control &= !KVM_GUESTDBG_SINGLESTEP; - } - - log::debug!("Setting bp: {:?} cfg: {:?}", addrs, self.dbg_cfg); - vcpu_fd - .set_guest_debug(&self.dbg_cfg) - .map_err(|e| new_error!("Could not set guest debug: {:?}", e))?; - - self.single_step = step; - - Ok(()) - } - - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - vcpu_fd: &VcpuFd, - debug_exit: kvm_debug_exit_arch, - entrypoint: u64, - ) -> Result { - let rip = self.get_instruction_pointer(vcpu_fd)?; - let rip = self.translate_gva(vcpu_fd, rip)?; - - // Check if the vCPU stopped because of a hardware breakpoint - let reason = vcpu_stop_reason( - self.single_step, - rip, - debug_exit.dr6, - entrypoint, - debug_exit.exception, - &self.hw_breakpoints, - &self.sw_breakpoints, - ); - - if let VcpuStopReason::EntryPointBp = reason { - // In case the hw breakpoint is the entry point, remove it to - // avoid hanging here as gdb does not remove breakpoints it - // has not set. - // Gdb expects the target to be stopped when connected. - self.remove_hw_breakpoint(vcpu_fd, entrypoint)?; - } - - Ok(reason) - } -} - -impl GuestDebug for KvmDebug { - type Vcpu = VcpuFd; - - fn is_hw_breakpoint(&self, addr: &u64) -> bool { - self.hw_breakpoints.contains(addr) - } - fn is_sw_breakpoint(&self, addr: &u64) -> bool { - self.sw_breakpoints.contains_key(addr) - } - fn save_hw_breakpoint(&mut self, addr: &u64) -> bool { - if self.hw_breakpoints.len() >= MAX_NO_OF_HW_BP { - false - } else { - self.hw_breakpoints.push(*addr); - - true - } - } - fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]) { - _ = self.sw_breakpoints.insert(addr, data); - } - fn delete_hw_breakpoint(&mut self, addr: &u64) { - self.hw_breakpoints.retain(|&a| a != *addr); - } - fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]> { - self.sw_breakpoints.remove(addr) - } - - fn read_regs(&self, vcpu_fd: &Self::Vcpu, regs: &mut X86_64Regs) -> Result<()> { - log::debug!("Read registers"); - let vcpu_regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not read guest registers: {:?}", e))?; - - regs.rax = vcpu_regs.rax; - regs.rbx = vcpu_regs.rbx; - regs.rcx = vcpu_regs.rcx; - regs.rdx = vcpu_regs.rdx; - regs.rsi = vcpu_regs.rsi; - regs.rdi = vcpu_regs.rdi; - regs.rbp = vcpu_regs.rbp; - regs.rsp = vcpu_regs.rsp; - regs.r8 = vcpu_regs.r8; - regs.r9 = vcpu_regs.r9; - regs.r10 = vcpu_regs.r10; - regs.r11 = vcpu_regs.r11; - regs.r12 = vcpu_regs.r12; - regs.r13 = vcpu_regs.r13; - regs.r14 = vcpu_regs.r14; - regs.r15 = vcpu_regs.r15; - - regs.rip = vcpu_regs.rip; - regs.rflags = vcpu_regs.rflags; - - Ok(()) - } - - fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> Result<()> { - self.set_debug_config(vcpu_fd, enable) - } - - fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> Result { - let tr = vcpu_fd - .translate_gva(gva) - .map_err(|_| HyperlightError::TranslateGuestAddress(gva))?; - - if tr.valid == 0 { - Err(HyperlightError::TranslateGuestAddress(gva)) - } else { - Ok(tr.physical_address) - } - } - - fn write_regs(&self, vcpu_fd: &Self::Vcpu, regs: &X86_64Regs) -> Result<()> { - log::debug!("Write registers"); - let new_regs = kvm_regs { - rax: regs.rax, - rbx: regs.rbx, - rcx: regs.rcx, - rdx: regs.rdx, - rsi: regs.rsi, - rdi: regs.rdi, - rbp: regs.rbp, - rsp: regs.rsp, - r8: regs.r8, - r9: regs.r9, - r10: regs.r10, - r11: regs.r11, - r12: regs.r12, - r13: regs.r13, - r14: regs.r14, - r15: regs.r15, - - rip: regs.rip, - rflags: regs.rflags, - }; - - vcpu_fd - .set_regs(&new_regs) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e)) - } -} diff --git a/src/hyperlight_host/src/hypervisor/gdb/mod.rs b/src/hyperlight_host/src/hypervisor/gdb/mod.rs index 46c26d1dc..ef9e32cbb 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/mod.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/mod.rs @@ -14,36 +14,23 @@ See the License for the specific language governing permissions and limitations under the License. */ -mod arch; +pub(crate) mod arch; mod event_loop; -#[cfg(kvm)] -mod kvm_debug; -#[cfg(mshv)] -mod mshv_debug; mod x86_64_target; use std::io::{self, ErrorKind}; use std::net::TcpListener; -use std::sync::{Arc, Mutex}; use std::thread; -use arch::{SW_BP, SW_BP_SIZE}; use crossbeam_channel::{Receiver, Sender, TryRecvError}; use event_loop::event_loop_thread; use gdbstub::conn::ConnectionExt; use gdbstub::stub::GdbStub; use gdbstub::target::TargetError; -use hyperlight_common::mem::PAGE_SIZE; -#[cfg(kvm)] -pub(crate) use kvm_debug::KvmDebug; -#[cfg(mshv)] -pub(crate) use mshv_debug::MshvDebug; use thiserror::Error; use x86_64_target::HyperlightSandboxTarget; -use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; -use crate::mem::layout::SandboxMemoryLayout; -use crate::{new_error, HyperlightError}; +use crate::hypervisor::regs::CommonRegisters; #[derive(Debug, Error)] pub(crate) enum GdbTargetError { @@ -82,29 +69,6 @@ impl From for TargetError { } } -/// Struct that contains the x86_64 core registers -#[derive(Debug, Default)] -pub(crate) struct X86_64Regs { - pub(crate) rax: u64, - pub(crate) rbx: u64, - pub(crate) rcx: u64, - pub(crate) rdx: u64, - pub(crate) rsi: u64, - pub(crate) rdi: u64, - pub(crate) rbp: u64, - pub(crate) rsp: u64, - pub(crate) r8: u64, - pub(crate) r9: u64, - pub(crate) r10: u64, - pub(crate) r11: u64, - pub(crate) r12: u64, - pub(crate) r13: u64, - pub(crate) r14: u64, - pub(crate) r15: u64, - pub(crate) rip: u64, - pub(crate) rflags: u64, -} - /// Defines the possible reasons for which a vCPU can be stopped when debugging #[derive(Debug)] pub enum VcpuStopReason { @@ -133,7 +97,7 @@ pub(crate) enum DebugMsg { RemoveSwBreakpoint(u64), Step, WriteAddr(u64, Vec), - WriteRegisters(X86_64Regs), + WriteRegisters(CommonRegisters), } /// Enumerates the possible responses that a hypervisor can provide to a debugger @@ -146,7 +110,7 @@ pub(crate) enum DebugResponse { ErrorOccurred, GetCodeSectionOffset(u64), ReadAddr(Vec), - ReadRegisters(X86_64Regs), + ReadRegisters(CommonRegisters), RemoveHwBreakpoint(bool), RemoveSwBreakpoint(bool), Step, @@ -155,185 +119,9 @@ pub(crate) enum DebugResponse { WriteRegisters, } -/// This trait is used to define common debugging functionality for Hypervisors -pub(crate) trait GuestDebug { - /// Type that wraps the vCPU functionality - type Vcpu; - - /// Returns true whether the provided address is a hardware breakpoint - fn is_hw_breakpoint(&self, addr: &u64) -> bool; - /// Returns true whether the provided address is a software breakpoint - fn is_sw_breakpoint(&self, addr: &u64) -> bool; - /// Stores the address of the hw breakpoint - fn save_hw_breakpoint(&mut self, addr: &u64) -> bool; - /// Stores the data that the sw breakpoint op code replaces - fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]); - /// Deletes the address of the hw breakpoint from storage - fn delete_hw_breakpoint(&mut self, addr: &u64); - /// Retrieves the saved data that the sw breakpoint op code replaces - fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]>; - - /// Read registers - fn read_regs(&self, vcpu_fd: &Self::Vcpu, regs: &mut X86_64Regs) -> crate::Result<()>; - /// Enables or disables stepping and sets the vCPU debug configuration - fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> crate::Result<()>; - /// Translates the guest address to physical address - fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> crate::Result; - /// Write registers - fn write_regs(&self, vcpu_fd: &Self::Vcpu, regs: &X86_64Regs) -> crate::Result<()>; - - /// Adds hardware breakpoint - fn add_hw_breakpoint(&mut self, vcpu_fd: &Self::Vcpu, addr: u64) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - if self.is_hw_breakpoint(&addr) { - return Ok(()); - } - - self.save_hw_breakpoint(&addr) - .then(|| self.set_single_step(vcpu_fd, false)) - .ok_or_else(|| new_error!("Failed to save hw breakpoint"))? - } - /// Overwrites the guest memory with the SW Breakpoint op code that instructs - /// the vCPU to stop when is executed and stores the overwritten data to be - /// able to restore it - fn add_sw_breakpoint( - &mut self, - vcpu_fd: &Self::Vcpu, - addr: u64, - dbg_mem_access_fn: Arc>, - ) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - if self.is_sw_breakpoint(&addr) { - return Ok(()); - } - - // Write breakpoint OP code to write to guest memory - let mut save_data = [0; SW_BP_SIZE]; - self.read_addrs(vcpu_fd, addr, &mut save_data[..], dbg_mem_access_fn.clone())?; - self.write_addrs(vcpu_fd, addr, &SW_BP, dbg_mem_access_fn)?; - - // Save guest memory to restore when breakpoint is removed - self.save_sw_breakpoint_data(addr, save_data); - - Ok(()) - } - /// Copies the data from the guest memory address to the provided slice - /// The address is checked to be a valid guest address - fn read_addrs( - &mut self, - vcpu_fd: &Self::Vcpu, - mut gva: u64, - mut data: &mut [u8], - dbg_mem_access_fn: Arc>, - ) -> crate::Result<()> { - let data_len = data.len(); - log::debug!("Read addr: {:X} len: {:X}", gva, data_len); - - while !data.is_empty() { - let gpa = self.translate_gva(vcpu_fd, gva)?; - - let read_len = std::cmp::min( - data.len(), - (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), - ); - let offset = (gpa as usize) - .checked_sub(SandboxMemoryLayout::BASE_ADDRESS) - .ok_or_else(|| { - log::warn!( - "gva=0x{:#X} causes subtract with underflow: \"gpa - BASE_ADDRESS={:#X}-{:#X}\"", - gva, gpa, SandboxMemoryLayout::BASE_ADDRESS); - HyperlightError::TranslateGuestAddress(gva) - })?; - - dbg_mem_access_fn - .try_lock() - .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? - .read(offset, &mut data[..read_len])?; - - data = &mut data[read_len..]; - gva += read_len as u64; - } - - Ok(()) - } - /// Removes hardware breakpoint - fn remove_hw_breakpoint(&mut self, vcpu_fd: &Self::Vcpu, addr: u64) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - self.is_hw_breakpoint(&addr) - .then(|| { - self.delete_hw_breakpoint(&addr); - self.set_single_step(vcpu_fd, false) - }) - .ok_or_else(|| new_error!("The address: {:?} is not a hw breakpoint", addr))? - } - /// Restores the overwritten data to the guest memory - fn remove_sw_breakpoint( - &mut self, - vcpu_fd: &Self::Vcpu, - addr: u64, - dbg_mem_access_fn: Arc>, - ) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - if self.is_sw_breakpoint(&addr) { - let save_data = self - .delete_sw_breakpoint_data(&addr) - .ok_or_else(|| new_error!("Expected to contain the sw breakpoint address"))?; - - // Restore saved data to the guest's memory - self.write_addrs(vcpu_fd, addr, &save_data, dbg_mem_access_fn)?; - - Ok(()) - } else { - Err(new_error!("The address: {:?} is not a sw breakpoint", addr)) - } - } - /// Copies the data from the provided slice to the guest memory address - /// The address is checked to be a valid guest address - fn write_addrs( - &mut self, - vcpu_fd: &Self::Vcpu, - mut gva: u64, - mut data: &[u8], - dbg_mem_access_fn: Arc>, - ) -> crate::Result<()> { - let data_len = data.len(); - log::debug!("Write addr: {:X} len: {:X}", gva, data_len); - - while !data.is_empty() { - let gpa = self.translate_gva(vcpu_fd, gva)?; - - let write_len = std::cmp::min( - data.len(), - (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), - ); - let offset = (gpa as usize) - .checked_sub(SandboxMemoryLayout::BASE_ADDRESS) - .ok_or_else(|| { - log::warn!( - "gva=0x{:#X} causes subtract with underflow: \"gpa - BASE_ADDRESS={:#X}-{:#X}\"", - gva, gpa, SandboxMemoryLayout::BASE_ADDRESS); - HyperlightError::TranslateGuestAddress(gva) - })?; - - dbg_mem_access_fn - .try_lock() - .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? - .write(offset, data)?; - - data = &data[write_len..]; - gva += write_len as u64; - } - - Ok(()) - } -} - /// Debug communication channel that is used for sending a request type and /// receive a different response type +#[derive(Debug)] pub(crate) struct DebugCommChannel { /// Transmit channel tx: Sender, @@ -428,7 +216,7 @@ mod tests { let res = gdb_conn.try_recv(); assert!(res.is_err()); - let res = hyp_conn.send(DebugResponse::ReadRegisters(X86_64Regs::default())); + let res = hyp_conn.send(DebugResponse::ReadRegisters(CommonRegisters::default())); assert!(res.is_ok()); let res = gdb_conn.recv(); diff --git a/src/hyperlight_host/src/hypervisor/gdb/mshv_debug.rs b/src/hyperlight_host/src/hypervisor/gdb/mshv_debug.rs deleted file mode 100644 index 7d48fcae0..000000000 --- a/src/hyperlight_host/src/hypervisor/gdb/mshv_debug.rs +++ /dev/null @@ -1,267 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#[cfg(mshv2)] -extern crate mshv_bindings2 as mshv_bindings; -#[cfg(mshv2)] -extern crate mshv_ioctls2 as mshv_ioctls; - -#[cfg(mshv3)] -extern crate mshv_bindings3 as mshv_bindings; -#[cfg(mshv3)] -extern crate mshv_ioctls3 as mshv_ioctls; - -use std::collections::HashMap; - -use mshv_bindings::{ - DebugRegisters, StandardRegisters, HV_TRANSLATE_GVA_VALIDATE_READ, - HV_TRANSLATE_GVA_VALIDATE_WRITE, -}; -use mshv_ioctls::VcpuFd; - -use super::arch::{vcpu_stop_reason, MAX_NO_OF_HW_BP, SW_BP_SIZE}; -use super::{GuestDebug, VcpuStopReason, X86_64Regs}; -use crate::{new_error, HyperlightError, Result}; - -#[derive(Debug, Default)] -pub(crate) struct MshvDebug { - /// vCPU stepping state - single_step: bool, - - /// Array of addresses for HW breakpoints - hw_breakpoints: Vec, - /// Saves the bytes modified to enable SW breakpoints - sw_breakpoints: HashMap, - - /// Debug registers - dbg_cfg: DebugRegisters, -} - -impl MshvDebug { - pub(crate) fn new() -> Self { - Self { - single_step: false, - hw_breakpoints: vec![], - sw_breakpoints: HashMap::new(), - dbg_cfg: DebugRegisters::default(), - } - } - - /// Returns the instruction pointer from the stopped vCPU - fn get_instruction_pointer(&self, vcpu_fd: &VcpuFd) -> Result { - let regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; - - Ok(regs.rip) - } - - /// This method sets the vCPU debug register fields to enable breakpoints at - /// specific addresses - /// - /// The first 4 debug registers are used to set the addresses - /// The 4th and 5th debug registers are obsolete and not used - /// The 7th debug register is used to enable the breakpoints - /// For more information see: DEBUG REGISTERS chapter in the architecture - /// manual - fn set_debug_config(&mut self, vcpu_fd: &VcpuFd, step: bool) -> Result<()> { - let addrs = &self.hw_breakpoints; - - let mut dbg_cfg = DebugRegisters::default(); - for (k, addr) in addrs.iter().enumerate() { - match k { - 0 => { - dbg_cfg.dr0 = *addr; - } - 1 => { - dbg_cfg.dr1 = *addr; - } - 2 => { - dbg_cfg.dr2 = *addr; - } - 3 => { - dbg_cfg.dr3 = *addr; - } - _ => { - Err(new_error!("Tried to set more than 4 HW breakpoints"))?; - } - } - dbg_cfg.dr7 |= 1 << (k * 2); - } - - self.dbg_cfg = dbg_cfg; - vcpu_fd - .set_debug_regs(&self.dbg_cfg) - .map_err(|e| new_error!("Could not set guest debug: {:?}", e))?; - - self.single_step = step; - - let mut regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not get registers: {:?}", e))?; - - // Set TF Flag to enable Traps - if self.single_step { - regs.rflags |= 1 << 8; - } else { - regs.rflags &= !(1 << 8); - } - - vcpu_fd - .set_regs(®s) - .map_err(|e| new_error!("Could not set registers: {:?}", e))?; - - Ok(()) - } - - /// Returns the vCPU stop reason - pub(crate) fn get_stop_reason( - &mut self, - vcpu_fd: &VcpuFd, - exception: u16, - entrypoint: u64, - ) -> Result { - let regs = vcpu_fd - .get_debug_regs() - .map_err(|e| new_error!("Cannot retrieve debug registers from vCPU: {}", e))?; - - // DR6 register contains debug state related information - let debug_status = regs.dr6; - - let rip = self.get_instruction_pointer(vcpu_fd)?; - let rip = self.translate_gva(vcpu_fd, rip)?; - - let reason = vcpu_stop_reason( - self.single_step, - rip, - debug_status, - entrypoint, - exception as u32, - &self.hw_breakpoints, - &self.sw_breakpoints, - ); - - if let VcpuStopReason::EntryPointBp = reason { - // In case the hw breakpoint is the entry point, remove it to - // avoid hanging here as gdb does not remove breakpoints it - // has not set. - // Gdb expects the target to be stopped when connected. - self.remove_hw_breakpoint(vcpu_fd, entrypoint)?; - } - - Ok(reason) - } -} - -impl GuestDebug for MshvDebug { - type Vcpu = VcpuFd; - - fn is_hw_breakpoint(&self, addr: &u64) -> bool { - self.hw_breakpoints.contains(addr) - } - fn is_sw_breakpoint(&self, addr: &u64) -> bool { - self.sw_breakpoints.contains_key(addr) - } - fn save_hw_breakpoint(&mut self, addr: &u64) -> bool { - if self.hw_breakpoints.len() >= MAX_NO_OF_HW_BP { - false - } else { - self.hw_breakpoints.push(*addr); - - true - } - } - fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]) { - _ = self.sw_breakpoints.insert(addr, data); - } - fn delete_hw_breakpoint(&mut self, addr: &u64) { - self.hw_breakpoints.retain(|&a| a != *addr); - } - fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]> { - self.sw_breakpoints.remove(addr) - } - - fn read_regs(&self, vcpu_fd: &Self::Vcpu, regs: &mut X86_64Regs) -> Result<()> { - log::debug!("Read registers"); - let vcpu_regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not read guest registers: {:?}", e))?; - - regs.rax = vcpu_regs.rax; - regs.rbx = vcpu_regs.rbx; - regs.rcx = vcpu_regs.rcx; - regs.rdx = vcpu_regs.rdx; - regs.rsi = vcpu_regs.rsi; - regs.rdi = vcpu_regs.rdi; - regs.rbp = vcpu_regs.rbp; - regs.rsp = vcpu_regs.rsp; - regs.r8 = vcpu_regs.r8; - regs.r9 = vcpu_regs.r9; - regs.r10 = vcpu_regs.r10; - regs.r11 = vcpu_regs.r11; - regs.r12 = vcpu_regs.r12; - regs.r13 = vcpu_regs.r13; - regs.r14 = vcpu_regs.r14; - regs.r15 = vcpu_regs.r15; - - regs.rip = vcpu_regs.rip; - regs.rflags = vcpu_regs.rflags; - - Ok(()) - } - - fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> Result<()> { - self.set_debug_config(vcpu_fd, enable) - } - - fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> Result { - let flags = (HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE) as u64; - let (addr, _) = vcpu_fd - .translate_gva(gva, flags) - .map_err(|_| HyperlightError::TranslateGuestAddress(gva))?; - - Ok(addr) - } - - fn write_regs(&self, vcpu_fd: &Self::Vcpu, regs: &X86_64Regs) -> Result<()> { - log::debug!("Write registers"); - let new_regs = StandardRegisters { - rax: regs.rax, - rbx: regs.rbx, - rcx: regs.rcx, - rdx: regs.rdx, - rsi: regs.rsi, - rdi: regs.rdi, - rbp: regs.rbp, - rsp: regs.rsp, - r8: regs.r8, - r9: regs.r9, - r10: regs.r10, - r11: regs.r11, - r12: regs.r12, - r13: regs.r13, - r14: regs.r14, - r15: regs.r15, - - rip: regs.rip, - rflags: regs.rflags, - }; - - vcpu_fd - .set_regs(&new_regs) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e)) - } -} diff --git a/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs b/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs index a25098bf4..21b0d3bdc 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs @@ -29,7 +29,8 @@ use gdbstub::target::ext::section_offsets::{Offsets, SectionOffsets}; use gdbstub::target::{Target, TargetError, TargetResult}; use gdbstub_arch::x86::X86_64_SSE as GdbTargetArch; -use super::{DebugCommChannel, DebugMsg, DebugResponse, GdbTargetError, X86_64Regs}; +use super::{DebugCommChannel, DebugMsg, DebugResponse, GdbTargetError}; +use crate::hypervisor::regs::CommonRegisters; /// Gdbstub target used by the gdbstub crate to provide GDB protocol implementation pub(crate) struct HyperlightSandboxTarget { @@ -222,7 +223,7 @@ impl SingleThreadBase for HyperlightSandboxTarget { ) -> TargetResult<(), Self> { log::debug!("Write regs"); - let regs = X86_64Regs { + let regs = CommonRegisters { rax: regs.regs[0], rbx: regs.regs[1], rcx: regs.regs[2], @@ -420,7 +421,7 @@ mod tests { // Check response to read registers - send the response first to not be blocked // by the recv call in the target - let msg = DebugResponse::ReadRegisters(X86_64Regs::default()); + let msg = DebugResponse::ReadRegisters(CommonRegisters::default()); let res = gdb_conn.send(msg); assert!(res.is_ok()); diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs new file mode 100644 index 000000000..794e32fbd --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -0,0 +1,710 @@ +/* +Copyright 2024 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +use std::convert::TryFrom; +use std::fmt::Debug; +use std::sync::{Arc, Mutex}; + +use log::LevelFilter; +use tracing::{instrument, Span}; + +#[cfg(gdb)] +use super::gdb::{arch, DebugCommChannel, DebugMsg, DebugResponse, VcpuStopReason}; +#[cfg(gdb)] +use super::handlers::DbgMemAccessHandlerWrapper; +use super::handlers::{ + MemAccessHandlerCaller, MemAccessHandlerWrapper, OutBHandlerCaller, OutBHandlerWrapper, +}; +#[cfg(kvm)] +use super::kvm::KvmVm; +#[cfg(mshv)] +use super::mshv::MshvVm; +use super::regs::{ + CommonFpu, CommonRegisters, FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT, +}; +use super::vm::{HyperlightExit, Vm}; +use super::{ + HyperlightVm, InterruptHandle, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, + CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, +}; +#[cfg(crashdump)] +use crate::hypervisor::crashdump; +#[cfg(gdb)] +use crate::hypervisor::vm::DebugExit; +#[cfg(target_os = "windows")] +use crate::hypervisor::whp::WhpVm; +#[cfg(target_os = "windows")] +use crate::hypervisor::wrappers::HandleWrapper; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; +use crate::mem::ptr::{GuestPtr, RawPtr}; +use crate::metrics::METRIC_GUEST_CANCELLATION; +use crate::sandbox::hypervisor::HypervisorType; +use crate::HyperlightError::ExecutionCanceledByHost; +use crate::{log_then_return, new_error, HyperlightError, Result}; + +#[cfg(gdb)] +mod debug { + use std::sync::{Arc, Mutex}; + + use hyperlight_common::mem::PAGE_SIZE; + + use super::HyperlightSandbox; + use crate::hypervisor::gdb::{DebugMsg, DebugResponse}; + use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; + use crate::mem::layout::SandboxMemoryLayout; + use crate::{new_error, HyperlightError, Result}; + + impl HyperlightSandbox { + pub(crate) fn process_dbg_request( + &mut self, + req: DebugMsg, + dbg_mem_access_fn: Arc>, + ) -> Result { + if self.gdb_conn.is_some() { + match req { + DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( + self.vm + .add_hw_breakpoint(addr) + .map_err(|e| { + log::error!("Failed to add hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( + self.vm + .add_sw_breakpoint(addr, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to add sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Continue => { + self.vm.set_single_step(false).map_err(|e| { + log::error!("Failed to continue execution: {:?}", e); + + e + })?; + + Ok(DebugResponse::Continue) + } + DebugMsg::DisableDebug => { + self.vm.set_debug(false).map_err(|e| { + log::error!("Failed to disable debugging: {:?}", e); + + e + })?; + + Ok(DebugResponse::DisableDebug) + } + DebugMsg::GetCodeSectionOffset => { + let offset = dbg_mem_access_fn + .try_lock() + .map_err(|e| { + new_error!("Error locking at {}:{}: {}", file!(), line!(), e) + })? + .get_code_offset() + .map_err(|e| { + log::error!("Failed to get code offset: {:?}", e); + + e + })?; + + Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) + } + DebugMsg::ReadAddr(addr, len) => { + let mut data = vec![0u8; len]; + + self.read_addrs(addr, &mut data, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to read from address: {:?}", e); + + e + })?; + + Ok(DebugResponse::ReadAddr(data)) + } + DebugMsg::ReadRegisters => self + .vm + .get_regs() + .map_err(|e| { + log::error!("Failed to read registers: {:?}", e); + + e + }) + .map(DebugResponse::ReadRegisters), + DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( + self.vm + .remove_hw_breakpoint(addr) + .map_err(|e| { + log::error!("Failed to remove hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( + self.vm + .remove_sw_breakpoint(addr, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to remove sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Step => { + self.vm.set_single_step(true).map_err(|e| { + log::error!("Failed to enable step instruction: {:?}", e); + + e + })?; + + Ok(DebugResponse::Step) + } + DebugMsg::WriteAddr(addr, data) => { + self.write_addrs(addr, &data, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to write to address: {:?}", e); + + e + })?; + + Ok(DebugResponse::WriteAddr) + } + DebugMsg::WriteRegisters(regs) => self + .vm + .set_regs(®s) + .map_err(|e| { + log::error!("Failed to write registers: {:?}", e); + + e + }) + .map(|_| DebugResponse::WriteRegisters), + } + } else { + Err(new_error!("Debugging is not enabled")) + } + } + + pub(crate) fn recv_dbg_msg(&mut self) -> Result { + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or_else(|| new_error!("Debug is not enabled"))?; + + gdb_conn.recv().map_err(|e| { + new_error!( + "Got an error while waiting to receive a message from the gdb thread: {:?}", + e + ) + }) + } + + pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { + log::debug!("Sending {:?}", cmd); + + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or_else(|| new_error!("Debug is not enabled"))?; + + gdb_conn.send(cmd).map_err(|e| { + new_error!( + "Got an error while sending a response message to the gdb thread: {:?}", + e + ) + }) + } + + fn read_addrs( + &mut self, + mut gva: u64, + mut data: &mut [u8], + dbg_mem_access_fn: Arc>, + ) -> crate::Result<()> { + let data_len = data.len(); + log::debug!("Read addr: {:X} len: {:X}", gva, data_len); + + while !data.is_empty() { + let gpa = self.vm.translate_gva(gva)?; + + let read_len = std::cmp::min( + data.len(), + (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), + ); + let offset = (gpa as usize) + .checked_sub(SandboxMemoryLayout::BASE_ADDRESS) + .ok_or_else(|| { + log::warn!( + "gva=0x{:#X} causes subtract with underflow: \"gpa - BASE_ADDRESS={:#X}-{:#X}\"", + gva, gpa, SandboxMemoryLayout::BASE_ADDRESS); + HyperlightError::TranslateGuestAddress(gva) + })?; + + dbg_mem_access_fn + .try_lock() + .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? + .read(offset, &mut data[..read_len])?; + + data = &mut data[read_len..]; + gva += read_len as u64; + } + + Ok(()) + } + + /// Copies the data from the provided slice to the guest memory address + /// The address is checked to be a valid guest address + fn write_addrs( + &mut self, + mut gva: u64, + mut data: &[u8], + dbg_mem_access_fn: Arc>, + ) -> crate::Result<()> { + let data_len = data.len(); + log::debug!("Write addr: {:X} len: {:X}", gva, data_len); + + while !data.is_empty() { + let gpa = self.vm.translate_gva(gva)?; + + let write_len = std::cmp::min( + data.len(), + (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), + ); + let offset = (gpa as usize) + .checked_sub(SandboxMemoryLayout::BASE_ADDRESS) + .ok_or_else(|| { + log::warn!( + "gva=0x{:#X} causes subtract with underflow: \"gpa - BASE_ADDRESS={:#X}-{:#X}\"", + gva, gpa, SandboxMemoryLayout::BASE_ADDRESS); + HyperlightError::TranslateGuestAddress(gva) + })?; + + dbg_mem_access_fn + .try_lock() + .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? + .write(offset, data)?; + + data = &data[write_len..]; + gva += write_len as u64; + } + + Ok(()) + } + } +} + +#[derive(Debug)] +pub(crate) struct HyperlightSandbox { + vm: Box, + entrypoint: u64, + orig_rsp: GuestPtr, + mem_regions: Vec, + #[cfg(gdb)] + gdb_conn: Option>, +} + +impl HyperlightSandbox { + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn new( + hv: &HypervisorType, + mem_regions: Vec, + pml4_addr: u64, + entrypoint: u64, + rsp: u64, + #[cfg(gdb)] gdb_conn: Option>, + #[cfg(target_os = "windows")] handle: HandleWrapper, + ) -> Result { + #[allow(unused_mut)] // needs to be mutable when gdb is enabled + let mut vm: Box = match hv { + #[cfg(kvm)] + HypervisorType::Kvm => Box::new(KvmVm::new()?), + #[cfg(mshv)] + HypervisorType::Mshv => Box::new(MshvVm::new()?), + #[cfg(target_os = "windows")] + HypervisorType::Whp => Box::new(WhpVm::new(handle)?), + }; + + // Safety: We haven't called this before and the regions are valid + unsafe { + vm.map_memory(&mem_regions)?; + } + + let mut sregs = vm.get_sregs()?; + sregs.cr3 = pml4_addr; + sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT; + sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP; + sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX; + sregs.cs.type_ = 11; // 0b1011: Execute/Read, Accessed + sregs.cs.s = 1; // Code/data segment + sregs.cs.present = 1; // Segment is present + sregs.cs.l = 1; // 64-bit segment + vm.set_sregs(&sregs)?; + + #[cfg(gdb)] + let gdb_conn = if let Some(gdb_conn) = gdb_conn { + // Add breakpoint to the entry point address + vm.set_debug(true)?; + vm.add_hw_breakpoint(entrypoint)?; + + Some(gdb_conn) + } else { + None + }; + + let rsp_gp = GuestPtr::try_from(RawPtr::from(rsp))?; + + let ret = Self { + vm, + entrypoint, + orig_rsp: rsp_gp, + mem_regions, + + #[cfg(gdb)] + gdb_conn, + }; + + Ok(ret) + } +} + +impl HyperlightVm for HyperlightSandbox { + /// Implementation of initialise for Hypervisor trait. + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + fn initialise( + &mut self, + peb_addr: RawPtr, + seed: u64, + page_size: u32, + outb_hdl: OutBHandlerWrapper, + mem_access_hdl: MemAccessHandlerWrapper, + max_guest_log_level: Option, + #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, + ) -> Result<()> { + let max_guest_log_level: u64 = match max_guest_log_level { + Some(level) => level as u64, + None => self.get_max_log_level().into(), + }; + + let regs = CommonRegisters { + rip: self.entrypoint, + rsp: self.orig_rsp.absolute()?, + + // function args + rdi: peb_addr.into(), + rsi: seed, + rdx: page_size.into(), + rcx: max_guest_log_level, + rflags: 1 << 1, + ..Default::default() + }; + self.vm.set_regs(®s)?; + + self.run( + outb_hdl, + mem_access_hdl, + #[cfg(gdb)] + dbg_mem_access_fn, + )?; + + Ok(()) + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + fn dispatch_call_from_host( + &mut self, + dispatch_func_addr: RawPtr, + outb_handle_fn: OutBHandlerWrapper, + mem_access_fn: MemAccessHandlerWrapper, + #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, + ) -> Result<()> { + // Reset general purpose registers, then set RIP and RSP + let regs = CommonRegisters { + rip: dispatch_func_addr.into(), + rsp: self.orig_rsp.absolute()?, + rflags: 1 << 1, + ..Default::default() + }; + self.vm.set_regs(®s)?; + + // reset fpu state + let fpu = CommonFpu { + fcw: FP_CONTROL_WORD_DEFAULT, + ftwx: FP_TAG_WORD_DEFAULT, + mxcsr: MXCSR_DEFAULT, + ..Default::default() // zero out the rest + }; + self.vm.set_fpu(&fpu)?; + + // run + self.run( + outb_handle_fn, + mem_access_fn, + #[cfg(gdb)] + dbg_mem_access_fn, + )?; + + Ok(()) + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + fn handle_io( + &mut self, + port: u16, + data: Vec, + outb_handle_fn: OutBHandlerWrapper, + ) -> Result<()> { + // KVM does not need RIP or instruction length, as it automatically sets the RIP + + let mut padded = [0u8; 4]; + let copy_len = data.len().min(4); + padded[..copy_len].copy_from_slice(&data[..copy_len]); + let val = u32::from_le_bytes(padded); + + if data.is_empty() { + log_then_return!("no data was given in IO interrupt"); + } else { + outb_handle_fn + .try_lock() + .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? + .call(port, val)?; + } + + Ok(()) + } + + fn run( + &mut self, + outb_handle_fn: Arc>, + mem_access_fn: Arc>, + #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, + ) -> Result<()> { + loop { + match self.vm.run_vcpu() { + #[cfg(gdb)] + Ok(HyperlightExit::Debug(debug_exit)) => { + match debug_exit { + DebugExit::Debug { dr6, exception } => { + // Handle debug event (breakpoints) + let stop_reason = arch::vcpu_stop_reason( + self.vm.as_mut(), + self.entrypoint, + dr6, + exception, + )?; + if let Err(e) = + self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) + { + log_then_return!(e); + } + } + DebugExit::Interrupt => { + if let Err(e) = self + .handle_debug(dbg_mem_access_fn.clone(), VcpuStopReason::Interrupt) + { + log_then_return!(e); + } + } + } + } + + Ok(HyperlightExit::Halt()) => { + break; + } + Ok(HyperlightExit::IoOut(port, data)) => { + self.handle_io(port, data, outb_handle_fn.clone())? + } + Ok(HyperlightExit::MmioRead(addr)) => { + #[cfg(crashdump)] + crashdump::crashdump_to_tempfile(self)?; + + match get_memory_access_violation( + addr as usize, + MemoryRegionFlags::READ, + &self.mem_regions, + ) { + Some(MemoryAccess::StackGuardPageViolation) => { + return Err(HyperlightError::StackOverflow()); + } + Some(MemoryAccess::AccessViolation(region_flags)) => { + log_then_return!(HyperlightError::MemoryAccessViolation( + addr, + MemoryRegionFlags::READ, + region_flags + )); + } + None => { + mem_access_fn + .clone() + .try_lock() + .map_err(|e| { + new_error!("Error locking at {}:{}: {}", file!(), line!(), e) + })? + .call()?; + + log_then_return!("MMIO READ access address {:#x}", addr); + } + } + } + Ok(HyperlightExit::MmioWrite(addr)) => { + #[cfg(crashdump)] + crashdump::crashdump_to_tempfile(self)?; + + match get_memory_access_violation( + addr as usize, + MemoryRegionFlags::WRITE, + &self.mem_regions, + ) { + Some(MemoryAccess::StackGuardPageViolation) => { + return Err(HyperlightError::StackOverflow()); + } + Some(MemoryAccess::AccessViolation(region_flags)) => { + log_then_return!(HyperlightError::MemoryAccessViolation( + addr, + MemoryRegionFlags::WRITE, + region_flags + )); + } + None => { + mem_access_fn + .clone() + .try_lock() + .map_err(|e| { + new_error!("Error locking at {}:{}: {}", file!(), line!(), e) + })? + .call()?; + + log_then_return!("MMIO WRITE access address {:#x}", addr); + } + } + } + Ok(HyperlightExit::Cancelled()) => { + // Shutdown is returned when the host has cancelled execution + // After termination, the main thread will re-initialize the VM + metrics::counter!(METRIC_GUEST_CANCELLATION).increment(1); + log_then_return!(ExecutionCanceledByHost()); + } + Ok(HyperlightExit::Unknown(reason)) => { + #[cfg(crashdump)] + crashdump::crashdump_to_tempfile(self)?; + + log_then_return!("Unexpected VM Exit {:?}", reason); + } + Ok(HyperlightExit::Retry()) => continue, + Err(e) => { + #[cfg(crashdump)] + crashdump::crashdump_to_tempfile(self)?; + + return Err(e); + } + } + } + + Ok(()) + } + + fn interrupt_handle(&self) -> Arc { + self.vm.interrupt_handle() + } + + #[cfg(crashdump)] + fn get_memory_regions(&self) -> &[MemoryRegion] { + &self.mem_regions + } + + #[cfg(gdb)] + fn handle_debug( + &mut self, + dbg_mem_access_fn: Arc>, + stop_reason: VcpuStopReason, + ) -> Result<()> { + self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) + .map_err(|e| new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e))?; + + loop { + log::debug!("Debug wait for event to resume vCPU"); + // Wait for a message from gdb + let req = self.recv_dbg_msg()?; + log::info!("GDB request: {:?}", req); + + let result = self.process_dbg_request(req, dbg_mem_access_fn.clone()); + + let response = match result { + Ok(response) => response, + // Treat non fatal errors separately so the guest doesn't fail + Err(HyperlightError::TranslateGuestAddress(_)) => DebugResponse::ErrorOccurred, + Err(e) => { + return Err(e); + } + }; + + // If the command was either step or continue, we need to run the vcpu + let cont = matches!( + response, + DebugResponse::Step | DebugResponse::Continue | DebugResponse::DisableDebug + ); + + self.send_dbg_msg(response) + .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + + if cont { + break; + } + } + + Ok(()) + } + + #[cfg(target_os = "windows")] + fn get_partition_handle(&self) -> windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE { + self.vm.get_partition_handle() + } +} + +/// The vCPU tried to access the given addr, b +enum MemoryAccess { + /// The accessed region has the given flags + AccessViolation(MemoryRegionFlags), + /// The accessed region is a stack guard page + StackGuardPageViolation, +} + +/// Determines if a memory access violation occurred at the given address with the given action type. +fn get_memory_access_violation( + gpa: usize, + tried: MemoryRegionFlags, + mem_regions: &[MemoryRegion], +) -> Option { + // find the region containing the given gpa + let region = mem_regions + .iter() + .find(|region| region.guest_region.contains(&gpa)); + + if let Some(region) = region { + if region.region_type == MemoryRegionType::GuardPage { + return Some(MemoryAccess::StackGuardPageViolation); + } else if !region.flags.contains(tried) { + return Some(MemoryAccess::AccessViolation(region.flags)); + } + } + None +} + +#[cfg(test)] +mod tests {} diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs deleted file mode 100644 index 85dc514b5..000000000 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ /dev/null @@ -1,806 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#[cfg(mshv2)] -extern crate mshv_bindings2 as mshv_bindings; -#[cfg(mshv2)] -extern crate mshv_ioctls2 as mshv_ioctls; - -#[cfg(mshv3)] -extern crate mshv_bindings3 as mshv_bindings; -#[cfg(mshv3)] -extern crate mshv_ioctls3 as mshv_ioctls; - -use std::fmt::{Debug, Formatter}; - -use log::{error, LevelFilter}; -#[cfg(mshv2)] -use mshv_bindings::hv_message; -#[cfg(gdb)] -use mshv_bindings::{ - hv_intercept_parameters, hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, - hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT, mshv_install_intercept, - HV_INTERCEPT_ACCESS_MASK_EXECUTE, -}; -use mshv_bindings::{ - hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, - hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, hv_register_assoc, - hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_user_mem_region, - FloatingPointUnit, SegmentRegister, SpecialRegisters, StandardRegisters, -}; -#[cfg(mshv3)] -use mshv_bindings::{ - hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - hv_partition_synthetic_processor_features, -}; -use mshv_ioctls::{Mshv, VcpuFd, VmFd}; -use tracing::{instrument, Span}; - -use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; -#[cfg(gdb)] -use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, MshvDebug}; -#[cfg(gdb)] -use super::handlers::DbgMemAccessHandlerWrapper; -use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; -use super::{ - Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, - CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, -}; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; -use crate::hypervisor::HyperlightExit; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::mem::ptr::{GuestPtr, RawPtr}; -#[cfg(gdb)] -use crate::HyperlightError; -use crate::{log_then_return, new_error, Result}; - -#[cfg(gdb)] -mod debug { - use std::sync::{Arc, Mutex}; - - use super::mshv_bindings::hv_x64_exception_intercept_message; - use super::{HypervLinuxDriver, *}; - use crate::hypervisor::gdb::{DebugMsg, DebugResponse, VcpuStopReason, X86_64Regs}; - use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; - use crate::{new_error, Result}; - - impl HypervLinuxDriver { - /// Resets the debug information to disable debugging - fn disable_debug(&mut self) -> Result<()> { - let mut debug = MshvDebug::default(); - - debug.set_single_step(&self.vcpu_fd, false)?; - - self.debug = Some(debug); - - Ok(()) - } - - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - ex_info: hv_x64_exception_intercept_message, - ) -> Result { - let debug = self - .debug - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - debug.get_stop_reason(&self.vcpu_fd, ex_info.exception_vector, self.entrypoint) - } - - pub(crate) fn process_dbg_request( - &mut self, - req: DebugMsg, - dbg_mem_access_fn: Arc>, - ) -> Result { - if let Some(debug) = self.debug.as_mut() { - match req { - DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( - debug - .add_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to add hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( - debug - .add_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to add sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Continue => { - debug.set_single_step(&self.vcpu_fd, false).map_err(|e| { - log::error!("Failed to continue execution: {:?}", e); - - e - })?; - - Ok(DebugResponse::Continue) - } - DebugMsg::DisableDebug => { - self.disable_debug().map_err(|e| { - log::error!("Failed to disable debugging: {:?}", e); - - e - })?; - - Ok(DebugResponse::DisableDebug) - } - DebugMsg::GetCodeSectionOffset => { - let offset = dbg_mem_access_fn - .try_lock() - .map_err(|e| { - new_error!("Error locking at {}:{}: {}", file!(), line!(), e) - })? - .get_code_offset() - .map_err(|e| { - log::error!("Failed to get code offset: {:?}", e); - - e - })?; - - Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) - } - DebugMsg::ReadAddr(addr, len) => { - let mut data = vec![0u8; len]; - - debug - .read_addrs(&self.vcpu_fd, addr, &mut data, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to read from address: {:?}", e); - - e - })?; - - Ok(DebugResponse::ReadAddr(data)) - } - DebugMsg::ReadRegisters => { - let mut regs = X86_64Regs::default(); - - debug - .read_regs(&self.vcpu_fd, &mut regs) - .map_err(|e| { - log::error!("Failed to read registers: {:?}", e); - - e - }) - .map(|_| DebugResponse::ReadRegisters(regs)) - } - DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( - debug - .remove_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to remove hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( - debug - .remove_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to remove sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Step => { - debug.set_single_step(&self.vcpu_fd, true).map_err(|e| { - log::error!("Failed to enable step instruction: {:?}", e); - - e - })?; - - Ok(DebugResponse::Step) - } - DebugMsg::WriteAddr(addr, data) => { - debug - .write_addrs(&self.vcpu_fd, addr, &data, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to write to address: {:?}", e); - - e - })?; - - Ok(DebugResponse::WriteAddr) - } - DebugMsg::WriteRegisters(regs) => debug - .write_regs(&self.vcpu_fd, ®s) - .map_err(|e| { - log::error!("Failed to write registers: {:?}", e); - - e - }) - .map(|_| DebugResponse::WriteRegisters), - } - } else { - Err(new_error!("Debugging is not enabled")) - } - } - - pub(crate) fn recv_dbg_msg(&mut self) -> Result { - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn.recv().map_err(|e| { - new_error!( - "Got an error while waiting to receive a - message: {:?}", - e - ) - }) - } - - pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { - log::debug!("Sending {:?}", cmd); - - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn - .send(cmd) - .map_err(|e| new_error!("Got an error while sending a response message {:?}", e)) - } - } -} - -/// Determine whether the HyperV for Linux hypervisor API is present -/// and functional. -#[instrument(skip_all, parent = Span::current(), level = "Trace")] -pub(crate) fn is_hypervisor_present() -> bool { - match Mshv::new() { - Ok(_) => true, - Err(_) => { - log::info!("MSHV is not available on this system"); - false - } - } -} - -/// A Hypervisor driver for HyperV-on-Linux. This hypervisor is often -/// called the Microsoft Hypervisor (MSHV) -pub(super) struct HypervLinuxDriver { - _mshv: Mshv, - vm_fd: VmFd, - vcpu_fd: VcpuFd, - entrypoint: u64, - mem_regions: Vec, - orig_rsp: GuestPtr, - - #[cfg(gdb)] - debug: Option, - #[cfg(gdb)] - gdb_conn: Option>, -} - -impl HypervLinuxDriver { - /// Create a new `HypervLinuxDriver`, complete with all registers - /// set up to execute a Hyperlight binary inside a HyperV-powered - /// sandbox on Linux. - /// - /// While registers are set up, they will not have been applied to - /// the underlying virtual CPU after this function returns. Call the - /// `apply_registers` method to do that, or more likely call - /// `initialise` to do it for you. - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - pub(super) fn new( - mem_regions: Vec, - entrypoint_ptr: GuestPtr, - rsp_ptr: GuestPtr, - pml4_ptr: GuestPtr, - #[cfg(gdb)] gdb_conn: Option>, - ) -> Result { - let mshv = Mshv::new()?; - let pr = Default::default(); - #[cfg(mshv2)] - let vm_fd = mshv.create_vm_with_config(&pr)?; - #[cfg(mshv3)] - let vm_fd = { - // It's important to avoid create_vm() and explicitly use - // create_vm_with_args() with an empty arguments structure - // here, because otherwise the partition is set up with a SynIC. - - let vm_fd = mshv.create_vm_with_args(&pr)?; - let features: hv_partition_synthetic_processor_features = Default::default(); - vm_fd.hvcall_set_partition_property( - hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - unsafe { features.as_uint64[0] }, - )?; - vm_fd.initialize()?; - vm_fd - }; - - let mut vcpu_fd = vm_fd.create_vcpu(0)?; - - #[cfg(gdb)] - let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn { - let mut debug = MshvDebug::new(); - debug.add_hw_breakpoint(&vcpu_fd, entrypoint_ptr.absolute()?)?; - - // The bellow intercepts make the vCPU exit with the Exception Intercept exit code - // Check Table 6-1. Exceptions and Interrupts at Page 6-13 Vol. 1 - // of Intel 64 and IA-32 Architectures Software Developer's Manual - // Install intercept for #DB (1) exception - vm_fd - .install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, - intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, - // Exception handler #DB (1) - intercept_parameter: hv_intercept_parameters { - exception_vector: 0x1, - }, - }) - .map_err(|e| new_error!("Cannot install debug exception intercept: {}", e))?; - - // Install intercept for #BP (3) exception - vm_fd - .install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, - intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, - // Exception handler #BP (3) - intercept_parameter: hv_intercept_parameters { - exception_vector: 0x3, - }, - }) - .map_err(|e| new_error!("Cannot install breakpoint exception intercept: {}", e))?; - - (Some(debug), Some(gdb_conn)) - } else { - (None, None) - }; - - mem_regions.iter().try_for_each(|region| { - let mshv_region = region.to_owned().into(); - vm_fd.map_user_memory(mshv_region) - })?; - - Self::setup_initial_sregs(&mut vcpu_fd, pml4_ptr.absolute()?)?; - - Ok(Self { - _mshv: mshv, - vm_fd, - vcpu_fd, - mem_regions, - entrypoint: entrypoint_ptr.absolute()?, - orig_rsp: rsp_ptr, - - #[cfg(gdb)] - debug, - #[cfg(gdb)] - gdb_conn, - }) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn setup_initial_sregs(vcpu: &mut VcpuFd, pml4_addr: u64) -> Result<()> { - let sregs = SpecialRegisters { - cr0: CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP, - cr4: CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT, - cr3: pml4_addr, - efer: EFER_LME | EFER_LMA | EFER_SCE | EFER_NX, - cs: SegmentRegister { - type_: 11, - present: 1, - s: 1, - l: 1, - ..Default::default() - }, - tr: SegmentRegister { - limit: 65535, - type_: 11, - present: 1, - ..Default::default() - }, - ..Default::default() - }; - vcpu.set_sregs(&sregs)?; - Ok(()) - } -} - -impl Debug for HypervLinuxDriver { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let mut f = f.debug_struct("Hyperv Linux Driver"); - - f.field("Entrypoint", &self.entrypoint) - .field("Original RSP", &self.orig_rsp); - - for region in &self.mem_regions { - f.field("Memory Region", ®ion); - } - - let regs = self.vcpu_fd.get_regs(); - - if let Ok(regs) = regs { - f.field("Registers", ®s); - } - - let sregs = self.vcpu_fd.get_sregs(); - - if let Ok(sregs) = sregs { - f.field("Special Registers", &sregs); - } - - f.finish() - } -} - -impl Hypervisor for HypervLinuxDriver { - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn initialise( - &mut self, - peb_addr: RawPtr, - seed: u64, - page_size: u32, - outb_hdl: OutBHandlerWrapper, - mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, - max_guest_log_level: Option, - #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - let max_guest_log_level: u64 = match max_guest_log_level { - Some(level) => level as u64, - None => self.get_max_log_level().into(), - }; - - let regs = StandardRegisters { - rip: self.entrypoint, - rsp: self.orig_rsp.absolute()?, - rflags: 2, //bit 1 of rlags is required to be set - - // function args - rdi: peb_addr.into(), - rsi: seed, - rdx: page_size.into(), - rcx: max_guest_log_level, - - ..Default::default() - }; - self.vcpu_fd.set_regs(®s)?; - - VirtualCPU::run( - self.as_mut_hypervisor(), - hv_handler, - outb_hdl, - mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_fn, - )?; - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - outb_handle_fn: OutBHandlerWrapper, - mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, - #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - // Reset general purpose registers, then set RIP and RSP - let regs = StandardRegisters { - rip: dispatch_func_addr.into(), - rsp: self.orig_rsp.absolute()?, - rflags: 2, //bit 1 of rlags is required to be set - ..Default::default() - }; - self.vcpu_fd.set_regs(®s)?; - - // reset fpu state - let fpu = FloatingPointUnit { - fcw: FP_CONTROL_WORD_DEFAULT, - ftwx: FP_TAG_WORD_DEFAULT, - mxcsr: MXCSR_DEFAULT, - ..Default::default() // zero out the rest - }; - self.vcpu_fd.set_fpu(&fpu)?; - - // run - VirtualCPU::run( - self.as_mut_hypervisor(), - hv_handler, - outb_handle_fn, - mem_access_fn, - #[cfg(gdb)] - dbg_mem_access_fn, - )?; - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn handle_io( - &mut self, - port: u16, - data: Vec, - rip: u64, - instruction_length: u64, - outb_handle_fn: OutBHandlerWrapper, - ) -> Result<()> { - let mut padded = [0u8; 4]; - let copy_len = data.len().min(4); - padded[..copy_len].copy_from_slice(&data[..copy_len]); - let val = u32::from_le_bytes(padded); - - outb_handle_fn - .try_lock() - .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? - .call(port, val)?; - - // update rip - self.vcpu_fd.set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_RIP, - value: hv_register_value { - reg64: rip + instruction_length, - }, - ..Default::default() - }])?; - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn run(&mut self) -> Result { - const HALT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_HALT; - const IO_PORT_INTERCEPT_MESSAGE: hv_message_type = - hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; - const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA; - const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT; - #[cfg(gdb)] - const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; - - #[cfg(mshv2)] - let run_result = { - let hv_message: hv_message = Default::default(); - &self.vcpu_fd.run(hv_message) - }; - #[cfg(mshv3)] - let run_result = &self.vcpu_fd.run(); - - let result = match run_result { - Ok(m) => match m.header.message_type { - HALT_MESSAGE => { - crate::debug!("mshv - Halt Details : {:#?}", &self); - HyperlightExit::Halt() - } - IO_PORT_INTERCEPT_MESSAGE => { - let io_message = m.to_ioport_info()?; - let port_number = io_message.port_number; - let rip = io_message.header.rip; - let rax = io_message.rax; - let instruction_length = io_message.header.instruction_length() as u64; - crate::debug!("mshv IO Details : \nPort : {}\n{:#?}", port_number, &self); - HyperlightExit::IoOut( - port_number, - rax.to_le_bytes().to_vec(), - rip, - instruction_length, - ) - } - UNMAPPED_GPA_MESSAGE => { - let mimo_message = m.to_memory_info()?; - let addr = mimo_message.guest_physical_address; - crate::debug!( - "mshv MMIO unmapped GPA -Details: Address: {} \n {:#?}", - addr, - &self - ); - HyperlightExit::Mmio(addr) - } - INVALID_GPA_ACCESS_MESSAGE => { - let mimo_message = m.to_memory_info()?; - let gpa = mimo_message.guest_physical_address; - let access_info = MemoryRegionFlags::try_from(mimo_message)?; - crate::debug!( - "mshv MMIO invalid GPA access -Details: Address: {} \n {:#?}", - gpa, - &self - ); - match self.get_memory_access_violation( - gpa as usize, - &self.mem_regions, - access_info, - ) { - Some(access_info_violation) => access_info_violation, - None => HyperlightExit::Mmio(gpa), - } - } - // The only case an intercept exit is expected is when debugging is enabled - // and the intercepts are installed. - // Provide the extra information about the exception to accurately determine - // the stop reason - #[cfg(gdb)] - EXCEPTION_INTERCEPT => { - // Extract exception info from the message so we can figure out - // more information about the vCPU state - let ex_info = match m.to_exception_info() { - Ok(info) => info, - Err(e) => { - log_then_return!("Error converting to exception info: {:?}", e); - } - }; - - match self.get_stop_reason(ex_info) { - Ok(reason) => HyperlightExit::Debug(reason), - Err(e) => { - log_then_return!("Error getting stop reason: {:?}", e); - } - } - } - other => { - crate::debug!("mshv Other Exit: Exit: {:#?} \n {:#?}", other, &self); - log_then_return!("unknown Hyper-V run message type {:?}", other); - } - }, - Err(e) => match e.errno() { - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled - libc::EINTR => HyperlightExit::Cancelled(), - libc::EAGAIN => HyperlightExit::Retry(), - _ => { - crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self); - log_then_return!("Error running VCPU {:?}", e); - } - }, - }; - Ok(result) - } - - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor { - self as &mut dyn Hypervisor - } - - #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion] { - &self.mem_regions - } - - #[cfg(gdb)] - fn handle_debug( - &mut self, - dbg_mem_access_fn: std::sync::Arc< - std::sync::Mutex, - >, - stop_reason: super::gdb::VcpuStopReason, - ) -> Result<()> { - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e))?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - let result = self.process_dbg_request(req, dbg_mem_access_fn.clone()); - - let response = match result { - Ok(response) => response, - // Treat non fatal errors separately so the guest doesn't fail - Err(HyperlightError::TranslateGuestAddress(_)) => DebugResponse::ErrorOccurred, - Err(e) => { - return Err(e); - } - }; - - // If the command was either step or continue, we need to run the vcpu - let cont = matches!( - response, - DebugResponse::Step | DebugResponse::Continue | DebugResponse::DisableDebug - ); - - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - - if cont { - break; - } - } - - Ok(()) - } -} - -impl Drop for HypervLinuxDriver { - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn drop(&mut self) { - for region in &self.mem_regions { - let mshv_region: mshv_user_mem_region = region.to_owned().into(); - match self.vm_fd.unmap_user_memory(mshv_region) { - Ok(_) => (), - Err(e) => error!("Failed to unmap user memory in HyperVOnLinux ({:?})", e), - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::mem::memory_region::MemoryRegionVecBuilder; - use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory}; - - #[rustfmt::skip] - const CODE: [u8; 12] = [ - 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ - 0x00, 0xd8, /* add %bl, %al */ - 0x04, b'0', /* add $'0', %al */ - 0xee, /* out %al, (%dx) */ - /* send a 0 to indicate we're done */ - 0xb0, b'\0', /* mov $'\0', %al */ - 0xee, /* out %al, (%dx) */ - 0xf4, /* HLT */ - ]; - - fn shared_mem_with_code( - code: &[u8], - mem_size: usize, - load_offset: usize, - ) -> Result> { - if load_offset > mem_size { - log_then_return!( - "code load offset ({}) > memory size ({})", - load_offset, - mem_size - ); - } - let mut shared_mem = ExclusiveSharedMemory::new(mem_size)?; - shared_mem.copy_from_slice(code, load_offset)?; - Ok(Box::new(shared_mem)) - } - - #[test] - fn create_driver() { - if !super::is_hypervisor_present() { - return; - } - const MEM_SIZE: usize = 0x3000; - let gm = shared_mem_with_code(CODE.as_slice(), MEM_SIZE, 0).unwrap(); - let rsp_ptr = GuestPtr::try_from(0).unwrap(); - let pml4_ptr = GuestPtr::try_from(0).unwrap(); - let entrypoint_ptr = GuestPtr::try_from(0).unwrap(); - let mut regions = MemoryRegionVecBuilder::new(0, gm.base_addr()); - regions.push_page_aligned( - MEM_SIZE, - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE, - crate::mem::memory_region::MemoryRegionType::Code, - ); - super::HypervLinuxDriver::new( - regions.build(), - entrypoint_ptr, - rsp_ptr, - pml4_ptr, - #[cfg(gdb)] - None, - ) - .unwrap(); - } -} diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs deleted file mode 100644 index abc2b77cf..000000000 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ /dev/null @@ -1,523 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use core::ffi::c_void; -use std::fmt; -use std::fmt::{Debug, Formatter}; -use std::string::String; - -use hyperlight_common::mem::PAGE_SIZE_USIZE; -use log::LevelFilter; -use tracing::{instrument, Span}; -use windows::Win32::System::Hypervisor::{ - WHvX64RegisterCr0, WHvX64RegisterCr3, WHvX64RegisterCr4, WHvX64RegisterCs, WHvX64RegisterEfer, - WHV_MEMORY_ACCESS_TYPE, WHV_PARTITION_HANDLE, WHV_REGISTER_VALUE, WHV_RUN_VP_EXIT_CONTEXT, - WHV_RUN_VP_EXIT_REASON, WHV_X64_SEGMENT_REGISTER, WHV_X64_SEGMENT_REGISTER_0, -}; - -use super::fpu::{FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; -#[cfg(gdb)] -use super::handlers::DbgMemAccessHandlerWrapper; -use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; -use super::surrogate_process::SurrogateProcess; -use super::surrogate_process_manager::*; -use super::windows_hypervisor_platform::{VMPartition, VMProcessor}; -use super::wrappers::{HandleWrapper, WHvFPURegisters}; -use super::{ - HyperlightExit, Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, - CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, -}; -use crate::hypervisor::fpu::FP_CONTROL_WORD_DEFAULT; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; -use crate::hypervisor::wrappers::WHvGeneralRegisters; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::mem::ptr::{GuestPtr, RawPtr}; -use crate::{debug, new_error, Result}; - -/// A Hypervisor driver for HyperV-on-Windows. -pub(crate) struct HypervWindowsDriver { - size: usize, // this is the size of the memory region, excluding the 2 surrounding guard pages - processor: VMProcessor, - _surrogate_process: SurrogateProcess, // we need to keep a reference to the SurrogateProcess for the duration of the driver since otherwise it will dropped and the memory mapping will be unmapped and the surrogate process will be returned to the pool - source_address: *mut c_void, // this points into the first guard page - entrypoint: u64, - orig_rsp: GuestPtr, - mem_regions: Vec, -} -/* This does not automatically impl Send/Sync because the host - * address of the shared memory region is a raw pointer, which are - * marked as !Send and !Sync. However, the access patterns used - * here are safe. - */ -unsafe impl Send for HypervWindowsDriver {} -unsafe impl Sync for HypervWindowsDriver {} - -impl HypervWindowsDriver { - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn new( - mem_regions: Vec, - raw_size: usize, - raw_source_address: *mut c_void, - pml4_address: u64, - entrypoint: u64, - rsp: u64, - mmap_file_handle: HandleWrapper, - ) -> Result { - // create and setup hypervisor partition - let mut partition = VMPartition::new(1)?; - - // get a surrogate process with preallocated memory of size SharedMemory::raw_mem_size() - // with guard pages setup - let surrogate_process = { - let mgr = get_surrogate_process_manager()?; - mgr.get_surrogate_process(raw_size, raw_source_address, mmap_file_handle) - }?; - - partition.map_gpa_range(&mem_regions, surrogate_process.process_handle)?; - - let mut proc = VMProcessor::new(partition)?; - Self::setup_initial_sregs(&mut proc, pml4_address)?; - - // subtract 2 pages for the guard pages, since when we copy memory to and from surrogate process, - // we don't want to copy the guard pages themselves (that would cause access violation) - let mem_size = raw_size - 2 * PAGE_SIZE_USIZE; - Ok(Self { - size: mem_size, - processor: proc, - _surrogate_process: surrogate_process, - source_address: raw_source_address, - entrypoint, - orig_rsp: GuestPtr::try_from(RawPtr::from(rsp))?, - mem_regions, - }) - } - - fn setup_initial_sregs(proc: &mut VMProcessor, pml4_addr: u64) -> Result<()> { - proc.set_registers(&[ - (WHvX64RegisterCr3, WHV_REGISTER_VALUE { Reg64: pml4_addr }), - ( - WHvX64RegisterCr4, - WHV_REGISTER_VALUE { - Reg64: CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT, - }, - ), - ( - WHvX64RegisterCr0, - WHV_REGISTER_VALUE { - Reg64: CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP, - }, - ), - ( - WHvX64RegisterEfer, - WHV_REGISTER_VALUE { - Reg64: EFER_LME | EFER_LMA | EFER_SCE | EFER_NX, - }, - ), - ( - WHvX64RegisterCs, - WHV_REGISTER_VALUE { - Segment: WHV_X64_SEGMENT_REGISTER { - Anonymous: WHV_X64_SEGMENT_REGISTER_0 { - Attributes: 0b1011 | 1 << 4 | 1 << 7 | 1 << 13, // Type (11: Execute/Read, accessed) | L (64-bit mode) | P (present) | S (code segment) - }, - ..Default::default() // zero out the rest - }, - }, - ), - ])?; - Ok(()) - } - - #[inline] - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn get_exit_details(&self, exit_reason: WHV_RUN_VP_EXIT_REASON) -> Result { - let mut error = String::new(); - error.push_str(&format!( - "Did not receive a halt from Hypervisor as expected - Received {exit_reason:?}!\n" - )); - error.push_str(&format!("Registers: \n{:#?}", self.processor.get_regs()?)); - Ok(error) - } - - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn get_partition_hdl(&self) -> WHV_PARTITION_HANDLE { - self.processor.get_partition_hdl() - } -} - -impl Debug for HypervWindowsDriver { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let mut fs = f.debug_struct("HyperV Driver"); - - fs.field("Size", &self.size) - .field("Source Address", &self.source_address) - .field("Entrypoint", &self.entrypoint) - .field("Original RSP", &self.orig_rsp); - - for region in &self.mem_regions { - fs.field("Memory Region", ®ion); - } - - // Get the registers - - let regs = self.processor.get_regs(); - - if let Ok(regs) = regs { - { - fs.field("Registers", ®s); - } - } - - // Get the special registers - - let special_regs = self.processor.get_sregs(); - if let Ok(special_regs) = special_regs { - fs.field("CR0", unsafe { &special_regs.cr0.Reg64 }); - fs.field("CR2", unsafe { &special_regs.cr2.Reg64 }); - fs.field("CR3", unsafe { &special_regs.cr3.Reg64 }); - fs.field("CR4", unsafe { &special_regs.cr4.Reg64 }); - fs.field("CR8", unsafe { &special_regs.cr8.Reg64 }); - fs.field("EFER", unsafe { &special_regs.efer.Reg64 }); - fs.field("APIC_BASE", unsafe { &special_regs.apic_base.Reg64 }); - - // Segment registers - fs.field( - "CS", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.cs.Segment.Base }, - unsafe { &special_regs.cs.Segment.Limit }, - unsafe { &special_regs.cs.Segment.Selector }, - unsafe { &special_regs.cs.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "DS", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.ds.Segment.Base }, - unsafe { &special_regs.ds.Segment.Limit }, - unsafe { &special_regs.ds.Segment.Selector }, - unsafe { &special_regs.ds.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "ES", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.es.Segment.Base }, - unsafe { &special_regs.es.Segment.Limit }, - unsafe { &special_regs.es.Segment.Selector }, - unsafe { &special_regs.es.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "FS", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.fs.Segment.Base }, - unsafe { &special_regs.fs.Segment.Limit }, - unsafe { &special_regs.fs.Segment.Selector }, - unsafe { &special_regs.fs.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "GS", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.gs.Segment.Base }, - unsafe { &special_regs.gs.Segment.Limit }, - unsafe { &special_regs.gs.Segment.Selector }, - unsafe { &special_regs.gs.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "SS", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.ss.Segment.Base }, - unsafe { &special_regs.ss.Segment.Limit }, - unsafe { &special_regs.ss.Segment.Selector }, - unsafe { &special_regs.ss.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "TR", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.tr.Segment.Base }, - unsafe { &special_regs.tr.Segment.Limit }, - unsafe { &special_regs.tr.Segment.Selector }, - unsafe { &special_regs.tr.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "LDTR", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Selector: {:?}, Attributes: {:?} }}", - unsafe { &special_regs.ldtr.Segment.Base }, - unsafe { &special_regs.ldtr.Segment.Limit }, - unsafe { &special_regs.ldtr.Segment.Selector }, - unsafe { &special_regs.ldtr.Segment.Anonymous.Attributes } - ), - ); - fs.field( - "GDTR", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Pad: {:?} }}", - unsafe { &special_regs.gdtr.Table.Base }, - unsafe { &special_regs.gdtr.Table.Limit }, - unsafe { &special_regs.gdtr.Table.Pad } - ), - ); - fs.field( - "IDTR", - &format_args!( - "{{ Base: {:?}, Limit: {:?}, Pad: {:?} }}", - unsafe { &special_regs.idtr.Table.Base }, - unsafe { &special_regs.idtr.Table.Limit }, - unsafe { &special_regs.idtr.Table.Pad } - ), - ); - }; - - fs.finish() - } -} - -impl Hypervisor for HypervWindowsDriver { - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn initialise( - &mut self, - peb_address: RawPtr, - seed: u64, - page_size: u32, - outb_hdl: OutBHandlerWrapper, - mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, - max_guest_log_level: Option, - #[cfg(gdb)] dbg_mem_access_hdl: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - let max_guest_log_level: u64 = match max_guest_log_level { - Some(level) => level as u64, - None => self.get_max_log_level().into(), - }; - - let regs = WHvGeneralRegisters { - rip: self.entrypoint, - rsp: self.orig_rsp.absolute()?, - - // function args - rdi: peb_address.into(), - rsi: seed, - rdx: page_size.into(), - rcx: max_guest_log_level, - rflags: 1 << 1, // eflags bit index 1 is reserved and always needs to be 1 - - ..Default::default() - }; - self.processor.set_general_purpose_registers(®s)?; - - VirtualCPU::run( - self.as_mut_hypervisor(), - hv_handler, - outb_hdl, - mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_hdl, - )?; - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - outb_hdl: OutBHandlerWrapper, - mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, - #[cfg(gdb)] dbg_mem_access_hdl: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - // Reset general purpose registers, then set RIP and RSP - let regs = WHvGeneralRegisters { - rip: dispatch_func_addr.into(), - rsp: self.orig_rsp.absolute()?, - rflags: 1 << 1, // eflags bit index 1 is reserved and always needs to be 1 - ..Default::default() - }; - self.processor.set_general_purpose_registers(®s)?; - - // reset fpu state - self.processor.set_fpu(&WHvFPURegisters { - fp_control_word: FP_CONTROL_WORD_DEFAULT, - fp_tag_word: FP_TAG_WORD_DEFAULT, - mxcsr: MXCSR_DEFAULT, - ..Default::default() // zero out the rest - })?; - - VirtualCPU::run( - self.as_mut_hypervisor(), - hv_handler, - outb_hdl, - mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_hdl, - )?; - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn handle_io( - &mut self, - port: u16, - data: Vec, - rip: u64, - instruction_length: u64, - outb_handle_fn: OutBHandlerWrapper, - ) -> Result<()> { - let mut padded = [0u8; 4]; - let copy_len = data.len().min(4); - padded[..copy_len].copy_from_slice(&data[..copy_len]); - let val = u32::from_le_bytes(padded); - - outb_handle_fn - .try_lock() - .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? - .call(port, val)?; - - let mut regs = self.processor.get_regs()?; - regs.rip = rip + instruction_length; - self.processor.set_general_purpose_registers(®s) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn run(&mut self) -> Result { - let exit_context: WHV_RUN_VP_EXIT_CONTEXT = self.processor.run()?; - - let result = match exit_context.ExitReason { - // WHvRunVpExitReasonX64IoPortAccess - WHV_RUN_VP_EXIT_REASON(2i32) => { - // size of current instruction is in lower byte of _bitfield - // see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvexitcontextdatatypes) - let instruction_length = exit_context.VpContext._bitfield & 0xF; - unsafe { - debug!( - "HyperV IO Details :\n Port: {:#x} \n {:#?}", - exit_context.Anonymous.IoPortAccess.PortNumber, &self - ); - HyperlightExit::IoOut( - exit_context.Anonymous.IoPortAccess.PortNumber, - exit_context - .Anonymous - .IoPortAccess - .Rax - .to_le_bytes() - .to_vec(), - exit_context.VpContext.Rip, - instruction_length as u64, - ) - } - } - // HvRunVpExitReasonX64Halt - WHV_RUN_VP_EXIT_REASON(8i32) => { - debug!("HyperV Halt Details :\n {:#?}", &self); - HyperlightExit::Halt() - } - // WHvRunVpExitReasonMemoryAccess - WHV_RUN_VP_EXIT_REASON(1i32) => { - let gpa = unsafe { exit_context.Anonymous.MemoryAccess.Gpa }; - let access_info = unsafe { - WHV_MEMORY_ACCESS_TYPE( - // 2 first bits are the access type, see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/memoryaccess#syntax - (exit_context.Anonymous.MemoryAccess.AccessInfo.AsUINT32 & 0b11) as i32, - ) - }; - let access_info = MemoryRegionFlags::try_from(access_info)?; - debug!( - "HyperV Memory Access Details :\n GPA: {:#?}\n Access Info :{:#?}\n {:#?} ", - gpa, access_info, &self - ); - - match self.get_memory_access_violation(gpa as usize, &self.mem_regions, access_info) - { - Some(access_info) => access_info, - None => HyperlightExit::Mmio(gpa), - } - } - // WHvRunVpExitReasonCanceled - // Execution was cancelled by the host. - // This will happen when guest code runs for too long - WHV_RUN_VP_EXIT_REASON(8193i32) => { - debug!("HyperV Cancelled Details :\n {:#?}", &self); - HyperlightExit::Cancelled() - } - WHV_RUN_VP_EXIT_REASON(_) => { - debug!( - "HyperV Unexpected Exit Details :#nReason {:#?}\n {:#?}", - exit_context.ExitReason, &self - ); - match self.get_exit_details(exit_context.ExitReason) { - Ok(error) => HyperlightExit::Unknown(error), - Err(e) => HyperlightExit::Unknown(format!("Error getting exit details: {}", e)), - } - } - }; - - Ok(result) - } - - fn get_partition_handle(&self) -> WHV_PARTITION_HANDLE { - self.processor.get_partition_hdl() - } - - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor { - self as &mut dyn Hypervisor - } - - #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion] { - &self.mem_regions - } -} - -#[cfg(test)] -pub mod tests { - use std::sync::{Arc, Mutex}; - - use serial_test::serial; - - use crate::hypervisor::handlers::{MemAccessHandler, OutBHandler}; - use crate::hypervisor::tests::test_initialise; - use crate::Result; - - #[test] - #[serial] - fn test_init() { - let outb_handler = { - let func: Box Result<()> + Send> = - Box::new(|_, _| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(OutBHandler::from(func))) - }; - let mem_access_handler = { - let func: Box Result<()> + Send> = Box::new(|| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(MemAccessHandler::from(func))) - }; - test_initialise(outb_handler, mem_access_handler).unwrap(); - } -} diff --git a/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs b/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs index 8e351708c..54b4898bf 100644 --- a/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs +++ b/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs @@ -14,8 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -#[cfg(target_os = "windows")] -use core::ffi::c_void; use std::ops::DerefMut; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; @@ -37,12 +35,13 @@ use windows::Win32::System::Hypervisor::{WHvCancelRunVirtualProcessor, WHV_PARTI #[cfg(gdb)] use super::gdb::create_gdb_thread; +use super::hyperlight_vm::HyperlightSandbox; #[cfg(gdb)] use crate::hypervisor::handlers::DbgMemAccessHandlerWrapper; use crate::hypervisor::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; #[cfg(target_os = "windows")] use crate::hypervisor::wrappers::HandleWrapper; -use crate::hypervisor::Hypervisor; +use crate::hypervisor::HyperlightVm; use crate::mem::layout::SandboxMemoryLayout; use crate::mem::mgr::SandboxMemoryManager; use crate::mem::ptr::{GuestPtr, RawPtr}; @@ -50,7 +49,7 @@ use crate::mem::ptr_offset::Offset; use crate::mem::shared_mem::{GuestSharedMemory, HostSharedMemory, SharedMemory}; #[cfg(gdb)] use crate::sandbox::config::DebugInfo; -use crate::sandbox::hypervisor::{get_available_hypervisor, HypervisorType}; +use crate::sandbox::hypervisor::get_available_hypervisor; #[cfg(target_os = "linux")] use crate::signal_handlers::setup_signal_handlers; use crate::HyperlightError::{ @@ -292,7 +291,7 @@ impl HypervisorHandler { thread::Builder::new() .name("Hypervisor Handler".to_string()) .spawn(move || -> Result<()> { - let mut hv: Option> = None; + let mut hv: Option> = None; for action in to_handler_rx { match action { HypervisorHandlerAction::Initialise => { @@ -819,111 +818,6 @@ pub enum HandlerMsg { Error(HyperlightError), } -fn set_up_hypervisor_partition( - mgr: &mut SandboxMemoryManager, - #[cfg(gdb)] debug_info: &Option, -) -> Result> { - let mem_size = u64::try_from(mgr.shared_mem.mem_size())?; - let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; - let rsp_ptr = { - let rsp_u64 = mgr.set_up_shared_memory(mem_size, &mut regions)?; - let rsp_raw = RawPtr::from(rsp_u64); - GuestPtr::try_from(rsp_raw) - }?; - let base_ptr = GuestPtr::try_from(Offset::from(0))?; - let pml4_ptr = { - let pml4_offset_u64 = u64::try_from(SandboxMemoryLayout::PML4_OFFSET)?; - base_ptr + Offset::from(pml4_offset_u64) - }; - let entrypoint_ptr = { - let entrypoint_total_offset = mgr.load_addr.clone() + mgr.entrypoint_offset; - GuestPtr::try_from(entrypoint_total_offset) - }?; - - if base_ptr != pml4_ptr { - log_then_return!( - "Error: base_ptr ({:#?}) does not equal pml4_ptr ({:#?})", - base_ptr, - pml4_ptr - ); - } - if entrypoint_ptr <= pml4_ptr { - log_then_return!( - "Error: entrypoint_ptr ({:#?}) is not greater than pml4_ptr ({:#?})", - entrypoint_ptr, - pml4_ptr - ); - } - - // Create gdb thread if gdb is enabled and the configuration is provided - #[cfg(gdb)] - let gdb_conn = if let Some(DebugInfo { port }) = debug_info { - let gdb_conn = create_gdb_thread(*port, unsafe { pthread_self() }); - - // in case the gdb thread creation fails, we still want to continue - // without gdb - match gdb_conn { - Ok(gdb_conn) => Some(gdb_conn), - Err(e) => { - log::error!("Could not create gdb connection: {:#}", e); - - None - } - } - } else { - None - }; - - match *get_available_hypervisor() { - #[cfg(mshv)] - Some(HypervisorType::Mshv) => { - let hv = crate::hypervisor::hyperv_linux::HypervLinuxDriver::new( - regions, - entrypoint_ptr, - rsp_ptr, - pml4_ptr, - #[cfg(gdb)] - gdb_conn, - )?; - Ok(Box::new(hv)) - } - - #[cfg(kvm)] - Some(HypervisorType::Kvm) => { - let hv = crate::hypervisor::kvm::KVMDriver::new( - regions, - pml4_ptr.absolute()?, - entrypoint_ptr.absolute()?, - rsp_ptr.absolute()?, - #[cfg(gdb)] - gdb_conn, - )?; - Ok(Box::new(hv)) - } - - #[cfg(target_os = "windows")] - Some(HypervisorType::Whp) => { - let mmap_file_handle = mgr - .shared_mem - .with_exclusivity(|e| e.get_mmap_file_handle())?; - let hv = crate::hypervisor::hyperv_windows::HypervWindowsDriver::new( - regions, - mgr.shared_mem.raw_mem_size(), // we use raw_* here because windows driver requires 64K aligned addresses, - mgr.shared_mem.raw_ptr() as *mut c_void, // and instead convert it to base_addr where needed in the driver itself - pml4_ptr.absolute()?, - entrypoint_ptr.absolute()?, - rsp_ptr.absolute()?, - HandleWrapper::from(mmap_file_handle), - )?; - Ok(Box::new(hv)) - } - - _ => { - log_then_return!(NoHypervisorFound()); - } - } -} - #[cfg(test)] mod tests { use std::sync::{Arc, Barrier}; diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index 3dd1cb1fc..4136df569 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -14,36 +14,26 @@ See the License for the specific language governing permissions and limitations under the License. */ -use std::convert::TryFrom; -use std::fmt::Debug; #[cfg(gdb)] -use std::sync::{Arc, Mutex}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::{Arc, LazyLock}; -use kvm_bindings::{kvm_fpu, kvm_regs, kvm_userspace_memory_region, KVM_MEM_READONLY}; +#[cfg(gdb)] +use kvm_bindings::kvm_guest_debug; +use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_READONLY}; use kvm_ioctls::Cap::UserMemory; use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; -use log::LevelFilter; use tracing::{instrument, Span}; -use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; -#[cfg(gdb)] -use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, KvmDebug, VcpuStopReason}; +use super::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +use super::vm::{HyperlightExit, InterruptHandle, Vm}; #[cfg(gdb)] -use super::handlers::DbgMemAccessHandlerWrapper; -use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; -use super::{ - HyperlightExit, Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, - CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, -}; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; +use crate::hypervisor::vm::DebugExit; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::mem::ptr::{GuestPtr, RawPtr}; -#[cfg(gdb)] -use crate::HyperlightError; use crate::{log_then_return, new_error, Result}; /// Return `true` if the KVM API is available, version 12, and has UserMemory capability, or `false` otherwise -#[instrument(skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn is_hypervisor_present() -> bool { if let Ok(kvm) = Kvm::new() { let api_version = kvm.get_api_version(); @@ -64,509 +54,165 @@ pub(crate) fn is_hypervisor_present() -> bool { } } -#[cfg(gdb)] -mod debug { - use std::sync::{Arc, Mutex}; - - use kvm_bindings::kvm_debug_exit_arch; - - use super::KVMDriver; - use crate::hypervisor::gdb::{ - DebugMsg, DebugResponse, GuestDebug, KvmDebug, VcpuStopReason, X86_64Regs, - }; - use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; - use crate::{new_error, Result}; - - impl KVMDriver { - /// Resets the debug information to disable debugging - fn disable_debug(&mut self) -> Result<()> { - let mut debug = KvmDebug::default(); - - debug.set_single_step(&self.vcpu_fd, false)?; - - self.debug = Some(debug); - - Ok(()) - } - - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - debug_exit: kvm_debug_exit_arch, - ) -> Result { - let debug = self - .debug - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - debug.get_stop_reason(&self.vcpu_fd, debug_exit, self.entrypoint) - } - - pub(crate) fn process_dbg_request( - &mut self, - req: DebugMsg, - dbg_mem_access_fn: Arc>, - ) -> Result { - if let Some(debug) = self.debug.as_mut() { - match req { - DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( - debug - .add_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to add hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( - debug - .add_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to add sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Continue => { - debug.set_single_step(&self.vcpu_fd, false).map_err(|e| { - log::error!("Failed to continue execution: {:?}", e); - - e - })?; - - Ok(DebugResponse::Continue) - } - DebugMsg::DisableDebug => { - self.disable_debug().map_err(|e| { - log::error!("Failed to disable debugging: {:?}", e); - - e - })?; - - Ok(DebugResponse::DisableDebug) - } - DebugMsg::GetCodeSectionOffset => { - let offset = dbg_mem_access_fn - .try_lock() - .map_err(|e| { - new_error!("Error locking at {}:{}: {}", file!(), line!(), e) - })? - .get_code_offset() - .map_err(|e| { - log::error!("Failed to get code offset: {:?}", e); - - e - })?; - - Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) - } - DebugMsg::ReadAddr(addr, len) => { - let mut data = vec![0u8; len]; - - debug - .read_addrs(&self.vcpu_fd, addr, &mut data, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to read from address: {:?}", e); - - e - })?; - - Ok(DebugResponse::ReadAddr(data)) - } - DebugMsg::ReadRegisters => { - let mut regs = X86_64Regs::default(); - - debug - .read_regs(&self.vcpu_fd, &mut regs) - .map_err(|e| { - log::error!("Failed to read registers: {:?}", e); - - e - }) - .map(|_| DebugResponse::ReadRegisters(regs)) - } - DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( - debug - .remove_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to remove hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( - debug - .remove_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to remove sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Step => { - debug.set_single_step(&self.vcpu_fd, true).map_err(|e| { - log::error!("Failed to enable step instruction: {:?}", e); - - e - })?; - - Ok(DebugResponse::Step) - } - DebugMsg::WriteAddr(addr, data) => { - debug - .write_addrs(&self.vcpu_fd, addr, &data, dbg_mem_access_fn) - .map_err(|e| { - log::error!("Failed to write to address: {:?}", e); - - e - })?; - - Ok(DebugResponse::WriteAddr) - } - DebugMsg::WriteRegisters(regs) => debug - .write_regs(&self.vcpu_fd, ®s) - .map_err(|e| { - log::error!("Failed to write registers: {:?}", e); - - e - }) - .map(|_| DebugResponse::WriteRegisters), - } - } else { - Err(new_error!("Debugging is not enabled")) - } - } - - pub(crate) fn recv_dbg_msg(&mut self) -> Result { - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn.recv().map_err(|e| { - new_error!( - "Got an error while waiting to receive a message from the gdb thread: {:?}", - e - ) - }) - } - - pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { - log::debug!("Sending {:?}", cmd); +/// A KVM implementation of a single-vcpu VM +#[derive(Debug)] +pub(super) struct KvmVm { + vm_fd: VmFd, + vcpu_fd: VcpuFd, + interrupt_handle: Arc, + #[cfg(gdb)] + debug: KvmDebug, +} - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; +#[derive(Debug)] +pub(super) struct KvmInterruptHandle { + /// True when the vcpu is currently running and blocking the thread + running: AtomicBool, + /// The thread id on which the vcpu was most recently run on or is currently running on + tid: AtomicU64, + /// Whether the corresponding vm is dropped + dropped: AtomicBool, +} - gdb_conn.send(cmd).map_err(|e| { - new_error!( - "Got an error while sending a response message to the gdb thread: {:?}", - e - ) - }) +impl InterruptHandle for KvmInterruptHandle { + fn kill(&self) { + // The reason why we might need multiple signals is because if we deliver the signal right before + // the vm sets `running` to true, and before the vm calls `VcpuFd::run()`, then the signal is lost because + // the thread is still in userspace. + while self.running.load(Ordering::Relaxed) { + unsafe { libc::pthread_kill(self.tid.load(Ordering::Relaxed) as _, libc::SIGRTMIN()) }; + std::thread::sleep(std::time::Duration::from_micros(50)); } } + fn dropped(&self) -> bool { + self.dropped.load(Ordering::Relaxed) + } } -/// A Hypervisor driver for KVM on Linux -pub(super) struct KVMDriver { - _kvm: Kvm, - _vm_fd: VmFd, - vcpu_fd: VcpuFd, - entrypoint: u64, - orig_rsp: GuestPtr, - mem_regions: Vec, - - #[cfg(gdb)] - debug: Option, - #[cfg(gdb)] - gdb_conn: Option>, +#[cfg(gdb)] +#[derive(Debug, Default)] +struct KvmDebug { + regs: kvm_guest_debug, + sw_breakpoints: HashMap, // addr -> original instruction } -impl KVMDriver { - /// Create a new instance of a `KVMDriver`, with only control registers - /// set. Standard registers will not be set, and `initialise` must - /// be called to do so. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(super) fn new( - mem_regions: Vec, - pml4_addr: u64, - entrypoint: u64, - rsp: u64, - #[cfg(gdb)] gdb_conn: Option>, - ) -> Result { - let kvm = Kvm::new()?; - - let vm_fd = kvm.create_vm_with_type(0)?; - - let perm_flags = - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE; - - mem_regions.iter().enumerate().try_for_each(|(i, region)| { - let perm_flags = perm_flags.intersection(region.flags); - let kvm_region = kvm_userspace_memory_region { - slot: i as u32, - guest_phys_addr: region.guest_region.start as u64, - memory_size: (region.guest_region.end - region.guest_region.start) as u64, - userspace_addr: region.host_region.start as u64, - flags: match perm_flags { - MemoryRegionFlags::READ => KVM_MEM_READONLY, - _ => 0, // normal, RWX - }, - }; - unsafe { vm_fd.set_user_memory_region(kvm_region) } - })?; - - let mut vcpu_fd = vm_fd.create_vcpu(0)?; - Self::setup_initial_sregs(&mut vcpu_fd, pml4_addr)?; - - #[cfg(gdb)] - let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn { - let mut debug = KvmDebug::new(); - // Add breakpoint to the entry point address - debug.add_hw_breakpoint(&vcpu_fd, entrypoint)?; +static KVM: LazyLock> = + LazyLock::new(|| Kvm::new().map_err(|e| new_error!("Failed to open /dev/kvm: {}", e))); - (Some(debug), Some(gdb_conn)) - } else { - (None, None) - }; - - let rsp_gp = GuestPtr::try_from(RawPtr::from(rsp))?; - - let ret = Self { - _kvm: kvm, - _vm_fd: vm_fd, +impl KvmVm { + /// Create a new instance of a `KvmVm` + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(super) fn new() -> Result { + let hv = KVM + .as_ref() + .map_err(|e| new_error!("Failed to create KVM instance: {}", e))?; + let vm_fd = hv.create_vm_with_type(0)?; + let vcpu_fd = vm_fd.create_vcpu(0)?; + + Ok(Self { + vm_fd, vcpu_fd, - entrypoint, - orig_rsp: rsp_gp, - mem_regions, - - #[cfg(gdb)] - debug, #[cfg(gdb)] - gdb_conn, - }; - - Ok(ret) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn setup_initial_sregs(vcpu_fd: &mut VcpuFd, pml4_addr: u64) -> Result<()> { - // setup paging and IA-32e (64-bit) mode - let mut sregs = vcpu_fd.get_sregs()?; - sregs.cr3 = pml4_addr; - sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT; - sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP; - sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX; - sregs.cs.l = 1; // required for 64-bit mode - vcpu_fd.set_sregs(&sregs)?; - Ok(()) + debug: KvmDebug::default(), + interrupt_handle: Arc::new(KvmInterruptHandle { + running: AtomicBool::new(false), + tid: AtomicU64::new(unsafe { libc::pthread_self() }), + dropped: AtomicBool::new(false), + }), + }) } } -impl Debug for KVMDriver { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut f = f.debug_struct("KVM Driver"); - // Output each memory region - - for region in &self.mem_regions { - f.field("Memory Region", ®ion); - } - let regs = self.vcpu_fd.get_regs(); - // check that regs is OK and then set field in debug struct - - if let Ok(regs) = regs { - f.field("Registers", ®s); - } - - let sregs = self.vcpu_fd.get_sregs(); - - // check that sregs is OK and then set field in debug struct - - if let Ok(sregs) = sregs { - f.field("Special Registers", &sregs); - } - - f.finish() +impl Vm for KvmVm { + fn get_regs(&self) -> Result { + Ok((&self.vcpu_fd.get_regs()?).into()) } -} -impl Hypervisor for KVMDriver { - /// Implementation of initialise for Hypervisor trait. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn initialise( - &mut self, - peb_addr: RawPtr, - seed: u64, - page_size: u32, - outb_hdl: OutBHandlerWrapper, - mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, - max_guest_log_level: Option, - #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - let max_guest_log_level: u64 = match max_guest_log_level { - Some(level) => level as u64, - None => self.get_max_log_level().into(), - }; - - let regs = kvm_regs { - rip: self.entrypoint, - rsp: self.orig_rsp.absolute()?, - - // function args - rdi: peb_addr.into(), - rsi: seed, - rdx: page_size.into(), - rcx: max_guest_log_level, - - ..Default::default() - }; - self.vcpu_fd.set_regs(®s)?; - - VirtualCPU::run( - self.as_mut_hypervisor(), - hv_handler, - outb_hdl, - mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_fn, - )?; + fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { + Ok(self.vcpu_fd.set_regs(®s.into())?) + } - Ok(()) + fn get_sregs(&self) -> Result { + Ok((&self.vcpu_fd.get_sregs()?).into()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - outb_handle_fn: OutBHandlerWrapper, - mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, - #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - // Reset general purpose registers, then set RIP and RSP - let regs = kvm_regs { - rip: dispatch_func_addr.into(), - rsp: self.orig_rsp.absolute()?, - ..Default::default() - }; - self.vcpu_fd.set_regs(®s)?; - - // reset fpu state - let fpu = kvm_fpu { - fcw: FP_CONTROL_WORD_DEFAULT, - ftwx: FP_TAG_WORD_DEFAULT, - mxcsr: MXCSR_DEFAULT, - ..Default::default() // zero out the rest - }; - self.vcpu_fd.set_fpu(&fpu)?; - - // run - VirtualCPU::run( - self.as_mut_hypervisor(), - hv_handler, - outb_handle_fn, - mem_access_fn, - #[cfg(gdb)] - dbg_mem_access_fn, - )?; + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { + Ok(self.vcpu_fd.set_sregs(&sregs.into())?) + } - Ok(()) + fn get_fpu(&self) -> Result { + Ok((&self.vcpu_fd.get_fpu()?).into()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn handle_io( - &mut self, - port: u16, - data: Vec, - _rip: u64, - _instruction_length: u64, - outb_handle_fn: OutBHandlerWrapper, - ) -> Result<()> { - // KVM does not need RIP or instruction length, as it automatically sets the RIP + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { + Ok(self.vcpu_fd.set_fpu(&fpu.into())?) + } - // The payload param for the outb_handle_fn is the first byte - // of the data array cast to an u64. Thus, we need to make sure - // the data array has at least one u8, then convert that to an u64 - if data.is_empty() { - log_then_return!("no data was given in IO interrupt"); - } else { - let mut padded = [0u8; 4]; - let copy_len = data.len().min(4); - padded[..copy_len].copy_from_slice(&data[..copy_len]); - let value = u32::from_le_bytes(padded); - - outb_handle_fn - .try_lock() - .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? - .call(port, value)?; + unsafe fn map_memory(&mut self, regions: &[MemoryRegion]) -> Result<()> { + if regions.is_empty() { + return Err(new_error!("No memory regions to map")); } + regions.iter().enumerate().try_for_each(|(i, region)| { + let kvm_region = kvm_userspace_memory_region { + slot: i as u32, + guest_phys_addr: region.guest_region.start as u64, + memory_size: region.guest_region.len() as u64, + userspace_addr: region.host_region.start as u64, + flags: match region.flags { + MemoryRegionFlags::READ => KVM_MEM_READONLY, + _ => 0, // normal, RWX + }, + }; + unsafe { self.vm_fd.set_user_memory_region(kvm_region) } + })?; Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn run(&mut self) -> Result { - let exit_reason = self.vcpu_fd.run(); - let result = match exit_reason { - Ok(VcpuExit::Hlt) => { - crate::debug!("KVM - Halt Details : {:#?}", &self); - HyperlightExit::Halt() - } - Ok(VcpuExit::IoOut(port, data)) => { - // because vcpufd.run() mutably borrows self we cannot pass self to crate::debug! macro here - crate::debug!("KVM IO Details : \nPort : {}\nData : {:?}", port, data); - // KVM does not need to set RIP or instruction length so these are set to 0 - HyperlightExit::IoOut(port, data.to_vec(), 0, 0) - } - Ok(VcpuExit::MmioRead(addr, _)) => { - crate::debug!("KVM MMIO Read -Details: Address: {} \n {:#?}", addr, &self); - - match self.get_memory_access_violation( - addr as usize, - &self.mem_regions, - MemoryRegionFlags::READ, - ) { - Some(access_violation_exit) => access_violation_exit, - None => HyperlightExit::Mmio(addr), - } - } - Ok(VcpuExit::MmioWrite(addr, _)) => { - crate::debug!("KVM MMIO Write -Details: Address: {} \n {:#?}", addr, &self); - - match self.get_memory_access_violation( - addr as usize, - &self.mem_regions, - MemoryRegionFlags::WRITE, - ) { - Some(access_violation_exit) => access_violation_exit, - None => HyperlightExit::Mmio(addr), - } - } + fn run_vcpu(&mut self) -> Result { + self.interrupt_handle + .tid + .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); + self.interrupt_handle.running.store(true, Ordering::Relaxed); + + // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here** + // - after we've set the running to true, + // - before we've called `VcpuFd::run()` + // Then the individual signal is lost, because the signal is only processed after we've left userspace. + // Luckily, we keep sending the signal again and again until we see that the atomic `running` is set to false. + + let vcpu_result = self.vcpu_fd.run(); + + // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here** + // - after we've called `VcpuFd::run()` + // - before we've set the running to false + // Then this is fine because the call to `VcpuFd::run()` is already finished, + // the signal handler itself is a no-op, and the signals will stop being sent + // once we've set the `running` to false. + self.interrupt_handle + .running + .store(false, Ordering::Relaxed); + match vcpu_result { + Ok(VcpuExit::Hlt) => Ok(HyperlightExit::Halt()), + Ok(VcpuExit::IoOut(port, data)) => Ok(HyperlightExit::IoOut(port, data.to_vec())), + Ok(VcpuExit::MmioRead(addr, _)) => Ok(HyperlightExit::MmioRead(addr)), + Ok(VcpuExit::MmioWrite(addr, _)) => Ok(HyperlightExit::MmioWrite(addr)), #[cfg(gdb)] // KVM provides architecture specific information about the vCPU state when exiting - Ok(VcpuExit::Debug(debug_exit)) => match self.get_stop_reason(debug_exit) { - Ok(reason) => HyperlightExit::Debug(reason), - Err(e) => { - log_then_return!("Error getting stop reason: {:?}", e); - } - }, + Ok(VcpuExit::Debug(debug_exit)) => Ok(HyperlightExit::Debug(DebugExit::Debug { + dr6: debug_exit.dr6, + exception: debug_exit.exception, + })), Err(e) => match e.errno() { // In case of the gdb feature, the timeout is not enabled, this // exit is because of a signal sent from the gdb thread to the - // hypervisor thread to cancel execution + // hypervisor thread to cancel execution (e.g. Ctrl+C from GDB) #[cfg(gdb)] - libc::EINTR => HyperlightExit::Debug(VcpuStopReason::Interrupt), - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled + libc::EINTR => Ok(HyperlightExit::Debug(DebugExit::Interrupt)), + // we send a signal to the thread to cancel execution. This results in EINTR being returned #[cfg(not(gdb))] - libc::EINTR => HyperlightExit::Cancelled(), - libc::EAGAIN => HyperlightExit::Retry(), + libc::EINTR => Ok(HyperlightExit::Cancelled()), + libc::EAGAIN => Ok(HyperlightExit::Retry()), _ => { crate::debug!("KVM Error -Details: Address: {} \n {:#?}", e, &self); log_then_return!("Error running VCPU {:?}", e); @@ -575,117 +221,143 @@ impl Hypervisor for KVMDriver { Ok(other) => { let err_msg = format!("Unexpected KVM Exit {:?}", other); crate::debug!("KVM Other Exit Details: {:#?}", &self); - HyperlightExit::Unknown(err_msg) + Ok(HyperlightExit::Unknown(err_msg)) } - }; - Ok(result) + } } - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor { - self as &mut dyn Hypervisor + fn interrupt_handle(&self) -> Arc { + self.interrupt_handle.clone() } - #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion] { - &self.mem_regions - } + // --- DEBUGGING RELATED BELOW --- #[cfg(gdb)] - fn handle_debug( - &mut self, - dbg_mem_access_fn: Arc>, - stop_reason: VcpuStopReason, - ) -> Result<()> { - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e))?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - let result = self.process_dbg_request(req, dbg_mem_access_fn.clone()); - - let response = match result { - Ok(response) => response, - // Treat non fatal errors separately so the guest doesn't fail - Err(HyperlightError::TranslateGuestAddress(_)) => DebugResponse::ErrorOccurred, - Err(e) => { - return Err(e); - } - }; + fn translate_gva(&self, gva: u64) -> Result { + use crate::HyperlightError; - // If the command was either step or continue, we need to run the vcpu - let cont = matches!( - response, - DebugResponse::Step | DebugResponse::Continue | DebugResponse::DisableDebug - ); + let gpa = self.vcpu_fd.translate_gva(gva)?; + if gpa.valid == 0 { + Err(HyperlightError::TranslateGuestAddress(gva)) + } else { + Ok(gpa.physical_address) + } + } - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + #[cfg(gdb)] + fn set_debug(&mut self, enable: bool) -> Result<()> { + use kvm_bindings::{KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_USE_HW_BP, KVM_GUESTDBG_USE_SW_BP}; - if cont { - break; - } + log::info!("Setting debug to {}", enable); + if enable { + self.debug.regs.control |= + KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_USE_SW_BP; + } else { + self.debug.regs.control &= + !(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_USE_SW_BP); } - + self.vcpu_fd.set_guest_debug(&self.debug.regs)?; Ok(()) } -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; #[cfg(gdb)] - use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; - use crate::hypervisor::handlers::{MemAccessHandler, OutBHandler}; - use crate::hypervisor::tests::test_initialise; - use crate::Result; + fn set_single_step(&mut self, enable: bool) -> Result<()> { + use kvm_bindings::KVM_GUESTDBG_SINGLESTEP; + + log::info!("Setting single step to {}", enable); + if enable { + self.debug.regs.control |= KVM_GUESTDBG_SINGLESTEP; + } else { + self.debug.regs.control &= !KVM_GUESTDBG_SINGLESTEP; + } + // Set TF Flag to enable Traps + let mut regs = self.get_regs()?; + if enable { + regs.rflags |= 1 << 8; + } else { + regs.rflags &= !(1 << 8); + } + self.set_regs(®s)?; + Ok(()) + } #[cfg(gdb)] - struct DbgMemAccessHandler {} + fn add_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + use crate::hypervisor::gdb::arch::MAX_NO_OF_HW_BP; + use crate::new_error; + + // Find the first available LOCAL (L0–L3) slot + let i = (0..MAX_NO_OF_HW_BP) + .position(|i| self.debug.regs.arch.debugreg[7] & (1 << (i * 2)) == 0) + .ok_or_else(|| new_error!("Tried to add more than 4 hardware breakpoints"))?; + + // Assign to corresponding debug register + self.debug.regs.arch.debugreg[i] = addr; + + // Enable LOCAL bit + self.debug.regs.arch.debugreg[7] |= 1 << (i * 2); + + self.vcpu_fd.set_guest_debug(&self.debug.regs)?; + Ok(()) + } #[cfg(gdb)] - impl DbgMemAccessHandlerCaller for DbgMemAccessHandler { - fn read(&mut self, _offset: usize, _data: &mut [u8]) -> Result<()> { - Ok(()) - } + fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + use crate::new_error; - fn write(&mut self, _offset: usize, _data: &[u8]) -> Result<()> { - Ok(()) - } + // Find the index of the breakpoint + let index = self.debug.regs.arch.debugreg[..4] + .iter() + .position(|&a| a == addr) + .ok_or_else(|| new_error!("Tried to remove non-existing hw-breakpoint"))?; - fn get_code_offset(&mut self) -> Result { - Ok(0) - } + // Clear the address + self.debug.regs.arch.debugreg[index] = 0; + // Disable LOCAL bit + self.debug.regs.arch.debugreg[7] &= !(1 << (index * 2)); + + self.vcpu_fd.set_guest_debug(&self.debug.regs)?; + Ok(()) } - #[test] - fn test_init() { - if !super::is_hypervisor_present() { - return; - } + #[cfg(gdb)] + fn add_sw_breakpoint( + &mut self, + addr: u64, + dbg_mem_access_fn: std::sync::Arc< + std::sync::Mutex, + >, + ) -> Result<()> { + use super::gdb::arch::SW_BP_SIZE; + use crate::hypervisor::gdb::arch::SW_BP; + + let mut save_data = [0; SW_BP_SIZE]; + let mut mem = dbg_mem_access_fn.lock().unwrap(); + mem.read(addr as usize, &mut save_data[..])?; + mem.write(addr as usize, &SW_BP)?; + self.debug.sw_breakpoints.insert(addr, save_data[0]); + Ok(()) + } - let outb_handler: Arc> = { - let func: Box Result<()> + Send> = - Box::new(|_, _| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(OutBHandler::from(func))) - }; - let mem_access_handler = { - let func: Box Result<()> + Send> = Box::new(|| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(MemAccessHandler::from(func))) - }; - #[cfg(gdb)] - let dbg_mem_access_handler = Arc::new(Mutex::new(DbgMemAccessHandler {})); - - test_initialise( - outb_handler, - mem_access_handler, - #[cfg(gdb)] - dbg_mem_access_handler, - ) - .unwrap(); + #[cfg(gdb)] + fn remove_sw_breakpoint( + &mut self, + addr: u64, + dbg_mem_access_fn: std::sync::Arc< + std::sync::Mutex, + >, + ) -> Result<()> { + let original_instr = self.debug.sw_breakpoints.remove(&addr).unwrap(); + dbg_mem_access_fn + .lock() + .unwrap() + .write(addr as usize, &[original_instr])?; + Ok(()) + } +} + +impl Drop for KvmVm { + fn drop(&mut self) { + self.interrupt_handle.dropped.store(true, Ordering::Relaxed); } } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 62cebe829..ffc352b94 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -15,42 +15,43 @@ limitations under the License. */ use log::LevelFilter; -use tracing::{instrument, Span}; -use crate::error::HyperlightError::ExecutionCanceledByHost; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::metrics::METRIC_GUEST_CANCELLATION; -use crate::{log_then_return, new_error, HyperlightError, Result}; +use crate::Result; -/// Util for handling x87 fpu state -#[cfg(any(kvm, mshv, target_os = "windows"))] -pub mod fpu; /// Handlers for Hypervisor custom logic pub mod handlers; -/// HyperV-on-linux functionality -#[cfg(mshv)] -pub mod hyperv_linux; +pub(crate) mod hyperlight_vm; + +/// Registers including general purpose registers, special registesr, fpu registers +mod regs; +/// Vm trait +mod vm; +pub use vm::InterruptHandle; + +/// Implements vm::Vm trait on Windows using Windows Hypervisor Platform (WHP) #[cfg(target_os = "windows")] -/// Hyperv-on-windows functionality -pub(crate) mod hyperv_windows; -pub(crate) mod hypervisor_handler; +pub(crate) mod whp; + +/// Implements vm::Vm trait on Linux using Microsoft Hypervisor (MSHV) +#[cfg(mshv)] +pub mod mshv; + +/// Implements vm::Vm trait on Linux using Kernel-based Virtual Machine (KVM) +#[cfg(kvm)] +pub mod kvm; /// GDB debugging support #[cfg(gdb)] mod gdb; -#[cfg(kvm)] -/// Functionality to manipulate KVM-based virtual machines -pub mod kvm; -#[cfg(target_os = "windows")] /// Hyperlight Surrogate Process -pub(crate) mod surrogate_process; #[cfg(target_os = "windows")] +mod surrogate_process; + /// Hyperlight Surrogate Process -pub(crate) mod surrogate_process_manager; -/// WindowsHypervisorPlatform utilities #[cfg(target_os = "windows")] -pub(crate) mod windows_hypervisor_platform; +mod surrogate_process_manager; + /// Safe wrappers around windows types like `PSTR` #[cfg(target_os = "windows")] pub(crate) mod wrappers; @@ -70,7 +71,6 @@ use self::handlers::{DbgMemAccessHandlerCaller, DbgMemAccessHandlerWrapper}; use self::handlers::{ MemAccessHandlerCaller, MemAccessHandlerWrapper, OutBHandlerCaller, OutBHandlerWrapper, }; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; use crate::mem::ptr::RawPtr; pub(crate) const CR4_PAE: u64 = 1 << 5; @@ -88,34 +88,9 @@ pub(crate) const EFER_LMA: u64 = 1 << 10; pub(crate) const EFER_SCE: u64 = 1; pub(crate) const EFER_NX: u64 = 1 << 11; -/// These are the generic exit reasons that we can handle from a Hypervisor the Hypervisors run method is responsible for mapping from -/// the hypervisor specific exit reasons to these generic ones -pub enum HyperlightExit { - #[cfg(gdb)] - /// The vCPU has exited due to a debug event - Debug(VcpuStopReason), - /// The vCPU has halted - Halt(), - /// The vCPU has issued a write to the given port with the given value - IoOut(u16, Vec, u64, u64), - /// The vCPU has attempted to read or write from an unmapped address - Mmio(u64), - /// The vCPU tried to access memory but was missing the required permissions - AccessViolation(u64, MemoryRegionFlags, MemoryRegionFlags), - /// The vCPU execution has been cancelled - Cancelled(), - /// The vCPU has exited for a reason that is not handled by Hyperlight - Unknown(String), - /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN - Retry(), -} - -/// A common set of hypervisor functionality -/// -/// Note: a lot of these structures take in an `Option`. -/// This is because, if we are coming from the C API, we don't have a HypervisorHandler and have -/// to account for the fact the Hypervisor was set up beforehand. -pub(crate) trait Hypervisor: Debug + Sync + Send { +/// Functionality required by a Hyperlight VM. A Hyperlight VM is a VM capable of executing +/// guest function calls. +pub(crate) trait HyperlightVm: Debug + Sync + Send { /// Initialise the internally stored vCPU with the given PEB address and /// random number seed, then run it until a HLT instruction. #[allow(clippy::too_many_arguments)] @@ -126,7 +101,6 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { page_size: u32, outb_handle_fn: OutBHandlerWrapper, mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, guest_max_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()>; @@ -143,7 +117,6 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { dispatch_func_addr: RawPtr, outb_handle_fn: OutBHandlerWrapper, mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()>; @@ -152,40 +125,19 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { &mut self, port: u16, data: Vec, - rip: u64, - instruction_length: u64, outb_handle_fn: OutBHandlerWrapper, ) -> Result<()>; /// Run the vCPU - fn run(&mut self) -> Result; - - /// Returns a Some(HyperlightExit::AccessViolation(..)) if the given gpa doesn't have - /// access its corresponding region. Returns None otherwise, or if the region is not found. - fn get_memory_access_violation( - &self, - gpa: usize, - mem_regions: &[MemoryRegion], - access_info: MemoryRegionFlags, - ) -> Option { - // find the region containing the given gpa - let region = mem_regions - .iter() - .find(|region| region.guest_region.contains(&gpa)); - - if let Some(region) = region { - if !region.flags.contains(access_info) - || region.flags.contains(MemoryRegionFlags::STACK_GUARD) - { - return Some(HyperlightExit::AccessViolation( - gpa as u64, - access_info, - region.flags, - )); - } - } - None - } + fn run( + &mut self, + outb_handle_fn: Arc>, + mem_access_fn: Arc>, + #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, + ) -> Result<()>; + + /// Get InterruptHandle to underlying VM + fn interrupt_handle(&self) -> Arc; /// Get the logging level to pass to the guest entrypoint fn get_max_log_level(&self) -> u32 { @@ -224,15 +176,12 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { LevelFilter::from_str(level).unwrap_or(LevelFilter::Error) as u32 } - /// get a mutable trait object from self - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor; - /// Get the partition handle for WHP #[cfg(target_os = "windows")] fn get_partition_handle(&self) -> windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE; #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion]; + fn get_memory_regions(&self) -> &[crate::mem::memory_region::MemoryRegion]; #[cfg(gdb)] /// handles the cases when the vCPU stops due to a Debug event @@ -240,175 +189,8 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { &mut self, _dbg_mem_access_fn: Arc>, _stop_reason: VcpuStopReason, - ) -> Result<()> { - unimplemented!() - } -} - -/// A virtual CPU that can be run until an exit occurs -pub struct VirtualCPU {} - -impl VirtualCPU { - /// Run the given hypervisor until a halt instruction is reached - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub fn run( - hv: &mut dyn Hypervisor, - hv_handler: Option, - outb_handle_fn: Arc>, - mem_access_fn: Arc>, - #[cfg(gdb)] dbg_mem_access_fn: Arc>, - ) -> Result<()> { - loop { - match hv.run() { - #[cfg(gdb)] - Ok(HyperlightExit::Debug(stop_reason)) => { - if let Err(e) = hv.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { - log_then_return!(e); - } - } - - Ok(HyperlightExit::Halt()) => { - break; - } - Ok(HyperlightExit::IoOut(port, data, rip, instruction_length)) => { - hv.handle_io(port, data, rip, instruction_length, outb_handle_fn.clone())? - } - Ok(HyperlightExit::Mmio(addr)) => { - #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; - - mem_access_fn - .clone() - .try_lock() - .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? - .call()?; - - log_then_return!("MMIO access address {:#x}", addr); - } - Ok(HyperlightExit::AccessViolation(addr, tried, region_permission)) => { - #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; - - if region_permission.intersects(MemoryRegionFlags::STACK_GUARD) { - return Err(HyperlightError::StackOverflow()); - } - log_then_return!(HyperlightError::MemoryAccessViolation( - addr, - tried, - region_permission - )); - } - Ok(HyperlightExit::Cancelled()) => { - // Shutdown is returned when the host has cancelled execution - // After termination, the main thread will re-initialize the VM - if let Some(hvh) = hv_handler { - // If hvh is None, then we are running from the C API, which doesn't use - // the HypervisorHandler - hvh.set_running(false); - #[cfg(target_os = "linux")] - hvh.set_run_cancelled(true); - } - metrics::counter!(METRIC_GUEST_CANCELLATION).increment(1); - log_then_return!(ExecutionCanceledByHost()); - } - Ok(HyperlightExit::Unknown(reason)) => { - #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; - - log_then_return!("Unexpected VM Exit {:?}", reason); - } - Ok(HyperlightExit::Retry()) => continue, - Err(e) => { - #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; - - return Err(e); - } - } - } - - Ok(()) - } + ) -> Result<()>; } -#[cfg(all(test, any(target_os = "windows", kvm)))] -pub(crate) mod tests { - use std::path::Path; - use std::sync::{Arc, Mutex}; - use std::time::Duration; - - use hyperlight_testing::dummy_guest_as_string; - - #[cfg(gdb)] - use super::handlers::DbgMemAccessHandlerWrapper; - use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; - use crate::hypervisor::hypervisor_handler::{ - HvHandlerConfig, HypervisorHandler, HypervisorHandlerAction, - }; - use crate::mem::ptr::RawPtr; - use crate::sandbox::uninitialized::GuestBinary; - use crate::sandbox::{SandboxConfiguration, UninitializedSandbox}; - use crate::{new_error, Result}; - - pub(crate) fn test_initialise( - outb_hdl: OutBHandlerWrapper, - mem_access_hdl: MemAccessHandlerWrapper, - #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - let filename = dummy_guest_as_string().map_err(|e| new_error!("{}", e))?; - if !Path::new(&filename).exists() { - return Err(new_error!( - "test_initialise: file {} does not exist", - filename - )); - } - - let sandbox = UninitializedSandbox::new(GuestBinary::FilePath(filename.clone()), None)?; - let (hshm, gshm) = sandbox.mgr.build(); - drop(hshm); - - let hv_handler_config = HvHandlerConfig { - outb_handler: outb_hdl, - mem_access_handler: mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_handler: dbg_mem_access_fn, - seed: 1234567890, - page_size: 4096, - peb_addr: RawPtr::from(0x230000), - dispatch_function_addr: Arc::new(Mutex::new(None)), - max_init_time: Duration::from_millis( - SandboxConfiguration::DEFAULT_MAX_INITIALIZATION_TIME as u64, - ), - max_exec_time: Duration::from_millis( - SandboxConfiguration::DEFAULT_MAX_EXECUTION_TIME as u64, - ), - max_wait_for_cancellation: Duration::from_millis( - SandboxConfiguration::DEFAULT_MAX_WAIT_FOR_CANCELLATION as u64, - ), - max_guest_log_level: None, - }; - - let mut hv_handler = HypervisorHandler::new(hv_handler_config); - - // call initialise on the hypervisor implementation with specific values - // for PEB (process environment block) address, seed and page size. - // - // these values are not actually used, they're just checked inside - // the dummy guest, and if they don't match these values, the dummy - // guest issues a write to an invalid memory address, which in turn - // fails this test. - // - // in this test, we're not actually testing whether a guest can issue - // memory operations, call functions, etc... - we're just testing - // whether we can configure the shared memory region, load a binary - // into it, and run the CPU to completion (e.g., a HLT interrupt) - - hv_handler.start_hypervisor_handler( - gshm, - #[cfg(gdb)] - None, - )?; - - hv_handler.execute_hypervisor_handler_action(HypervisorHandlerAction::Initialise) - } -} +#[cfg(test)] +mod tests {} diff --git a/src/hyperlight_host/src/hypervisor/mshv.rs b/src/hyperlight_host/src/hypervisor/mshv.rs new file mode 100644 index 000000000..0bb8be5fa --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/mshv.rs @@ -0,0 +1,476 @@ +/* +Copyright 2024 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(mshv2)] +extern crate mshv_bindings2 as mshv_bindings; +#[cfg(mshv2)] +extern crate mshv_ioctls2 as mshv_ioctls; + +#[cfg(mshv3)] +extern crate mshv_bindings3 as mshv_bindings; +#[cfg(mshv3)] +extern crate mshv_ioctls3 as mshv_ioctls; + +#[cfg(gdb)] +use std::collections::HashMap; +#[cfg(gdb)] +use std::fmt::Debug; +use std::sync::Arc; +use std::sync::LazyLock; + +#[cfg(mshv2)] +use mshv_bindings::hv_message; +#[cfg(gdb)] +use mshv_bindings::hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; +#[cfg(gdb)] +use mshv_bindings::DebugRegisters; +use mshv_bindings::{ + hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, + hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, hv_register_assoc, + hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, +}; +#[cfg(mshv3)] +use mshv_bindings::{ + hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, + hv_partition_synthetic_processor_features, +}; +use mshv_ioctls::{Mshv, VcpuFd, VmFd}; +use tracing::{instrument, Span}; + +#[cfg(gdb)] +use super::handlers::DbgMemAccessHandlerCaller; +use super::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +use super::vm::{HyperlightExit, Vm}; +use super::InterruptHandle; +#[cfg(gdb)] +use crate::hypervisor::vm::DebugExit; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; +use crate::{log_then_return, new_error, Result}; + +/// Determine whether the HyperV for Linux hypervisor API is present +/// and functional. +#[instrument(skip_all, parent = Span::current(), level = "Trace")] +pub(crate) fn is_hypervisor_present() -> bool { + match Mshv::new() { + Ok(_) => true, + Err(_) => { + log::info!("MSHV is not available on this system"); + false + } + } +} + +/// A MSHV implementation of a single-vcpu VM +#[derive(Debug)] +pub(super) struct MshvVm { + vm_fd: VmFd, + vcpu_fd: VcpuFd, + + #[cfg(gdb)] + debug: MshvDebug, +} + +#[cfg(gdb)] +#[derive(Debug, Default)] +struct MshvDebug { + regs: DebugRegisters, + sw_breakpoints: HashMap, // addr -> original instruction +} + +static MSHV: LazyLock> = + LazyLock::new(|| Mshv::new().map_err(|e| new_error!("Failed to open /dev/mshv: {}", e))); + +impl MshvVm { + /// Create a new instance of a MshvVm + #[instrument(skip_all, parent = Span::current(), level = "Trace")] + pub(super) fn new() -> Result { + let hv = MSHV + .as_ref() + .map_err(|e| new_error!("Failed to create MSHV instance: {}", e))?; + let pr = Default::default(); + #[cfg(mshv2)] + let vm_fd = hv.create_vm_with_config(&pr)?; + #[cfg(mshv3)] + let vm_fd = { + // It's important to avoid create_vm() and explicitly use + // create_vm_with_args() with an empty arguments structure + // here, because otherwise the partition is set up with a SynIC. + + let vm_fd = hv.create_vm_with_args(&pr)?; + let features: hv_partition_synthetic_processor_features = Default::default(); + vm_fd.hvcall_set_partition_property( + hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, + unsafe { features.as_uint64[0] }, + )?; + vm_fd.initialize()?; + vm_fd + }; + + let vcpu_fd = vm_fd.create_vcpu(0)?; + + Ok(Self { + vm_fd, + vcpu_fd, + #[cfg(gdb)] + debug: MshvDebug::default(), + }) + } +} + +impl Vm for MshvVm { + fn get_regs(&self) -> Result { + Ok((&self.vcpu_fd.get_regs()?).into()) + } + + fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { + Ok(self.vcpu_fd.set_regs(®s.into())?) + } + + fn get_sregs(&self) -> Result { + Ok((&self.vcpu_fd.get_sregs()?).into()) + } + + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { + self.vcpu_fd.set_sregs(&sregs.into())?; + Ok(()) + } + + fn get_fpu(&self) -> Result { + Ok((&self.vcpu_fd.get_fpu()?).into()) + } + + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { + self.vcpu_fd.set_fpu(&fpu.into())?; + Ok(()) + } + + unsafe fn map_memory(&mut self, regions: &[MemoryRegion]) -> Result<()> { + if regions.is_empty() { + return Err(new_error!("No memory regions to map")); + } + + regions.iter().try_for_each(|region| { + let mshv_region = region.clone().into(); + self.vm_fd.map_user_memory(mshv_region) + })?; + Ok(()) + } + + fn run_vcpu(&mut self) -> Result { + const HALT: hv_message_type = hv_message_type_HVMSG_X64_HALT; + const IO_PORT: hv_message_type = hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; + const UNMAPPED_GPA: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA; + const INVALID_GPA: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT; + #[cfg(gdb)] + const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; + + #[cfg(mshv2)] + let run_result = { + let hv_message: hv_message = Default::default(); + &self.vcpu_fd.run(hv_message) + }; + #[cfg(mshv3)] + let run_result = &self.vcpu_fd.run(); + + let result = match run_result { + Ok(m) => match m.header.message_type { + HALT => { + crate::debug!("mshv - Halt Details : {:#?}", &self); + HyperlightExit::Halt() + } + IO_PORT => { + let io_message = m.to_ioport_info()?; + let port_number = io_message.port_number; + let rax = io_message.rax; + // mshv, unlike kvm, does not automatically increment RIP + self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RIP, + value: hv_register_value { + reg64: io_message.header.rip + + io_message.header.instruction_length() as u64, + }, + ..Default::default() + }])?; + crate::debug!("mshv IO Details : \nPort : {}\n{:#?}", port_number, &self); + HyperlightExit::IoOut(port_number, rax.to_le_bytes().to_vec()) + } + UNMAPPED_GPA => { + let mimo_message = m.to_memory_info()?; + let addr = mimo_message.guest_physical_address; + crate::debug!( + "mshv MMIO unmapped GPA -Details: Address: {} \n {:#?}", + addr, + &self + ); + match MemoryRegionFlags::try_from(mimo_message)? { + MemoryRegionFlags::READ => HyperlightExit::MmioRead(addr), + MemoryRegionFlags::WRITE => HyperlightExit::MmioWrite(addr), + _ => HyperlightExit::Unknown("Unknown MMIO access".to_string()), + } + } + INVALID_GPA => { + let mimo_message = m.to_memory_info()?; + let gpa = mimo_message.guest_physical_address; + let access_info = MemoryRegionFlags::try_from(mimo_message)?; + match access_info { + MemoryRegionFlags::READ => HyperlightExit::MmioRead(gpa), + MemoryRegionFlags::WRITE => HyperlightExit::MmioWrite(gpa), + _ => HyperlightExit::Unknown("Unknown MMIO access".to_string()), + } + } + // The only case an intercept exit is expected is when debugging is enabled + // and the intercepts are installed. + // Provide the extra information about the exception to accurately determine + // the stop reason + #[cfg(gdb)] + EXCEPTION_INTERCEPT => { + let exception_message = m.to_exception_info()?; + let DebugRegisters { dr6, .. } = self.vcpu_fd.get_debug_regs()?; + HyperlightExit::Debug(DebugExit::Debug { + dr6, + exception: exception_message.exception_vector as u32, + }) + } + other => { + crate::debug!("mshv Other Exit: Exit: {:#?} \n {:#?}", other, &self); + log_then_return!("unknown Hyper-V run message type {:?}", other); + } + }, + Err(e) => match e.errno() { + // In case of the gdb feature, the timeout is not enabled, this + // exit is because of a signal sent from the gdb thread to the + // hypervisor thread to cancel execution (e.g. Ctrl+C from GDB) + #[cfg(gdb)] + libc::EINTR => HyperlightExit::Debug(DebugExit::Interrupt), + // we send a signal to the thread to cancel execution. This results in EINTR being returned + #[cfg(not(gdb))] + libc::EINTR => HyperlightExit::Cancelled(), + libc::EAGAIN => HyperlightExit::Retry(), + _ => { + crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self); + log_then_return!("Error running VCPU {:?}", e); + } + }, + }; + Ok(result) + } + + fn interrupt_handle(&self) -> Arc { + todo!() + } + + // -- DEBUGGING RELATED BELOW --- + + #[cfg(gdb)] + fn translate_gva(&self, gva: u64) -> Result { + use mshv_bindings::{HV_TRANSLATE_GVA_VALIDATE_READ, HV_TRANSLATE_GVA_VALIDATE_WRITE}; + + use crate::HyperlightError; + + let flags = (HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE) as u64; + let (addr, _) = self + .vcpu_fd + .translate_gva(gva, flags) + .map_err(|_| HyperlightError::TranslateGuestAddress(gva))?; + + Ok(addr) + } + + #[cfg(gdb)] + fn set_debug(&mut self, enabled: bool) -> Result<()> { + use mshv_bindings::{ + hv_intercept_parameters, hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, + mshv_install_intercept, HV_INTERCEPT_ACCESS_MASK_EXECUTE, + }; + + use crate::hypervisor::gdb::arch::{BP_EX_ID, DB_EX_ID}; + use crate::new_error; + + if enabled { + self.vm_fd + .install_intercept(mshv_install_intercept { + access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, + intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, + // Exception handler #DB (1) + intercept_parameter: hv_intercept_parameters { + exception_vector: DB_EX_ID as u16, + }, + }) + .map_err(|e| new_error!("Cannot install debug exception intercept: {}", e))?; + + // Install intercept for #BP (3) exception + self.vm_fd + .install_intercept(mshv_install_intercept { + access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, + intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, + // Exception handler #BP (3) + intercept_parameter: hv_intercept_parameters { + exception_vector: BP_EX_ID as u16, + }, + }) + .map_err(|e| new_error!("Cannot install breakpoint exception intercept: {}", e))?; + } else { + // There doesn't seem to be any way to remove installed intercepts. But that seems fine. + } + Ok(()) + } + + #[cfg(gdb)] + fn set_single_step(&mut self, enable: bool) -> Result<()> { + let mut regs = self.get_regs()?; + if enable { + regs.rflags |= 1 << 8; + } else { + regs.rflags &= !(1 << 8); + } + self.set_regs(®s)?; + Ok(()) + } + + #[cfg(gdb)] + fn add_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + use crate::hypervisor::gdb::arch::MAX_NO_OF_HW_BP; + use crate::new_error; + + // Find the first available LOCAL (L0–L3) slot + let i = (0..MAX_NO_OF_HW_BP) + .position(|i| self.debug.regs.dr7 & (1 << (i * 2)) == 0) + .ok_or_else(|| new_error!("Tried to add more than 4 hardware breakpoints"))?; + + // Assign to corresponding debug register + *[ + &mut self.debug.regs.dr0, + &mut self.debug.regs.dr1, + &mut self.debug.regs.dr2, + &mut self.debug.regs.dr3, + ][i] = addr; + + // Enable LOCAL bit + self.debug.regs.dr7 |= 1 << (i * 2); + + self.vcpu_fd.set_debug_regs(&self.debug.regs)?; + Ok(()) + } + + #[cfg(gdb)] + fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + use crate::new_error; + + let regs = [ + &mut self.debug.regs.dr0, + &mut self.debug.regs.dr1, + &mut self.debug.regs.dr2, + &mut self.debug.regs.dr3, + ]; + + if let Some(i) = regs.iter().position(|&&mut reg| reg == addr) { + // Clear the address + *regs[i] = 0; + // Disable LOCAL bit + self.debug.regs.dr7 &= !(1 << (i * 2)); + self.vcpu_fd.set_debug_regs(&self.debug.regs)?; + Ok(()) + } else { + Err(new_error!("Tried to remove non-existing hw-breakpoint")) + } + } + + #[cfg(gdb)] + fn add_sw_breakpoint( + &mut self, + addr: u64, + dbg_mem_access_fn: std::sync::Arc>, + ) -> Result<()> { + use super::gdb::arch::SW_BP_SIZE; + use crate::hypervisor::gdb::arch::SW_BP; + + let mut save_data = [0; SW_BP_SIZE]; + let mut mem = dbg_mem_access_fn.lock().unwrap(); + mem.read(addr as usize, &mut save_data[..])?; + mem.write(addr as usize, &SW_BP)?; + self.debug.sw_breakpoints.insert(addr, save_data[0]); + Ok(()) + } + + #[cfg(gdb)] + fn remove_sw_breakpoint( + &mut self, + addr: u64, + dbg_mem_access_fn: std::sync::Arc>, + ) -> Result<()> { + let original_instr = self.debug.sw_breakpoints.remove(&addr).unwrap(); + dbg_mem_access_fn + .lock() + .unwrap() + .write(addr as usize, &[original_instr])?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mem::memory_region::MemoryRegionVecBuilder; + use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory}; + + #[rustfmt::skip] + const CODE: [u8; 12] = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, (%dx) */ + /* send a 0 to indicate we're done */ + 0xb0, b'\0', /* mov $'\0', %al */ + 0xee, /* out %al, (%dx) */ + 0xf4, /* HLT */ + ]; + + fn shared_mem_with_code( + code: &[u8], + mem_size: usize, + load_offset: usize, + ) -> Result> { + if load_offset > mem_size { + log_then_return!( + "code load offset ({}) > memory size ({})", + load_offset, + mem_size + ); + } + let mut shared_mem = ExclusiveSharedMemory::new(mem_size)?; + shared_mem.copy_from_slice(code, load_offset)?; + Ok(Box::new(shared_mem)) + } + + #[test] + fn create_mshv_vm() { + if !super::is_hypervisor_present() { + return; + } + const MEM_SIZE: usize = 0x3000; + let gm = shared_mem_with_code(CODE.as_slice(), MEM_SIZE, 0).unwrap(); + let mut regions = MemoryRegionVecBuilder::new(0, gm.base_addr()); + regions.push_page_aligned( + MEM_SIZE, + MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE, + crate::mem::memory_region::MemoryRegionType::Code, + ); + let mut mshv_vm = super::MshvVm::new().unwrap(); + unsafe { + mshv_vm.map_memory(®ions.build()).unwrap(); + } + } +} diff --git a/src/hyperlight_host/src/hypervisor/regs/fpu.rs b/src/hyperlight_host/src/hypervisor/regs/fpu.rs new file mode 100644 index 000000000..65d2a77f9 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/regs/fpu.rs @@ -0,0 +1,437 @@ +/* +Copyright 2024 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#[cfg(mshv2)] +extern crate mshv_bindings2 as mshv_bindings; +#[cfg(mshv2)] +extern crate mshv_ioctls2 as mshv_ioctls; + +#[cfg(mshv3)] +extern crate mshv_bindings3 as mshv_bindings; +#[cfg(mshv3)] +extern crate mshv_ioctls3 as mshv_ioctls; + +#[cfg(target_os = "windows")] +use std::collections::HashSet; + +#[cfg(kvm)] +use kvm_bindings::kvm_fpu; +#[cfg(mshv)] +use mshv_bindings::FloatingPointUnit; + +#[cfg(target_os = "windows")] +use crate::hypervisor::regs::FromWhpRegisterError; + +pub(crate) const FP_CONTROL_WORD_DEFAULT: u16 = 0x37f; // mask all fp-exception, set rounding to nearest, set precision to 64-bit +pub(crate) const FP_TAG_WORD_DEFAULT: u8 = 0xff; // each 8 of x87 fpu registers is empty +pub(crate) const MXCSR_DEFAULT: u32 = 0x1f80; // mask simd fp-exceptions, clear exception flags, set rounding to nearest, disable flush-to-zero mode, disable denormals-are-zero mode + +#[derive(Debug, Default, Clone, Copy, PartialEq)] +pub(crate) struct CommonFpu { + pub fpr: [[u8; 16]; 8], + pub fcw: u16, + pub fsw: u16, + pub ftwx: u8, + pub pad1: u8, + pub last_opcode: u16, + pub last_ip: u64, + pub last_dp: u64, + pub xmm: [[u8; 16]; 16], + pub mxcsr: u32, + pub pad2: u32, +} + +#[cfg(kvm)] +impl From<&CommonFpu> for kvm_fpu { + fn from(common_fpu: &CommonFpu) -> Self { + kvm_fpu { + fpr: common_fpu.fpr, + fcw: common_fpu.fcw, + fsw: common_fpu.fsw, + ftwx: common_fpu.ftwx, + pad1: common_fpu.pad1, + last_opcode: common_fpu.last_opcode, + last_ip: common_fpu.last_ip, + last_dp: common_fpu.last_dp, + xmm: common_fpu.xmm, + mxcsr: common_fpu.mxcsr, + pad2: common_fpu.pad2, + } + } +} + +#[cfg(mshv)] +impl From<&CommonFpu> for FloatingPointUnit { + fn from(common_fpu: &CommonFpu) -> FloatingPointUnit { + FloatingPointUnit { + fpr: common_fpu.fpr, + fcw: common_fpu.fcw, + fsw: common_fpu.fsw, + ftwx: common_fpu.ftwx, + pad1: common_fpu.pad1, + last_opcode: common_fpu.last_opcode, + last_ip: common_fpu.last_ip, + last_dp: common_fpu.last_dp, + xmm: common_fpu.xmm, + mxcsr: common_fpu.mxcsr, + pad2: common_fpu.pad2, + } + } +} + +#[cfg(kvm)] +impl From<&kvm_fpu> for CommonFpu { + fn from(kvm_fpu: &kvm_fpu) -> Self { + Self { + fpr: kvm_fpu.fpr, + fcw: kvm_fpu.fcw, + fsw: kvm_fpu.fsw, + ftwx: kvm_fpu.ftwx, + pad1: kvm_fpu.pad1, + last_opcode: kvm_fpu.last_opcode, + last_ip: kvm_fpu.last_ip, + last_dp: kvm_fpu.last_dp, + xmm: kvm_fpu.xmm, + mxcsr: kvm_fpu.mxcsr, + pad2: kvm_fpu.pad2, + } + } +} + +#[cfg(mshv)] +impl From<&FloatingPointUnit> for CommonFpu { + fn from(mshv_fpu: &FloatingPointUnit) -> Self { + Self { + fpr: mshv_fpu.fpr, + fcw: mshv_fpu.fcw, + fsw: mshv_fpu.fsw, + ftwx: mshv_fpu.ftwx, + pad1: mshv_fpu.pad1, + last_opcode: mshv_fpu.last_opcode, + last_ip: mshv_fpu.last_ip, + last_dp: mshv_fpu.last_dp, + xmm: mshv_fpu.xmm, + mxcsr: mshv_fpu.mxcsr, + pad2: mshv_fpu.pad2, + } + } +} + +#[cfg(target_os = "windows")] +use windows::Win32::System::Hypervisor::*; + +#[cfg(target_os = "windows")] +impl From<&CommonFpu> for [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 26] { + fn from(fpu: &CommonFpu) -> Self { + let mut regs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 26] = [Default::default(); 26]; + let mut idx = 0; + + // FPU/MMX registers (8 x 128-bit) + for (i, reg) in fpu.fpr.iter().enumerate() { + let mut value = WHV_REGISTER_VALUE::default(); + value.Reg128 = WHV_UINT128 { + Dword: [ + u32::from_le_bytes([reg[0], reg[1], reg[2], reg[3]]), + u32::from_le_bytes([reg[4], reg[5], reg[6], reg[7]]), + u32::from_le_bytes([reg[8], reg[9], reg[10], reg[11]]), + u32::from_le_bytes([reg[12], reg[13], reg[14], reg[15]]), + ], + }; + regs[idx] = (WHV_REGISTER_NAME(WHvX64RegisterFpMmx0.0 + i as i32), value); + idx += 1; + } + + // FpControlStatus + let mut fp_control_status = WHV_REGISTER_VALUE::default(); + fp_control_status.FpControlStatus = WHV_X64_FP_CONTROL_STATUS_REGISTER { + Anonymous: WHV_X64_FP_CONTROL_STATUS_REGISTER_0 { + FpControl: fpu.fcw, + FpStatus: fpu.fsw, + FpTag: fpu.ftwx, + Reserved: fpu.pad1, + LastFpOp: fpu.last_opcode, + Anonymous: WHV_X64_FP_CONTROL_STATUS_REGISTER_0_0 { + LastFpRip: fpu.last_ip, + }, + }, + }; + regs[idx] = (WHvX64RegisterFpControlStatus, fp_control_status); + idx += 1; + + // XMM registers (16 x 128-bit) + for (i, reg) in fpu.xmm.iter().enumerate() { + let mut value = WHV_REGISTER_VALUE::default(); + value.Reg128 = WHV_UINT128 { + Dword: [ + u32::from_le_bytes([reg[0], reg[1], reg[2], reg[3]]), + u32::from_le_bytes([reg[4], reg[5], reg[6], reg[7]]), + u32::from_le_bytes([reg[8], reg[9], reg[10], reg[11]]), + u32::from_le_bytes([reg[12], reg[13], reg[14], reg[15]]), + ], + }; + regs[idx] = (WHV_REGISTER_NAME(WHvX64RegisterXmm0.0 + i as i32), value); + idx += 1; + } + + // XmmControlStatus + let mut xmm_control_status = WHV_REGISTER_VALUE::default(); + xmm_control_status.XmmControlStatus = WHV_X64_XMM_CONTROL_STATUS_REGISTER { + Anonymous: WHV_X64_XMM_CONTROL_STATUS_REGISTER_0 { + XmmStatusControl: fpu.mxcsr, + XmmStatusControlMask: !0, + Anonymous: WHV_X64_XMM_CONTROL_STATUS_REGISTER_0_0 { + LastFpRdp: fpu.last_dp, + }, + }, + }; + regs[idx] = (WHvX64RegisterXmmControlStatus, xmm_control_status); + + regs + } +} + +#[cfg(target_os = "windows")] +pub(crate) const WHP_FPU_NAMES_LEN: usize = 26; +#[expect(dead_code, reason = "Used in get_fpu, but get_fpu is currently unused")] +#[cfg(target_os = "windows")] +pub(crate) const WHP_FPU_NAMES: [WHV_REGISTER_NAME; WHP_FPU_NAMES_LEN] = [ + WHvX64RegisterFpMmx0, + WHvX64RegisterFpMmx1, + WHvX64RegisterFpMmx2, + WHvX64RegisterFpMmx3, + WHvX64RegisterFpMmx4, + WHvX64RegisterFpMmx5, + WHvX64RegisterFpMmx6, + WHvX64RegisterFpMmx7, + WHvX64RegisterFpControlStatus, + WHvX64RegisterXmm0, + WHvX64RegisterXmm1, + WHvX64RegisterXmm2, + WHvX64RegisterXmm3, + WHvX64RegisterXmm4, + WHvX64RegisterXmm5, + WHvX64RegisterXmm6, + WHvX64RegisterXmm7, + WHvX64RegisterXmm8, + WHvX64RegisterXmm9, + WHvX64RegisterXmm10, + WHvX64RegisterXmm11, + WHvX64RegisterXmm12, + WHvX64RegisterXmm13, + WHvX64RegisterXmm14, + WHvX64RegisterXmm15, + WHvX64RegisterXmmControlStatus, +]; + +#[cfg(target_os = "windows")] +impl TryFrom<&[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]> for CommonFpu { + type Error = FromWhpRegisterError; + + fn try_from(regs: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]) -> Result { + if regs.len() != WHP_FPU_NAMES_LEN { + return Err(FromWhpRegisterError::InvalidLength(regs.len())); + } + + let mut fpu = CommonFpu::default(); + let mut seen_registers = HashSet::new(); + + for (name, value) in regs { + let name_id = name.0; + + // Check for duplicates + if !seen_registers.insert(name_id) { + return Err(FromWhpRegisterError::DuplicateRegister(name_id)); + } + + match name_id { + id if (WHvX64RegisterFpMmx0.0..WHvX64RegisterFpMmx0.0 + 8).contains(&id) => { + let idx = (id - WHvX64RegisterFpMmx0.0) as usize; + let dwords = unsafe { value.Reg128.Dword }; + fpu.fpr[idx] = [ + dwords[0].to_le_bytes(), + dwords[1].to_le_bytes(), + dwords[2].to_le_bytes(), + dwords[3].to_le_bytes(), + ] + .concat() + .try_into() + .map_err(|_| FromWhpRegisterError::InvalidEncoding)?; + } + + id if id == WHvX64RegisterFpControlStatus.0 => { + let control = unsafe { value.FpControlStatus.Anonymous }; + fpu.fcw = control.FpControl; + fpu.fsw = control.FpStatus; + fpu.ftwx = control.FpTag; + fpu.pad1 = control.Reserved; + fpu.last_opcode = control.LastFpOp; + fpu.last_ip = unsafe { control.Anonymous.LastFpRip }; + } + + id if (WHvX64RegisterXmm0.0..WHvX64RegisterXmm0.0 + 16).contains(&id) => { + let idx = (id - WHvX64RegisterXmm0.0) as usize; + let dwords = unsafe { value.Reg128.Dword }; + fpu.xmm[idx] = [ + dwords[0].to_le_bytes(), + dwords[1].to_le_bytes(), + dwords[2].to_le_bytes(), + dwords[3].to_le_bytes(), + ] + .concat() + .try_into() + .map_err(|_| FromWhpRegisterError::InvalidEncoding)?; + } + + id if id == WHvX64RegisterXmmControlStatus.0 => { + let control = unsafe { value.XmmControlStatus.Anonymous }; + fpu.mxcsr = control.XmmStatusControl; + fpu.last_dp = unsafe { control.Anonymous.LastFpRdp }; + } + + _ => { + return Err(FromWhpRegisterError::InvalidRegister(name_id)); + } + } + } + + // Set of all expected register names + let expected_registers: HashSet = [ + WHvX64RegisterFpMmx0.0, + WHvX64RegisterFpMmx1.0, + WHvX64RegisterFpMmx2.0, + WHvX64RegisterFpMmx3.0, + WHvX64RegisterFpMmx4.0, + WHvX64RegisterFpMmx5.0, + WHvX64RegisterFpMmx6.0, + WHvX64RegisterFpMmx7.0, + WHvX64RegisterFpControlStatus.0, + WHvX64RegisterXmm0.0, + WHvX64RegisterXmm1.0, + WHvX64RegisterXmm2.0, + WHvX64RegisterXmm3.0, + WHvX64RegisterXmm4.0, + WHvX64RegisterXmm5.0, + WHvX64RegisterXmm6.0, + WHvX64RegisterXmm7.0, + WHvX64RegisterXmm8.0, + WHvX64RegisterXmm9.0, + WHvX64RegisterXmm10.0, + WHvX64RegisterXmm11.0, + WHvX64RegisterXmm12.0, + WHvX64RegisterXmm13.0, + WHvX64RegisterXmm14.0, + WHvX64RegisterXmm15.0, + WHvX64RegisterXmmControlStatus.0, + ] + .into_iter() + .collect(); + + // Technically it should not be possible to have any missing registers at this point + // since we are guaranteed to have 18 non-duplicate registers that have passed the match-arm above, but leaving this here for safety anyway + let missing: HashSet = expected_registers + .difference(&seen_registers) + .cloned() + .collect(); + + if !missing.is_empty() { + return Err(FromWhpRegisterError::MissingRegister(missing)); + } + + Ok(fpu) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_common_fpu() -> CommonFpu { + CommonFpu { + fpr: [ + [1u8; 16], [2u8; 16], [3u8; 16], [4u8; 16], [5u8; 16], [6u8; 16], [7u8; 16], + [8u8; 16], + ], + fcw: 0x1234, + fsw: 0x5678, + ftwx: 0x9a, + pad1: 0xbc, + last_opcode: 0xdef0, + last_ip: 0xdeadbeefcafebabe, + last_dp: 0xabad1deaf00dbabe, + xmm: [ + [8u8; 16], [9u8; 16], [10u8; 16], [11u8; 16], [12u8; 16], [13u8; 16], [14u8; 16], + [15u8; 16], [16u8; 16], [17u8; 16], [18u8; 16], [19u8; 16], [20u8; 16], [21u8; 16], + [22u8; 16], [23u8; 16], + ], + mxcsr: 0x1f80, + pad2: 0, + } + } + + #[cfg(kvm)] + #[test] + fn round_trip_kvm_fpu() { + use kvm_bindings::kvm_fpu; + + let original = sample_common_fpu(); + let kvm: kvm_fpu = (&original).into(); + let round_tripped = CommonFpu::from(&kvm); + + assert_eq!(original, round_tripped); + } + + #[cfg(mshv)] + #[test] + fn round_trip_mshv_fpu() { + use mshv_bindings::FloatingPointUnit; + + let original = sample_common_fpu(); + let mshv: FloatingPointUnit = (&original).into(); + let round_tripped = CommonFpu::from(&mshv); + + assert_eq!(original, round_tripped); + } + + #[cfg(target_os = "windows")] + #[test] + fn round_trip_windows_fpu() { + use windows::Win32::System::Hypervisor::*; + + let original = sample_common_fpu(); + let windows: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 26] = (&original).into(); + let round_tripped = CommonFpu::try_from(windows.as_ref()).unwrap(); + assert_eq!(original, round_tripped); + + // test for duplicate register error handling + let original = sample_common_fpu(); + let mut windows: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 26] = (&original).into(); + windows[0].0 = WHvX64RegisterFpMmx1; + let err = CommonFpu::try_from(windows.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::DuplicateRegister(WHvX64RegisterFpMmx1.0) + ); + + // test for passing non-fpu register (e.g. RAX) + let original = sample_common_fpu(); + let mut windows: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 26] = (&original).into(); + windows[0] = (WHvX64RegisterRax, windows[0].1); + let err = CommonFpu::try_from(windows.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::InvalidRegister(WHvX64RegisterRax.0) + ); + } +} diff --git a/src/hyperlight_host/src/hypervisor/regs/mod.rs b/src/hyperlight_host/src/hypervisor/regs/mod.rs new file mode 100644 index 000000000..b2ccf770a --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/regs/mod.rs @@ -0,0 +1,20 @@ +mod fpu; +mod special_regs; +mod standard_regs; + +#[cfg(target_os = "windows")] +use std::collections::HashSet; + +pub(crate) use fpu::*; +pub(crate) use special_regs::*; +pub(crate) use standard_regs::*; + +#[cfg(target_os = "windows")] +#[derive(Debug, PartialEq)] +pub(crate) enum FromWhpRegisterError { + MissingRegister(HashSet), + InvalidLength(usize), + InvalidEncoding, + DuplicateRegister(i32), + InvalidRegister(i32), +} diff --git a/src/hyperlight_host/src/hypervisor/regs/special_regs.rs b/src/hyperlight_host/src/hypervisor/regs/special_regs.rs new file mode 100644 index 000000000..d1639ef29 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/regs/special_regs.rs @@ -0,0 +1,637 @@ +#[cfg(mshv2)] +extern crate mshv_bindings2 as mshv_bindings; +#[cfg(mshv2)] +extern crate mshv_ioctls2 as mshv_ioctls; + +#[cfg(mshv3)] +extern crate mshv_bindings3 as mshv_bindings; +#[cfg(mshv3)] +extern crate mshv_ioctls3 as mshv_ioctls; + +#[cfg(target_os = "windows")] +use std::collections::HashSet; +#[cfg(target_os = "windows")] +use std::mem::offset_of; + +#[cfg(kvm)] +use kvm_bindings::{kvm_dtable, kvm_segment, kvm_sregs}; +#[cfg(mshv)] +use mshv_bindings::{SegmentRegister, SpecialRegisters, TableRegister}; +#[cfg(target_os = "windows")] +use windows::Win32::System::Hypervisor::*; + +#[cfg(target_os = "windows")] +use super::FromWhpRegisterError; +#[cfg(target_os = "windows")] +use crate::hypervisor::regs::{WHP_FPU_NAMES_LEN, WHP_REGS_NAMES_LEN}; + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonSpecialRegisters { + pub cs: CommonSegmentRegister, + pub ds: CommonSegmentRegister, + pub es: CommonSegmentRegister, + pub fs: CommonSegmentRegister, + pub gs: CommonSegmentRegister, + pub ss: CommonSegmentRegister, + pub tr: CommonSegmentRegister, + pub ldt: CommonSegmentRegister, + pub gdt: CommonTableRegister, + pub idt: CommonTableRegister, + pub cr0: u64, + pub cr2: u64, + pub cr3: u64, + pub cr4: u64, + pub cr8: u64, + pub efer: u64, + pub apic_base: u64, + pub interrupt_bitmap: [u64; 4], +} + +#[cfg(mshv)] +impl From<&SpecialRegisters> for CommonSpecialRegisters { + fn from(value: &SpecialRegisters) -> Self { + CommonSpecialRegisters { + cs: value.cs.into(), + ds: value.ds.into(), + es: value.es.into(), + fs: value.fs.into(), + gs: value.gs.into(), + ss: value.ss.into(), + tr: value.tr.into(), + ldt: value.ldt.into(), + gdt: value.gdt.into(), + idt: value.idt.into(), + cr0: value.cr0, + cr2: value.cr2, + cr3: value.cr3, + cr4: value.cr4, + cr8: value.cr8, + efer: value.efer, + apic_base: value.apic_base, + interrupt_bitmap: value.interrupt_bitmap, + } + } +} + +#[cfg(mshv)] +impl From<&CommonSpecialRegisters> for SpecialRegisters { + fn from(other: &CommonSpecialRegisters) -> Self { + SpecialRegisters { + cs: other.cs.into(), + ds: other.ds.into(), + es: other.es.into(), + fs: other.fs.into(), + gs: other.gs.into(), + ss: other.ss.into(), + tr: other.tr.into(), + ldt: other.ldt.into(), + gdt: other.gdt.into(), + idt: other.idt.into(), + cr0: other.cr0, + cr2: other.cr2, + cr3: other.cr3, + cr4: other.cr4, + cr8: other.cr8, + efer: other.efer, + apic_base: other.apic_base, + interrupt_bitmap: other.interrupt_bitmap, + } + } +} + +#[cfg(kvm)] +impl From<&kvm_sregs> for CommonSpecialRegisters { + fn from(kvm_sregs: &kvm_sregs) -> Self { + CommonSpecialRegisters { + cs: kvm_sregs.cs.into(), + ds: kvm_sregs.ds.into(), + es: kvm_sregs.es.into(), + fs: kvm_sregs.fs.into(), + gs: kvm_sregs.gs.into(), + ss: kvm_sregs.ss.into(), + tr: kvm_sregs.tr.into(), + ldt: kvm_sregs.ldt.into(), + gdt: kvm_sregs.gdt.into(), + idt: kvm_sregs.idt.into(), + cr0: kvm_sregs.cr0, + cr2: kvm_sregs.cr2, + cr3: kvm_sregs.cr3, + cr4: kvm_sregs.cr4, + cr8: kvm_sregs.cr8, + efer: kvm_sregs.efer, + apic_base: kvm_sregs.apic_base, + interrupt_bitmap: kvm_sregs.interrupt_bitmap, + } + } +} + +#[cfg(kvm)] +impl From<&CommonSpecialRegisters> for kvm_sregs { + fn from(common_sregs: &CommonSpecialRegisters) -> Self { + kvm_sregs { + cs: common_sregs.cs.into(), + ds: common_sregs.ds.into(), + es: common_sregs.es.into(), + fs: common_sregs.fs.into(), + gs: common_sregs.gs.into(), + ss: common_sregs.ss.into(), + tr: common_sregs.tr.into(), + ldt: common_sregs.ldt.into(), + gdt: common_sregs.gdt.into(), + idt: common_sregs.idt.into(), + cr0: common_sregs.cr0, + cr2: common_sregs.cr2, + cr3: common_sregs.cr3, + cr4: common_sregs.cr4, + cr8: common_sregs.cr8, + efer: common_sregs.efer, + apic_base: common_sregs.apic_base, + interrupt_bitmap: common_sregs.interrupt_bitmap, + } + } +} + +#[repr(C, align(16))] +#[cfg(target_os = "windows")] +pub(crate) struct AlignedRegisterValues(pub(crate) [WHV_REGISTER_VALUE; N]); + +#[cfg(target_os = "windows")] +#[allow(clippy::disallowed_macros)] // this is at compile time +const _: () = { + // WHP_SREGS_NAMES_LEN + assert!(std::mem::align_of::>() % 16 == 0); + assert!(offset_of!(AlignedRegisterValues, 0) % 16 == 0); + // WHP_REGS_NAMES_LEN + assert!(std::mem::align_of::>() % 16 == 0,); + assert!(offset_of!(AlignedRegisterValues, 0) % 16 == 0); + // WHP_FPU_NAMES_LEN + assert!(std::mem::align_of::>() % 16 == 0,); + assert!(offset_of!(AlignedRegisterValues, 0) % 16 == 0); +}; + +#[cfg(target_os = "windows")] +pub(crate) const WHP_SREGS_NAMES_LEN: usize = 17; +#[cfg(target_os = "windows")] +pub(crate) static WHP_SREGS_NAMES: [WHV_REGISTER_NAME; WHP_SREGS_NAMES_LEN] = [ + WHvX64RegisterCs, + WHvX64RegisterDs, + WHvX64RegisterEs, + WHvX64RegisterFs, + WHvX64RegisterGs, + WHvX64RegisterSs, + WHvX64RegisterTr, + WHvX64RegisterLdtr, + WHvX64RegisterGdtr, + WHvX64RegisterIdtr, + WHvX64RegisterCr0, + WHvX64RegisterCr2, + WHvX64RegisterCr3, + WHvX64RegisterCr4, + WHvX64RegisterCr8, + WHvX64RegisterEfer, + WHvX64RegisterApicBase, +]; + +#[cfg(target_os = "windows")] +impl From<&CommonSpecialRegisters> + for [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); WHP_SREGS_NAMES_LEN] +{ + fn from(other: &CommonSpecialRegisters) -> Self { + [ + (WHvX64RegisterCs, other.cs.into()), + (WHvX64RegisterDs, other.ds.into()), + (WHvX64RegisterEs, other.es.into()), + (WHvX64RegisterFs, other.fs.into()), + (WHvX64RegisterGs, other.gs.into()), + (WHvX64RegisterSs, other.ss.into()), + (WHvX64RegisterTr, other.tr.into()), + (WHvX64RegisterLdtr, other.ldt.into()), + (WHvX64RegisterGdtr, other.gdt.into()), + (WHvX64RegisterIdtr, other.idt.into()), + (WHvX64RegisterCr0, WHV_REGISTER_VALUE { Reg64: other.cr0 }), + (WHvX64RegisterCr2, WHV_REGISTER_VALUE { Reg64: other.cr2 }), + (WHvX64RegisterCr3, WHV_REGISTER_VALUE { Reg64: other.cr3 }), + (WHvX64RegisterCr4, WHV_REGISTER_VALUE { Reg64: other.cr4 }), + (WHvX64RegisterCr8, WHV_REGISTER_VALUE { Reg64: other.cr8 }), + (WHvX64RegisterEfer, WHV_REGISTER_VALUE { Reg64: other.efer }), + ( + WHvX64RegisterApicBase, + WHV_REGISTER_VALUE { + Reg64: other.apic_base, + }, + ), + ] + } +} + +#[cfg(target_os = "windows")] +impl TryFrom<&[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]> for CommonSpecialRegisters { + type Error = FromWhpRegisterError; + + #[expect( + non_upper_case_globals, + reason = "Windows API has lowercase register names" + )] + fn try_from(regs: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]) -> Result { + if regs.len() != WHP_SREGS_NAMES_LEN { + return Err(FromWhpRegisterError::InvalidLength(regs.len())); + } + let mut registers = CommonSpecialRegisters::default(); + let mut seen_registers = HashSet::new(); + + for &(name, value) in regs { + let name_id = name.0; + + // Check for duplicates + if !seen_registers.insert(name_id) { + return Err(FromWhpRegisterError::DuplicateRegister(name_id)); + } + + unsafe { + match name { + WHvX64RegisterCs => registers.cs = value.into(), + WHvX64RegisterDs => registers.ds = value.into(), + WHvX64RegisterEs => registers.es = value.into(), + WHvX64RegisterFs => registers.fs = value.into(), + WHvX64RegisterGs => registers.gs = value.into(), + WHvX64RegisterSs => registers.ss = value.into(), + WHvX64RegisterTr => registers.tr = value.into(), + WHvX64RegisterLdtr => registers.ldt = value.into(), + WHvX64RegisterGdtr => registers.gdt = value.into(), + WHvX64RegisterIdtr => registers.idt = value.into(), + WHvX64RegisterCr0 => registers.cr0 = value.Reg64, + WHvX64RegisterCr2 => registers.cr2 = value.Reg64, + WHvX64RegisterCr3 => registers.cr3 = value.Reg64, + WHvX64RegisterCr4 => registers.cr4 = value.Reg64, + WHvX64RegisterCr8 => registers.cr8 = value.Reg64, + WHvX64RegisterEfer => registers.efer = value.Reg64, + WHvX64RegisterApicBase => registers.apic_base = value.Reg64, + _ => { + // Given unexpected register + return Err(FromWhpRegisterError::InvalidRegister(name_id)); + } + } + } + } + + // TODO: I'm not sure how to get this from WHP at the moment + registers.interrupt_bitmap = Default::default(); + + // Set of all expected register names + let expected_registers: HashSet = + WHP_SREGS_NAMES.map(|name| name.0).into_iter().collect(); + + // Technically it should not be possible to have any missing registers at this point + // since we are guaranteed to have 18 non-duplicate registers that have passed the match-arm above, but leaving this here for safety anyway + let missing: HashSet<_> = expected_registers + .difference(&seen_registers) + .cloned() + .collect(); + + if !missing.is_empty() { + return Err(FromWhpRegisterError::MissingRegister(missing)); + } + + Ok(registers) + } +} + +// --- Segment Register --- + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonSegmentRegister { + pub base: u64, + pub limit: u32, + pub selector: u16, + pub type_: u8, + pub present: u8, + pub dpl: u8, + pub db: u8, + pub s: u8, + pub l: u8, + pub g: u8, + pub avl: u8, + pub unusable: u8, + pub padding: u8, +} + +#[cfg(mshv)] +impl From for CommonSegmentRegister { + fn from(other: SegmentRegister) -> Self { + CommonSegmentRegister { + base: other.base, + limit: other.limit, + selector: other.selector, + type_: other.type_, + present: other.present, + dpl: other.dpl, + db: other.db, + s: other.s, + l: other.l, + g: other.g, + avl: other.avl, + unusable: other.unusable, + padding: other.padding, + } + } +} + +#[cfg(mshv)] +impl From for SegmentRegister { + fn from(other: CommonSegmentRegister) -> Self { + SegmentRegister { + base: other.base, + limit: other.limit, + selector: other.selector, + type_: other.type_, + present: other.present, + dpl: other.dpl, + db: other.db, + s: other.s, + l: other.l, + g: other.g, + avl: other.avl, + unusable: other.unusable, + padding: other.padding, + } + } +} + +#[cfg(kvm)] +impl From for CommonSegmentRegister { + fn from(kvm_segment: kvm_segment) -> Self { + CommonSegmentRegister { + base: kvm_segment.base, + limit: kvm_segment.limit, + selector: kvm_segment.selector, + type_: kvm_segment.type_, + present: kvm_segment.present, + dpl: kvm_segment.dpl, + db: kvm_segment.db, + s: kvm_segment.s, + l: kvm_segment.l, + g: kvm_segment.g, + avl: kvm_segment.avl, + unusable: kvm_segment.unusable, + padding: kvm_segment.padding, + } + } +} + +#[cfg(kvm)] +impl From for kvm_segment { + fn from(common_segment: CommonSegmentRegister) -> Self { + kvm_segment { + base: common_segment.base, + limit: common_segment.limit, + selector: common_segment.selector, + type_: common_segment.type_, + present: common_segment.present, + dpl: common_segment.dpl, + db: common_segment.db, + s: common_segment.s, + l: common_segment.l, + g: common_segment.g, + avl: common_segment.avl, + unusable: common_segment.unusable, + padding: common_segment.padding, + } + } +} + +#[cfg(target_os = "windows")] +impl From for CommonSegmentRegister { + fn from(other: WHV_REGISTER_VALUE) -> Self { + unsafe { + let segment = other.Segment; + let bits = segment.Anonymous.Attributes; + + // Source of bit layout: https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvvirtualprocessordatatypes + CommonSegmentRegister { + base: segment.Base, + limit: segment.Limit, + selector: segment.Selector, + type_: (bits & 0b1111) as u8, // bits 0–3: SegmentType + s: ((bits >> 4) & 0b1) as u8, // bit 4: NonSystemSegment + dpl: ((bits >> 5) & 0b11) as u8, // bits 5–6: DPL + present: ((bits >> 7) & 0b1) as u8, // bit 7: Present + // bits 8–11: Reserved + avl: ((bits >> 12) & 0b1) as u8, // bit 12: Available + l: ((bits >> 13) & 0b1) as u8, // bit 13: Long mode + db: ((bits >> 14) & 0b1) as u8, // bit 14: Default + g: ((bits >> 15) & 0b1) as u8, // bit 15: Granularity + unusable: 0, + padding: 0, + } + } + } +} + +#[cfg(target_os = "windows")] +impl From for WHV_REGISTER_VALUE { + fn from(other: CommonSegmentRegister) -> Self { + // Truncate each field to its valid bit width before composing `Attributes`. + let type_ = other.type_ & 0xF; // 4 bits + let s = other.s & 0x1; // 1 bit + let dpl = other.dpl & 0x3; // 2 bits + let present = other.present & 0x1; // 1 bit + let avl = other.avl & 0x1; // 1 bit + let l = other.l & 0x1; // 1 bit + let db = other.db & 0x1; // 1 bit + let g = other.g & 0x1; // 1 bit + + WHV_REGISTER_VALUE { + Segment: WHV_X64_SEGMENT_REGISTER { + Base: other.base, + Limit: other.limit, + Selector: other.selector, + Anonymous: WHV_X64_SEGMENT_REGISTER_0 { + Attributes: (type_ as u16) // bit 0-3 + | ((s as u16) << 4) // bit 4 + | ((dpl as u16) << 5) // bit 5-6 + | ((present as u16) << 7) // bit 7 + | ((avl as u16) << 12) // bit 12 + | ((l as u16) << 13) // bit 13 + | ((db as u16) << 14) // bit 14 + | ((g as u16) << 15), // bit 15 + }, + }, + } + } +} + +// --- Table Register --- + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonTableRegister { + pub base: u64, + pub limit: u16, +} + +#[cfg(mshv)] +impl From for CommonTableRegister { + fn from(other: TableRegister) -> Self { + CommonTableRegister { + base: other.base, + limit: other.limit, + } + } +} + +#[cfg(mshv)] +impl From for TableRegister { + fn from(other: CommonTableRegister) -> Self { + TableRegister { + base: other.base, + limit: other.limit, + } + } +} + +#[cfg(kvm)] +impl From for CommonTableRegister { + fn from(kvm_dtable: kvm_dtable) -> Self { + CommonTableRegister { + base: kvm_dtable.base, + limit: kvm_dtable.limit, + } + } +} + +#[cfg(kvm)] +impl From for kvm_dtable { + fn from(common_dtable: CommonTableRegister) -> Self { + kvm_dtable { + base: common_dtable.base, + limit: common_dtable.limit, + padding: Default::default(), + } + } +} + +#[cfg(target_os = "windows")] +impl From for CommonTableRegister { + fn from(other: WHV_REGISTER_VALUE) -> Self { + unsafe { + let table = other.Table; + CommonTableRegister { + base: table.Base, + limit: table.Limit, + } + } + } +} + +#[cfg(target_os = "windows")] +impl From for WHV_REGISTER_VALUE { + fn from(other: CommonTableRegister) -> Self { + WHV_REGISTER_VALUE { + Table: WHV_X64_TABLE_REGISTER { + Base: other.base, + Limit: other.limit, + Pad: Default::default(), + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_common_special_registers() -> CommonSpecialRegisters { + let sample_segment = CommonSegmentRegister { + base: 0x1000, + limit: 0xFFFF, + selector: 0x10, + type_: 0xB, + present: 1, + dpl: 0, + db: 1, + s: 1, + l: 0, + g: 1, + avl: 0, + unusable: 0, + padding: 0, + }; + + let sample_table = CommonTableRegister { + base: 0x2000, + limit: 0x1000, + }; + + CommonSpecialRegisters { + cs: sample_segment, + ds: sample_segment, + es: sample_segment, + fs: sample_segment, + gs: sample_segment, + ss: sample_segment, + tr: sample_segment, + ldt: sample_segment, + gdt: sample_table, + idt: sample_table, + cr0: 0xDEAD_BEEF, + cr2: 0xBAD_C0DE, + cr3: 0xC0FFEE, + cr4: 0xFACE_CAFE, + cr8: 0x1234, + efer: 0x5678, + apic_base: 0x9ABC, + interrupt_bitmap: [0; 4], + } + } + + #[cfg(kvm)] + #[test] + fn round_trip_kvm_sregs() { + let original = sample_common_special_registers(); + let kvm_sregs: kvm_sregs = (&original).into(); + let roundtrip = CommonSpecialRegisters::from(&kvm_sregs); + + assert_eq!(original, roundtrip); + } + + #[cfg(mshv)] + #[test] + fn round_trip_mshv_sregs() { + let original = sample_common_special_registers(); + let mshv_sregs: SpecialRegisters = (&original).into(); + let roundtrip = CommonSpecialRegisters::from(&mshv_sregs); + + assert_eq!(original, roundtrip); + } + + #[cfg(target_os = "windows")] + #[test] + fn round_trip_whp_sregs() { + let original = sample_common_special_registers(); + let whp_sregs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); WHP_SREGS_NAMES_LEN] = + (&original).into(); + let roundtrip = CommonSpecialRegisters::try_from(whp_sregs.as_ref()).unwrap(); + assert_eq!(original, roundtrip); + + // Test duplicate register error + let original = sample_common_special_registers(); + let mut whp_sregs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); WHP_SREGS_NAMES_LEN] = + (&original).into(); + whp_sregs[0].0 = WHvX64RegisterDs; + let err = CommonSpecialRegisters::try_from(whp_sregs.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::DuplicateRegister(WHvX64RegisterDs.0) + ); + + // Test passing non-sregs register (e.g. RIP) + let original = sample_common_special_registers(); + let mut whp_sregs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); WHP_SREGS_NAMES_LEN] = + (&original).into(); + whp_sregs[0].0 = WHvX64RegisterRip; + let err = CommonSpecialRegisters::try_from(whp_sregs.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::InvalidRegister(WHvX64RegisterRip.0) + ); + } +} diff --git a/src/hyperlight_host/src/hypervisor/regs/standard_regs.rs b/src/hyperlight_host/src/hypervisor/regs/standard_regs.rs new file mode 100644 index 000000000..fcc15605e --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/regs/standard_regs.rs @@ -0,0 +1,349 @@ +#[cfg(mshv2)] +extern crate mshv_bindings2 as mshv_bindings; +#[cfg(mshv2)] +extern crate mshv_ioctls2 as mshv_ioctls; + +#[cfg(mshv3)] +extern crate mshv_bindings3 as mshv_bindings; +#[cfg(mshv3)] +extern crate mshv_ioctls3 as mshv_ioctls; + +#[cfg(kvm)] +use kvm_bindings::kvm_regs; +#[cfg(mshv)] +use mshv_bindings::StandardRegisters; + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonRegisters { + pub rax: u64, + pub rbx: u64, + pub rcx: u64, + pub rdx: u64, + pub rsi: u64, + pub rdi: u64, + pub rsp: u64, + pub rbp: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + pub rip: u64, + pub rflags: u64, +} + +// --- KVM --- +#[cfg(kvm)] +impl From<&kvm_regs> for CommonRegisters { + fn from(kvm_regs: &kvm_regs) -> Self { + CommonRegisters { + rax: kvm_regs.rax, + rbx: kvm_regs.rbx, + rcx: kvm_regs.rcx, + rdx: kvm_regs.rdx, + rsi: kvm_regs.rsi, + rdi: kvm_regs.rdi, + rsp: kvm_regs.rsp, + rbp: kvm_regs.rbp, + r8: kvm_regs.r8, + r9: kvm_regs.r9, + r10: kvm_regs.r10, + r11: kvm_regs.r11, + r12: kvm_regs.r12, + r13: kvm_regs.r13, + r14: kvm_regs.r14, + r15: kvm_regs.r15, + rip: kvm_regs.rip, + rflags: kvm_regs.rflags, + } + } +} + +#[cfg(kvm)] +impl From<&CommonRegisters> for kvm_regs { + fn from(regs: &CommonRegisters) -> Self { + kvm_regs { + rax: regs.rax, + rbx: regs.rbx, + rcx: regs.rcx, + rdx: regs.rdx, + rsi: regs.rsi, + rdi: regs.rdi, + rsp: regs.rsp, + rbp: regs.rbp, + r8: regs.r8, + r9: regs.r9, + r10: regs.r10, + r11: regs.r11, + r12: regs.r12, + r13: regs.r13, + r14: regs.r14, + r15: regs.r15, + rip: regs.rip, + rflags: regs.rflags, + } + } +} + +// --- MSHV --- + +#[cfg(mshv)] +impl From<&StandardRegisters> for CommonRegisters { + fn from(mshv_regs: &StandardRegisters) -> Self { + CommonRegisters { + rax: mshv_regs.rax, + rbx: mshv_regs.rbx, + rcx: mshv_regs.rcx, + rdx: mshv_regs.rdx, + rsi: mshv_regs.rsi, + rdi: mshv_regs.rdi, + rsp: mshv_regs.rsp, + rbp: mshv_regs.rbp, + r8: mshv_regs.r8, + r9: mshv_regs.r9, + r10: mshv_regs.r10, + r11: mshv_regs.r11, + r12: mshv_regs.r12, + r13: mshv_regs.r13, + r14: mshv_regs.r14, + r15: mshv_regs.r15, + rip: mshv_regs.rip, + rflags: mshv_regs.rflags, + } + } +} + +#[cfg(mshv)] +impl From<&CommonRegisters> for StandardRegisters { + fn from(regs: &CommonRegisters) -> Self { + StandardRegisters { + rax: regs.rax, + rbx: regs.rbx, + rcx: regs.rcx, + rdx: regs.rdx, + rsi: regs.rsi, + rdi: regs.rdi, + rsp: regs.rsp, + rbp: regs.rbp, + r8: regs.r8, + r9: regs.r9, + r10: regs.r10, + r11: regs.r11, + r12: regs.r12, + r13: regs.r13, + r14: regs.r14, + r15: regs.r15, + rip: regs.rip, + rflags: regs.rflags, + } + } +} + +#[cfg(target_os = "windows")] +use windows::Win32::System::Hypervisor::*; + +#[cfg(target_os = "windows")] +impl From<&CommonRegisters> for [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 18] { + fn from(regs: &CommonRegisters) -> Self { + [ + (WHvX64RegisterRax, WHV_REGISTER_VALUE { Reg64: regs.rax }), + (WHvX64RegisterRbx, WHV_REGISTER_VALUE { Reg64: regs.rbx }), + (WHvX64RegisterRcx, WHV_REGISTER_VALUE { Reg64: regs.rcx }), + (WHvX64RegisterRdx, WHV_REGISTER_VALUE { Reg64: regs.rdx }), + (WHvX64RegisterRsi, WHV_REGISTER_VALUE { Reg64: regs.rsi }), + (WHvX64RegisterRdi, WHV_REGISTER_VALUE { Reg64: regs.rdi }), + (WHvX64RegisterRsp, WHV_REGISTER_VALUE { Reg64: regs.rsp }), + (WHvX64RegisterRbp, WHV_REGISTER_VALUE { Reg64: regs.rbp }), + (WHvX64RegisterR8, WHV_REGISTER_VALUE { Reg64: regs.r8 }), + (WHvX64RegisterR9, WHV_REGISTER_VALUE { Reg64: regs.r9 }), + (WHvX64RegisterR10, WHV_REGISTER_VALUE { Reg64: regs.r10 }), + (WHvX64RegisterR11, WHV_REGISTER_VALUE { Reg64: regs.r11 }), + (WHvX64RegisterR12, WHV_REGISTER_VALUE { Reg64: regs.r12 }), + (WHvX64RegisterR13, WHV_REGISTER_VALUE { Reg64: regs.r13 }), + (WHvX64RegisterR14, WHV_REGISTER_VALUE { Reg64: regs.r14 }), + (WHvX64RegisterR15, WHV_REGISTER_VALUE { Reg64: regs.r15 }), + (WHvX64RegisterRip, WHV_REGISTER_VALUE { Reg64: regs.rip }), + ( + WHvX64RegisterRflags, + WHV_REGISTER_VALUE { Reg64: regs.rflags }, + ), + ] + } +} + +#[cfg(target_os = "windows")] +use std::collections::HashSet; + +#[cfg(target_os = "windows")] +use super::FromWhpRegisterError; + +#[cfg(target_os = "windows")] +pub(crate) const WHP_REGS_NAMES_LEN: usize = 18; +#[cfg(target_os = "windows")] +pub(crate) const WHP_REGS_NAMES: [WHV_REGISTER_NAME; WHP_REGS_NAMES_LEN] = [ + WHvX64RegisterRax, + WHvX64RegisterRbx, + WHvX64RegisterRcx, + WHvX64RegisterRdx, + WHvX64RegisterRsi, + WHvX64RegisterRdi, + WHvX64RegisterRsp, + WHvX64RegisterRbp, + WHvX64RegisterR8, + WHvX64RegisterR9, + WHvX64RegisterR10, + WHvX64RegisterR11, + WHvX64RegisterR12, + WHvX64RegisterR13, + WHvX64RegisterR14, + WHvX64RegisterR15, + WHvX64RegisterRip, + WHvX64RegisterRflags, +]; + +#[cfg(target_os = "windows")] +impl TryFrom<&[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]> for CommonRegisters { + type Error = FromWhpRegisterError; + + #[expect( + non_upper_case_globals, + reason = "Windows API has lowercase register names" + )] + fn try_from(regs: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]) -> Result { + if regs.len() != WHP_REGS_NAMES_LEN { + return Err(FromWhpRegisterError::InvalidLength(regs.len())); + } + let mut registers = CommonRegisters::default(); + let mut seen_registers = HashSet::new(); + + for &(name, value) in regs { + let name_id = name.0; + + // Check for duplicates + if !seen_registers.insert(name_id) { + return Err(FromWhpRegisterError::DuplicateRegister(name_id)); + } + + unsafe { + match name { + WHvX64RegisterRax => registers.rax = value.Reg64, + WHvX64RegisterRbx => registers.rbx = value.Reg64, + WHvX64RegisterRcx => registers.rcx = value.Reg64, + WHvX64RegisterRdx => registers.rdx = value.Reg64, + WHvX64RegisterRsi => registers.rsi = value.Reg64, + WHvX64RegisterRdi => registers.rdi = value.Reg64, + WHvX64RegisterRsp => registers.rsp = value.Reg64, + WHvX64RegisterRbp => registers.rbp = value.Reg64, + WHvX64RegisterR8 => registers.r8 = value.Reg64, + WHvX64RegisterR9 => registers.r9 = value.Reg64, + WHvX64RegisterR10 => registers.r10 = value.Reg64, + WHvX64RegisterR11 => registers.r11 = value.Reg64, + WHvX64RegisterR12 => registers.r12 = value.Reg64, + WHvX64RegisterR13 => registers.r13 = value.Reg64, + WHvX64RegisterR14 => registers.r14 = value.Reg64, + WHvX64RegisterR15 => registers.r15 = value.Reg64, + WHvX64RegisterRip => registers.rip = value.Reg64, + WHvX64RegisterRflags => registers.rflags = value.Reg64, + _ => { + // Given unexpected register + return Err(FromWhpRegisterError::InvalidRegister(name_id)); + } + } + } + } + + // Set of all expected register names + let expected_registers: HashSet = + WHP_REGS_NAMES.map(|name| name.0).into_iter().collect(); + + // Technically it should not be possible to have any missing registers at this point + // since we are guaranteed to have 18 non-duplicate registers that have passed the match-arm above, but leaving this here for safety anyway + let missing: HashSet<_> = expected_registers + .difference(&seen_registers) + .cloned() + .collect(); + + if !missing.is_empty() { + return Err(FromWhpRegisterError::MissingRegister(missing)); + } + + Ok(registers) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn common_regs() -> CommonRegisters { + CommonRegisters { + rax: 1, + rbx: 2, + rcx: 3, + rdx: 4, + rsi: 5, + rdi: 6, + rsp: 7, + rbp: 8, + r8: 9, + r9: 10, + r10: 11, + r11: 12, + r12: 13, + r13: 14, + r14: 15, + r15: 16, + rip: 17, + rflags: 18, + } + } + #[cfg(kvm)] + #[test] + fn round_trip_kvm_regs() { + let original = common_regs(); + let kvm_regs: kvm_regs = (&original).into(); + let converted: CommonRegisters = (&kvm_regs).into(); + assert_eq!(original, converted); + } + + #[cfg(mshv)] + #[test] + fn round_trip_mshv_regs() { + let original = common_regs(); + let mshv_regs: StandardRegisters = (&original).into(); + let converted: CommonRegisters = (&mshv_regs).into(); + assert_eq!(original, converted); + } + + #[cfg(target_os = "windows")] + #[test] + fn round_trip_whp_regs() { + let original = common_regs(); + let whp_regs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 18] = (&original).into(); + let converted: CommonRegisters = whp_regs.as_ref().try_into().unwrap(); + assert_eq!(original, converted); + + // test for duplicate register error handling + let original = common_regs(); + let mut whp_regs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 18] = (&original).into(); + whp_regs[0].0 = WHvX64RegisterRbx; + let err = CommonRegisters::try_from(whp_regs.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::DuplicateRegister(WHvX64RegisterRbx.0) + ); + + // test for passing non-standard register (e.g. CR8) + let original = common_regs(); + let mut whp_regs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); 18] = (&original).into(); + whp_regs[0].0 = WHvX64RegisterCr8; + let err = CommonRegisters::try_from(whp_regs.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::InvalidRegister(WHvX64RegisterCr8.0) + ); + } +} diff --git a/src/hyperlight_host/src/hypervisor/vm.rs b/src/hyperlight_host/src/hypervisor/vm.rs new file mode 100644 index 000000000..0e3516af3 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/vm.rs @@ -0,0 +1,127 @@ +use std::fmt::Debug; +use std::sync::Arc; + +#[cfg(gdb)] +use std::sync::Mutex; + +use super::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +#[cfg(gdb)] +use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; +use crate::mem::memory_region::MemoryRegion; +use crate::Result; + +pub(crate) trait Vm: Send + Sync + Debug { + /// Get the standard registers of the vCPU + #[allow(dead_code)] + fn get_regs(&self) -> Result; + /// Set the standard registers of the vCPU + fn set_regs(&self, regs: &CommonRegisters) -> Result<()>; + + /// Get the special registers of the vCPU + fn get_sregs(&self) -> Result; + /// Set the special registers of the vCPU + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()>; + + /// Get the FPU registers of the vCPU + #[allow(dead_code)] + fn get_fpu(&self) -> Result; + /// Set the FPU registers of the vCPU + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()>; + + /// Get a handle which is able to interrupt a blocking running vCPU + fn interrupt_handle(&self) -> Arc; + + /// Map memory regions into this VM + /// + /// Safety: Should only be called once, since memory slots will otherwise be overwritten on KVM + unsafe fn map_memory(&mut self, region: &[MemoryRegion]) -> Result<()>; + + /// Runs the vCPU until it exits + fn run_vcpu(&mut self) -> Result; + + #[cfg(target_os = "windows")] + fn get_partition_handle(&self) -> windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE; + + // --- DEBUGGING ------------ + + /// Translates a guest virtual address to a guest physical address + #[cfg(gdb)] + fn translate_gva(&self, gva: u64) -> Result; + + /// Enable/disable debugging + #[cfg(gdb)] + fn set_debug(&mut self, enable: bool) -> Result<()>; + + /// Enable/disable single stepping + #[cfg(gdb)] + fn set_single_step(&mut self, enable: bool) -> Result<()>; + + /// Add a hardware breakpoint at the given address + #[cfg(gdb)] + fn add_hw_breakpoint(&mut self, addr: u64) -> Result<()>; + + /// Remove a hardware breakpoint at the given address + #[cfg(gdb)] + fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<()>; + + /// Add a software breakpoint at the given address + #[cfg(gdb)] + fn add_sw_breakpoint( + &mut self, + addr: u64, + dbg_mem_access_fn: Arc>, + ) -> Result<()>; + + /// Remove a software breakpoint at the given address + #[cfg(gdb)] + fn remove_sw_breakpoint( + &mut self, + addr: u64, + dbg_mem_access_fn: Arc>, + ) -> Result<()>; +} + +#[derive(Debug)] +#[cfg(gdb)] +pub(super) enum DebugExit { + /// The vCPU has exited due to a debug event (usually breakpoint) + Debug { dr6: u64, exception: u32 }, + /// The user has requested to stop the VM during execution (e.g. via Ctrl+C inside GDB) + Interrupt, +} + +/// Possible exit reasons of a VM's vCPU +pub(super) enum HyperlightExit { + #[cfg(gdb)] + /// The vCPU has exited due to a debug event + Debug(DebugExit), + /// The vCPU has halted + Halt(), + /// The vCPU has issued a write to the given port with the given value + IoOut(u16, Vec), + /// The vCPU tried to read from the given (unmapped) addr + MmioRead(u64), + /// The vCPU tried to write to the given (unmapped) addr + MmioWrite(u64), + /// The vCPU execution has been cancelled + #[allow(dead_code)] + Cancelled(), + /// The vCPU has exited for a reason that is not handled by Hyperlight + Unknown(String), + /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN + #[cfg_attr( + target_os = "windows", + expect( + dead_code, + reason = "Retry() is never constructed on Windows, but it is still matched on (which dead_code lint ignores)" + ) + )] + Retry(), +} + +pub trait InterruptHandle: Send + Sync { + /// Interrupt the corresponding sandbox's vcpu if it's running + fn kill(&self); + /// Returns true iff the corresponding sandbox has been dropped + fn dropped(&self) -> bool; +} diff --git a/src/hyperlight_host/src/hypervisor/whp.rs b/src/hyperlight_host/src/hypervisor/whp.rs new file mode 100644 index 000000000..753eb87c7 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/whp.rs @@ -0,0 +1,410 @@ +/* +Copyright 2024 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::os::raw::c_void; + +use hyperlight_common::mem::PAGE_SIZE_USIZE; +use windows::core::s; +use windows::Win32::Foundation::{FreeLibrary, HANDLE}; +use windows::Win32::System::Hypervisor::*; +use windows::Win32::System::LibraryLoader::*; +use windows_result::HRESULT; + +#[cfg(gdb)] +use super::handlers::DbgMemAccessHandlerWrapper; +use super::regs::{ + AlignedRegisterValues, WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, + WHP_SREGS_NAMES, WHP_SREGS_NAMES_LEN, +}; +use super::vm::HyperlightExit; +use super::wrappers::HandleWrapper; +use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +use crate::hypervisor::surrogate_process::SurrogateProcess; +use crate::hypervisor::surrogate_process_manager::get_surrogate_process_manager; +use crate::hypervisor::vm::Vm; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; +use crate::{new_error, Result}; + +pub(crate) fn is_hypervisor_present() -> bool { + let mut capability: WHV_CAPABILITY = Default::default(); + let written_size: Option<*mut u32> = None; + + match unsafe { + WHvGetCapability( + WHvCapabilityCodeHypervisorPresent, + &mut capability as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + written_size, + ) + } { + Ok(_) => unsafe { capability.HypervisorPresent.as_bool() }, + Err(_) => { + log::info!("Windows Hypervisor Platform is not available on this system"); + false + } + } +} + +// This function dynamically loads the WHvMapGpaRange2 function from the winhvplatform.dll +// WHvMapGpaRange2 only available on Windows 11 or Windows Server 2022 and later +// we do things this way to allow a user trying to load hyperlight on an older version of windows to +// get an error message saying that hyperlight requires a newer version of windows, rather than just failing +// with an error about a missing entrypoint +// This function should always succeed since before we get here we have already checked that the hypervisor is present and +// that we are on a supported version of windows. +type WHvMapGpaRange2Func = unsafe extern "cdecl" fn( + WHV_PARTITION_HANDLE, + HANDLE, + *const c_void, + u64, + u64, + WHV_MAP_GPA_RANGE_FLAGS, +) -> HRESULT; + +/// A Hypervisor driver for HyperV-on-Windows. +#[derive(Debug)] +pub(crate) struct WhpVm { + partition: WHV_PARTITION_HANDLE, + // Lazily create the surrogate process when we need to map memory + surrogate_process: Option, + mmap_file_handle: HandleWrapper, +} + +unsafe impl Send for WhpVm {} +unsafe impl Sync for WhpVm {} + +#[repr(C, align(16))] +struct Align16(T); +#[allow(clippy::disallowed_macros)] // compile time +const _: () = { + assert!( + std::mem::size_of::>() + == std::mem::size_of::() + ); +}; + +impl WhpVm { + pub(crate) fn new(mmap_file_handle: HandleWrapper) -> Result { + const NUM_CPU: u32 = 1; + let partition = unsafe { + let partition = WHvCreatePartition()?; + WHvSetPartitionProperty( + partition, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + )?; + WHvSetupPartition(partition)?; + WHvCreateVirtualProcessor(partition, 0, 0)?; + partition + }; + + Ok(WhpVm { + partition, + surrogate_process: None, + mmap_file_handle, + }) + } + + /// Helper for setting arbitrary registers. + fn set_registers(&self, registers: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]) -> Result<()> { + let register_count = registers.len(); + + // Prepare register names (no special alignment needed) + let mut register_names = Vec::with_capacity(register_count); + let mut register_values = Vec::with_capacity(register_count); + + for (key, value) in registers.iter() { + register_names.push(*key); + register_values.push(Align16(*value)); + } + + unsafe { + WHvSetVirtualProcessorRegisters( + self.partition, + 0, + register_names.as_ptr(), + register_count as u32, + register_values.as_ptr() as *const WHV_REGISTER_VALUE, + )?; + } + + Ok(()) + } +} + +impl Vm for WhpVm { + fn get_regs(&self) -> Result { + let mut whv_regs_values = + AlignedRegisterValues::(unsafe { std::mem::zeroed() }); + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_REGS_NAMES.as_ptr(), + whv_regs_values.0.len() as u32, + whv_regs_values.0.as_mut_ptr(), + )?; + } + + WHP_REGS_NAMES + .into_iter() + .zip(whv_regs_values.0) + .collect::>() + .as_slice() + .try_into() + .map_err(|e| { + new_error!( + "Failed to convert WHP registers to CommonRegisters: {:?}", + e + ) + }) + } + + fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { + let whp_regs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); WHP_REGS_NAMES_LEN] = regs.into(); + self.set_registers(&whp_regs)?; + Ok(()) + } + + fn get_sregs(&self) -> Result { + let mut whp_sregs_values = + AlignedRegisterValues::(unsafe { std::mem::zeroed() }); + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_SREGS_NAMES.as_ptr(), + whp_sregs_values.0.len() as u32, + whp_sregs_values.0.as_mut_ptr(), + )?; + } + + WHP_SREGS_NAMES + .into_iter() + .zip(whp_sregs_values.0) + .collect::>() + .as_slice() + .try_into() + .map_err(|e| { + new_error!( + "Failed to convert WHP registers to CommonSpecialRegisters: {:?}", + e + ) + }) + } + + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { + let whp_regs: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); WHP_SREGS_NAMES_LEN] = sregs.into(); + self.set_registers(&whp_regs)?; + Ok(()) + } + + fn get_fpu(&self) -> Result { + let mut whp_fpu_values = + AlignedRegisterValues::(unsafe { std::mem::zeroed() }); + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_FPU_NAMES.as_ptr(), + whp_fpu_values.0.len() as u32, + whp_fpu_values.0.as_mut_ptr(), + )?; + } + + WHP_FPU_NAMES + .into_iter() + .zip(whp_fpu_values.0) + .collect::>() + .as_slice() + .try_into() + .map_err(|e| new_error!("Failed to convert WHP registers to CommonFpu: {:?}", e)) + } + + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { + let whp_fpu: [(WHV_REGISTER_NAME, WHV_REGISTER_VALUE); WHP_FPU_NAMES_LEN] = fpu.into(); + self.set_registers(&whp_fpu)?; + Ok(()) + } + + unsafe fn map_memory(&mut self, regions: &[MemoryRegion]) -> Result<()> { + if regions.is_empty() { + return Err(new_error!("No memory regions to map")); + } + if self.surrogate_process.is_some() { + return Err(new_error!("Memory has already been mapped")); + } + + let size: usize = regions.iter().map(|r| r.host_region.len()).sum(); + let raw_size = size + 2 * PAGE_SIZE_USIZE; + let raw_source_address = regions[0].host_region.start - PAGE_SIZE_USIZE; + + // get a surrogate process with preallocated memory of size SharedMemory::raw_mem_size() + // with guard pages setup + let surrogate_process = { + let mgr = get_surrogate_process_manager()?; + mgr.get_surrogate_process( + raw_size, + raw_source_address as *const c_void, + self.mmap_file_handle, + ) + }?; + + let process_handle: HANDLE = surrogate_process.process_handle.into(); + // The function pointer to WHvMapGpaRange2 is resolved dynamically to allow us to detect + // when we are running on older versions of windows that do not support this API and + // return a more informative error message, rather than failing with an error about a missing entrypoint + let whvmapgparange2_func = unsafe { + match try_load_whv_map_gpa_range2() { + Ok(func) => func, + Err(e) => return Err(new_error!("Can't find API: {}", e)), + } + }; + + regions.iter().try_for_each(|region| unsafe { + let flags = region + .flags + .iter() + .map(|flag| match flag { + MemoryRegionFlags::NONE => Ok(WHvMapGpaRangeFlagNone), + MemoryRegionFlags::READ => Ok(WHvMapGpaRangeFlagRead), + MemoryRegionFlags::WRITE => Ok(WHvMapGpaRangeFlagWrite), + MemoryRegionFlags::EXECUTE => Ok(WHvMapGpaRangeFlagExecute), + _ => Err(new_error!("Invalid Memory Region Flag")), + }) + .collect::>>()? + .iter() + .fold(WHvMapGpaRangeFlagNone, |acc, flag| acc | *flag); // collect using bitwise OR + + let res = whvmapgparange2_func( + self.partition, + process_handle, + region.host_region.start as *const c_void, + region.guest_region.start as u64, + region.guest_region.len() as u64, + flags, + ); + if res.is_err() { + return Err(new_error!("Call to WHvMapGpaRange2 failed")); + } + Ok(()) + })?; + + self.surrogate_process = Some(surrogate_process); + + Ok(()) + } + + #[expect(non_upper_case_globals, reason = "Windows API constant are lower case")] + fn run_vcpu(&mut self) -> Result { + let mut exit_context: WHV_RUN_VP_EXIT_CONTEXT = Default::default(); + + unsafe { + WHvRunVirtualProcessor( + self.partition, + 0, + &mut exit_context as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + )?; + } + + let result = match exit_context.ExitReason { + WHvRunVpExitReasonX64IoPortAccess => unsafe { + let instruction_length = exit_context.VpContext._bitfield & 0xF; + let rip = exit_context.VpContext.Rip + instruction_length as u64; + self.set_registers(&[(WHvX64RegisterRip, WHV_REGISTER_VALUE { Reg64: rip })])?; + HyperlightExit::IoOut( + exit_context.Anonymous.IoPortAccess.PortNumber, + exit_context + .Anonymous + .IoPortAccess + .Rax + .to_le_bytes() + .to_vec(), + ) + }, + WHvRunVpExitReasonX64Halt => HyperlightExit::Halt(), + WHvRunVpExitReasonMemoryAccess => { + let gpa = unsafe { exit_context.Anonymous.MemoryAccess.Gpa }; + let access_info = unsafe { + WHV_MEMORY_ACCESS_TYPE( + // 2 first bits are the access type, see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/memoryaccess#syntax + (exit_context.Anonymous.MemoryAccess.AccessInfo.AsUINT32 & 0b11) as i32, + ) + }; + let access_info = MemoryRegionFlags::try_from(access_info)?; + match access_info { + MemoryRegionFlags::READ => HyperlightExit::MmioRead(gpa), + MemoryRegionFlags::WRITE => HyperlightExit::MmioWrite(gpa), + _ => HyperlightExit::Unknown("Unknown memory access type".to_string()), + } + } + // Execution was cancelled by the host. + // This will happen when guest code runs for too long + WHvRunVpExitReasonCanceled => HyperlightExit::Cancelled(), + WHV_RUN_VP_EXIT_REASON(_) => HyperlightExit::Unknown(format!( + "Unknown exit reason '{}'", + exit_context.ExitReason.0 + )), + }; + Ok(result) + } + + fn get_partition_handle(&self) -> WHV_PARTITION_HANDLE { + self.partition + } +} + +impl Drop for WhpVm { + fn drop(&mut self) { + if let Err(e) = unsafe { WHvDeletePartition(self.partition) } { + log::error!("Failed to delete partition: {}", e); + } + } +} + +unsafe fn try_load_whv_map_gpa_range2() -> Result { + let library = unsafe { + LoadLibraryExA( + s!("winhvplatform.dll"), + None, + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, + ) + }; + + if let Err(e) = library { + return Err(new_error!("{}", e)); + } + + #[allow(clippy::unwrap_used)] + // We know this will succeed because we just checked for an error above + let library = library.unwrap(); + + let address = unsafe { GetProcAddress(library, s!("WHvMapGpaRange2")) }; + + if address.is_none() { + unsafe { FreeLibrary(library)? }; + return Err(new_error!( + "Failed to find WHvMapGpaRange2 in winhvplatform.dll" + )); + } + + unsafe { Ok(std::mem::transmute_copy(&address)) } +} diff --git a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs b/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs deleted file mode 100644 index 14fd86b1d..000000000 --- a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs +++ /dev/null @@ -1,533 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use core::ffi::c_void; - -use tracing::{instrument, Span}; -use windows::core::s; -use windows::Win32::Foundation::{FreeLibrary, HANDLE}; -use windows::Win32::System::Hypervisor::*; -use windows::Win32::System::LibraryLoader::*; -use windows_result::HRESULT; - -use super::wrappers::HandleWrapper; -use crate::hypervisor::wrappers::{WHvFPURegisters, WHvGeneralRegisters, WHvSpecialRegisters}; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::{new_error, Result}; - -/// Interop calls for Windows Hypervisor Platform APIs -/// -/// Documentation can be found at: -/// - https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/hypervisor-platform -/// - https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/System/Hypervisor/index.html -#[instrument(skip_all, parent = Span::current(), level= "Trace")] -pub(crate) fn is_hypervisor_present() -> bool { - let mut capability: WHV_CAPABILITY = Default::default(); - let written_size: Option<*mut u32> = None; - - match unsafe { - WHvGetCapability( - WHvCapabilityCodeHypervisorPresent, - &mut capability as *mut _ as *mut c_void, - std::mem::size_of::() as u32, - written_size, - ) - } { - Ok(_) => unsafe { capability.HypervisorPresent.as_bool() }, - Err(_) => { - log::info!("Windows Hypervisor Platform is not available on this system"); - false - } - } -} - -#[derive(Debug)] -pub(super) struct VMPartition(WHV_PARTITION_HANDLE); - -impl VMPartition { - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn new(proc_count: u32) -> Result { - let hdl = unsafe { WHvCreatePartition() }?; - Self::set_processor_count(&hdl, proc_count)?; - unsafe { WHvSetupPartition(hdl) }?; - Ok(Self(hdl)) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - fn set_processor_count( - partition_handle: &WHV_PARTITION_HANDLE, - processor_count: u32, - ) -> Result<()> { - unsafe { - WHvSetPartitionProperty( - *partition_handle, - WHvPartitionPropertyCodeProcessorCount, - &processor_count as *const u32 as *const c_void, - std::mem::size_of_val(&processor_count) as u32, - )?; - } - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn map_gpa_range( - &mut self, - regions: &[MemoryRegion], - process_handle: HandleWrapper, - ) -> Result<()> { - let process_handle: HANDLE = process_handle.into(); - // The function pointer to WHvMapGpaRange2 is resolved dynamically to allow us to detect - // when we are running on older versions of windows that do not support this API and - // return a more informative error message, rather than failing with an error about a missing entrypoint - let whvmapgparange2_func = unsafe { - match try_load_whv_map_gpa_range2() { - Ok(func) => func, - Err(e) => return Err(new_error!("Can't find API: {}", e)), - } - }; - - regions.iter().try_for_each(|region| unsafe { - let flags = region - .flags - .iter() - .map(|flag| match flag { - MemoryRegionFlags::NONE => Ok(WHvMapGpaRangeFlagNone), - MemoryRegionFlags::READ => Ok(WHvMapGpaRangeFlagRead), - MemoryRegionFlags::WRITE => Ok(WHvMapGpaRangeFlagWrite), - MemoryRegionFlags::EXECUTE => Ok(WHvMapGpaRangeFlagExecute), - MemoryRegionFlags::STACK_GUARD => Ok(WHvMapGpaRangeFlagNone), - _ => Err(new_error!("Invalid Memory Region Flag")), - }) - .collect::>>()? - .iter() - .fold(WHvMapGpaRangeFlagNone, |acc, flag| acc | *flag); // collect using bitwise OR - - let res = whvmapgparange2_func( - self.0, - process_handle, - region.host_region.start as *const c_void, - region.guest_region.start as u64, - (region.guest_region.end - region.guest_region.start) as u64, - flags, - ); - if res.is_err() { - return Err(new_error!("Call to WHvMapGpaRange2 failed")); - } - Ok(()) - })?; - Ok(()) - } -} - -// This function dynamically loads the WHvMapGpaRange2 function from the winhvplatform.dll -// WHvMapGpaRange2 only available on Windows 11 or Windows Server 2022 and later -// we do things this way to allow a user trying to load hyperlight on an older version of windows to -// get an error message saying that hyperlight requires a newer version of windows, rather than just failing -// with an error about a missing entrypoint -// This function should always succeed since before we get here we have already checked that the hypervisor is present and -// that we are on a supported version of windows. -type WHvMapGpaRange2Func = unsafe extern "cdecl" fn( - WHV_PARTITION_HANDLE, - HANDLE, - *const c_void, - u64, - u64, - WHV_MAP_GPA_RANGE_FLAGS, -) -> HRESULT; - -pub unsafe fn try_load_whv_map_gpa_range2() -> Result { - let library = unsafe { - LoadLibraryExA( - s!("winhvplatform.dll"), - None, - LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, - ) - }; - - if let Err(e) = library { - return Err(new_error!("{}", e)); - } - - #[allow(clippy::unwrap_used)] - // We know this will succeed because we just checked for an error above - let library = library.unwrap(); - - let address = unsafe { GetProcAddress(library, s!("WHvMapGpaRange2")) }; - - if address.is_none() { - unsafe { FreeLibrary(library)? }; - return Err(new_error!( - "Failed to find WHvMapGpaRange2 in winhvplatform.dll" - )); - } - - unsafe { Ok(std::mem::transmute_copy(&address)) } -} - -impl Drop for VMPartition { - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn drop(&mut self) { - if let Err(e) = unsafe { WHvDeletePartition(self.0) } { - tracing::error!( - "Failed to delete partition (WHvDeletePartition failed): {:?}", - e - ); - } - } -} - -#[derive(Debug)] -pub(super) struct VMProcessor(VMPartition); -impl VMProcessor { - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn new(part: VMPartition) -> Result { - unsafe { WHvCreateVirtualProcessor(part.0, 0, 0) }?; - Ok(Self(part)) - } - - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_partition_hdl(&self) -> WHV_PARTITION_HANDLE { - let part = &self.0; - part.0 - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn set_registers( - &mut self, - registers: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)], - ) -> Result<()> { - let partition_handle = self.get_partition_hdl(); - let register_count = registers.len(); - let mut register_names: Vec = vec![]; - let mut register_values: Vec = vec![]; - - for (key, value) in registers.iter() { - register_names.push(*key); - register_values.push(*value); - } - - unsafe { - WHvSetVirtualProcessorRegisters( - partition_handle, - 0, - register_names.as_ptr(), - register_count as u32, - register_values.as_ptr(), - )?; - } - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_sregs(&self) -> Result { - const LEN: usize = 17; - - let names: [WHV_REGISTER_NAME; LEN] = [ - WHvX64RegisterCr0, - WHvX64RegisterCr2, - WHvX64RegisterCr3, - WHvX64RegisterCr4, - WHvX64RegisterCr8, - WHvX64RegisterEfer, - WHvX64RegisterApicBase, - WHvX64RegisterCs, - WHvX64RegisterDs, - WHvX64RegisterEs, - WHvX64RegisterFs, - WHvX64RegisterGs, - WHvX64RegisterSs, - WHvX64RegisterTr, - WHvX64RegisterLdtr, - WHvX64RegisterGdtr, - WHvX64RegisterIdtr, - ]; - - let mut out: [WHV_REGISTER_VALUE; LEN] = unsafe { std::mem::zeroed() }; - unsafe { - WHvGetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - names.as_ptr(), - LEN as u32, - out.as_mut_ptr(), - )?; - } - - let res: WHvSpecialRegisters = WHvSpecialRegisters { - cr0: out[0], - cr2: out[1], - cr3: out[2], - cr4: out[3], - cr8: out[4], - efer: out[5], - apic_base: out[6], - cs: out[7], - ds: out[8], - es: out[9], - fs: out[10], - gs: out[11], - ss: out[12], - tr: out[13], - ldtr: out[14], - gdtr: out[15], - idtr: out[16], - }; - - Ok(res) - } - - // Sets the registers for the VMProcessor to the given general purpose registers. - // If you want to set other registers, use `set_registers` instead. - pub(super) fn set_general_purpose_registers( - &mut self, - regs: &WHvGeneralRegisters, - ) -> Result<()> { - const LEN: usize = 18; - - let names: [WHV_REGISTER_NAME; LEN] = [ - WHvX64RegisterRax, - WHvX64RegisterRbx, - WHvX64RegisterRcx, - WHvX64RegisterRdx, - WHvX64RegisterRsi, - WHvX64RegisterRdi, - WHvX64RegisterRsp, - WHvX64RegisterRbp, - WHvX64RegisterR8, - WHvX64RegisterR9, - WHvX64RegisterR10, - WHvX64RegisterR11, - WHvX64RegisterR12, - WHvX64RegisterR13, - WHvX64RegisterR14, - WHvX64RegisterR15, - WHvX64RegisterRip, - WHvX64RegisterRflags, - ]; - - let values: [WHV_REGISTER_VALUE; LEN] = [ - WHV_REGISTER_VALUE { Reg64: regs.rax }, - WHV_REGISTER_VALUE { Reg64: regs.rbx }, - WHV_REGISTER_VALUE { Reg64: regs.rcx }, - WHV_REGISTER_VALUE { Reg64: regs.rdx }, - WHV_REGISTER_VALUE { Reg64: regs.rsi }, - WHV_REGISTER_VALUE { Reg64: regs.rdi }, - WHV_REGISTER_VALUE { Reg64: regs.rsp }, - WHV_REGISTER_VALUE { Reg64: regs.rbp }, - WHV_REGISTER_VALUE { Reg64: regs.r8 }, - WHV_REGISTER_VALUE { Reg64: regs.r9 }, - WHV_REGISTER_VALUE { Reg64: regs.r10 }, - WHV_REGISTER_VALUE { Reg64: regs.r11 }, - WHV_REGISTER_VALUE { Reg64: regs.r12 }, - WHV_REGISTER_VALUE { Reg64: regs.r13 }, - WHV_REGISTER_VALUE { Reg64: regs.r14 }, - WHV_REGISTER_VALUE { Reg64: regs.r15 }, - WHV_REGISTER_VALUE { Reg64: regs.rip }, - WHV_REGISTER_VALUE { Reg64: regs.rflags }, - ]; - - unsafe { - WHvSetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - names.as_ptr(), - LEN as u32, - values.as_ptr(), - )?; - } - Ok(()) - } - - pub(super) fn get_regs(&self) -> Result { - const LEN: usize = 18; - - let names: [WHV_REGISTER_NAME; LEN] = [ - WHvX64RegisterRax, - WHvX64RegisterRbx, - WHvX64RegisterRcx, - WHvX64RegisterRdx, - WHvX64RegisterRsi, - WHvX64RegisterRdi, - WHvX64RegisterRsp, - WHvX64RegisterRbp, - WHvX64RegisterR8, - WHvX64RegisterR9, - WHvX64RegisterR10, - WHvX64RegisterR11, - WHvX64RegisterR12, - WHvX64RegisterR13, - WHvX64RegisterR14, - WHvX64RegisterR15, - WHvX64RegisterRip, - WHvX64RegisterRflags, - ]; - - let mut out: [WHV_REGISTER_VALUE; LEN] = unsafe { std::mem::zeroed() }; - unsafe { - WHvGetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - names.as_ptr(), - LEN as u32, - out.as_mut_ptr(), - )?; - Ok(WHvGeneralRegisters { - rax: out[0].Reg64, - rbx: out[1].Reg64, - rcx: out[2].Reg64, - rdx: out[3].Reg64, - rsi: out[4].Reg64, - rdi: out[5].Reg64, - rsp: out[6].Reg64, - rbp: out[7].Reg64, - r8: out[8].Reg64, - r9: out[9].Reg64, - r10: out[10].Reg64, - r11: out[11].Reg64, - r12: out[12].Reg64, - r13: out[13].Reg64, - r14: out[14].Reg64, - r15: out[15].Reg64, - rip: out[16].Reg64, - rflags: out[17].Reg64, - }) - } - } - - pub(super) fn set_fpu(&mut self, regs: &WHvFPURegisters) -> Result<()> { - const LEN: usize = 26; - - let names: [WHV_REGISTER_NAME; LEN] = [ - WHvX64RegisterXmm0, - WHvX64RegisterXmm1, - WHvX64RegisterXmm2, - WHvX64RegisterXmm3, - WHvX64RegisterXmm4, - WHvX64RegisterXmm5, - WHvX64RegisterXmm6, - WHvX64RegisterXmm7, - WHvX64RegisterXmm8, - WHvX64RegisterXmm9, - WHvX64RegisterXmm10, - WHvX64RegisterXmm11, - WHvX64RegisterXmm12, - WHvX64RegisterXmm13, - WHvX64RegisterXmm14, - WHvX64RegisterXmm15, - WHvX64RegisterFpMmx0, - WHvX64RegisterFpMmx1, - WHvX64RegisterFpMmx2, - WHvX64RegisterFpMmx3, - WHvX64RegisterFpMmx4, - WHvX64RegisterFpMmx5, - WHvX64RegisterFpMmx6, - WHvX64RegisterFpMmx7, - WHvX64RegisterFpControlStatus, - WHvX64RegisterXmmControlStatus, - ]; - - let xmm_regs = [ - regs.xmm0, regs.xmm1, regs.xmm2, regs.xmm3, regs.xmm4, regs.xmm5, regs.xmm6, regs.xmm7, - regs.xmm8, regs.xmm9, regs.xmm10, regs.xmm11, regs.xmm12, regs.xmm13, regs.xmm14, - regs.xmm15, - ]; - - let mut values: Vec = xmm_regs - .iter() - .map(|®| WHV_REGISTER_VALUE { - Fp: WHV_X64_FP_REGISTER { - AsUINT128: WHV_UINT128 { - Anonymous: WHV_UINT128_0 { - Low64: reg as u64, - High64: (reg >> 64) as u64, - }, - }, - }, - }) - .collect(); - - values.extend_from_slice(&[ - WHV_REGISTER_VALUE { Reg64: regs.mmx0 }, - WHV_REGISTER_VALUE { Reg64: regs.mmx1 }, - WHV_REGISTER_VALUE { Reg64: regs.mmx2 }, - WHV_REGISTER_VALUE { Reg64: regs.mmx3 }, - WHV_REGISTER_VALUE { Reg64: regs.mmx4 }, - WHV_REGISTER_VALUE { Reg64: regs.mmx5 }, - WHV_REGISTER_VALUE { Reg64: regs.mmx6 }, - WHV_REGISTER_VALUE { Reg64: regs.mmx7 }, - WHV_REGISTER_VALUE { - FpControlStatus: WHV_X64_FP_CONTROL_STATUS_REGISTER { - Anonymous: WHV_X64_FP_CONTROL_STATUS_REGISTER_0 { - FpControl: regs.fp_control_word, - FpTag: regs.fp_tag_word, - ..Default::default() - }, - }, - }, - WHV_REGISTER_VALUE { - XmmControlStatus: WHV_X64_XMM_CONTROL_STATUS_REGISTER { - Anonymous: WHV_X64_XMM_CONTROL_STATUS_REGISTER_0 { - XmmStatusControl: regs.mxcsr, - ..Default::default() - }, - }, - }, - ]); - - unsafe { - WHvSetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - names.as_ptr(), - LEN as u32, - values.as_ptr(), - )?; - } - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn run(&mut self) -> Result { - let partition_handle = self.get_partition_hdl(); - let mut exit_context: WHV_RUN_VP_EXIT_CONTEXT = Default::default(); - - unsafe { - WHvRunVirtualProcessor( - partition_handle, - 0, - &mut exit_context as *mut _ as *mut c_void, - std::mem::size_of::() as u32, - )?; - } - - Ok(exit_context) - } -} - -impl Drop for VMProcessor { - #[instrument(parent = Span::current(), level= "Trace")] - fn drop(&mut self) { - let part_hdl = self.get_partition_hdl(); - if let Err(e) = unsafe { WHvDeleteVirtualProcessor(part_hdl, 0) } { - tracing::error!( - "Failed to delete virtual processor (WHvDeleteVirtualProcessor failed): {:?}", - e - ); - } - } -} diff --git a/src/hyperlight_host/src/hypervisor/wrappers.rs b/src/hyperlight_host/src/hypervisor/wrappers.rs index aa9943d6a..c6f64937f 100644 --- a/src/hyperlight_host/src/hypervisor/wrappers.rs +++ b/src/hyperlight_host/src/hypervisor/wrappers.rs @@ -19,7 +19,6 @@ use std::ffi::CString; use tracing::{instrument, Span}; use windows::core::PSTR; use windows::Win32::Foundation::{HANDLE, HMODULE}; -use windows::Win32::System::Hypervisor::WHV_REGISTER_VALUE; use crate::{HyperlightError, Result}; @@ -57,84 +56,6 @@ impl From<&PSTRWrapper> for PSTR { } } -// only used on windows. mshv and kvm already has this implemented -#[derive(Debug, Default, Copy, Clone, PartialEq)] -pub(super) struct WHvGeneralRegisters { - pub rax: u64, - pub rbx: u64, - pub rcx: u64, - pub rdx: u64, - pub rsi: u64, - pub rdi: u64, - pub rsp: u64, - pub rbp: u64, - pub r8: u64, - pub r9: u64, - pub r10: u64, - pub r11: u64, - pub r12: u64, - pub r13: u64, - pub r14: u64, - pub r15: u64, - pub rip: u64, - pub rflags: u64, -} - -#[derive(Debug, Default, Copy, Clone, PartialEq)] -pub(super) struct WHvFPURegisters { - pub xmm0: u128, - pub xmm1: u128, - pub xmm2: u128, - pub xmm3: u128, - pub xmm4: u128, - pub xmm5: u128, - pub xmm6: u128, - pub xmm7: u128, - pub xmm8: u128, - pub xmm9: u128, - pub xmm10: u128, - pub xmm11: u128, - pub xmm12: u128, - pub xmm13: u128, - pub xmm14: u128, - pub xmm15: u128, - - pub mmx0: u64, - pub mmx1: u64, - pub mmx2: u64, - pub mmx3: u64, - pub mmx4: u64, - pub mmx5: u64, - pub mmx6: u64, - pub mmx7: u64, - - pub fp_control_word: u16, - pub fp_tag_word: u8, - - pub mxcsr: u32, -} - -#[derive(Default, Copy, Clone)] -pub(super) struct WHvSpecialRegisters { - pub cr0: WHV_REGISTER_VALUE, - pub cr2: WHV_REGISTER_VALUE, - pub cr3: WHV_REGISTER_VALUE, - pub cr4: WHV_REGISTER_VALUE, - pub cr8: WHV_REGISTER_VALUE, - pub efer: WHV_REGISTER_VALUE, - pub apic_base: WHV_REGISTER_VALUE, - pub cs: WHV_REGISTER_VALUE, - pub ds: WHV_REGISTER_VALUE, - pub es: WHV_REGISTER_VALUE, - pub fs: WHV_REGISTER_VALUE, - pub gs: WHV_REGISTER_VALUE, - pub ss: WHV_REGISTER_VALUE, - pub tr: WHV_REGISTER_VALUE, - pub ldtr: WHV_REGISTER_VALUE, - pub gdtr: WHV_REGISTER_VALUE, - pub idtr: WHV_REGISTER_VALUE, -} - /// Wrapper for HANDLE, required since HANDLE is no longer Send. #[derive(Debug, Copy, Clone)] pub struct HandleWrapper(HANDLE); diff --git a/src/hyperlight_host/src/lib.rs b/src/hyperlight_host/src/lib.rs index c18027d63..1c55f13c9 100644 --- a/src/hyperlight_host/src/lib.rs +++ b/src/hyperlight_host/src/lib.rs @@ -13,7 +13,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#![deny(dead_code, missing_docs, unused_mut)] //! This crate contains an SDK that is used to execute specially- // compiled binaries within a very lightweight hypervisor environment. diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 7dea36cde..3d8da07b0 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -604,11 +604,8 @@ impl SandboxMemoryLayout { } // guard page - let stack_offset = builder.push_page_aligned( - PAGE_SIZE_USIZE, - MemoryRegionFlags::READ | MemoryRegionFlags::STACK_GUARD, - GuardPage, - ); + let stack_offset = + builder.push_page_aligned(PAGE_SIZE_USIZE, MemoryRegionFlags::READ, GuardPage); let expected_stack_offset = TryInto::::try_into(self.guest_user_stack_buffer_offset)?; diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 46177bb4b..1be20bd17 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -55,8 +55,6 @@ bitflags! { const WRITE = 2; /// allow guest to execute const EXECUTE = 4; - /// identifier that this is a stack guard page - const STACK_GUARD = 8; } } diff --git a/src/hyperlight_host/src/sandbox/hypervisor.rs b/src/hyperlight_host/src/sandbox/hypervisor.rs index 4ea4b68d7..04282a708 100644 --- a/src/hyperlight_host/src/sandbox/hypervisor.rs +++ b/src/hyperlight_host/src/sandbox/hypervisor.rs @@ -17,10 +17,12 @@ limitations under the License. use std::fmt::Debug; use std::sync::OnceLock; -#[cfg(mshv)] -use crate::hypervisor::hyperv_linux; #[cfg(kvm)] use crate::hypervisor::kvm; +#[cfg(mshv)] +use crate::hypervisor::mshv; +#[cfg(target_os = "windows")] +use crate::hypervisor::whp; static AVAILABLE_HYPERVISOR: OnceLock> = OnceLock::new(); @@ -31,7 +33,7 @@ pub fn get_available_hypervisor() -> &'static Option { // If both features are enabled, we need to determine hypervisor at runtime. // Currently /dev/kvm and /dev/mshv cannot exist on the same machine, so the first one // that works is guaranteed to be correct. - if hyperv_linux::is_hypervisor_present() { + if mshv::is_hypervisor_present() { Some(HypervisorType::Mshv) } else if kvm::is_hypervisor_present() { Some(HypervisorType::Kvm) @@ -45,15 +47,13 @@ pub fn get_available_hypervisor() -> &'static Option { None } } else if #[cfg(mshv)] { - if hyperv_linux::is_hypervisor_present() { + if mshv::is_hypervisor_present() { Some(HypervisorType::Mshv) } else { None } } else if #[cfg(target_os = "windows")] { - use crate::sandbox::windows_hypervisor_platform; - - if windows_hypervisor_platform::is_hypervisor_present() { + if whp::is_hypervisor_present() { Some(HypervisorType::Whp) } else { None diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 8769e1643..0e34db462 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -16,6 +16,7 @@ limitations under the License. use std::sync::{Arc, Mutex}; +use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; use hyperlight_common::flatbuffer_wrappers::function_types::{ ParameterValue, ReturnType, ReturnValue, }; @@ -24,12 +25,15 @@ use tracing::{instrument, Span}; use super::host_funcs::FunctionRegistry; use super::{MemMgrWrapper, WrapperGetter}; use crate::func::call_ctx::MultiUseGuestCallContext; -use crate::func::guest_dispatch::call_function_on_guest; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; +use crate::func::guest_err::check_for_guest_error; +use crate::hypervisor::handlers::{MemAccessHandlerCaller, OutBHandlerCaller}; +use crate::hypervisor::{HyperlightVm, InterruptHandle}; +use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; +use crate::metrics::maybe_time_and_emit_guest_call; use crate::sandbox_state::sandbox::{DevolvableSandbox, EvolvableSandbox, Sandbox}; use crate::sandbox_state::transition::{MultiUseContextCallback, Noop}; -use crate::Result; +use crate::{HyperlightError, Result}; /// A sandbox that supports being used Multiple times. /// The implication of being used multiple times is two-fold: @@ -43,26 +47,10 @@ pub struct MultiUseSandbox { // We need to keep a reference to the host functions, even if the compiler marks it as unused. The compiler cannot detect our dynamic usages of the host function in `HyperlightFunction::call`. pub(super) _host_funcs: Arc>, pub(crate) mem_mgr: MemMgrWrapper, - hv_handler: HypervisorHandler, -} - -// We need to implement drop to join the -// threads, because, otherwise, we will -// be leaking a thread with every -// sandbox that is dropped. This was initially -// caught by our benchmarks that created a ton of -// sandboxes and caused the system to run out of -// resources. Now, this is covered by the test: -// `create_1000_sandboxes`. -impl Drop for MultiUseSandbox { - fn drop(&mut self) { - match self.hv_handler.kill_hypervisor_handler_thread() { - Ok(_) => {} - Err(e) => { - log::error!("[POTENTIAL THREAD LEAK] Potentially failed to kill hypervisor handler thread when dropping MultiUseSandbox: {:?}", e); - } - } - } + vm: Box, + out_hdl: Arc>, + mem_hdl: Arc>, + dispatch_ptr: RawPtr, } impl MultiUseSandbox { @@ -75,12 +63,18 @@ impl MultiUseSandbox { pub(super) fn from_uninit( host_funcs: Arc>, mgr: MemMgrWrapper, - hv_handler: HypervisorHandler, + vm: Box, + out_hdl: Arc>, + mem_hdl: Arc>, + dispatch_ptr: RawPtr, ) -> MultiUseSandbox { Self { _host_funcs: host_funcs, mem_mgr: mgr, - hv_handler, + vm, + out_hdl, + mem_hdl, + dispatch_ptr, } } @@ -161,9 +155,11 @@ impl MultiUseSandbox { func_ret_type: ReturnType, args: Option>, ) -> Result { - let res = call_function_on_guest(self, func_name, func_ret_type, args); - self.restore_state()?; - res + maybe_time_and_emit_guest_call(func_name, move || { + let res = self.call_guest_function_by_name_no_reset(func_name, func_ret_type, args); + self.restore_state()?; + res + }) } /// Restore the Sandbox's state @@ -172,6 +168,45 @@ impl MultiUseSandbox { let mem_mgr = self.mem_mgr.unwrap_mgr_mut(); mem_mgr.restore_state_from_last_snapshot() } + + pub(crate) fn call_guest_function_by_name_no_reset( + &mut self, + function_name: &str, + return_type: ReturnType, + args: Option>, + ) -> Result { + let fc = FunctionCall::new( + function_name.to_string(), + args, + FunctionCallType::Guest, + return_type, + ); + + let buffer: Vec = fc + .try_into() + .map_err(|_| HyperlightError::Error("Failed to serialize FunctionCall".to_string()))?; + + self.get_mgr_wrapper_mut() + .as_mut() + .write_guest_function_call(&buffer)?; + + self.vm.dispatch_call_from_host( + self.dispatch_ptr.clone(), + self.out_hdl.clone(), + self.mem_hdl.clone(), + )?; + + self.check_stack_guard()?; + check_for_guest_error(self.get_mgr_wrapper_mut())?; + + self.get_mgr_wrapper_mut() + .as_mut() + .get_guest_function_call_result() + } + + pub fn interrupt_handle(&self) -> Arc { + self.vm.interrupt_handle() + } } impl WrapperGetter for MultiUseSandbox { @@ -181,12 +216,6 @@ impl WrapperGetter for MultiUseSandbox { fn get_mgr_wrapper_mut(&mut self) -> &mut MemMgrWrapper { &mut self.mem_mgr } - fn get_hv_handler(&self) -> &HypervisorHandler { - &self.hv_handler - } - fn get_hv_handler_mut(&mut self) -> &mut HypervisorHandler { - &mut self.hv_handler - } } impl Sandbox for MultiUseSandbox { diff --git a/src/hyperlight_host/src/sandbox/mod.rs b/src/hyperlight_host/src/sandbox/mod.rs index 3cad349d6..5566533ad 100644 --- a/src/hyperlight_host/src/sandbox/mod.rs +++ b/src/hyperlight_host/src/sandbox/mod.rs @@ -48,9 +48,6 @@ pub use uninitialized::GuestBinary; pub use uninitialized::UninitializedSandbox; use self::mem_mgr::MemMgrWrapper; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; -#[cfg(target_os = "windows")] -use crate::hypervisor::windows_hypervisor_platform; use crate::mem::shared_mem::HostSharedMemory; // In case its not obvious why there are separate is_supported_platform and is_hypervisor_present functions its because @@ -86,9 +83,6 @@ pub(crate) trait WrapperGetter { #[allow(dead_code)] fn get_mgr_wrapper(&self) -> &MemMgrWrapper; fn get_mgr_wrapper_mut(&mut self) -> &mut MemMgrWrapper; - fn get_hv_handler(&self) -> &HypervisorHandler; - #[allow(dead_code)] - fn get_hv_handler_mut(&mut self) -> &mut HypervisorHandler; } #[cfg(test)] diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 30c136c15..aa0dda0c2 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -18,7 +18,6 @@ use std::fmt::Debug; use std::option::Option; use std::path::Path; use std::sync::{Arc, Mutex}; -use std::time::Duration; use log::LevelFilter; use tracing::{instrument, Span}; @@ -67,9 +66,6 @@ pub struct UninitializedSandbox { pub(crate) host_funcs: Arc>, /// The memory manager for the sandbox. pub(crate) mgr: MemMgrWrapper, - pub(crate) max_initialization_time: Duration, - pub(crate) max_execution_time: Duration, - pub(crate) max_wait_for_cancellation: Duration, pub(crate) max_guest_log_level: Option, #[cfg(gdb)] pub(crate) debug_info: Option, @@ -180,13 +176,6 @@ impl UninitializedSandbox { let mut sandbox = Self { host_funcs, mgr: mem_mgr_wrapper, - max_initialization_time: Duration::from_millis( - sandbox_cfg.get_max_initialization_time() as u64, - ), - max_execution_time: Duration::from_millis(sandbox_cfg.get_max_execution_time() as u64), - max_wait_for_cancellation: Duration::from_millis( - sandbox_cfg.get_max_wait_for_cancellation() as u64, - ), max_guest_log_level: None, #[cfg(gdb)] debug_info, diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 05c445790..ea92bb79a 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -14,21 +14,24 @@ See the License for the specific language governing permissions and limitations under the License. */ -use core::time::Duration; +use crate::hypervisor::handlers::{MemAccessHandlerCaller, OutBHandlerCaller}; +use crate::signal_handlers::setup_signal_handlers; +use crate::HyperlightError::NoHypervisorFound; use std::sync::{Arc, Mutex}; -use log::LevelFilter; use rand::Rng; use tracing::{instrument, Span}; +use super::hypervisor::get_available_hypervisor; #[cfg(gdb)] use super::mem_access::dbg_mem_access_handler_wrapper; -use crate::hypervisor::hypervisor_handler::{ - HvHandlerConfig, HypervisorHandler, HypervisorHandlerAction, -}; +use crate::hypervisor::hyperlight_vm::HyperlightSandbox; +use crate::hypervisor::HyperlightVm; +use crate::mem::layout::SandboxMemoryLayout; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::RawPtr; -use crate::mem::shared_mem::GuestSharedMemory; +use crate::mem::ptr::{GuestPtr, RawPtr}; +use crate::mem::ptr_offset::Offset; +use crate::mem::shared_mem::{GuestSharedMemory, SharedMemory}; #[cfg(gdb)] use crate::sandbox::config::DebugInfo; use crate::sandbox::host_funcs::FunctionRegistry; @@ -36,7 +39,7 @@ use crate::sandbox::mem_access::mem_access_handler_wrapper; use crate::sandbox::outb::outb_handler_wrapper; use crate::sandbox::{HostSharedMemory, MemMgrWrapper}; use crate::sandbox_state::sandbox::Sandbox; -use crate::{new_error, MultiUseSandbox, Result, UninitializedSandbox}; +use crate::{log_then_return, new_error, MultiUseSandbox, Result, UninitializedSandbox}; /// The implementation for evolving `UninitializedSandbox`es to /// `Sandbox`es. @@ -58,65 +61,15 @@ where TransformFunc: Fn( Arc>, MemMgrWrapper, - HypervisorHandler, + Box, + Arc>, + Arc>, + RawPtr, ) -> Result, { - let (hshm, gshm) = u_sbox.mgr.build(); - - let hv_handler = { - let mut hv_handler = hv_init( - &hshm, - gshm, - u_sbox.host_funcs.clone(), - u_sbox.max_initialization_time, - u_sbox.max_execution_time, - u_sbox.max_wait_for_cancellation, - u_sbox.max_guest_log_level, - #[cfg(gdb)] - u_sbox.debug_info, - )?; - - { - let dispatch_function_addr = hshm.as_ref().get_pointer_to_dispatch_function()?; - if dispatch_function_addr == 0 { - return Err(new_error!("Dispatch function address is null")); - } - hv_handler.set_dispatch_function_addr(RawPtr::from(dispatch_function_addr))?; - } - - hv_handler - }; - - transform(u_sbox.host_funcs, hshm, hv_handler) -} - -#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] -pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result { - evolve_impl(u_sbox, |hf, mut hshm, hv_handler| { - { - hshm.as_mut().push_state()?; - } - Ok(MultiUseSandbox::from_uninit(hf, hshm, hv_handler)) - }) -} - -#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] -#[allow(clippy::too_many_arguments)] -fn hv_init( - hshm: &MemMgrWrapper, - gshm: SandboxMemoryManager, - host_funcs: Arc>, - max_init_time: Duration, - max_exec_time: Duration, - max_wait_for_cancellation: Duration, - max_guest_log_level: Option, - #[cfg(gdb)] debug_info: Option, -) -> Result { - let outb_hdl = outb_handler_wrapper(hshm.clone(), host_funcs); - let mem_access_hdl = mem_access_handler_wrapper(hshm.clone()); - #[cfg(gdb)] - let dbg_mem_access_hdl = dbg_mem_access_handler_wrapper(hshm.clone()); - + let (hshm, mut gshm) = u_sbox.mgr.build(); + let mut vm = set_up_hypervisor_partition(&mut gshm)?; + let outb_hdl = outb_handler_wrapper(hshm.clone(), u_sbox.host_funcs.clone()); let seed = { let mut rng = rand::rng(); rng.random::() @@ -125,40 +78,138 @@ fn hv_init( let peb_u64 = u64::try_from(gshm.layout.peb_address)?; RawPtr::from(peb_u64) }; + let page_size = u32::try_from(page_size::get())?; - let hv_handler_config = HvHandlerConfig { - outb_handler: outb_hdl, - mem_access_handler: mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_handler: dbg_mem_access_hdl, + let mem_access_hdl = mem_access_handler_wrapper(hshm.clone()); + + #[cfg(gdb)] + let dbg_mem_access_hdl = dbg_mem_access_handler_wrapper(hshm.clone()); + + setup_signal_handlers()?; + + vm.initialise( + peb_addr, seed, page_size, - peb_addr, - dispatch_function_addr: Arc::new(Mutex::new(None)), - max_init_time, - max_exec_time, - max_wait_for_cancellation, - max_guest_log_level, + outb_hdl.clone(), + mem_access_hdl.clone(), + u_sbox.max_guest_log_level, + #[cfg(gdb)] + u_sbox.debug_info, + )?; + + let dispatch_function_addr = hshm.as_ref().get_pointer_to_dispatch_function()?; + if dispatch_function_addr == 0 { + return Err(new_error!("Dispatch function address is null")); + } + + transform( + u_sbox.host_funcs, + hshm, + vm, + outb_hdl, + mem_access_hdl, + RawPtr::from(dispatch_function_addr), + ) +} + +#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] +pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result { + evolve_impl( + u_sbox, + |hf, mut hshm, vm, out_hdl, mem_hdl, dispatch_ptr| { + { + hshm.as_mut().push_state()?; + } + Ok(MultiUseSandbox::from_uninit( + hf, + hshm, + vm, + out_hdl, + mem_hdl, + dispatch_ptr, + )) + }, + ) +} + +fn set_up_hypervisor_partition( + mgr: &mut SandboxMemoryManager, + #[cfg(gdb)] debug_info: &Option, +) -> Result> { + let mem_size = u64::try_from(mgr.shared_mem.mem_size())?; + let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; + let rsp_ptr = { + let rsp_u64 = mgr.set_up_shared_memory(mem_size, &mut regions)?; + let rsp_raw = RawPtr::from(rsp_u64); + GuestPtr::try_from(rsp_raw) + }?; + let base_ptr = GuestPtr::try_from(Offset::from(0))?; + let pml4_ptr = { + let pml4_offset_u64 = u64::try_from(SandboxMemoryLayout::PML4_OFFSET)?; + base_ptr + Offset::from(pml4_offset_u64) }; - // Note: `dispatch_function_addr` is set by the Hyperlight guest library, and so it isn't in - // shared memory at this point in time. We will set it after the execution of `hv_init`. + let entrypoint_ptr = { + let entrypoint_total_offset = mgr.load_addr.clone() + mgr.entrypoint_offset; + GuestPtr::try_from(entrypoint_total_offset) + }?; + + if base_ptr != pml4_ptr { + log_then_return!( + "Error: base_ptr ({:#?}) does not equal pml4_ptr ({:#?})", + base_ptr, + pml4_ptr + ); + } + if entrypoint_ptr <= pml4_ptr { + log_then_return!( + "Error: entrypoint_ptr ({:#?}) is not greater than pml4_ptr ({:#?})", + entrypoint_ptr, + pml4_ptr + ); + } - let mut hv_handler = HypervisorHandler::new(hv_handler_config); + // Create gdb thread if gdb is enabled and the configuration is provided + #[cfg(gdb)] + let gdb_conn = if let Some(DebugInfo { port }) = debug_info { + let gdb_conn = create_gdb_thread(*port, unsafe { libc::pthread_self() }); - hv_handler.start_hypervisor_handler( - gshm, - #[cfg(gdb)] - debug_info, - )?; + // in case the gdb thread creation fails, we still want to continue + // without gdb + match gdb_conn { + Ok(gdb_conn) => Some(gdb_conn), + Err(e) => { + log::error!("Could not create gdb connection: {:#}", e); - hv_handler - .execute_hypervisor_handler_action(HypervisorHandlerAction::Initialise) - .map_err(|exec_e| match hv_handler.kill_hypervisor_handler_thread() { - Ok(_) => exec_e, - Err(kill_e) => new_error!("{}", format!("{}, {}", exec_e, kill_e)), - })?; + None + } + } + } else { + None + }; - Ok(hv_handler) + match get_available_hypervisor() { + Some(hv_type) => { + let hv = HyperlightSandbox::new( + hv_type, + regions, + pml4_ptr.absolute()?, + entrypoint_ptr.absolute()?, + rsp_ptr.absolute()?, + #[cfg(gdb)] + gdb_conn, + #[cfg(target_os = "windows")] + HandleWrapper::from( + mgr.shared_mem + .with_exclusivity(|e| e.get_mmap_file_handle())?, + ), + )?; + Ok(Box::new(hv)) + } + None => { + log_then_return!(NoHypervisorFound()); + } + } } #[cfg(test)] diff --git a/src/hyperlight_host/src/signal_handlers/mod.rs b/src/hyperlight_host/src/signal_handlers/mod.rs index 29c367dc0..e363a467e 100644 --- a/src/hyperlight_host/src/signal_handlers/mod.rs +++ b/src/hyperlight_host/src/signal_handlers/mod.rs @@ -45,7 +45,7 @@ pub(crate) fn setup_signal_handlers() -> crate::Result<()> { original_hook(panic_info); })); } - vmm_sys_util::signal::register_signal_handler(libc::SIGRTMIN(), handle_hltimeout)?; + vmm_sys_util::signal::register_signal_handler(libc::SIGRTMIN(), vm_kill_signal)?; // Note: For libraries registering signal handlers, it's important to keep in mind that // the user of the library could have their own signal handlers that we don't want to @@ -60,6 +60,6 @@ pub(crate) fn setup_signal_handlers() -> crate::Result<()> { Ok(()) } -extern "C" fn handle_hltimeout(_: libc::c_int, _: *mut libc::siginfo_t, _: *mut libc::c_void) { - // Do nothing. SIGRTMIN is just used to issue a VM exit to the underlying VMM. +extern "C" fn vm_kill_signal(_: libc::c_int, _: *mut libc::siginfo_t, _: *mut libc::c_void) { + // Do nothing. SIGRTMIN is just used to issue a VM exit to the underlying VM. } diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 2df993070..9f3b5be3f 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -14,6 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #![allow(clippy::disallowed_macros)] +use std::thread; +use std::time::Duration; + use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::mem::PAGE_SIZE; use hyperlight_host::func::{ParameterValue, ReturnType, ReturnValue}; @@ -28,6 +31,28 @@ use log::LevelFilter; pub mod common; // pub to disable dead_code warning use crate::common::{new_uninit, new_uninit_rust}; +#[test] +fn kill_running_vm() { + // this test is rust-guest only + let mut sbox1: MultiUseSandbox = new_uninit().unwrap().evolve(Noop::default()).unwrap(); + + let interrupt_handle = sbox1.interrupt_handle(); + assert!(!interrupt_handle.dropped()); + + // kill vm after 1 second + thread::spawn(|| { + thread::sleep(Duration::from_secs(1)); + interrupt_handle.kill(); + thread::sleep(Duration::from_secs(1)); + assert!(interrupt_handle.dropped()); + }); + let res = sbox1 + .call_guest_function_by_name("Spin", ReturnType::Int, None) + .unwrap_err(); + assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + println!("{:?}", res); +} + #[test] fn print_four_args_c_guest() { let path = c_simple_guest_as_string().unwrap(); diff --git a/typos.toml b/typos.toml index 32d9e9fde..bc0abd614 100644 --- a/typos.toml +++ b/typos.toml @@ -7,3 +7,5 @@ extend-exclude = ["**/*.patch", "src/hyperlight_guest/third_party/**/*", "NOTICE [default.extend-words] # typ is used for field name as type is a reserved keyword typ="typ" +# fpr is Floating Point Register +fpr="fpr"