diff --git a/docs/mmds/mmds-user-guide.md b/docs/mmds/mmds-user-guide.md index c6bac05100e..5f9b3f012d0 100644 --- a/docs/mmds/mmds-user-guide.md +++ b/docs/mmds/mmds-user-guide.md @@ -249,7 +249,7 @@ The session must start with an HTTP `PUT` request that generates the session tok In order to be successful, the request must respect the following constraints: - must be directed towards `/latest/api/token` path -- must contain a `X-ametadata-token-ttl-seconds` header specifying the token lifetime +- must contain a `X-metadata-token-ttl-seconds` header specifying the token lifetime in seconds. The value cannot be lower than 1 or greater than 21600 (6 hours). - must not contain a `X-Forwarded-For` header. diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index ca0e51381a5..5a2204f41d6 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -596,6 +596,19 @@ } ] }, + { + "syscall": "msync", + "comment": "Used to sync memory from mmap to disk", + "args": [ + { + "index": 2, + "type": "dword", + "op": "eq", + "val": 4, + "comment": "MS_SYNC" + } + ] + }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index b419581ea3f..02bc9629a6a 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -248,6 +248,19 @@ } ] }, + { + "syscall": "msync", + "comment": "Used to sync memory from mmap to disk", + "args": [ + { + "index": 2, + "type": "dword", + "op": "eq", + "val": 4, + "comment": "MS_SYNC" + } + ] + }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", diff --git a/src/api_server/src/parsed_request.rs b/src/api_server/src/parsed_request.rs index 9b4cd6e64b9..7565853e1ab 100644 --- a/src/api_server/src/parsed_request.rs +++ b/src/api_server/src/parsed_request.rs @@ -17,6 +17,7 @@ use crate::request::logger::parse_put_logger; use crate::request::machine_configuration::{ parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config, }; +use crate::request::memory_backing_file::parse_put_memory_backing_file; use crate::request::metrics::parse_put_metrics; use crate::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use crate::request::net::{parse_patch_net, parse_put_net}; @@ -112,6 +113,7 @@ impl ParsedRequest { (Method::Put, "network-interfaces", Some(body)) => { parse_put_net(body, path_tokens.get(1)) } + (Method::Put, "memory-backing-file", Some(body)) => parse_put_memory_backing_file(body), (Method::Put, "shutdown-internal", None) => { Ok(ParsedRequest::new(RequestAction::ShutdownInternal)) } diff --git a/src/api_server/src/request/memory_backing_file.rs b/src/api_server/src/request/memory_backing_file.rs new file mode 100644 index 00000000000..e29798039bc --- /dev/null +++ b/src/api_server/src/request/memory_backing_file.rs @@ -0,0 +1,42 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::super::VmmAction; +use crate::parsed_request::{Error, ParsedRequest}; +use crate::request::Body; +use logger::{IncMetric, METRICS}; +use vmm::vmm_config::memory_backing_file::MemoryBackingFileConfig; + +pub(crate) fn parse_put_memory_backing_file(body: &Body) -> Result { + METRICS.put_api_requests.memory_backing_file_cfg_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile( + serde_json::from_slice::(body.raw()).map_err(|e| { + METRICS.put_api_requests.memory_backing_file_cfg_fails.inc(); + Error::SerdeJson(e) + })?, + ))) +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use super::*; + + #[test] + fn test_parse_memory_backing_file() { + assert!(parse_put_memory_backing_file(&Body::new("invalid_payload")).is_err()); + + let body = r#"{ + "path": "./memory.snap" + }"#; + let same_body = MemoryBackingFileConfig { + path: PathBuf::from("./memory.snap"), + }; + let result = parse_put_memory_backing_file(&Body::new(body)); + assert!(result.is_ok()); + let parsed_req = result.unwrap_or_else(|_e| panic!("Failed test.")); + + assert!(parsed_req == ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile(same_body))); + } +} diff --git a/src/api_server/src/request/mod.rs b/src/api_server/src/request/mod.rs index 75f9a0daef3..c2f52110bf0 100644 --- a/src/api_server/src/request/mod.rs +++ b/src/api_server/src/request/mod.rs @@ -8,6 +8,7 @@ pub mod drive; pub mod instance_info; pub mod logger; pub mod machine_configuration; +pub mod memory_backing_file; pub mod metrics; pub mod mmds; pub mod net; diff --git a/src/api_server/swagger/firecracker.yaml b/src/api_server/swagger/firecracker.yaml index 07d242532e6..14cae2b7a69 100644 --- a/src/api_server/swagger/firecracker.yaml +++ b/src/api_server/swagger/firecracker.yaml @@ -350,6 +350,29 @@ paths: description: Internal server error schema: $ref: "#/definitions/Error" + + /memory-backing-file: + put: + summary: Configures a memory backing file to sync the memory changes to during the runtime of the vm + operationId: putMemoryBackingFile + parameters: + - name: body + in: body + description: Path to memory backing file + required: true + schema: + $ref: "#/definitions/MemoryBackingFile" + responses: + 204: + description: Memory backing file configured + 400: + description: Memory backing file failed + schema: + $ref: "#/definitions/Error" + default: + description: Internal server error. + schema: + $ref: "#/definitions/Error" /metrics: put: @@ -1047,6 +1070,14 @@ definitions: tx_rate_limiter: $ref: "#/definitions/RateLimiter" + MemoryBackingFile: + type: object + required: + - path + properties: + path: + type: string + PartialDrive: type: object required: diff --git a/src/cpuid/src/transformer/amd.rs b/src/cpuid/src/transformer/amd.rs index c1db38260a5..261e88ef4e4 100644 --- a/src/cpuid/src/transformer/amd.rs +++ b/src/cpuid/src/transformer/amd.rs @@ -147,6 +147,9 @@ impl CpuidTransformer for AmdCpuidTransformer { leaf_0x8000001d::LEAF_NUM => Some(amd::update_extended_cache_topology_entry), leaf_0x8000001e::LEAF_NUM => Some(amd::update_extended_apic_id_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), + + // hypervisor stuff + 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } diff --git a/src/cpuid/src/transformer/common.rs b/src/cpuid/src/transformer/common.rs index ba89fc35f74..59de658fba5 100644 --- a/src/cpuid/src/transformer/common.rs +++ b/src/cpuid/src/transformer/common.rs @@ -69,6 +69,19 @@ pub fn update_brand_string_entry( Ok(()) } +// KVM feature bits +#[cfg(target_arch = "x86_64")] +const KVM_FEATURE_ASYNC_PF_INT_BIT: u32 = 14; + +pub fn disable_kvm_feature_async_pf( + entry: &mut kvm_cpuid_entry2, + vm_spec: &VmSpec, +) -> Result<(), Error> { + entry.eax.write_bit(KVM_FEATURE_ASYNC_PF_INT_BIT, false); + + Ok(()) +} + pub fn update_cache_parameters_entry( entry: &mut kvm_cpuid_entry2, vm_spec: &VmSpec, diff --git a/src/cpuid/src/transformer/intel.rs b/src/cpuid/src/transformer/intel.rs index 8505b668932..439058c8780 100644 --- a/src/cpuid/src/transformer/intel.rs +++ b/src/cpuid/src/transformer/intel.rs @@ -126,6 +126,8 @@ impl CpuidTransformer for IntelCpuidTransformer { leaf_0xa::LEAF_NUM => Some(intel::update_perf_mon_entry), leaf_0xb::LEAF_NUM => Some(intel::update_extended_topology_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), + // hypervisor stuff + 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } diff --git a/src/logger/src/metrics.rs b/src/logger/src/metrics.rs index 779689f04b8..aba286cc4d8 100644 --- a/src/logger/src/metrics.rs +++ b/src/logger/src/metrics.rs @@ -403,6 +403,10 @@ pub struct PutRequestsMetrics { pub machine_cfg_count: SharedIncMetric, /// Number of failures in configuring the machine. pub machine_cfg_fails: SharedIncMetric, + /// Number of PUTs for setting memory backing file. + pub memory_backing_file_cfg_count: SharedIncMetric, + /// Number of failures in configuring the machine. + pub memory_backing_file_cfg_fails: SharedIncMetric, /// Number of PUTs for initializing the metrics system. pub metrics_count: SharedIncMetric, /// Number of failures in initializing the metrics system. diff --git a/src/mmds/src/lib.rs b/src/mmds/src/lib.rs index 5360c5a0f0a..ae206ab7d6e 100644 --- a/src/mmds/src/lib.rs +++ b/src/mmds/src/lib.rs @@ -42,7 +42,7 @@ impl fmt::Display for Error { ), Error::NoTtlProvided => write!( f, - "Token time to live value not found. Use `X-metadata-token-ttl_seconds` header to \ + "Token time to live value not found. Use `X-metadata-token-ttl-seconds` header to \ specify the token's lifetime." ), Error::ResourceNotFound(ref uri) => { @@ -705,8 +705,8 @@ mod tests { assert_eq!( Error::NoTtlProvided.to_string(), - "Token time to live value not found. Use `X-metadata-token-ttl_seconds` header to \ - specify the token's lifetime." + "Token time to live value not found. Use `X-metadata-token-ttl-seconds` header to \ + specify the token's lifetime." ); assert_eq!( diff --git a/src/mmds/src/token_headers.rs b/src/mmds/src/token_headers.rs index 5d13d9a4245..1f03d0b4f4e 100644 --- a/src/mmds/src/token_headers.rs +++ b/src/mmds/src/token_headers.rs @@ -39,12 +39,19 @@ impl TokenHeaders { /// Return `TokenHeaders` from headers map. pub fn try_from(map: &HashMap) -> Result { let mut headers = Self::default(); + let lowercased_headers: HashMap = map + .iter() + .map(|(k, v)| (k.to_lowercase(), v.clone())) + .collect(); - if let Some(token) = map.get(TokenHeaders::X_METADATA_TOKEN) { + if let Some(token) = lowercased_headers.get(&TokenHeaders::X_METADATA_TOKEN.to_lowercase()) + { headers.x_metadata_token = Some(token.to_string()); } - if let Some(value) = map.get(TokenHeaders::X_METADATA_TOKEN_TTL_SECONDS) { + if let Some(value) = + lowercased_headers.get(&TokenHeaders::X_METADATA_TOKEN_TTL_SECONDS.to_lowercase()) + { match value.parse::() { Ok(seconds) => { headers.x_metadata_token_ttl_seconds = Some(seconds); @@ -127,6 +134,17 @@ mod tests { let headers = TokenHeaders::try_from(&map).unwrap(); assert_eq!(*headers.x_metadata_token().unwrap(), "".to_string()); + // Lowercased headers + let mut map: HashMap = HashMap::default(); + map.insert( + TokenHeaders::X_METADATA_TOKEN_TTL_SECONDS + .to_string() + .to_lowercase(), + "60".to_string(), + ); + let headers = TokenHeaders::try_from(&map).unwrap(); + assert_eq!(headers.x_metadata_token_ttl_seconds().unwrap(), 60); + // Invalid value. let mut map: HashMap = HashMap::default(); map.insert( diff --git a/src/vm-memory/src/lib.rs b/src/vm-memory/src/lib.rs index 814f79098b4..1d18139dd99 100644 --- a/src/vm-memory/src/lib.rs +++ b/src/vm-memory/src/lib.rs @@ -117,7 +117,7 @@ pub fn create_guest_memory( for region in regions { let flags = match region.0 { None => libc::MAP_NORESERVE | libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, - Some(_) => libc::MAP_NORESERVE | libc::MAP_PRIVATE, + Some(_) => libc::MAP_NORESERVE | libc::MAP_SHARED, }; let mmap_region = diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index bffb43e47cf..c096d8cb132 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -5,9 +5,11 @@ use std::convert::TryFrom; use std::fmt::{Display, Formatter}; +use std::fs::File; use std::io::{self, Read, Seek, SeekFrom}; use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::{Arc, Mutex}; +use vm_memory::FileOffset; use arch::InitrdConfig; #[cfg(target_arch = "x86_64")] @@ -28,7 +30,6 @@ use linux_loader::loader::KernelLoader; use logger::{error, warn, METRICS}; use seccompiler::BpfThreadMap; use snapshot::Persist; -use userfaultfd::Uffd; use utils::eventfd::EventFd; use utils::terminal::Terminal; use utils::time::TimestampUs; @@ -43,7 +44,7 @@ use crate::construct_kvm_mpidrs; use crate::device_manager::legacy::PortIODeviceManager; use crate::device_manager::mmio::MMIODeviceManager; use crate::device_manager::persist::MMIODevManagerConstructorArgs; -use crate::persist::{MicrovmState, MicrovmStateError}; +use crate::persist::{MemoryDescriptor, MicrovmState, MicrovmStateError}; use crate::resources::VmResources; use crate::vmm_config::boot_source::BootConfig; use crate::vmm_config::instance_info::InstanceInfo; @@ -58,6 +59,8 @@ use crate::{device_manager, mem_size_mib, Error, EventManager, Vmm, VmmEventsObs pub enum StartMicrovmError { /// Unable to attach block device to Vmm. AttachBlockDevice(io::Error), + /// Unable to create the memory backing file. + BackingMemoryFile(io::Error), /// This error is thrown by the minimal boot loader implementation. ConfigureSystem(arch::Error), /// Internal errors are due to resource exhaustion. @@ -112,6 +115,9 @@ impl Display for StartMicrovmError { write!(f, "Unable to attach block device to Vmm: {}", err) } ConfigureSystem(err) => write!(f, "System configuration error: {:?}", err), + BackingMemoryFile(err) => { + write!(f, "Unable to create the memory backing file: {}", err) + } CreateRateLimiter(err) => write!(f, "Cannot create RateLimiter: {}", err), CreateNetDevice(err) => { let mut err_msg = format!("{:?}", err); @@ -231,7 +237,7 @@ fn create_vmm_and_vcpus( instance_info: &InstanceInfo, event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, - uffd: Option, + memory_descriptor: Option, track_dirty_pages: bool, vcpu_count: u8, ) -> std::result::Result<(Vmm, Vec), StartMicrovmError> { @@ -297,7 +303,7 @@ fn create_vmm_and_vcpus( shutdown_exit_code: None, vm, guest_memory, - uffd, + memory_descriptor, vcpus_handles: Vec::new(), vcpus_exit_evt, mmio_device_manager, @@ -329,8 +335,23 @@ pub fn build_microvm_for_boot( let boot_config = vm_resources.boot_source().ok_or(MissingKernelConfig)?; let track_dirty_pages = vm_resources.track_dirty_pages(); - let guest_memory = - create_guest_memory(vm_resources.vm_config().mem_size_mib, track_dirty_pages)?; + + let backing_memory_file = if let Some(ref file) = vm_resources.backing_memory_file { + file.set_len((vm_resources.vm_config().mem_size_mib * 1024 * 1024) as u64) + .map_err(|e| { + error!("Failed to set backing memory file size: {}", e); + StartMicrovmError::BackingMemoryFile(e) + })?; + + Some(file.clone()) + } else { + None + }; + let guest_memory = create_guest_memory( + vm_resources.vm_config().mem_size_mib, + backing_memory_file.clone(), + track_dirty_pages, + )?; let vcpu_config = vm_resources.vcpu_config(); let entry_addr = load_kernel(boot_config, &guest_memory)?; let initrd = load_initrd_from_config(boot_config, &guest_memory)?; @@ -362,7 +383,7 @@ pub fn build_microvm_for_boot( instance_info, event_manager, guest_memory, - None, + backing_memory_file.map(MemoryDescriptor::File), track_dirty_pages, vcpu_config.vcpu_count, )?; @@ -451,7 +472,7 @@ pub fn build_microvm_from_snapshot( event_manager: &mut EventManager, microvm_state: MicrovmState, guest_memory: GuestMemoryMmap, - uffd: Option, + memory_descriptor: Option, track_dirty_pages: bool, seccomp_filters: &BpfThreadMap, vm_resources: &mut VmResources, @@ -466,7 +487,7 @@ pub fn build_microvm_from_snapshot( instance_info, event_manager, guest_memory.clone(), - uffd, + memory_descriptor, track_dirty_pages, vcpu_count, )?; @@ -581,15 +602,24 @@ pub fn build_microvm_from_snapshot( /// Creates GuestMemory of `mem_size_mib` MiB in size. pub fn create_guest_memory( mem_size_mib: usize, + backing_memory_file: Option>, track_dirty_pages: bool, ) -> std::result::Result { let mem_size = mem_size_mib << 20; let arch_mem_regions = arch::arch_memory_regions(mem_size); + let mut offset = 0_u64; vm_memory::create_guest_memory( &arch_mem_regions .iter() - .map(|(addr, size)| (None, *addr, *size)) + .map(|(addr, size)| { + let file_offset = backing_memory_file + .clone() + .map(|file| FileOffset::from_arc(file, offset)); + offset += *size as u64; + + (file_offset, *addr, *size) + }) .collect::>()[..], track_dirty_pages, ) @@ -1068,7 +1098,7 @@ pub mod tests { } pub(crate) fn default_vmm() -> Vmm { - let guest_memory = create_guest_memory(128, false).unwrap(); + let guest_memory = create_guest_memory(128, None, false).unwrap(); let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK) .map_err(Error::EventFd) @@ -1096,12 +1126,12 @@ pub mod tests { shutdown_exit_code: None, vm, guest_memory, - uffd: None, vcpus_handles: Vec::new(), vcpus_exit_evt, mmio_device_manager, #[cfg(target_arch = "x86_64")] pio_device_manager, + memory_descriptor: None, } } @@ -1283,13 +1313,13 @@ pub mod tests { // Case 1: create guest memory without dirty page tracking { - let guest_memory = create_guest_memory(mem_size, false).unwrap(); + let guest_memory = create_guest_memory(mem_size, None, false).unwrap(); assert!(!is_dirty_tracking_enabled(&guest_memory)); } // Case 2: create guest memory with dirty page tracking { - let guest_memory = create_guest_memory(mem_size, true).unwrap(); + let guest_memory = create_guest_memory(mem_size, None, true).unwrap(); assert!(is_dirty_tracking_enabled(&guest_memory)); } } @@ -1297,7 +1327,7 @@ pub mod tests { #[test] fn test_create_vcpus() { let vcpu_count = 2; - let guest_memory = create_guest_memory(128, false).unwrap(); + let guest_memory = create_guest_memory(128, None, false).unwrap(); #[allow(unused_mut)] let mut vm = setup_kvm_vm(&guest_memory, false).unwrap(); diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index d71932525b5..90975ac6acc 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -50,10 +50,10 @@ use devices::virtio::{ use devices::BusDevice; use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber}; use logger::{error, info, warn, LoggerError, MetricsError, METRICS}; +use persist::MemoryDescriptor; use rate_limiter::BucketUpdate; use seccompiler::BpfProgram; use snapshot::Persist; -use userfaultfd::Uffd; use utils::epoll::EventSet; use utils::eventfd::EventFd; use vm_memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; @@ -260,10 +260,6 @@ pub struct Vmm { // Guest VM core resources. vm: Vm, guest_memory: GuestMemoryMmap, - // Save UFFD in order to keep it open in the Firecracker process, as well. - // Since this field is never read again, we need to allow `dead_code`. - #[allow(dead_code)] - uffd: Option, vcpus_handles: Vec, // Used by Vcpus and devices to initiate teardown; Vmm should never write here. vcpus_exit_evt: EventFd, @@ -272,6 +268,11 @@ pub struct Vmm { mmio_device_manager: MMIODeviceManager, #[cfg(target_arch = "x86_64")] pio_device_manager: PortIODeviceManager, + + // The mem file that should be mmaped. We need to keep a reference of the UFFD in the + // process so we allow dead_code + #[allow(dead_code)] + memory_descriptor: Option, } impl Vmm { diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 309cbce46b3..0ff5ca1f9da 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -325,6 +325,27 @@ fn snapshot_memory_to_file( snapshot_type: &SnapshotType, ) -> std::result::Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; + if OpenOptions::new().read(true).open(mem_file_path).is_ok() + && snapshot_type == &SnapshotType::Diff + { + // // The memory file already exists. + // // We're going to use the msync behaviour + // for region in vmm.guest_memory().iter() { + // info!("msyncing memory region"); + // unsafe { + // if libc::msync(region.as_ptr() as _, region.len() as _, libc::MS_SYNC) == -1 { + // return Err(CreateSnapshotError::Memory( + // memory_snapshot::Error::CreateRegion(vm_memory::MmapRegionError::Mmap( + // std::io::Error::last_os_error(), + // )), + // )); + // } + // }; + // } + + return Ok(()); + } + let mut file = OpenOptions::new() .write(true) .create(true) @@ -483,6 +504,16 @@ pub fn snapshot_state_sanity_check( Ok(()) } +/// Describes a descriptor that connects to the memory used by the VM. This could either be the a file descriptor +/// or a UFFD descriptor. +#[derive(Debug)] +pub enum MemoryDescriptor { + /// A file descriptor that connects to the user fault process. + Uffd(Uffd), + /// A file descriptor of the backing memory file. + File(Arc), +} + /// Loads a Microvm snapshot producing a 'paused' Microvm. pub fn restore_from_snapshot( instance_info: &InstanceInfo, @@ -501,26 +532,31 @@ pub fn restore_from_snapshot( let mem_backend_path = ¶ms.mem_backend.backend_path; let mem_state = µvm_state.memory_state; let track_dirty_pages = params.enable_diff_snapshots; - let (guest_memory, uffd) = match params.mem_backend.backend_type { - MemBackendType::File => ( - guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)?, - None, - ), - MemBackendType::Uffd => guest_memory_from_uffd( - mem_backend_path, - mem_state, - track_dirty_pages, - // We enable the UFFD_FEATURE_EVENT_REMOVE feature only if a balloon device - // is present in the microVM state. - microvm_state.device_states.balloon_device.is_some(), - )?, + let (guest_memory, memory_descriptor) = match params.mem_backend.backend_type { + MemBackendType::File => { + let (guest_memory, file) = + guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)?; + (guest_memory, Some(MemoryDescriptor::File(Arc::new(file)))) + } + MemBackendType::Uffd => { + let (guest_memory, uffd) = guest_memory_from_uffd( + mem_backend_path, + mem_state, + track_dirty_pages, + // We enable the UFFD_FEATURE_EVENT_REMOVE feature only if a balloon device + // is present in the microVM state. + microvm_state.device_states.balloon_device.is_some(), + )?; + + (guest_memory, uffd.map(MemoryDescriptor::Uffd)) + } }; builder::build_microvm_from_snapshot( instance_info, event_manager, microvm_state, guest_memory, - uffd, + memory_descriptor, track_dirty_pages, seccomp_filters, vm_resources, @@ -545,11 +581,19 @@ fn guest_memory_from_file( mem_file_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, -) -> std::result::Result { +) -> std::result::Result<(GuestMemoryMmap, File), LoadSnapshotError> { use self::LoadSnapshotError::{DeserializeMemory, MemoryBackingFile}; - let mem_file = File::open(mem_file_path).map_err(MemoryBackingFile)?; - GuestMemoryMmap::restore(Some(&mem_file), mem_state, track_dirty_pages) - .map_err(DeserializeMemory) + let mem_file = OpenOptions::new() + .write(true) + .read(true) + .open(mem_file_path) + .map_err(MemoryBackingFile)?; + + Ok(( + GuestMemoryMmap::restore(Some(&mem_file), mem_state, track_dirty_pages) + .map_err(DeserializeMemory)?, + mem_file, + )) } fn guest_memory_from_uffd( diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 9a5257ac307..fb9c3b11e61 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::convert::From; +use std::fs::File; use std::sync::{Arc, Mutex, MutexGuard}; use logger::info; @@ -117,6 +118,8 @@ pub struct VmResources { pub mmds_size_limit: usize, /// Whether or not to load boot timer device. pub boot_timer: bool, + /// When backed by a memory file, this should be set + pub backing_memory_file: Option>, } impl VmResources { @@ -236,6 +239,16 @@ impl VmResources { self.vm_config.track_dirty_pages = dirty_page_tracking; } + /// Returns the config for the backing memory file + pub fn backing_memory_file(&self) -> Option> { + self.backing_memory_file.clone() + } + + /// Sets the backing memory file + pub fn set_backing_memory_file(&mut self, backing_mem_file: Arc) { + self.backing_memory_file.get_or_insert(backing_mem_file); + } + /// Returns the VmConfig. pub fn vm_config(&self) -> &VmConfig { &self.vm_config @@ -575,6 +588,7 @@ mod tests { mmds: None, boot_timer: false, mmds_size_limit: HTTP_MAX_PAYLOAD_SIZE, + backing_memory_file: None, } } diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 2a79237d3f1..87f9f2aae2a 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::{Display, Formatter}; -use std::result; use std::sync::{Arc, Mutex, MutexGuard}; +use std::{fs, result}; use logger::*; use mmds::data_store::{self, Mmds}; @@ -34,6 +34,7 @@ use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, Drive use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::logger::{LoggerConfig, LoggerConfigError}; use crate::vmm_config::machine_config::{VmConfig, VmConfigError, VmUpdateConfig}; +use crate::vmm_config::memory_backing_file::{MemoryBackingFileConfig, MemoryBackingFileError}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ @@ -99,6 +100,9 @@ pub enum VmmAction { /// `BalloonDeviceConfig` as input. This action can only be called before the microVM /// has booted. SetBalloonDevice(BalloonDeviceConfig), + /// Set the memory backing file for the VM. The VM will use this backing file to store its + /// memory. This action can only be called before the microVM has booted. + SetMemoryBackingFile(MemoryBackingFileConfig), /// Set the MMDS configuration. SetMmdsConfiguration(MmdsConfig), /// Set the vsock device or update the one that already exists using the @@ -130,6 +134,8 @@ pub enum VmmAction { pub enum VmmActionError { /// The action `SetBalloonDevice` failed because of bad user input. BalloonConfig(BalloonConfigError), + /// The action `SetMemoryBackingFile` failed because of bad user input. + MemoryBackingFile(MemoryBackingFileError), /// The action `ConfigureBootSource` failed because of bad user input. BootSource(BootSourceConfigError), /// The action `CreateSnapshot` failed. @@ -182,6 +188,7 @@ impl Display for VmmActionError { match self { BalloonConfig(err) => err.to_string(), BootSource(err) => err.to_string(), + MemoryBackingFile(err) => err.to_string(), CreateSnapshot(err) => err.to_string(), DriveConfig(err) => err.to_string(), InternalVmm(err) => format!("Internal Vmm error: {}", err), @@ -422,6 +429,7 @@ impl<'a> PrebootApiController<'a> { SetBalloonDevice(config) => self.set_balloon_device(config), SetVsockDevice(config) => self.set_vsock_device(config), SetMmdsConfiguration(config) => self.set_mmds_config(config), + SetMemoryBackingFile(config) => self.set_backing_memory_file(config), StartMicroVm => self.start_microvm(), UpdateVmConfiguration(config) => self.update_vm_config(config), // Operations not allowed pre-boot. @@ -447,6 +455,23 @@ impl<'a> PrebootApiController<'a> { .map_err(VmmActionError::BalloonConfig) } + fn set_backing_memory_file(&mut self, cfg: MemoryBackingFileConfig) -> ActionResult { + self.boot_path = true; + let file = fs::OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(cfg.path) + .map(Arc::new) + .map_err(|e| { + VmmActionError::MemoryBackingFile(MemoryBackingFileError::CreateFile(e)) + })?; + + self.vm_resources.backing_memory_file = Some(file); + + Ok(VmmData::Empty) + } + fn insert_block_device(&mut self, cfg: BlockDeviceConfig) -> ActionResult { self.boot_path = true; self.vm_resources @@ -654,6 +679,7 @@ impl RuntimeApiController { | InsertNetworkDevice(_) | LoadSnapshot(_) | SetBalloonDevice(_) + | SetMemoryBackingFile(_) | SetVsockDevice(_) | SetMmdsConfiguration(_) | StartMicroVm @@ -720,14 +746,14 @@ impl RuntimeApiController { fn create_snapshot(&mut self, create_params: &CreateSnapshotParams) -> ActionResult { log_dev_preview_warning("Virtual machine snapshots", None); - if create_params.snapshot_type == SnapshotType::Diff - && !self.vm_resources.track_dirty_pages() - { - return Err(VmmActionError::NotSupported( - "Diff snapshots are not allowed on uVMs with dirty page tracking disabled." - .to_string(), - )); - } + // if create_params.snapshot_type == SnapshotType::Diff + // && !self.vm_resources.track_dirty_pages() + // { + // return Err(VmmActionError::NotSupported( + // "Diff snapshots are not allowed on uVMs with dirty page tracking disabled." + // .to_string(), + // )); + // } let mut locked_vmm = self.vmm.lock().unwrap(); let create_start_us = utils::time::get_time_us(utils::time::ClockType::Monotonic); @@ -862,6 +888,7 @@ mod tests { pub boot_timer: bool, // when `true`, all self methods are forced to fail pub force_errors: bool, + pub backing_memory_file: Option>, } impl MockVmRes { diff --git a/src/vmm/src/vmm_config/memory_backing_file.rs b/src/vmm/src/vmm_config/memory_backing_file.rs new file mode 100644 index 00000000000..f5f5bd9ce6f --- /dev/null +++ b/src/vmm/src/vmm_config/memory_backing_file.rs @@ -0,0 +1,31 @@ +use std::{ + fmt::{Display, Formatter}, + io, + path::PathBuf, +}; + +use serde::{Deserialize, Serialize}; + +/// Keeps the Memory Backing file configuration. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(deny_unknown_fields)] +pub struct MemoryBackingFileConfig { + /// Location of the memory backing file. + pub path: PathBuf, +} + +/// Errors associated with the operations allowed on a memory backing file. +#[derive(Debug)] +pub enum MemoryBackingFileError { + /// Failed to create the block device + CreateFile(io::Error), +} + +impl Display for MemoryBackingFileError { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + use self::MemoryBackingFileError::*; + match self { + CreateFile(e) => write!(f, "Unable to create the memory backing file: {}", e), + } + } +} diff --git a/src/vmm/src/vmm_config/mod.rs b/src/vmm/src/vmm_config/mod.rs index c509fce888f..61b235a0fbb 100644 --- a/src/vmm/src/vmm_config/mod.rs +++ b/src/vmm/src/vmm_config/mod.rs @@ -23,6 +23,8 @@ pub mod instance_info; pub mod logger; /// Wrapper for configuring the memory and CPU of the microVM. pub mod machine_config; +/// Wrapper for configuring the backing memory file. +pub mod memory_backing_file; /// Wrapper for configuring the metrics. pub mod metrics; /// Wrapper for configuring the MMDS. diff --git a/tests/integration_tests/functional/test_mmds.py b/tests/integration_tests/functional/test_mmds.py index 870bce0a70a..92f735436df 100644 --- a/tests/integration_tests/functional/test_mmds.py +++ b/tests/integration_tests/functional/test_mmds.py @@ -832,7 +832,7 @@ def test_mmds_v2_negative(test_microvm_with_api, network_config): cmd = f"curl -m 2 -s -X PUT http://{DEFAULT_IPV4}/latest/api/token" expected = ( "Token time to live value not found. Use " - "`X-metadata-token-ttl_seconds` header to specify " + "`X-metadata-token-ttl-seconds` header to specify " "the token's lifetime." ) _run_guest_cmd(ssh_connection, cmd, expected)