diff --git a/docs/how-to/README.md b/docs/how-to/README.md index df6a060fce1d..874efc01af89 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -45,3 +45,4 @@ - [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md) - [How to run Kata Containers with AMD SEV-SNP](how-to-run-kata-containers-with-SNP-VMs.md) - [How to use EROFS to build rootfs in Kata Containers](how-to-use-erofs-build-rootfs.md) +- [How to run Kata Containers with kinds of Block Volumes](how-to-run-kata-containers-with-kinds-of-Block-Volumes.md) \ No newline at end of file diff --git a/docs/how-to/how-to-run-kata-containers-with-kinds-of-Block-Volumes.md b/docs/how-to/how-to-run-kata-containers-with-kinds-of-Block-Volumes.md new file mode 100644 index 000000000000..d4fd048ec9bf --- /dev/null +++ b/docs/how-to/how-to-run-kata-containers-with-kinds-of-Block-Volumes.md @@ -0,0 +1,78 @@ +# A new way for Kata Containers to use Kinds of Block Volumes + +> **Note:** This guide is only available for runtime-rs with default Hypervisor Dragonball. +> Now, other hypervisors are still ongoing, and it'll be updated when they're ready. + + +## Background + +Currently, there is no widely applicable and convenient method available for users to use some kinds of backend storages, such as File on host based block volume, SPDK based volume or VFIO device based volume for Kata Containers, so we adopt [Proposal: Direct Block Device Assignment](https://github.com/kata-containers/kata-containers/blob/main/docs/design/direct-blk-device-assignment.md) to address it. + +## Solution + +According to the proposal, it requires to use the `kata-ctl direct-volume` command to add a direct assigned block volume device to the Kata Containers runtime. + +And then with the help of method [get_volume_mount_info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L95), get information from JSON file: `(mountinfo.json)` and parse them into structure [Direct Volume Info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L70) which is used to save device-related information. + +We only fill the `mountinfo.json`, such as `device` ,`volume_type`, `fs_type`, `metadata` and `options`, which correspond to the fields in [Direct Volume Info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L70), to describe a device. + +The JSON file `mountinfo.json` placed in a sub-path `/kubelet/kata-test-vol-001/volume001` which under fixed path `/run/kata-containers/shared/direct-volumes/`. +And the full path looks like: `/run/kata-containers/shared/direct-volumes/kubelet/kata-test-vol-001/volume001`, But for some security reasons. it is +encoded as `/run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx`. + +Finally, when running a Kata Containers witch `ctr run --mount type=X, src=Y, dst=Z,,options=rbind:rw`, the `type=X` should be specified a proprietary type specifically designed for some kind of volume. + +Now, supported types: + +- `directvol` for direct volume +- `spdkvol` for SPDK volume (TBD) +- `vfiovol` for VFIO device based volume (TBD) + + +## Setup Device and Run a Kata-Containers + +### Direct Block Device Based Volume + +#### create raw block based backend storage + +> **Tips:** raw block based backend storage MUST be formatted with `mkfs`. + +```bash +$ sudo dd if=/dev/zero of=/tmp/stor/rawdisk01.20g bs=1M count=20480 +$ sudo mkfs.ext4 /tmp/stor/rawdisk01.20g +``` + +#### setup direct block device for kata-containers + +```json +{ + "device": "/tmp/stor/rawdisk01.20g", + "volume_type": "directvol", + "fs_type": "ext4", + "metadata":"{}", + "options": [] +} +``` + +```bash +$ sudo ./kata-ctl direct-volume add /kubelet/kata-direct-vol-002/directvol002 "{\"device\": \"/tmp/stor/rawdisk01.20g\", \"volume_type\": \"directvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}" +$# /kubelet/kata-direct-vol-002/directvol002 <==> /run/kata-containers/shared/direct-volumes/W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx +$ cat W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx/mountInfo.json +{"volume_type":"directvol","device":"/tmp/stor/rawdisk01.20g","fs_type":"ext4","metadata":{},"options":[]} +``` + +#### Run a Kata container with direct block device volume + +```bash +$ # type=disrectvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw +$sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=directvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw "$image" kata-direct-vol-xx05302045 /bin/bash +``` + + +### SPDK Device Based Volume + +TBD + +### VFIO Device Based Volume + +TBD \ No newline at end of file diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 9759e0e54f1f..0b5cd7ca9fed 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -241,6 +241,17 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "debc29dde2e69f9e47506b525f639ed42300fc014a3e007832592448fa8e4599" +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -827,6 +838,27 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if 1.0.0", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "dlv-list" version = "0.3.0" @@ -1212,6 +1244,15 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.2.6" @@ -1619,6 +1660,7 @@ dependencies = [ "slog-async", "slog-json", "slog-scope", + "slog-term", ] [[package]] @@ -1846,7 +1888,16 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi", + "hermit-abi 0.2.6", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ "libc", ] @@ -2434,6 +2485,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom 0.2.8", + "redox_syscall", + "thiserror", +] + [[package]] name = "regex" version = "1.7.1" @@ -2845,6 +2907,19 @@ dependencies = [ "slog-scope", ] +[[package]] +name = "slog-term" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +dependencies = [ + "atty", + "slog", + "term", + "thread_local", + "time 0.3.20", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -2963,6 +3038,17 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "termcolor" version = "1.2.0" @@ -3043,6 +3129,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ "itoa", + "libc", + "num_threads", "serde", "time-core", "time-macros", diff --git a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs index c633cf918131..62d9f7e52f73 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs @@ -8,19 +8,21 @@ use std::{collections::HashMap, sync::Arc}; use anyhow::{anyhow, Context, Result}; use kata_sys_util::rand::RandomBytes; -use tokio::sync::Mutex; +use tokio::sync::{Mutex, RwLock}; +use super::{ + util::{get_host_path, get_virt_drive_name}, + Device, DeviceConfig, DeviceType, +}; use crate::{ BlockConfig, BlockDevice, Hypervisor, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, }; -use super::{ - util::{get_host_path, get_virt_drive_name}, - Device, DeviceConfig, -}; pub type ArcMutexDevice = Arc>; +const DEVICE_TYPE_BLOCK: &str = "b"; + /// block_index and released_block_index are used to search an available block index /// in Sandbox. /// @@ -75,35 +77,27 @@ impl DeviceManager { }) } - pub async fn new_device(&mut self, device_config: &DeviceConfig) -> Result { - let device_id = if let Some(dev) = self.find_device(device_config).await { - dev - } else { - self.create_device(device_config) - .await - .context("failed to create device")? - }; - Ok(device_id) - } - - pub async fn try_add_device(&mut self, device_id: &str) -> Result<()> { + async fn try_add_device(&mut self, device_id: &str) -> Result<()> { // find the device let device = self .devices .get(device_id) .context("failed to find device")?; - let mut device_guard = device.lock().await; + // attach device + let mut device_guard = device.lock().await; let result = device_guard.attach(self.hypervisor.as_ref()).await; + // handle attach error if let Err(e) = result { - if let DeviceConfig::BlockCfg(config) = device_guard.get_device_info().await { - self.shared_info.release_device_index(config.index); + if let DeviceType::Block(device) = device_guard.get_device_info().await { + self.shared_info.release_device_index(device.config.index); }; drop(device_guard); self.devices.remove(device_id); return Err(e); } + Ok(()) } @@ -120,66 +114,97 @@ impl DeviceManager { } Err(e) => Err(e), }; + + // if detach success, remove it from device manager if result.is_ok() { drop(device_guard); - // if detach success, remove it from device manager self.devices.remove(device_id); } + return result; } + Err(anyhow!( "device with specified ID hasn't been created. {}", device_id )) } - pub async fn get_device_info(&self, device_id: &str) -> Result { + async fn get_device_info(&self, device_id: &str) -> Result { if let Some(dev) = self.devices.get(device_id) { return Ok(dev.lock().await.get_device_info().await); } + Err(anyhow!( "device with specified ID hasn't been created. {}", device_id )) } - async fn find_device(&self, device_config: &DeviceConfig) -> Option { + async fn find_device(&self, host_path: String) -> Option { for (device_id, dev) in &self.devices { match dev.lock().await.get_device_info().await { - DeviceConfig::BlockCfg(config) => match device_config { - DeviceConfig::BlockCfg(ref config_new) => { - if config_new.path_on_host == config.path_on_host { - return Some(device_id.to_string()); - } + DeviceType::Block(device) => { + if device.config.path_on_host == host_path { + return Some(device_id.to_string()); } - _ => { - continue; - } - }, + } _ => { // TODO: support find other device type continue; } } } + None } - async fn create_device(&mut self, device_config: &DeviceConfig) -> Result { + fn get_dev_virt_path(&mut self, dev_type: &str) -> Result> { + let virt_path = if dev_type == DEVICE_TYPE_BLOCK { + // generate virt path + let current_index = self.shared_info.declare_device_index()?; + let drive_name = get_virt_drive_name(current_index as i32)?; + let virt_path_name = format!("/dev/{}", drive_name); + + Some((current_index, virt_path_name)) + } else { + // only dev_type is block, otherwise, it's useless. + None + }; + + Ok(virt_path) + } + + async fn new_device(&mut self, device_config: &DeviceConfig) -> Result { // device ID must be generated by manager instead of device itself // in case of ID collision let device_id = self.new_device_id()?; let dev: ArcMutexDevice = match device_config { - DeviceConfig::BlockCfg(config) => self - .create_block_device(config, device_id.clone()) - .await - .context("failed to create device")?, + DeviceConfig::BlockCfg(config) => { + // try to find the device, found and just return id. + if let Some(dev_id_matched) = self.find_device(config.path_on_host.clone()).await { + info!( + sl!(), + "device with host path:{:?} found. just return device id: {:?}", + config.path_on_host.clone(), + dev_id_matched + ); + + return Ok(dev_id_matched); + } + + self.create_block_device(config, device_id.clone()) + .await + .context("failed to create device")? + } _ => { return Err(anyhow!("invliad device type")); } }; + // register device to devices self.devices.insert(device_id.clone(), dev.clone()); + Ok(device_id) } @@ -204,17 +229,23 @@ impl DeviceManager { _ => "".to_string(), }; block_config.driver_option = block_driver; - // generate virt path - let current_index = self.shared_info.declare_device_index()?; - block_config.index = current_index; - let drive_name = get_virt_drive_name(current_index as i32)?; - block_config.virt_path = format!("/dev/{}", drive_name); + + // generate block device index and virt path + // safe here, Block device always has virt_path. + if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? { + block_config.index = virt_path.0; + block_config.virt_path = virt_path.1; + } + // if the path on host is empty, we need to get device host path from the device major and minor number - // Otherwise, it might be rawfile based block device, the host path is already passed from the runtime, so we don't need to do anything here + // Otherwise, it might be rawfile based block device, the host path is already passed from the runtime, + // so we don't need to do anything here if block_config.path_on_host.is_empty() { - block_config.path_on_host = get_host_path("b".to_owned(), config.major, config.minor) - .context("failed to get host path")?; + block_config.path_on_host = + get_host_path(DEVICE_TYPE_BLOCK.to_owned(), config.major, config.minor) + .context("failed to get host path")?; } + Ok(Arc::new(Mutex::new(BlockDevice::new( device_id, block_config, @@ -237,3 +268,36 @@ impl DeviceManager { Err(anyhow!("ID are exhausted")) } } + +// Many scenarios have similar steps when adding devices. so to reduce duplicated code, +// we should create a common method abstracted and use it in various scenarios. +// do_handle_device: +// (1) new_device with DeviceConfig and return device_id; +// (2) try_add_device with device_id and do really add device; +// (3) return device info of device's info; +pub async fn do_handle_device( + d: &RwLock, + dev_info: &DeviceConfig, +) -> Result { + let device_id = d + .write() + .await + .new_device(dev_info) + .await + .context("failed to create deviec")?; + + d.write() + .await + .try_add_device(&device_id) + .await + .context("failed to add deivce")?; + + let device_info = d + .read() + .await + .get_device_info(&device_id) + .await + .context("failed to get device info")?; + + Ok(device_info) +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index ff3a579a81cb..63fe40022694 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -7,7 +7,7 @@ use std::{fs, path::Path, process::Command}; use crate::device::Device; -use crate::device::DeviceConfig; +use crate::device::DeviceType; use crate::Hypervisor as hypervisor; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use anyhow::anyhow; @@ -166,7 +166,7 @@ impl Device for VfioConfig { todo!() } - async fn get_device_info(&self) -> DeviceConfig { + async fn get_device_info(&self) -> DeviceType { todo!() } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user.rs index d778c4459751..a105672d572d 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user.rs @@ -5,7 +5,7 @@ // use crate::device::Device; -use crate::device::DeviceConfig; +use crate::device::DeviceType; use crate::Hypervisor as hypervisor; use anyhow::Result; use async_trait::async_trait; @@ -47,7 +47,7 @@ impl Device for VhostUserConfig { todo!() } - async fn get_device_info(&self) -> DeviceConfig { + async fn get_device_info(&self) -> DeviceType { todo!() } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs index 2ff98a1e703f..da5d50ea7bd8 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs @@ -6,7 +6,7 @@ pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio"; use crate::device::Device; -use crate::device::{DeviceConfig, DeviceType}; +use crate::device::DeviceType; use crate::Hypervisor as hypervisor; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; @@ -98,8 +98,8 @@ impl Device for BlockDevice { Ok(Some(self.config.index)) } - async fn get_device_info(&self) -> DeviceConfig { - DeviceConfig::BlockCfg(self.config.clone()) + async fn get_device_info(&self) -> DeviceType { + DeviceType::Block(self.clone()) } async fn increase_attach_count(&mut self) -> Result { diff --git a/src/runtime-rs/crates/hypervisor/src/device/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/mod.rs index d341d9a127e3..d4996a3e6154 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/mod.rs @@ -53,7 +53,7 @@ pub trait Device: Send + Sync { // detach is to unplug device from VM async fn detach(&mut self, h: &dyn hypervisor) -> Result>; // get_device_info returns device config - async fn get_device_info(&self) -> DeviceConfig; + async fn get_device_info(&self) -> DeviceType; // increase_attach_count is used to increase the attach count for a device // return values: // * true: no need to do real attach when current attach count is zero, skip following actions. diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 18e74dacf464..0c77ec142335 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -12,7 +12,10 @@ use anyhow::{anyhow, Context, Ok, Result}; use async_trait::async_trait; use hypervisor::{ - device::{device_manager::DeviceManager, DeviceConfig}, + device::{ + device_manager::{do_handle_device, DeviceManager}, + DeviceConfig, DeviceType, + }, BlockConfig, Hypervisor, }; use kata_types::config::TomlConfig; @@ -266,42 +269,23 @@ impl ResourceManagerInner { for d in linux.devices.iter() { match d.r#type.as_str() { "b" => { - let device_info = DeviceConfig::BlockCfg(BlockConfig { + let dev_info = DeviceConfig::BlockCfg(BlockConfig { major: d.major, minor: d.minor, ..Default::default() }); - let device_id = self - .device_manager - .write() - .await - .new_device(&device_info) - .await - .context("failed to create deviec")?; - - self.device_manager - .write() - .await - .try_add_device(&device_id) - .await - .context("failed to add deivce")?; - // get complete device information - let dev_info = self - .device_manager - .read() - .await - .get_device_info(&device_id) + let device_info = do_handle_device(&self.device_manager, &dev_info) .await - .context("failed to get device info")?; + .context("do handle device")?; // create agent device - if let DeviceConfig::BlockCfg(config) = dev_info { + if let DeviceType::Block(device) = device_info { let agent_device = Device { - id: device_id.clone(), + id: device.device_id.clone(), container_path: d.path.clone(), - field_type: config.driver_option, - vm_path: config.virt_path, + field_type: device.config.driver_option, + vm_path: device.config.virt_path, ..Default::default() }; devices.push(agent_device); diff --git a/src/runtime-rs/crates/resource/src/rootfs/block_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/block_rootfs.rs index 0e161fcb3c85..bcb31d957c19 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/block_rootfs.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/block_rootfs.rs @@ -10,7 +10,10 @@ use agent::Storage; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use hypervisor::{ - device::{device_manager::DeviceManager, DeviceConfig}, + device::{ + device_manager::{do_handle_device, DeviceManager}, + DeviceConfig, DeviceType, + }, BlockConfig, }; use kata_types::mount::Mount; @@ -46,18 +49,10 @@ impl BlockRootfs { ..Default::default() }; - let device_id = d - .write() - .await - .new_device(&DeviceConfig::BlockCfg(block_device_config.clone())) - .await - .context("failed to create deviec")?; - - d.write() + // create and insert block device into Kata VM + let device_info = do_handle_device(d, &DeviceConfig::BlockCfg(block_device_config.clone())) .await - .try_add_device(device_id.as_str()) - .await - .context("failed to add deivce")?; + .context("do handle device failed.")?; let mut storage = Storage { fs_type: rootfs.fs_type.clone(), @@ -66,17 +61,11 @@ impl BlockRootfs { ..Default::default() }; - // get complete device information - let dev_info = d - .read() - .await - .get_device_info(device_id.as_str()) - .await - .context("failed to get device info")?; - - if let DeviceConfig::BlockCfg(config) = dev_info { - storage.driver = config.driver_option; - storage.source = config.virt_path; + let mut device_id: String = "".to_owned(); + if let DeviceType::Block(device) = device_info { + storage.driver = device.config.driver_option; + storage.source = device.config.virt_path; + device_id = device.device_id; } Ok(Self { diff --git a/src/runtime-rs/crates/resource/src/volume/block_volume.rs b/src/runtime-rs/crates/resource/src/volume/block_volume.rs index 14f09fc81e5a..ad1c01f17f1c 100644 --- a/src/runtime-rs/crates/resource/src/volume/block_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/block_volume.rs @@ -4,29 +4,32 @@ // SPDX-License-Identifier: Apache-2.0 // -use anyhow::Result; +use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; -use std::{collections::HashMap, fs, path::Path}; - -use crate::share_fs::{do_get_guest_path, do_get_host_path}; +use nix::sys::{stat, stat::SFlag}; +use tokio::sync::RwLock; -use super::{share_fs_volume::generate_mount_path, Volume}; -use agent::Storage; -use anyhow::{anyhow, Context}; +use super::Volume; +use crate::volume::utils::{ + generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_DIRECT_VOLUME_TYPE, + KATA_MOUNT_BIND_TYPE, +}; use hypervisor::{ - device::{device_manager::DeviceManager, DeviceConfig}, + device::{ + device_manager::{do_handle_device, DeviceManager}, + DeviceConfig, DeviceType, + }, BlockConfig, }; -use nix::sys::stat::{self, SFlag}; -use tokio::sync::RwLock; -#[derive(Debug)] + +#[derive(Clone)] pub(crate) struct BlockVolume { storage: Option, mount: oci::Mount, device_id: String, } -/// BlockVolume: block device volume +/// BlockVolume for bind-mount block volume and direct block volume impl BlockVolume { pub(crate) async fn new( d: &RwLock, @@ -35,54 +38,71 @@ impl BlockVolume { cid: &str, sid: &str, ) -> Result { - let fstat = stat::stat(m.source.as_str()).context(format!("stat {}", m.source))?; - info!(sl!(), "device stat: {:?}", fstat); - let mut options = HashMap::new(); - if read_only { - options.insert("read_only".to_string(), "true".to_string()); - } - - let block_device_config = &mut BlockConfig { - major: stat::major(fstat.st_rdev) as i64, - minor: stat::minor(fstat.st_rdev) as i64, - ..Default::default() + let mnt_src: &str = &m.source; + // default block device fs type: ext4. + let mut blk_dev_fstype = DEFAULT_VOLUME_FS_TYPE.to_string(); + + let block_device_config = match m.r#type.as_str() { + KATA_MOUNT_BIND_TYPE => { + let fstat = stat::stat(mnt_src).context(format!("stat {}", m.source))?; + + BlockConfig { + major: stat::major(fstat.st_rdev) as i64, + minor: stat::minor(fstat.st_rdev) as i64, + ..Default::default() + } + } + KATA_DIRECT_VOLUME_TYPE => { + // get volume mountinfo from mountinfo.json + let v = volume_mount_info(mnt_src) + .context("deserde information from mountinfo.json")?; + // check volume type + if v.volume_type != KATA_DIRECT_VOLUME_TYPE { + return Err(anyhow!("volume type {:?} is invalid", v.volume_type)); + } + + let fstat = stat::stat(v.device.as_str()) + .with_context(|| format!("stat volume device file: {}", v.device.clone()))?; + if SFlag::from_bits_truncate(fstat.st_mode) != SFlag::S_IFREG + && SFlag::from_bits_truncate(fstat.st_mode) != SFlag::S_IFBLK + { + return Err(anyhow!( + "invalid volume device {:?} for volume type {:?}", + v.device, + v.volume_type + )); + } + + blk_dev_fstype = v.fs_type.clone(); + + BlockConfig { + path_on_host: v.device, + ..Default::default() + } + } + _ => { + return Err(anyhow!( + "unsupport direct block volume r#type: {:?}", + m.r#type.as_str() + )) + } }; - let device_id = d - .write() - .await - .new_device(&DeviceConfig::BlockCfg(block_device_config.clone())) + // create and insert block device into Kata VM + let device_info = do_handle_device(d, &DeviceConfig::BlockCfg(block_device_config.clone())) .await - .context("failed to create deviec")?; + .context("do handle device failed.")?; - d.write() + // generate host guest shared path + let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid) .await - .try_add_device(device_id.as_str()) - .await - .context("failed to add deivce")?; - - let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap(); - let file_name = generate_mount_path(cid, file_name); - let guest_path = do_get_guest_path(&file_name, cid, true, false); - let host_path = do_get_host_path(&file_name, sid, cid, true, read_only); - fs::create_dir_all(&host_path) - .map_err(|e| anyhow!("failed to create rootfs dir {}: {:?}", host_path, e))?; - - // get complete device information - let dev_info = d - .read() - .await - .get_device_info(&device_id) - .await - .context("failed to get device info")?; + .context("generate host-guest shared path failed")?; // storage - let mut storage = Storage::default(); - - if let DeviceConfig::BlockCfg(config) = dev_info { - storage.driver = config.driver_option; - storage.source = config.virt_path; - } + let mut storage = agent::Storage { + mount_point: guest_path.clone(), + ..Default::default() + }; storage.options = if read_only { vec!["ro".to_string()] @@ -90,21 +110,32 @@ impl BlockVolume { Vec::new() }; - storage.mount_point = guest_path.clone(); + // As the true Block Device wrapped in DeviceType, we need to + // get it out from the wrapper, and the device_id will be for + // BlockVolume. + // safe here, device_info is correct and only unwrap it. + let mut device_id = String::new(); + if let DeviceType::Block(device) = device_info { + // blk, mmioblk + storage.driver = device.config.driver_option; + // /dev/vdX + storage.source = device.config.virt_path; + device_id = device.device_id; + } - // If the volume had specified the filesystem type, use it. Otherwise, set it - // to ext4 since but right now we only support it. - if m.r#type != "bind" { - storage.fs_type = m.r#type.clone(); + // In some case, dest is device /dev/xxx + if m.destination.clone().starts_with("/dev") { + storage.fs_type = "bind".to_string(); + storage.options.append(&mut m.options.clone()); } else { - storage.fs_type = "ext4".to_string(); + // usually, the dest is directory. + storage.fs_type = blk_dev_fstype; } - // mount let mount = oci::Mount { destination: m.destination.clone(), - r#type: m.r#type.clone(), - source: guest_path.clone(), + r#type: storage.fs_type.clone(), + source: guest_path, options: m.options.clone(), }; @@ -128,6 +159,7 @@ impl Volume for BlockVolume { } else { vec![] }; + Ok(s) } @@ -144,13 +176,22 @@ impl Volume for BlockVolume { } } -pub(crate) fn is_block_volume(m: &oci::Mount) -> bool { - if m.r#type != "bind" { - return false; +pub(crate) fn is_block_volume(m: &oci::Mount) -> Result { + let vol_types = vec![KATA_MOUNT_BIND_TYPE, KATA_DIRECT_VOLUME_TYPE]; + if !vol_types.contains(&m.r#type.as_str()) { + return Ok(false); } - if let Ok(fstat) = stat::stat(m.source.as_str()).context(format!("stat {}", m.source)) { - info!(sl!(), "device stat: {:?}", fstat); - return SFlag::from_bits_truncate(fstat.st_mode) == SFlag::S_IFBLK; + + let fstat = + stat::stat(m.source.as_str()).context(format!("stat mount source {} failed.", m.source))?; + let s_flag = SFlag::from_bits_truncate(fstat.st_mode); + + match m.r#type.as_str() { + // case: mount bind and block device + KATA_MOUNT_BIND_TYPE if s_flag == SFlag::S_IFBLK => Ok(true), + // case: directvol and directory + KATA_DIRECT_VOLUME_TYPE if s_flag == SFlag::S_IFDIR => Ok(true), + // else: unsupported or todo for other volume type. + _ => Ok(false), } - false } diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index 52b826b2db32..6fc6e3bc81a2 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -9,16 +9,18 @@ mod default_volume; pub mod hugepage; mod share_fs_volume; mod shm_volume; -use async_trait::async_trait; +pub mod utils; -use crate::{share_fs::ShareFs, volume::block_volume::is_block_volume}; -use agent::Agent; -use anyhow::{Context, Result}; -use hypervisor::device::device_manager::DeviceManager; use std::{sync::Arc, vec::Vec}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; use tokio::sync::RwLock; use self::hugepage::{get_huge_page_limits_map, get_huge_page_option}; +use crate::{share_fs::ShareFs, volume::block_volume::is_block_volume}; +use agent::Agent; +use hypervisor::device::device_manager::DeviceManager; const BIND: &str = "bind"; @@ -66,7 +68,7 @@ impl VolumeResource { shm_volume::ShmVolume::new(m, shm_size) .with_context(|| format!("new shm volume {:?}", m))?, ) - } else if is_block_volume(m) { + } else if is_block_volume(m).context("block volume type")? { // handle block volume Arc::new( block_volume::BlockVolume::new(d, m, read_only, cid, sid) diff --git a/src/runtime-rs/crates/resource/src/volume/utils.rs b/src/runtime-rs/crates/resource/src/volume/utils.rs new file mode 100644 index 000000000000..892724e1e432 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/utils.rs @@ -0,0 +1,65 @@ +// Copyright (c) 2022-2023 Alibaba Cloud +// Copyright (c) 2022-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{fs, path::Path}; + +use anyhow::{anyhow, Context, Result}; + +use crate::{ + share_fs::{do_get_guest_path, do_get_host_path}, + volume::share_fs_volume::generate_mount_path, +}; +use kata_sys_util::eother; +use kata_types::mount::{get_volume_mount_info, DirectVolumeMountInfo}; + +pub const DEFAULT_VOLUME_FS_TYPE: &str = "ext4"; +pub const KATA_MOUNT_BIND_TYPE: &str = "bind"; +pub const KATA_DIRECT_VOLUME_TYPE: &str = "directvol"; +pub const KATA_VFIO_VOLUME_TYPE: &str = "vfiovol"; +pub const KATA_SPDK_VOLUME_TYPE: &str = "spdkvol"; + +// volume mount info load infomation from mountinfo.json +pub fn volume_mount_info(volume_path: &str) -> Result { + get_volume_mount_info(volume_path) +} + +pub fn get_file_name>(src: P) -> Result { + let file_name = src + .as_ref() + .file_name() + .map(|v| v.to_os_string()) + .ok_or_else(|| { + eother!( + "failed to get file name of path {}", + src.as_ref().to_string_lossy() + ) + })? + .into_string() + .map_err(|e| anyhow!("failed to convert to string {:?}", e))?; + + Ok(file_name) +} + +pub(crate) async fn generate_shared_path( + dest: String, + read_only: bool, + cid: &str, + sid: &str, +) -> Result { + let file_name = get_file_name(&dest).context("failed to get file name.")?; + let mount_name = generate_mount_path(cid, file_name.as_str()); + let guest_path = do_get_guest_path(&mount_name, cid, true, false); + let host_path = do_get_host_path(&mount_name, sid, cid, true, read_only); + + if dest.starts_with("/dev") { + fs::File::create(&host_path).context(format!("failed to create file {:?}", &host_path))?; + } else { + std::fs::create_dir_all(&host_path) + .map_err(|e| anyhow!("failed to create dir {}: {:?}", host_path, e))?; + } + + Ok(guest_path) +}