Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add block cache #469

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
390 changes: 390 additions & 0 deletions src/block_cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,390 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use crate::block_index::FsBlockIndex;
use crate::block_size::BlockSize;
use crate::error::Ext4Error;
use crate::util::usize_from_u32;
use alloc::boxed::Box;
use alloc::collections::VecDeque;
use alloc::vec;

/// Entry for a single block in the cache.
#[derive(Clone)]
struct CacheEntry {
/// Absolute block index within the filesystem.
block_index: FsBlockIndex,

/// Block data. The length is always equal to the filesystem block size.
data: Box<[u8]>,
}

/// LRU block cache.
///
/// This is a fairly simple cache that holds a fixed number of blocks in
/// a deque. The front of the deque is for most-recently accessed
/// blocks, the back for least-recently accessed.
///
/// When a block in the cache is accessed, it's moved to the front of
/// the cache, and new blocks are also added directly to the front.
///
/// When new blocks are added, an equal number of blocks are popped off
/// the back. At the end of insertion, the total number of cache entries
/// remains unchanged. The block allocations within each entry are
/// reused, so allocation only occurs when initializing the cache.
///
/// Blocks are read in a group. Depending on the underlying data source,
/// this can be much more efficient than reading one by one.
///
/// The number of entries in the cache, and the size of the read buffer,
/// are controlled by the block size. The intent is to strike a
/// reasonable balance between speed and memory usage.
pub(crate) struct BlockCache {
/// Contiguous buffer of multiple blocks.
///
/// Depending on the underlying data source, it can be much more
/// efficient to do a single read of N blocks, instead of N reads
/// each one block in length. And it's a good bet that if we read
/// block X, we'll soon need blocks X+1, X+2, etc.
///
/// Immediately after blocks are read into this buffer, they are individually
/// copied to an entry in `entries`.
read_buf: Box<[u8]>,

/// Maximum number of blocks that can be read into `read_buf`. The
/// length of `read_buf` is `max_blocks_per_read * block_size`.
max_blocks_per_read: u32,

/// Cache entries, sorted from most-recently-used to least.
///
/// The entries are fully allocated when the cache is
/// created. During regular operation no additional allocation or
/// deallocation occurs, data is just copied around.
entries: VecDeque<CacheEntry>,

/// File system block size.
block_size: BlockSize,

/// Total number of blocks in the filesystem.
///
/// This is used to ensure that when reading multiple blocks we
/// don't go past the end of the filesystem.
num_fs_blocks: u64,
}

impl BlockCache {
/// Create a block cache with sensible defaults.
pub(crate) fn new(
block_size: BlockSize,
num_fs_blocks: u64,
) -> Result<Self, Ext4Error> {
Self::with_opts(CacheOpts::new(block_size), num_fs_blocks)
}

/// Create a block cache with control over the number of entries and
/// the read size.
///
/// # Preconditions
///
/// `max_blocks_per_read` must be less than or equal to `num_entries`.
fn with_opts(
opts: CacheOpts,
num_fs_blocks: u64,
) -> Result<Self, Ext4Error> {
assert!(usize_from_u32(opts.max_blocks_per_read) <= opts.num_entries);

let read_buf_len = opts.read_buf_size_in_bytes();

let entries = vec![
CacheEntry {
block_index: 0,
data: vec![0; opts.block_size.to_usize()].into_boxed_slice(),
};
opts.num_entries
];
Ok(Self {
entries: VecDeque::from(entries),
max_blocks_per_read: opts.max_blocks_per_read,
read_buf: vec![0; read_buf_len].into_boxed_slice(),
block_size: opts.block_size,
num_fs_blocks,
})
}

/// Get the number of blocks to read.
///
/// Normally this returns `max_blocks_per_read`. If reading that
/// many blocks would go past the end of the filesystem, the number
/// is clamped to avoid that.
///
/// # Preconditions
///
/// `block_index` must be less than `num_fs_blocks`.
fn num_blocks_to_read(&self, block_index: FsBlockIndex) -> u32 {
assert!(block_index < self.num_fs_blocks);

// Get the index of the block right after the last block to read.
let end_block = block_index
.saturating_add(u64::from(self.max_blocks_per_read))
.min(self.num_fs_blocks);

// OK to unwrap: `end_block` can't be less than `block_index`.
let num_blocks = end_block.checked_sub(block_index).unwrap();

// OK to unwrap: the number is at most `max_blocks_per_read`,
// which is a `u32`.
u32::try_from(num_blocks).unwrap()
}

/// Get the cache entry for `block_size`, reading and inserting
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Get the cache entry for block_size -> block_index?

/// blocks into the cache if not already present.
///
/// If the entry is already present, it is moved to the front of the
/// cache to indicate it was accessed most recently.
///
/// Otherwise, `f` is called to read a contiguous group of
/// blocks. Each block is inserted into the cache, with the
/// requested `block_index` at the front of the cache. `f` is called
/// only once.
///
/// # Preconditions
///
/// `block_index` must be less than `num_fs_blocks`.
pub(crate) fn get_or_insert_blocks<F>(
&mut self,
block_index: FsBlockIndex,
f: F,
) -> Result<&[u8], Ext4Error>
where
F: FnOnce(&mut [u8]) -> Result<(), Ext4Error>,
{
assert!(block_index < self.num_fs_blocks);

// Check if the block is already cached.
if let Some(index) = self
.entries
.iter()
.position(|entry| entry.block_index == block_index)
{
// Move the entry to the front of the cache if it's not
// already there.
if index != 0 {
let entry = self.entries.remove(index).unwrap();
self.entries.push_front(entry);
}

// Return the cached block data.
return Ok(&*self.entries[0].data);
}

let block_size = self.block_size.to_usize();

// Get the number of blocks/bytes to read.
let num_blocks = self.num_blocks_to_read(block_index);
let num_bytes = usize_from_u32(num_blocks)
.checked_mul(block_size)
.unwrap_or(block_size);
Copy link
Collaborator

@tedbrandston tedbrandston Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why unwrap_or(block_size)? Probably wants a comment, because checking that this makes sense is taking me a bit...

  • If the multiplication overflows, we'll have num_blocks (almost definitely) > 1, and num_bytes == one block's worth.
  • L194 we'll read one blocks worth of bytes into read_buf... I'm actually unclear what happens to the rest of the buffer: I don't see anything that indicates that it's zero'd
  • when we iterate on L199, we'll insert_block for an index outside what we actually read. Not yet clear to me what happens.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, it looks like this can't possibly overflow if usize == u64, and would require a goofily large max_blocks_per_read on usize == u32.

Instead of unwrap_or(block_size), which I think is wrong if we could hit it, can this panic or something?


// Read blocks into the read buffer.
f(&mut self.read_buf[..num_bytes])?;

// Add blocks to the cache. Blocks are added to the front in
// reverse order, so that the requested `block_index` is at the
// very front of the cache.
for i in (0..num_blocks).rev() {
// OK to unwrap: function precondition requires that the
// requested blocks are valid (i.e. within the filesystem),
// Valid block indices fit in a `u64`, so this can't
// overflow.
let block_index = block_index.checked_add(u64::from(i)).unwrap();

self.insert_block(block_index, i);
}

// Get the requested block data, which should be at the front of
// the cache now.
let entry = &self.entries[0];
assert_eq!(entry.block_index, block_index);
Ok(&*entry.data)
}

/// Add a block to the front of the cache. The block data is read
/// from the `read_buf` at an offset of `block_within_read_buf *
/// block_size`.
///
/// # Preconditions
///
/// `block_within_read_buf` must be a valid block index within the
/// read buf.
fn insert_block(
&mut self,
block_index: FsBlockIndex,
block_within_read_buf: u32,
) {
assert!(block_within_read_buf < self.max_blocks_per_read);

// OK to unwrap: precondition says that `block_within_read_buf`
// is valid.
let start = usize_from_u32(block_within_read_buf)
.checked_mul(self.block_size.to_usize())
.unwrap();
let end = start.checked_add(self.block_size.to_usize()).unwrap();
let src = &self.read_buf[start..end];

// Take an entry from the back of the cache. Note that although
// this removes the entry from the deque, the entry is just
// being moved, so the large block allocation within the entry
// is not freed or reallocated.
let mut entry = self.entries.pop_back().unwrap();

entry.block_index = block_index;
entry.data.copy_from_slice(src);

// Move the entry to the front of the cache.
self.entries.push_front(entry);
}
}

#[derive(Debug, PartialEq)]
struct CacheOpts {
block_size: BlockSize,
max_blocks_per_read: u32,
num_entries: usize,
}

impl CacheOpts {
/// Create `CacheOpts` with sensible values based on the block size.
fn new(block_size: BlockSize) -> Self {
// On a typical 4K-blocksize filesystem, read 8 blocks at a
// time.
let max_bytes_per_read = 8 * 4096;
// Ensure that at least one block is read at a time.
let max_blocks_per_read =
1.max(max_bytes_per_read / block_size.to_nz_u32());

// OK to unwrap: the smallest block size is 1024, so
// `max_blocks_per_read` cannot exceed
// ((8*4096)/1024)=32. `num_entries` is therefore at most
// 32*8=256, which fits in `u32`.
let num_entries: u32 = max_blocks_per_read.checked_mul(8).unwrap();

Self {
block_size,
max_blocks_per_read,
num_entries: usize_from_u32(num_entries),
}
}

fn read_buf_size_in_bytes(&self) -> usize {
// OK to unwrap: outside of tests, `CacheOpts` is always created
// by the new method. For any large block size,
// `max_blocks_per_read` is capped to 1, so the multiplication
// cannot cause overflow.
usize_from_u32(self.max_blocks_per_read)
.checked_mul(self.block_size.to_usize())
.unwrap()
}
}

#[cfg(test)]
mod tests {
use super::*;

/// Convert block size in bytes to a `BlockSize`.
fn get_block_size(sz: u32) -> BlockSize {
let bs = BlockSize::from_superblock_value(sz.ilog2() - 10).unwrap();
assert_eq!(bs.to_u32(), sz);
bs
}

#[test]
fn test_cache_opts() {
let block_size = get_block_size(1024);
assert_eq!(
CacheOpts::new(block_size),
CacheOpts {
block_size,
max_blocks_per_read: 32,
num_entries: 256,
}
);

let block_size = get_block_size(4096);
assert_eq!(
CacheOpts::new(block_size),
CacheOpts {
block_size,
max_blocks_per_read: 8,
num_entries: 64,
}
);

let block_size = get_block_size(65536);
assert_eq!(
CacheOpts::new(block_size),
CacheOpts {
block_size,
max_blocks_per_read: 1,
num_entries: 8,
}
);
}

#[test]
fn test_num_blocks_to_read() {
let num_fs_blocks = 8;
let cache = BlockCache::with_opts(
CacheOpts {
block_size: get_block_size(1024),
max_blocks_per_read: 4,
num_entries: 4,
},
num_fs_blocks,
)
.unwrap();
assert_eq!(cache.num_blocks_to_read(0), 4);
assert_eq!(cache.num_blocks_to_read(4), 4);
assert_eq!(cache.num_blocks_to_read(5), 3);
assert_eq!(cache.num_blocks_to_read(7), 1);
}

#[test]
fn test_insert_block() {
let num_fs_blocks = 8;
let mut cache = BlockCache::with_opts(
CacheOpts {
block_size: get_block_size(1024),
max_blocks_per_read: 4,
num_entries: 4,
},
num_fs_blocks,
)
.unwrap();

cache.read_buf[0] = 6;
cache.read_buf[1024] = 7;

// Insert a block and check that it's in the front of the cache.
cache.insert_block(123, 0);
assert_eq!(cache.entries[0].block_index, 123);
assert_eq!(cache.entries[0].data[0], 6);
let block123_ptr = cache.entries[0].data.as_ptr();

// Insert another block, which is now the front of the cache.
cache.insert_block(456, 1);
assert_eq!(cache.entries[0].block_index, 456);
assert_eq!(cache.entries[0].data[0], 7);

// Check that the previous front of the cache is now in the
// second entry.
assert_eq!(cache.entries[1].block_index, 123);
assert_eq!(cache.entries[1].data[0], 6);
// And verify that the underlying allocation hasn't changed.
assert_eq!(cache.entries[1].data.as_ptr(), block123_ptr);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not test get_or_insert?

}
Loading