From 745f25addb2960a2fc6ff1841b2329925af687a9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 17 Feb 2026 20:08:14 -0800 Subject: [PATCH 01/41] Mega Cleanup --- CLAUDE.md | 5 + Cargo.lock | 74 +- Cargo.toml | 1 + lib/cache/async_backed.rs | 391 +++++++++ lib/cache/mod.rs | 2 + lib/drop_ward.rs | 133 +++ lib/fs/async_fs.rs | 432 ++++++++++ lib/fs/dcache.rs | 65 ++ lib/fs/fuser.rs | 425 ++++++++++ lib/fs/mod.rs | 188 +++++ lib/lib.rs | 3 + src/daemon.rs | 25 +- src/fs/fuser.rs | 351 -------- src/fs/icache/async_cache.rs | 1410 -------------------------------- src/fs/icache/bridge.rs | 138 ---- src/fs/icache/file_table.rs | 22 - src/fs/icache/inode_factory.rs | 19 - src/fs/icache/mod.rs | 21 - src/fs/mescloud/common.rs | 106 +-- src/fs/mescloud/composite.rs | 634 ++++++++------ src/fs/mescloud/icache.rs | 437 ---------- src/fs/mescloud/mod.rs | 440 +++++----- src/fs/mescloud/org.rs | 449 +++------- src/fs/mescloud/repo.rs | 903 ++++++++++---------- src/fs/mod.rs | 3 - src/fs/trait.rs | 375 --------- tests/async_fs_correctness.rs | 609 ++++++++++++++ tests/common/async_fs_mocks.rs | 104 +++ tests/common/mod.rs | 4 +- 29 files changed, 3706 insertions(+), 4063 deletions(-) create mode 100644 lib/cache/async_backed.rs create mode 100644 lib/drop_ward.rs create mode 100644 lib/fs/async_fs.rs create mode 100644 lib/fs/dcache.rs create mode 100644 lib/fs/fuser.rs create mode 100644 lib/fs/mod.rs delete mode 100644 src/fs/fuser.rs delete mode 100644 src/fs/icache/async_cache.rs delete mode 100644 src/fs/icache/bridge.rs delete mode 100644 src/fs/icache/file_table.rs delete mode 100644 src/fs/icache/inode_factory.rs delete mode 100644 src/fs/icache/mod.rs delete mode 100644 src/fs/mescloud/icache.rs delete mode 100644 src/fs/trait.rs create mode 100644 tests/async_fs_correctness.rs create mode 100644 tests/common/async_fs_mocks.rs diff --git a/CLAUDE.md b/CLAUDE.md index 9ba3f68b..653c07a6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -43,6 +43,11 @@ cargo fmt --all && cargo clippy --all-targets --all-features -- -D warnings && c - Channels: `tokio::sync::mpsc` for multi-producer, `tokio::sync::oneshot` for request-response - Never block the async runtime — offload blocking work with `tokio::task::spawn_blocking` +## Testing + +- Avoid writing tests in-line in the same file as production code; use separate `tests/` directory + for tests. + ## Dependencies - Check for existing deps with `cargo tree` before adding new crates diff --git a/Cargo.lock b/Cargo.lock index d4cf1499..1050f46b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -228,7 +234,7 @@ version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -758,6 +764,7 @@ dependencies = [ "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", + "ouroboros", "rand", "reqwest", "reqwest-middleware", @@ -839,6 +846,12 @@ dependencies = [ "hashbrown 0.16.1", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -1497,6 +1510,30 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ouroboros" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + [[package]] name = "page_size" version = "0.6.0" @@ -1623,6 +1660,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] + [[package]] name = "prost" version = "0.13.5" @@ -2312,6 +2362,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -2865,6 +2921,12 @@ dependencies = [ "rustversion", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "vt100" version = "0.16.2" @@ -3309,7 +3371,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -3320,7 +3382,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap 2.13.0", "prettyplease", "syn", @@ -3387,6 +3449,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index d837f7fe..dcf7b555 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ tracing-indicatif = "0.3.14" opentelemetry = { version = "0.29" } opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"] } opentelemetry-otlp = { version = "0.29", default-features = false, features = ["http-proto", "trace", "reqwest-blocking-client"] } +ouroboros = "0.18" tracing-opentelemetry = { version = "0.30" } hashlink = "0.11.0" diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs new file mode 100644 index 00000000..c3fddd05 --- /dev/null +++ b/lib/cache/async_backed.rs @@ -0,0 +1,391 @@ +//! Concurrent deduplication cache for async computations. +//! +//! Given a key and an async factory, ensures the factory runs at most once per key. Subsequent +//! callers for the same key await the already-in-flight computation via a [`Shared`] future, +//! avoiding the race conditions inherent in `Notify`-based signalling. +//! +//! Note that this cache does not support automatic eviction. + +use std::panic::AssertUnwindSafe; +use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; + +use futures::FutureExt as _; +use futures::future::Shared; + +type SharedFut = Shared> + Send>>>; + +/// Two-state slot: `InFlight` while a factory future is running, then promoted to `Ready` once +/// the future completes. +/// +/// The `InFlight` variant holds a `Shared<..., Output = Option>` where `None` signals that the +/// factory panicked (caught by `catch_unwind`). On `None`, callers remove the entry and retry. +enum Slot { + InFlight(SharedFut), + Ready(V), +} + +/// Deduplicating async cache. +/// +/// If [`get_or_init`](Self::get_or_init) is called concurrently for the same key, only one +/// invocation of the factory runs. All callers receive a clone of the result. +pub struct FutureBackedCache { + map: scc::HashMap>, +} + +impl Default for FutureBackedCache +where + K: Eq + Hash, + V: Clone + Send + 'static, +{ + fn default() -> Self { + Self { + map: scc::HashMap::default(), + } + } +} + +impl FutureBackedCache +where + K: Eq + Hash + Debug + Clone + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ + /// Get the cached value for `key`, or initialize it by running `factory`. + /// + /// If another caller is already computing the value for this key, this awaits the in-flight + /// computation instead of spawning a duplicate. If the factory panics, the entry is removed + /// and the next caller retries with a fresh factory invocation. + /// + /// # Panics + /// + /// Panics if this caller joins an in-flight factory that itself panicked (i.e. the caller + /// lost the race to insert a fresh entry after the poisoned slot was removed). + pub async fn get_or_init(&self, key: K, factory: F) -> V + where + F: FnOnce() -> Fut, + Fut: Future + Send + 'static, + { + // Fast path: value already cached. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => return v, + Some(Err(shared)) => { + if let Some(v) = self.await_shared(&key, shared).await { + return v; + } + // Factory panicked; entry removed. Fall through to re-insert below. + } + None => {} + } + + // Slow path: use entry_async for atomic check-and-insert. + let shared = match self.map.entry_async(key.clone()).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => return v.clone(), + Slot::InFlight(shared) => shared.clone(), + }, + scc::hash_map::Entry::Vacant(vac) => { + let shared = Self::make_shared(factory); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(shared)); + ret + } + }; + + if let Some(v) = self.await_shared(&key, shared).await { + return v; + } + + panic!("FutureBackedCache: joined an in-flight factory that panicked for key {key:?}"); + } + + /// Like [`get_or_init`](Self::get_or_init), but for fallible factories. + /// + /// If the factory returns `Ok(v)`, the value is cached and returned. If it returns `Err(e)`, + /// **nothing is cached** and the error is propagated to the caller. + /// + /// Unlike `get_or_init`, concurrent callers are **not** deduplicated — each caller that + /// finds the key absent will invoke the factory independently. However, if a value was + /// previously cached (by either `get_or_init` or a successful `get_or_try_init`), it is + /// returned immediately without calling the factory. + pub async fn get_or_try_init(&self, key: K, factory: F) -> Result + where + F: FnOnce() -> Fut, + Fut: Future> + Send + 'static, + { + // Fast path: value already cached or in-flight from an infallible init. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => return Ok(v), + Some(Err(shared)) => { + if let Some(v) = self.await_shared(&key, shared).await { + return Ok(v); + } + // Factory panicked; entry was removed. Fall through to run our own factory. + } + None => {} + } + + // Run the fallible factory (not deduplicated). + let val = factory().await?; + + // Attempt to cache. If another caller raced us and already inserted, + // return the existing value and discard ours. + match self.map.entry_async(key).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Ok(self + .await_shared(occ.key(), shared.clone()) + .await + .unwrap_or(val)), + }, + scc::hash_map::Entry::Vacant(vac) => { + vac.insert_entry(Slot::Ready(val.clone())); + Ok(val) + } + } + } + + /// Get the cached value for `key` if it exists. + /// + /// - If the value is `Ready`, returns `Some(v)` immediately. + /// - If the value is `InFlight`, awaits the in-flight computation and returns `Some(v)`. + /// - If the key is absent, returns `None`. + /// - If the in-flight factory panicked, returns `None` (and removes the poisoned entry). + pub async fn get(&self, key: &K) -> Option { + let existing = self + .map + .read_async(key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => Some(v), + Some(Err(shared)) => self.await_shared(key, shared).await, + None => None, + } + } + + /// Await a `Shared` future, handle promotion to `Ready`, and handle panic recovery. + /// + /// Returns `Some(v)` on success. Returns `None` if the factory panicked, after removing + /// the poisoned entry from the map. + async fn await_shared(&self, key: &K, shared: SharedFut) -> Option { + let mut guard = PromoteGuard { + map: &self.map, + key, + value: None, + }; + + let result = shared.await; + + if let Some(v) = result { + guard.value = Some(v.clone()); + + self.map + .update_async(key, |_, slot| { + if matches!(slot, Slot::InFlight(_)) { + *slot = Slot::Ready(v.clone()); + } + }) + .await; + + guard.value = None; + Some(v) + } else { + // Factory panicked. Remove the poisoned InFlight entry so the next caller + // can retry. + drop( + self.map + .remove_if_sync(key, |slot| matches!(slot, Slot::InFlight(_))), + ); + None + } + } + + /// Wrap a factory future in `catch_unwind`, producing a `Shared` with `Output = Option`. + fn make_shared(factory: F) -> SharedFut + where + F: FnOnce() -> Fut, + Fut: Future + Send + 'static, + { + let fut = AssertUnwindSafe(factory()).catch_unwind(); + let boxed: Pin> + Send>> = + Box::pin(async move { fut.await.ok() }); + boxed.shared() + } + + /// Returns the number of entries in the cache (both `Ready` and `InFlight`). + #[must_use] + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns `true` if the cache contains no entries. + #[must_use] + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Synchronously insert a value, overwriting any existing entry. + /// + /// Suitable for seeding the cache before async operations begin (e.g. + /// inside an ouroboros builder where async is unavailable). + pub fn insert_sync(&self, key: K, value: V) { + drop(self.map.insert_sync(key, Slot::Ready(value))); + } + + /// Synchronously remove the entry for `key`, returning `true` if it was present. + /// + /// Suitable for use in contexts where async is not available (e.g. inside + /// [`StatelessDrop::delete`](crate::drop_ward::StatelessDrop::delete)). + pub fn remove_sync(&self, key: &K) -> bool { + self.map.remove_sync(key).is_some() + } +} + +/// Drop guard that synchronously promotes an `InFlight` entry to `Ready` if the caller +/// is cancelled between `shared.await` completing and the async promotion running. +/// +/// Set `value = None` to defuse after successful promotion. +struct PromoteGuard<'a, K, V> +where + K: Eq + Hash, + V: Clone + Send + Sync + 'static, +{ + map: &'a scc::HashMap>, + key: &'a K, + value: Option, +} + +impl Drop for PromoteGuard<'_, K, V> +where + K: Eq + Hash, + V: Clone + Send + Sync + 'static, +{ + fn drop(&mut self) { + if let Some(v) = self.value.take() { + self.map.update_sync(self.key, |_, slot| { + if matches!(slot, Slot::InFlight(_)) { + *slot = Slot::Ready(v); + } + }); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn try_init_ok_caches_value() { + let cache = FutureBackedCache::::default(); + let result: Result = cache + .get_or_try_init(1, || async { Ok("hello".to_owned()) }) + .await; + assert_eq!(result.unwrap(), "hello", "should return Ok value"); + + // Value should now be cached (get returns it without factory) + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "hello", "value should be in cache"); + } + + #[tokio::test] + async fn try_init_err_does_not_cache() { + let cache = FutureBackedCache::::default(); + let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; + assert_eq!(result.unwrap_err(), "boom", "should return the error"); + + // Cache should be empty — error was not stored + assert!(cache.is_empty(), "cache should have no entries after error"); + assert!(cache.get(&1).await.is_none(), "key should not exist"); + } + + #[tokio::test] + async fn try_init_err_then_retry_ok() { + let cache = FutureBackedCache::::default(); + + // First call: factory fails + let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; + assert!(r1.is_err(), "first call should fail"); + + // Second call: factory succeeds + let r2: Result = cache + .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) + .await; + assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); + + // Value should now be cached + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "recovered"); + } + + #[tokio::test] + async fn try_init_returns_value_cached_by_init() { + let cache = FutureBackedCache::::default(); + + // Populate via infallible get_or_init + cache + .get_or_init(1, || async { "from_init".to_owned() }) + .await; + + // get_or_try_init should return the cached value without running factory + let result: Result = cache + .get_or_try_init(1, || async { panic!("factory should not run") }) + .await; + assert_eq!(result.unwrap(), "from_init"); + } + + #[tokio::test] + async fn panic_in_factory_is_recovered() { + use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + + let cache = Arc::new(FutureBackedCache::::default()); + let call_count = Arc::new(AtomicUsize::new(0)); + + // Spawn a task whose factory panics. tokio::spawn catches the panic. + let cache2 = Arc::clone(&cache); + let call_count2 = Arc::clone(&call_count); + let handle = tokio::spawn(async move { + cache2 + .get_or_init(1, || { + call_count2.fetch_add(1, Ordering::Relaxed); + async { panic!("boom") } + }) + .await + }); + // The spawned task panics internally; JoinHandle returns Err. + assert!(handle.await.is_err(), "task should have panicked"); + + // The key should NOT be permanently bricked. A new caller should succeed. + let v = cache + .get_or_init(1, || { + call_count.fetch_add(1, Ordering::Relaxed); + async { "recovered".to_owned() } + }) + .await; + assert_eq!(v, "recovered", "should recover after panic"); + assert_eq!( + call_count.load(Ordering::Relaxed), + 2, + "factory called twice" + ); + } +} diff --git a/lib/cache/mod.rs b/lib/cache/mod.rs index e0c1c97f..5c48ee22 100644 --- a/lib/cache/mod.rs +++ b/lib/cache/mod.rs @@ -1,3 +1,5 @@ +/// Async-backed cache implementation. +pub mod async_backed; /// Cache eviction policies. pub mod eviction; /// File-backed cache implementation. diff --git a/lib/drop_ward.rs b/lib/drop_ward.rs new file mode 100644 index 00000000..4922e13c --- /dev/null +++ b/lib/drop_ward.rs @@ -0,0 +1,133 @@ +//! Automatic, type-directed cleanup driven by reference counting. +//! +//! [`DropWard`] tracks how many live references exist for a given key and invokes a cleanup +//! callback when a key's count reaches zero. The cleanup logic is selected at the type level +//! through a zero-sized "tag" type that implements [`StatelessDrop`], keeping the ward itself +//! generic over *what* it manages without storing per-key values. +//! +//! This is designed for resources whose lifecycle is bound to an external context (e.g. GPU device +//! handles, connection pools, graphics pipelines) where Rust's built-in `Drop` cannot be used +//! because cleanup requires access to that context. +//! +//! # Design rationale +//! +//! The tag type `T` is constrained to be zero-sized. It exists only to carry the [`StatelessDrop`] +//! implementation at the type level — no `T` value is ever constructed or stored. This means a +//! single `DropWard` instance adds no per-key overhead beyond the key and its `usize` count. +//! +//! # Example +//! +//! ```ignore +//! struct GpuTextureDrop; +//! +//! impl StatelessDrop for GpuTextureDrop { +//! fn delete(device: &wgpu::Device, _key: &TextureId) { +//! // e.g. flush a deferred-destruction queue +//! device.poll(wgpu::Maintain::Wait); +//! } +//! } +//! +//! let mut ward: DropWard = DropWard::new(device); +//! +//! ward.inc(texture_id); // → 1 +//! ward.inc(texture_id); // → 2 +//! ward.dec(&texture_id); // → Some(1) +//! ward.dec(&texture_id); // → Some(0), calls GpuTextureDrop::delete(&device, &texture_id) +//! ``` + +use std::marker::PhantomData; + +use rustc_hash::FxHashMap; + +/// Type-level hook for cleanup that requires an external context. +/// +/// Implement this on a zero-sized tag type. The tag is never instantiated — it only selects which +/// `delete` implementation a [`DropWard`] will call. +pub trait StatelessDrop { + /// Called exactly once when a key's reference count reaches zero. + /// + /// `ctx` is the shared context owned by the [`DropWard`]. `key` is the key whose count just + /// reached zero. This callback fires synchronously inside [`DropWard::dec`]; avoid blocking or + /// panicking if the ward is used on a hot path. + fn delete(ctx: &Ctx, key: &K); +} + +/// A reference-counted key set that triggers [`StatelessDrop::delete`] on the associated context +/// when any key's count drops to zero. +/// +/// # Type parameters +/// +/// - `Ctx` — shared context passed to `T::delete` (e.g. a device handle). +/// - `K` — the key type being reference-counted. +/// - `T` — a **zero-sized** tag type carrying the cleanup logic. +/// Will fail to compile if `size_of::() != 0`. +/// +/// # Concurrency +/// +/// Not thread-safe. All access requires `&mut self`. Wrap in a `Mutex` or similar if shared across +/// threads. +/// +#[derive(Debug, Clone)] +pub struct DropWard { + map: FxHashMap, + ctx: Ctx, + _marker: PhantomData, +} + +impl DropWard +where + K: Eq + std::hash::Hash, + T: StatelessDrop, +{ + /// Compile-time guard: `T` must be zero-sized. + const _ASSERT_ZST: () = assert!(size_of::() == 0, "T must be zero-sized"); + + /// Create a new ward that will pass `ctx` to `T::delete` on cleanup. + pub fn new(ctx: Ctx) -> Self { + Self { + map: FxHashMap::default(), + ctx, + _marker: PhantomData, + } + } + + /// Increment the reference count for `key`, inserting it with a count + /// of 1 if it does not exist. + /// + /// Returns the count **after** incrementing. + pub fn inc(&mut self, key: K) -> usize { + *self + .map + .entry(key) + .and_modify(|count| *count += 1) + .or_insert(1) + } + + fn dec_by(&mut self, key: &K, by: usize) -> Option { + let curr = *self.map.get(key)?; + let new_count = curr.saturating_sub(by); + if new_count == 0 { + self.map.remove(key); + T::delete(&self.ctx, key); + } else if let Some(slot) = self.map.get_mut(key) { + *slot = new_count; + } + Some(new_count) + } + + /// Decrement the reference count for `key`. + /// + /// If the count reaches zero, the key is removed and `T::delete` is + /// called synchronously with the ward's context. Returns `Some(0)` in + /// this case — the key will no longer be tracked. + /// + /// Returns `None` if `key` was not present (no-op). + pub fn dec(&mut self, key: &K) -> Option { + self.dec_by(key, 1) + } + + /// Decrement the reference count for `key` by `count`. + pub fn dec_count(&mut self, key: &K, count: usize) -> Option { + self.dec_by(key, count) + } +} diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs new file mode 100644 index 00000000..7626578f --- /dev/null +++ b/lib/fs/async_fs.rs @@ -0,0 +1,432 @@ +//! Async `INode` Table which supports concurrent access and modification. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +use bytes::Bytes; + +use crate::cache::async_backed::FutureBackedCache; +use crate::drop_ward::StatelessDrop; +use crate::fs::{ + AsyncFsStats, DirEntry, FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags, + dcache::DCache, +}; + +/// A reader for an open file, returned by [`FsDataProvider::open`]. +/// +/// Implementors provide the actual data for read operations. The FUSE +/// adapter calls [`close`](Self::close) to release resources explicitly. +pub trait FileReader: Send + Sync + 'static { + /// Read up to `size` bytes starting at byte `offset`. + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send; + + /// Release any resources held by this reader. + /// + /// Called explicitly by the FUSE adapter during `release`. Implementations + /// that hold inner file handles should release them here. The default + /// implementation is a no-op. + fn close(&self) -> impl Future> + Send { + async { Ok(()) } + } +} + +/// A data provider for [`AsyncFs`] that fetches inode data on cache misses. +pub trait FsDataProvider: Clone + Send + Sync + 'static { + /// The reader type returned by [`open`](Self::open). + type Reader: FileReader; + + /// Look up a child inode by name within the given parent directory. + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send; + + /// List all children of a directory. + /// + /// Called by [`AsyncFs::readdir`] on a cache miss. The returned + /// children are inserted into the directory cache and inode table + /// so subsequent reads are served from cache. + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send; + + /// Open a file and return a reader for subsequent read calls. + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send; +} + +/// Zero-sized tag whose [`StatelessDrop`] implementation automatically evicts +/// an inode from the inode table when its reference count reaches zero. +pub struct InodeForget; + +impl<'a> StatelessDrop<&'a FutureBackedCache, InodeAddr> for InodeForget { + fn delete(inode_table: &&'a FutureBackedCache, addr: &InodeAddr) { + inode_table.remove_sync(addr); + } +} + +/// A looked-up inode whose lifetime must be managed by the caller. +/// +/// Each `TrackedINode` returned by [`AsyncFs::lookup`] represents one +/// reference that the FUSE kernel holds. The caller must balance it by +/// decrementing the [`InodeLifecycle`] ward when the kernel sends `forget`. +#[derive(Debug, Clone, Copy)] +pub struct TrackedINode { + /// The resolved inode data. + pub inode: INode, +} + +/// An open file that provides read access. +/// +/// Returned by [`AsyncFs::open`]. The caller owns this handle and uses +/// [`read`](Self::read) to fetch data. Dropping the handle releases +/// the underlying reader when the last `Arc` clone is gone. +#[derive(Debug, Clone)] +pub struct OpenFile { + /// The raw file handle number, suitable for returning to the FUSE kernel. + pub fh: FileHandle, + /// The reader backing this open file. + pub reader: Arc, +} + +impl OpenFile { + /// Read up to `size` bytes starting at byte `offset`. + pub async fn read(&self, offset: u64, size: u32) -> Result { + self.reader.read(offset, size).await + } +} + +mod inode_lifecycle_impl { + #![allow(clippy::future_not_send, clippy::mem_forget)] + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::drop_ward::DropWard; + use crate::fs::InodeAddr; + + use super::{INode, InodeForget}; + + /// Co-located inode table and reference-count ward. + /// + /// The ward borrows the table directly (no `Arc`) via `ouroboros`. + /// When `dec` reaches zero for a key, [`InodeForget::delete`] synchronously + /// removes that inode from the table. + #[self_referencing] + pub struct InodeLifecycle { + pub(super) table: FutureBackedCache, + #[borrows(table)] + #[not_covariant] + pub(super) ward: + DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + } + + impl InodeLifecycle { + /// Create a new lifecycle managing the given inode table. + pub fn from_table(table: FutureBackedCache) -> Self { + Self::new(table, |tbl| DropWard::new(tbl)) + } + } +} + +pub use inode_lifecycle_impl::InodeLifecycle; + +impl InodeLifecycle { + /// Increment the reference count for an inode address. + pub fn inc(&mut self, addr: InodeAddr) -> usize { + self.with_ward_mut(|ward| ward.inc(addr)) + } + + /// Decrement the reference count for an inode address. + /// + /// When the count reaches zero, the inode is automatically evicted + /// from the table via [`InodeForget::delete`]. + pub fn dec(&mut self, addr: &InodeAddr) -> Option { + self.with_ward_mut(|ward| ward.dec(addr)) + } + + /// Decrement the reference count by `count`. + /// + /// When the count reaches zero, the inode is automatically evicted. + pub fn dec_count(&mut self, addr: &InodeAddr, count: usize) -> Option { + self.with_ward_mut(|ward| ward.dec_count(addr, count)) + } + + /// Read-only access to the underlying inode table. + #[must_use] + pub fn table(&self) -> &FutureBackedCache { + self.borrow_table() + } +} + +/// An asynchronous filesystem cache mapping `InodeAddr` to `INode`. +/// +/// Uses two [`FutureBackedCache`] layers: +/// - `inode_table` stores resolved inodes by address, used by [`loaded_inode`](Self::loaded_inode). +/// - `lookup_cache` stores lookup results by `(parent_addr, name)`, ensuring `dp.lookup()` is only +/// called on a true cache miss (not already cached or in-flight). +/// +/// The [`DCache`] sits in front as a synchronous fast path mapping `(parent, name)` to child addr. +pub struct AsyncFs<'tbl, DP: FsDataProvider> { + /// Canonical addr -> `INode` map. Used by `loaded_inode()` to retrieve inodes by address. + inode_table: &'tbl FutureBackedCache, + + /// Deduplicating lookup cache keyed by `(parent_addr, child_name)`. The factory is + /// `dp.lookup()`, so the data provider is only called on a true cache miss. + lookup_cache: FutureBackedCache<(InodeAddr, OsString), INode>, + + /// Directory entry cache, mapping `(parent, name)` to child inode address. + directory_cache: DCache, + + /// The data provider used to fetch inode data on cache misses. + data_provider: DP, + + /// Monotonically increasing file handle counter. Starts at 1 (0 is reserved). + next_fh: AtomicU64, + + /// Tracks which directories have had their children fetched via `dp.readdir`. + readdir_populated: FutureBackedCache, +} + +impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { + /// Create a new `AsyncFs`, seeding the root inode into the table. + pub async fn new( + data_provider: DP, + root: INode, + inode_table: &'tbl FutureBackedCache, + ) -> Self { + inode_table + .get_or_init(root.addr, || async move { root }) + .await; + + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: DCache::new(), + data_provider, + next_fh: AtomicU64::new(1), + readdir_populated: FutureBackedCache::default(), + } + } + + /// Create a new `AsyncFs`, assuming the root inode is already in the table. + /// + /// This synchronous constructor is needed for ouroboros builders where + /// async is unavailable. The caller must ensure the root inode has already + /// been inserted into `inode_table` (e.g. via [`FutureBackedCache::insert_sync`]). + #[must_use] + pub fn new_preseeded( + data_provider: DP, + inode_table: &'tbl FutureBackedCache, + ) -> Self { + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: DCache::new(), + data_provider, + next_fh: AtomicU64::new(1), + readdir_populated: FutureBackedCache::default(), + } + } + + /// Get the total number of inodes currently stored in the inode table. + #[must_use] + pub fn inode_count(&self) -> usize { + self.inode_table.len() + } + + /// Return filesystem statistics. + /// + /// Reports the current inode count from the cache. Block-related + /// fields default to values appropriate for a virtual read-only + /// filesystem (4 KiB blocks, no free space). + #[must_use] + pub fn statfs(&self) -> AsyncFsStats { + AsyncFsStats { + block_size: 4096, + total_blocks: 0, + free_blocks: 0, + available_blocks: 0, + total_inodes: self.inode_count() as u64, + free_inodes: 0, + max_filename_length: 255, + } + } + + /// Asynchronously look up an inode by name within a parent directory. + /// + /// Resolution order: + /// 1. Directory cache (synchronous fast path) + /// 2. Lookup cache (`get_or_try_init` — calls `dp.lookup()` only on a true miss) + /// 3. On success, populates inode table and directory cache + pub async fn lookup( + &self, + parent: LoadedAddr, + name: &OsStr, + ) -> Result { + let parent_ino = self.loaded_inode(parent).await?; + debug_assert!( + matches!(parent_ino.itype, INodeType::Directory), + "parent inode should be a directory" + ); + + if let Some(dentry) = self.directory_cache.lookup(parent, name) + && let Some(inode) = self.inode_table.get(&dentry.ino.0).await + { + return Ok(TrackedINode { inode }); + } + // Inode was evicted from the table — fall through to the slow path. + + let name_owned = name.to_os_string(); + let name_for_cache = name_owned.clone(); + let lookup_key = (parent.0, name_owned.clone()); + let dp = self.data_provider.clone(); + + let child = self + .lookup_cache + .get_or_try_init(lookup_key, || async move { + dp.lookup(parent_ino, &name_owned).await + }) + .await?; + + self.inode_table + .get_or_init(child.addr, || async move { child }) + .await; + + self.directory_cache + .insert( + parent, + name_for_cache, + LoadedAddr(child.addr), + matches!(child.itype, INodeType::Directory), + ) + .await; + + Ok(TrackedINode { inode: child }) + } + + /// Retrieve an inode that is expected to already be loaded. + /// + /// If the inode is currently in-flight (being loaded by another caller), this awaits + /// completion. Returns an error if the inode is not in the table at all. + pub async fn loaded_inode(&self, addr: LoadedAddr) -> Result { + self.inode_table.get(&addr.0).await.ok_or_else(|| { + tracing::error!( + inode = ?addr.0, + "inode not found in table — this is a programming bug" + ); + std::io::Error::from_raw_os_error(libc::ENOENT) + }) + } + + /// Return the attributes of the inode at `addr`. + /// + /// This is the getattr entry point for the filesystem. Returns the + /// cached [`INode`] directly — callers at the FUSE boundary are + /// responsible for converting to `fuser::FileAttr`. + pub async fn getattr(&self, addr: LoadedAddr) -> Result { + self.loaded_inode(addr).await + } + + /// Open a file for reading. + /// + /// Validates the inode is not a directory, delegates to the data provider + /// to create a [`FileReader`], and returns an [`OpenFile`] that the caller + /// owns. Reads go through [`OpenFile::read`]. + pub async fn open( + &self, + addr: LoadedAddr, + flags: OpenFlags, + ) -> Result, std::io::Error> { + let inode = self.loaded_inode(addr).await?; + if inode.itype == INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); + } + let reader = self.data_provider.open(inode, flags).await?; + let fh = self.next_fh.fetch_add(1, Ordering::Relaxed); + Ok(OpenFile { + fh, + reader: Arc::new(reader), + }) + } + + /// Iterate directory entries for `parent`, starting from `offset`. + /// + /// On the first call for a given parent, fetches the directory listing + /// from the data provider and populates the directory cache and inode + /// table. Subsequent calls serve entries directly from cache. + /// + /// Entries are yielded in name-sorted order. For each entry, `filler` is + /// called with the [`DirEntry`] and the next offset value. If `filler` + /// returns `true` (indicating the caller's buffer is full), iteration + /// stops early. + /// + /// # Concurrency + /// + /// The `readdir_populated` check-then-populate is **not** atomic. If two + /// concurrent callers invoke `readdir` for the same parent, both may call + /// `dp.readdir()` and insert duplicate children. This is safe when the + /// caller serializes access (e.g. via `&mut self` on the `Fs` trait). + /// + /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and + /// avoid racing with `lookup`/`createfile`. + pub async fn readdir( + &self, + parent: LoadedAddr, + offset: u64, + mut filler: impl FnMut(DirEntry<'_>, u64) -> bool, + ) -> Result<(), std::io::Error> { + let parent_inode = self.loaded_inode(parent).await?; + if parent_inode.itype != INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + + // Populate the directory cache on first readdir for this parent. + if self.readdir_populated.get(&parent).await.is_none() { + let children = self.data_provider.readdir(parent_inode).await?; + for (name, child_inode) in children { + self.inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + self.directory_cache + .insert( + parent, + name, + LoadedAddr(child_inode.addr), + child_inode.itype == INodeType::Directory, + ) + .await; + } + self.readdir_populated + .get_or_init(parent, || async {}) + .await; + } + + let mut children = self.directory_cache.readdir(parent).await; + children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + for (i, (name, dvalue)) in children.iter().enumerate().skip(offset as usize) { + let inode = self.loaded_inode(dvalue.ino).await?; + let next_offset = (i + 1) as u64; + if filler(DirEntry { name, inode }, next_offset) { + break; + } + } + + Ok(()) + } +} diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs new file mode 100644 index 00000000..5138e802 --- /dev/null +++ b/lib/fs/dcache.rs @@ -0,0 +1,65 @@ +use std::ffi::{OsStr, OsString}; + +use crate::fs::LoadedAddr; + +/// Cached metadata for a directory entry. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DValue { + /// Inode address of this entry. + pub ino: LoadedAddr, + /// Whether this entry is itself a directory. + pub is_dir: bool, +} + +/// In-memory directory entry cache mapping `(parent, name)` to child metadata. +/// +/// Backed by [`scc::HashMap`] for atomic upsert on insert. The `readdir` +/// implementation scans the entire map and filters by parent — this is O(n) +/// over the cache size rather than O(log n + k) with an ordered index, but +/// guarantees that `insert` never creates a window where an entry is absent. +#[derive(Default)] +pub struct DCache { + cache: scc::HashMap<(LoadedAddr, OsString), DValue>, +} + +impl DCache { + /// Creates an empty directory cache. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Looks up a single child entry by parent inode and name. + #[must_use] + pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { + let key = (parent_ino, name.to_os_string()); + self.cache.read_sync(&key, |_, v| v.clone()) + } + + /// Atomically inserts or overwrites a child entry in the cache. + pub async fn insert( + &self, + parent_ino: LoadedAddr, + name: OsString, + ino: LoadedAddr, + is_dir: bool, + ) { + let key = (parent_ino, name); + let value = DValue { ino, is_dir }; + self.cache.upsert_async(key, value).await; + } + + /// Returns all cached children of `parent_ino` as `(name, value)` pairs. + pub async fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { + let mut entries = Vec::new(); + self.cache + .iter_async(|key, value| { + if key.0 == parent_ino { + entries.push((key.1.clone(), value.clone())); + } + true + }) + .await; + entries + } +} diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs new file mode 100644 index 00000000..50042a24 --- /dev/null +++ b/lib/fs/fuser.rs @@ -0,0 +1,425 @@ +//! FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`AsyncFs`](super::async_fs::AsyncFs). + +use std::collections::HashMap; +use std::ffi::OsStr; +use std::sync::Arc; + +use super::async_fs::{FileReader as _, FsDataProvider}; +use super::{FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags}; +use crate::cache::async_backed::FutureBackedCache; +use tracing::{debug, error, instrument}; + +/// Wrapper converting [`std::io::Error`] to errno. +#[derive(Debug, thiserror::Error)] +#[error("{0}")] +struct FuseIoError(std::io::Error); + +#[expect( + clippy::wildcard_enum_match_arm, + reason = "ErrorKind is non_exhaustive; EIO is the safe default" +)] +impl From for i32 { + fn from(e: FuseIoError) -> Self { + e.0.raw_os_error().unwrap_or_else(|| match e.0.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) + } +} + +/// Error for read operations. +#[derive(Debug, thiserror::Error)] +enum FuseReadError { + /// The file handle was not open. + #[error("file handle not open")] + NotOpen, + /// An I/O error occurred during the read. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), +} + +impl From for i32 { + fn from(e: FuseReadError) -> Self { + match e { + FuseReadError::NotOpen => libc::EBADF, + FuseReadError::Io(ref io) => io.raw_os_error().unwrap_or(libc::EIO), + } + } +} + +/// Error for release operations. +#[derive(Debug, thiserror::Error)] +enum FuseReleaseError { + /// The file handle was not open. + #[error("file handle not open")] + NotOpen, +} + +impl From for i32 { + fn from(e: FuseReleaseError) -> Self { + match e { + FuseReleaseError::NotOpen => libc::EBADF, + } + } +} + +mod inner { + #![allow(clippy::future_not_send, clippy::mem_forget)] + + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::drop_ward::DropWard; + use crate::fs::async_fs::{AsyncFs, FsDataProvider, InodeForget}; + use crate::fs::{INode, InodeAddr}; + + /// Self-referential struct holding the inode table, refcount ward, and `AsyncFs`. + /// + /// Both `ward` and `fs` borrow from `table`. The ward manages inode + /// refcounts; the fs serves lookup/readdir/open/read operations. + #[self_referencing] + pub(super) struct FuseBridgeInner { + table: FutureBackedCache, + #[borrows(table)] + #[not_covariant] + ward: DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + #[borrows(table)] + #[covariant] + fs: AsyncFs<'this, DP>, + } + + impl FuseBridgeInner { + pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { + FuseBridgeInnerBuilder { + table, + ward_builder: |tbl| DropWard::new(tbl), + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() + } + + pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { + self.borrow_fs() + } + + pub(super) fn ward_inc(&mut self, addr: InodeAddr) -> usize { + self.with_ward_mut(|ward| ward.inc(addr)) + } + + pub(super) fn ward_dec_count(&mut self, addr: InodeAddr, count: usize) -> Option { + self.with_ward_mut(|ward| ward.dec_count(&addr, count)) + } + } +} + +use inner::FuseBridgeInner; + +/// Convert an `INode` to the fuser-specific `FileAttr`. +fn inode_to_fuser_attr(inode: &INode, block_size: u32) -> fuser::FileAttr { + fuser::FileAttr { + ino: inode.addr, + size: inode.size, + blocks: inode.size.div_ceil(512), + atime: inode.last_modified_at, + mtime: inode.last_modified_at, + ctime: inode.last_modified_at, + crtime: inode.create_time, + kind: inode_type_to_fuser(inode.itype), + perm: inode.permissions.bits(), + nlink: 1, + uid: inode.uid, + gid: inode.gid, + rdev: 0, + blksize: block_size, + flags: 0, + } +} + +#[expect( + clippy::wildcard_enum_match_arm, + reason = "INodeType is non_exhaustive; File is the safe default" +)] +fn inode_type_to_fuser(itype: INodeType) -> fuser::FileType { + match itype { + INodeType::Directory => fuser::FileType::Directory, + INodeType::Symlink => fuser::FileType::Symlink, + _ => fuser::FileType::RegularFile, + } +} + +const BLOCK_SIZE: u32 = 4096; + +/// Bridges a generic [`FsDataProvider`] to the [`fuser::Filesystem`] trait. +/// +/// Owns a self-referential inode table + ward + [`AsyncFs`](super::async_fs::AsyncFs), +/// plus an open-file map and a tokio runtime handle for blocking on async ops. +pub struct FuserAdapter { + inner: FuseBridgeInner, + open_files: HashMap>, + runtime: tokio::runtime::Handle, +} + +impl FuserAdapter { + // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the + // kernel has to ask us for every single lookup all the time. + // + // I think a better implementation is to implement + // + // notify_inval_inode(ino, offset, len) + // notify_inval_entry(parent_ino, name) + // + // These two functions can be used to invalidate specific entries in the kernel cache when we + // know they have changed. This would allow us to set a much higher TTL here. + const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); + + /// Create a new adapter from a pre-seeded inode table and data provider. + /// + /// The `table` must already have the root inode inserted. + pub fn new( + table: FutureBackedCache, + provider: DP, + runtime: tokio::runtime::Handle, + ) -> Self { + Self { + inner: FuseBridgeInner::create(table, provider), + open_files: HashMap::new(), + runtime, + } + } +} + +impl fuser::Filesystem for FuserAdapter { + #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] + fn lookup( + &mut self, + _req: &fuser::Request<'_>, + parent: u64, + name: &OsStr, + reply: fuser::ReplyEntry, + ) { + let result = self.runtime.block_on(async { + let tracked = self + .inner + .get_fs() + .lookup(LoadedAddr(parent), name) + .await + .map_err(FuseIoError)?; + self.inner.ward_inc(tracked.inode.addr); + Ok::<_, FuseIoError>(tracked.inode) + }); + match result { + Ok(inode) => { + let f_attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?f_attr, "replying..."); + reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument(name = "FuserAdapter::getattr", skip(self, _req, _fh, reply))] + fn getattr( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: Option, + reply: fuser::ReplyAttr, + ) { + let result = self.runtime.block_on(async { + self.inner + .get_fs() + .getattr(LoadedAddr(ino)) + .await + .map_err(FuseIoError) + }); + match result { + Ok(inode) => { + let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?attr, "replying..."); + reply.attr(&Self::SHAMEFUL_TTL, &attr); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] + fn readdir( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + mut reply: fuser::ReplyDirectory, + ) { + let offset_u64 = offset.cast_unsigned(); + let result = self.runtime.block_on(async { + let mut entries = Vec::new(); + self.inner + .get_fs() + .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { + entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); + false + }) + .await + .map_err(FuseIoError)?; + Ok::<_, FuseIoError>(entries) + }); + + let entries = match result { + Ok(entries) => entries, + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + return; + } + }; + + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { + let kind = inode_type_to_fuser(*entry_itype); + let abs_idx = offset_u64 as usize + i + 1; + let Ok(idx): Result = abs_idx.try_into() else { + error!("Directory entry index {} too large for fuser", abs_idx); + reply.error(libc::EIO); + return; + }; + + debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); + if reply.add(*entry_ino, idx, kind, entry_name) { + debug!("buffer full for now, stopping readdir"); + break; + } + } + + debug!("finalizing reply..."); + reply.ok(); + } + + #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] + fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { + let flags = OpenFlags::from_bits_truncate(flags); + let result = self.runtime.block_on(async { + let open_file = self + .inner + .get_fs() + .open(LoadedAddr(ino), flags) + .await + .map_err(FuseIoError)?; + let fh = open_file.fh; + self.open_files.insert(fh, Arc::clone(&open_file.reader)); + Ok::<_, FuseIoError>(fh) + }); + match result { + Ok(fh) => { + debug!(handle = fh, "replying..."); + reply.opened(fh, 0); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument( + name = "FuserAdapter::read", + skip(self, _req, _ino, fh, offset, size, _flags, _lock_owner, reply) + )] + fn read( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: fuser::ReplyData, + ) { + let result: Result<_, FuseReadError> = self.runtime.block_on(async { + let reader = self.open_files.get(&fh).ok_or(FuseReadError::NotOpen)?; + Ok(reader.read(offset.cast_unsigned(), size).await?) + }); + match result { + Ok(data) => { + debug!(read_bytes = data.len(), "replying..."); + reply.data(&data); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument( + name = "FuserAdapter::release", + skip(self, _req, _ino, fh, _flags, _lock_owner, _flush, reply) + )] + fn release( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + _flags: i32, + _lock_owner: Option, + _flush: bool, + reply: fuser::ReplyEmpty, + ) { + let result: Result<_, FuseReleaseError> = match self.open_files.remove(&fh) { + Some(reader) => { + if let Err(e) = self.runtime.block_on(reader.close()) { + debug!(error = %e, "reader close reported error"); + } + Ok(()) + } + None => Err(FuseReleaseError::NotOpen), + }; + match result { + Ok(()) => { + debug!("replying ok"); + reply.ok(); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[expect( + clippy::cast_possible_truncation, + reason = "nlookups fits in usize on supported 64-bit platforms" + )] + #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] + fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { + self.inner.ward_dec_count(ino, nlookup as usize); + } + + #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] + fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { + let stats = self.inner.get_fs().statfs(); + debug!(?stats, "replying..."); + reply.statfs( + stats.total_blocks, + stats.free_blocks, + stats.available_blocks, + stats.total_inodes, + stats.free_inodes, + stats.block_size, + stats.max_filename_length, + 0, + ); + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs new file mode 100644 index 00000000..e8f971b4 --- /dev/null +++ b/lib/fs/mod.rs @@ -0,0 +1,188 @@ +//! Useful filesystem generalizations. +/// Async filesystem cache with concurrent inode management. +pub mod async_fs; +/// Directory entry cache for fast parent-child lookups. +pub mod dcache; +/// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. +pub mod fuser; + +pub use async_fs::{InodeForget, InodeLifecycle, OpenFile, TrackedINode}; + +use std::ffi::OsStr; +use std::time::SystemTime; + +use bitflags::bitflags; + +/// Type representing an inode identifier. +pub type InodeAddr = u64; + +/// Represents an inode address that has been loaded into the inode table. +/// +/// This newtype wrapper distinguishes inode addresses that are known to exist +/// in the [`async_fs::AsyncFs`] inode table from raw [`InodeAddr`] values. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct LoadedAddr(pub InodeAddr); + +/// Type representing a file handle. +pub type FileHandle = u64; + +bitflags! { + /// Permission bits for an inode, similar to Unix file permissions. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct InodePerms: u16 { + /// Other: execute permission. + const OTHER_EXECUTE = 1 << 0; + /// Other: write permission. + const OTHER_WRITE = 1 << 1; + /// Other: read permission. + const OTHER_READ = 1 << 2; + + /// Group: execute permission. + const GROUP_EXECUTE = 1 << 3; + /// Group: write permission. + const GROUP_WRITE = 1 << 4; + /// Group: read permission. + const GROUP_READ = 1 << 5; + + /// Owner: execute permission. + const OWNER_EXECUTE = 1 << 6; + /// Owner: write permission. + const OWNER_WRITE = 1 << 7; + /// Owner: read permission. + const OWNER_READ = 1 << 8; + + /// Sticky bit. + const STICKY = 1 << 9; + /// Set-group-ID bit. + const SETGID = 1 << 10; + /// Set-user-ID bit. + const SETUID = 1 << 11; + + /// Other: read, write, and execute. + const OTHER_RWX = Self::OTHER_READ.bits() + | Self::OTHER_WRITE.bits() + | Self::OTHER_EXECUTE.bits(); + /// Group: read, write, and execute. + const GROUP_RWX = Self::GROUP_READ.bits() + | Self::GROUP_WRITE.bits() + | Self::GROUP_EXECUTE.bits(); + /// Owner: read, write, and execute. + const OWNER_RWX = Self::OWNER_READ.bits() + | Self::OWNER_WRITE.bits() + | Self::OWNER_EXECUTE.bits(); + } +} + +bitflags! { + /// Flags for opening a file, similar to Unix open(2) flags. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct OpenFlags: i32 { + /// Open for reading only. + const RDONLY = libc::O_RDONLY; + /// Open for writing only. + const WRONLY = libc::O_WRONLY; + /// Open for reading and writing. + const RDWR = libc::O_RDWR; + + /// Append on each write. + const APPEND = libc::O_APPEND; + /// Truncate to zero length. + const TRUNC = libc::O_TRUNC; + /// Create file if it does not exist. + const CREAT = libc::O_CREAT; + /// Error if file already exists (with `CREAT`). + const EXCL = libc::O_EXCL; + + /// Non-blocking mode. + const NONBLOCK = libc::O_NONBLOCK; + /// Synchronous writes. + const SYNC = libc::O_SYNC; + /// Synchronous data integrity writes. + const DSYNC = libc::O_DSYNC; + /// Do not follow symlinks. + const NOFOLLOW = libc::O_NOFOLLOW; + /// Set close-on-exec. + const CLOEXEC = libc::O_CLOEXEC; + /// Fail if not a directory. + const DIRECTORY = libc::O_DIRECTORY; + + /// Do not update access time (Linux only). + #[cfg(target_os = "linux")] + const NOATIME = libc::O_NOATIME; + } +} + +/// The type of an inode entry in the filesystem. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum INodeType { + /// A regular file. + File, + /// A directory. + Directory, + /// A symbolic link. + Symlink, +} + +/// Representation of an inode. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct INode { + /// The address of this inode, which serves as its unique identifier. + pub addr: InodeAddr, + /// The permissions associated with this inode, represented as a bitfield. + pub permissions: InodePerms, + /// The user ID of the owner of this inode. + pub uid: u32, + /// The group ID of the owner of this inode. + pub gid: u32, + /// The time this inode was created at. + pub create_time: SystemTime, + /// The time this inode was last modified at. + pub last_modified_at: SystemTime, + /// The parent inode address, if any. This is `None` for the root inode. + pub parent: Option, + /// The size of the file represented by this inode, in bytes. + pub size: u64, + /// Additional information about the type of this inode (e.g., file vs directory). + pub itype: INodeType, +} + +impl INode { + /// Check if this inode is the root inode (i.e., has no parent). + #[must_use] + pub fn is_root(&self) -> bool { + self.parent.is_none() + } +} + +/// A directory entry yielded by [`async_fs::AsyncFs::readdir`]. +/// +/// Borrows the entry name from the directory cache's iteration buffer. +#[derive(Debug, Clone, Copy)] +pub struct DirEntry<'a> { + /// The name of this entry within its parent directory. + pub name: &'a OsStr, + /// The full inode data for this entry. + pub inode: INode, +} + +/// Filesystem statistics returned by [`async_fs::AsyncFs::statfs`]. +/// +/// Block-related sizes are in units of `block_size` bytes. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct AsyncFsStats { + /// Filesystem block size (bytes). + pub block_size: u32, + /// Total number of data blocks. + pub total_blocks: u64, + /// Number of free blocks. + pub free_blocks: u64, + /// Number of blocks available to unprivileged users. + pub available_blocks: u64, + /// Total number of file nodes (inodes). + pub total_inodes: u64, + /// Number of free file nodes. + pub free_inodes: u64, + /// Maximum filename length (bytes). + pub max_filename_length: u32, +} diff --git a/lib/lib.rs b/lib/lib.rs index f7388bd5..40b1e8f2 100644 --- a/lib/lib.rs +++ b/lib/lib.rs @@ -2,4 +2,7 @@ /// Caching primitives for git-fs. pub mod cache; +pub mod drop_ward; +/// Filesystem abstractions and caching layers. +pub mod fs; pub mod io; diff --git a/src/daemon.rs b/src/daemon.rs index dac2d052..0a7a9f31 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -14,9 +14,13 @@ mod managed_fuse { use nix::errno::Errno; + use git_fs::cache::async_backed::FutureBackedCache; + use git_fs::fs::{INode, INodeType, InodePerms}; + use super::{MesaFS, OrgConfig, app_config, debug, error}; - use crate::fs::fuser::FuserAdapter; + use crate::fs::mescloud::MesaFsProvider; use fuser::BackgroundSession; + use git_fs::fs::fuser::FuserAdapter; pub struct FuseCoreScope { _session: BackgroundSession, @@ -44,7 +48,24 @@ mod managed_fuse { api_key: org.api_key.clone(), }); let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid), &config.cache); - let fuse_adapter = FuserAdapter::new(mesa_fs, handle); + + let table = FutureBackedCache::default(); + let now = std::time::SystemTime::now(); + let root = INode { + addr: 1, + permissions: InodePerms::from_bits_truncate(0o755), + uid: config.uid, + gid: config.gid, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + table.insert_sync(1, root); + + let provider = MesaFsProvider::new(mesa_fs); + let fuse_adapter = FuserAdapter::new(table, provider, handle); let mount_opts = [ fuser::MountOption::FSName("git-fs".to_owned()), fuser::MountOption::RO, diff --git a/src/fs/fuser.rs b/src/fs/fuser.rs deleted file mode 100644 index 86ddabb6..00000000 --- a/src/fs/fuser.rs +++ /dev/null @@ -1,351 +0,0 @@ -use std::ffi::OsStr; - -use crate::fs::r#trait::{CommonFileAttr, DirEntryType, FileAttr, Fs, LockOwner, OpenFlags}; -use tracing::{debug, error, instrument}; - -impl From for fuser::FileAttr { - fn from(val: FileAttr) -> Self { - fn common_to_fuser(common: CommonFileAttr) -> fuser::FileAttr { - fuser::FileAttr { - ino: common.ino, - size: 0, - blocks: 0, - atime: common.atime, - mtime: common.mtime, - ctime: common.ctime, - crtime: common.crtime, - kind: fuser::FileType::RegularFile, - perm: common.perm.bits(), - nlink: common.nlink, - uid: common.uid, - gid: common.gid, - rdev: 0, - blksize: common.blksize, - flags: 0, - } - } - - match val { - FileAttr::RegularFile { - common, - size, - blocks, - } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.blocks = blocks; - attr.kind = fuser::FileType::RegularFile; - attr - } - FileAttr::Directory { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Directory; - attr - } - FileAttr::Symlink { common, size } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.kind = fuser::FileType::Symlink; - attr - } - FileAttr::CharDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::CharDevice; - attr - } - FileAttr::BlockDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::BlockDevice; - attr - } - FileAttr::NamedPipe { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::NamedPipe; - attr - } - FileAttr::Socket { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Socket; - attr - } - } - } -} - -impl From for fuser::FileType { - fn from(val: DirEntryType) -> Self { - match val { - DirEntryType::RegularFile => Self::RegularFile, - DirEntryType::Directory => Self::Directory, - DirEntryType::Symlink => Self::Symlink, - DirEntryType::CharDevice => Self::CharDevice, - DirEntryType::BlockDevice => Self::BlockDevice, - DirEntryType::NamedPipe => Self::NamedPipe, - DirEntryType::Socket => Self::Socket, - } - } -} - -impl From for OpenFlags { - fn from(val: i32) -> Self { - Self::from_bits_truncate(val) - } -} - -pub struct FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - fs: F, - runtime: tokio::runtime::Handle, -} - -impl FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the - // kernel has to ask us for every single lookup all the time. - // - // I think a better implementation is to implement - // - // notify_inval_inode(ino, offset, len) - // notify_inval_entry(parent_ino, name) - // - // These two functions can be used to invalidate specific entries in the kernel cache when we - // know they have changed. This would allow us to set a much higher TTL here. - const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); - - pub fn new(fs: F, runtime: tokio::runtime::Handle) -> Self { - Self { fs, runtime } - } -} - -impl fuser::Filesystem for FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] - fn lookup( - &mut self, - _req: &fuser::Request<'_>, - parent: u64, - name: &OsStr, - reply: fuser::ReplyEntry, - ) { - match self.runtime.block_on(self.fs.lookup(parent, name)) { - Ok(attr) => { - // TODO(markovejnovic): Passing generation = 0 here is a recipe for disaster. - // Someone with A LOT of files will likely see inode reuse which will lead to a - // disaster. - let f_attr: fuser::FileAttr = attr.into(); - debug!(?f_attr, "replying..."); - reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::getattr", skip(self, _req, fh, reply))] - fn getattr( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: Option, - reply: fuser::ReplyAttr, - ) { - match self.runtime.block_on(self.fs.getattr(ino, fh)) { - Ok(attr) => { - debug!(?attr, "replying..."); - reply.attr(&Self::SHAMEFUL_TTL, &attr.into()); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] - fn readdir( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - _fh: u64, - offset: i64, - mut reply: fuser::ReplyDirectory, - ) { - let entries = match self.runtime.block_on(self.fs.readdir(ino)) { - Ok(entries) => entries, - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - return; - } - }; - - #[expect( - clippy::cast_possible_truncation, - reason = "fuser offset is i64 but always non-negative" - )] - for (i, entry) in entries - .iter() - .enumerate() - .skip(offset.cast_unsigned() as usize) - { - let kind: fuser::FileType = entry.kind.into(); - let Ok(idx): Result = (i + 1).try_into() else { - error!("Directory entry index {} too large for fuser", i + 1); - reply.error(libc::EIO); - return; - }; - - debug!(?entry, "adding entry to reply..."); - if reply.add(entry.ino, idx, kind, &entry.name) { - debug!("buffer full for now, stopping readdir"); - break; - } - } - - debug!("finalizing reply..."); - reply.ok(); - } - - #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] - fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { - match self.runtime.block_on(self.fs.open(ino, flags.into())) { - Ok(open_file) => { - debug!(handle = open_file.handle, "replying..."); - reply.opened(open_file.handle, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument( - name = "FuserAdapter::read", - skip(self, _req, fh, offset, size, flags, lock_owner, reply) - )] - fn read( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - offset: i64, - size: u32, - flags: i32, - lock_owner: Option, - reply: fuser::ReplyData, - ) { - let flags: OpenFlags = flags.into(); - let lock_owner = lock_owner.map(LockOwner); - match self.runtime.block_on(self.fs.read( - ino, - fh, - offset.cast_unsigned(), - size, - flags, - lock_owner, - )) { - Ok(data) => { - debug!(read_bytes = data.len(), "replying..."); - reply.data(&data); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::release", skip(self, _req, _lock_owner, reply))] - fn release( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - flags: i32, - _lock_owner: Option, - flush: bool, - reply: fuser::ReplyEmpty, - ) { - match self - .runtime - .block_on(self.fs.release(ino, fh, flags.into(), flush)) - { - Ok(()) => { - debug!("replying ok"); - reply.ok(); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] - fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { - self.runtime.block_on(self.fs.forget(ino, nlookup)); - } - - #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] - fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { - self.runtime.block_on(async { - match self.fs.statfs().await { - Ok(statvfs) => { - debug!(?statvfs, "replying..."); - reply.statfs( - statvfs.total_blocks, - statvfs.free_blocks, - statvfs.available_blocks, - statvfs.total_inodes, - statvfs.free_inodes, - statvfs.block_size, - statvfs.max_filename_length, - 0, - ); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.raw_os_error().unwrap_or(libc::EIO)); - } - } - }); - } -} diff --git a/src/fs/icache/async_cache.rs b/src/fs/icache/async_cache.rs deleted file mode 100644 index 84003da3..00000000 --- a/src/fs/icache/async_cache.rs +++ /dev/null @@ -1,1410 +0,0 @@ -//! Async inode cache with InFlight/Available state machine. - -use std::future::Future; - -use scc::HashMap as ConcurrentHashMap; -use tokio::sync::watch; - -use tracing::{instrument, trace, warn}; - -use crate::fs::r#trait::Inode; - -use super::IcbLike; - -/// State of an entry in the async inode cache. -pub enum IcbState { - /// Entry is being loaded; waiters clone the receiver and `.changed().await`. - /// - /// The channel carries `()` rather than the resolved value because the map - /// is the single source of truth: ICBs are mutated in-place (rc, attrs) so - /// a snapshot in the channel would immediately go stale. Sender-drop also - /// gives us implicit, leak-proof signalling on both success and error paths. - InFlight(watch::Receiver<()>), - /// Entry is ready for use. - Available(I), -} - -impl IcbState { - /// Consume `self`, returning the inner value if `Available`, or `None` if `InFlight`. - fn into_available(self) -> Option { - match self { - Self::Available(inner) => Some(inner), - Self::InFlight(_) => None, - } - } -} - -/// Trait for resolving an inode to its control block. -/// -/// Implementations act as a "promise" that an ICB will eventually be produced -/// for a given inode. The cache calls `resolve` when it needs to populate a -/// missing entry. -pub trait IcbResolver: Send + Sync { - /// The inode control block type this resolver produces. - type Icb: IcbLike + Send + Sync; - /// Error type returned when resolution fails. - type Error: Send; - - /// Resolve an inode to a fully-populated control block. - /// - /// - `stub`: `Some(icb)` if upgrading an existing stub entry, `None` if creating - /// from scratch. The stub typically has `parent` and `path` set but `attr` missing. - /// - `cache`: reference to the cache, useful for walking parent chains to build paths. - fn resolve( - &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized; -} - -/// Async, concurrency-safe inode cache. -/// -/// All methods take `&self` — internal synchronization is provided by -/// `scc::HashMap` (sharded lock-free map). -pub struct AsyncICache { - resolver: R, - inode_table: ConcurrentHashMap>, -} - -impl AsyncICache { - /// Create a new cache with a root ICB at `root_ino` (rc = 1). - pub fn new(resolver: R, root_ino: Inode, root_path: impl Into) -> Self { - let table = ConcurrentHashMap::new(); - // insert_sync is infallible for a fresh map - drop(table.insert_sync( - root_ino, - IcbState::Available(R::Icb::new_root(root_path.into())), - )); - Self { - resolver, - inode_table: table, - } - } - - /// Number of entries (`InFlight` + `Available`) in the table. - pub fn inode_count(&self) -> usize { - self.inode_table.len() - } - - /// Wait until `ino` is `Available`. - /// Returns `true` if the entry exists and is Available, - /// `false` if the entry does not exist. - #[instrument(name = "AsyncICache::wait_for_available", skip(self))] - async fn wait_for_available(&self, ino: Inode) -> bool { - loop { - let rx = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::InFlight(rx) => Some(rx.clone()), - IcbState::Available(_) => None, - }) - .await; - - match rx { - None => return false, // key missing - Some(None) => return true, // Available - Some(Some(mut rx)) => { - // Wait for the resolver to complete (or fail/drop sender). - // changed() returns Err(RecvError) when sender is dropped, - // which is fine — it means resolution finished. - let _ = rx.changed().await; - // Loop back — the entry might be InFlight again if another - // resolution cycle started between our wakeup and re-read. - } - } - } - } - - /// Check whether `ino` has an entry in the table (either `InFlight` or `Available`). - /// - /// This is a non-blocking, synchronous check. It does **not** wait for - /// `InFlight` entries to resolve. - pub fn contains(&self, ino: Inode) -> bool { - self.inode_table.contains_sync(&ino) - } - - /// Read an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb", skip(self, f))] - // `Sync` is required because `f` is held across `.await` points in the - // loop body; for the resulting future to be `Send`, the captured closure - // must be `Sync` (clippy::future_not_send). - pub async fn get_icb( - &self, - ino: Inode, - f: impl Fn(&R::Icb) -> T + Send + Sync, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .read_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Mutate an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb_mut", skip(self, f))] - pub async fn get_icb_mut( - &self, - ino: Inode, - mut f: impl FnMut(&mut R::Icb) -> T + Send, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Insert an ICB directly as `Available`. If the entry is currently - /// `InFlight`, waits for resolution before overwriting. - #[instrument(name = "AsyncICache::insert_icb", skip(self, icb))] - pub async fn insert_icb(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - let mut icb = Some(icb); - loop { - match self.inode_table.entry_async(ino).await { - Entry::Vacant(vac) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - vac.insert_entry(IcbState::Available(val)); - return; - } - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - IcbState::Available(_) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - *occ.get_mut() = IcbState::Available(val); - return; - } - }, - } - } - } - - /// Get-or-insert pattern. If `ino` exists (awaits `InFlight`), runs `then` - /// on it. If absent, calls `factory` to create, inserts, then runs `then`. - /// - /// Both `factory` and `then` are `FnOnce` — wrapped in `Option` internally - /// to satisfy the borrow checker across the await-loop. - #[instrument(name = "AsyncICache::entry_or_insert_icb", skip(self, factory, then))] - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> R::Icb, - then: impl FnOnce(&mut R::Icb) -> T, - ) -> T { - use scc::hash_map::Entry; - let mut factory = Some(factory); - let mut then_fn = Some(then); - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return t(icb); - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); // release shard lock before awaiting - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let f = factory - .take() - .unwrap_or_else(|| unreachable!("factory consumed more than once")); - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let mut icb = f(); - let result = t(&mut icb); - vac.insert_entry(IcbState::Available(icb)); - return result; - } - } - } - } - - /// Write an ICB back to the table only if the entry still exists. - /// - /// If the entry was evicted (vacant) during resolution, the result is - /// silently dropped — this prevents resurrecting entries that a concurrent - /// `forget` has already removed. - async fn write_back_if_present(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => { - *occ.get_mut() = IcbState::Available(icb); - } - Entry::Vacant(_) => { - tracing::debug!( - ino, - "resolved inode was evicted during resolution, dropping result" - ); - } - } - } - - /// Look up `ino`. If `Available` and fully resolved, run `then` and return - /// `Ok(T)`. If `Available` but `needs_resolve()` is true (stub), extract - /// the stub, resolve it, cache the result, then run `then`. If absent, call - /// the resolver to fetch the ICB, cache it, then run `then`. If another task - /// is already resolving this inode (`InFlight`), wait for it. - /// - /// Returns `Err(R::Error)` if resolution fails. On error the `InFlight` - /// entry is removed so subsequent calls can retry. - #[instrument(name = "AsyncICache::get_or_resolve", skip(self, then))] - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&R::Icb) -> T, - ) -> Result { - use scc::hash_map::Entry; - - let mut then_fn = Some(then); - - // Fast path: Available and fully resolved - { - let hit = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - Some(t(icb)) - } - IcbState::InFlight(_) | IcbState::Available(_) => None, - }) - .await; - if let Some(Some(r)) = hit { - return Ok(r); - } - } - - // Slow path: missing, InFlight, or stub needing resolution - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return Ok(t(icb)); - } - IcbState::Available(_) => { - // Stub needing resolution — extract stub, replace with InFlight - let (tx, rx) = watch::channel(()); - let old = std::mem::replace(occ.get_mut(), IcbState::InFlight(rx)); - let stub = old.into_available().unwrap_or_else(|| { - unreachable!("matched Available arm, replaced value must be Available") - }); - let fallback = stub.clone(); - drop(occ); // release shard lock before awaiting - - match self.resolver.resolve(ino, Some(stub), self).await { - Ok(icb) => { - let t = then_fn.take().unwrap_or_else(|| { - unreachable!("then_fn consumed more than once") - }); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - if fallback.rc() > 0 { - self.write_back_if_present(ino, fallback).await; - } else { - self.inode_table.remove_async(&ino).await; - } - drop(tx); - return Err(e); - } - } - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let (tx, rx) = watch::channel(()); - vac.insert_entry(IcbState::InFlight(rx)); - - match self.resolver.resolve(ino, None, self).await { - Ok(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - self.inode_table.remove_async(&ino).await; - drop(tx); - return Err(e); - } - } - } - } - } - } - - /// Increment rc. **Awaits** `InFlight`. - /// - /// Returns `None` if the inode does not exist or was evicted concurrently. - /// This can happen when a concurrent `forget` removes the entry between the - /// caller's insert/cache and this `inc_rc` call, or when a concurrent - /// `get_or_resolve` swaps the entry to `InFlight` and the entry is then - /// evicted on resolution failure. Callers in FUSE `lookup` paths should - /// treat `None` as a lookup failure to avoid ref-count leaks (the kernel - /// would hold a reference the cache no longer tracks). - #[instrument(name = "AsyncICache::inc_rc", skip(self))] - pub async fn inc_rc(&self, ino: Inode) -> Option { - loop { - if !self.wait_for_available(ino).await { - warn!(ino, "inc_rc: inode not in table"); - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => { - *icb.rc_mut() += 1; - Some(icb.rc()) - } - IcbState::InFlight(_) => None, - }) - .await - .flatten(); - - match result { - Some(rc) => return Some(rc), - None => { - // Entry was concurrently replaced with InFlight or evicted. - if !self.contains(ino) { - warn!(ino, "inc_rc: inode evicted concurrently"); - return None; - } - // Entry exists but became InFlight — retry. - } - } - } - } - - /// Decrement rc by `nlookups`. If rc drops to zero, evicts and returns - /// the ICB. **Awaits** `InFlight` entries. - #[instrument(name = "AsyncICache::forget", skip(self))] - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - use scc::hash_map::Entry; - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - if icb.rc() <= nlookups { - trace!(ino, "evicting inode"); - let (_, state) = occ.remove_entry(); - return state.into_available(); - } - *icb.rc_mut() -= nlookups; - trace!(ino, new_rc = icb.rc(), "decremented rc"); - return None; - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(_) => { - warn!(ino, "forget on unknown inode"); - return None; - } - } - } - } - - /// Synchronous mutable access to an `Available` entry. - /// Does **not** wait for `InFlight`. Intended for initialization. - pub fn get_icb_mut_sync(&self, ino: Inode, f: impl FnOnce(&mut R::Icb) -> T) -> Option { - self.inode_table - .update_sync(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .flatten() - } - - /// Iterate over all `Available` entries (skips `InFlight`). - /// Async-safe iteration using `iter_async` to avoid contention on single-threaded runtimes. - pub async fn for_each(&self, mut f: impl FnMut(&Inode, &R::Icb)) { - self.inode_table - .iter_async(|ino, state| { - if let IcbState::Available(icb) = state { - f(ino, icb); - } - true // continue iteration - }) - .await; - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashMap as StdHashMap; - use std::path::PathBuf; - use std::sync::atomic::Ordering; - use std::sync::{Arc, Mutex}; - - #[derive(Debug, Clone, PartialEq)] - struct TestIcb { - rc: u64, - path: PathBuf, - resolved: bool, - } - - impl IcbLike for TestIcb { - fn new_root(path: PathBuf) -> Self { - Self { - rc: 1, - path, - resolved: true, - } - } - fn rc(&self) -> u64 { - self.rc - } - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - fn needs_resolve(&self) -> bool { - !self.resolved - } - } - - struct TestResolver { - responses: Mutex>>, - } - - impl TestResolver { - fn new() -> Self { - Self { - responses: Mutex::new(StdHashMap::new()), - } - } - - fn add(&self, ino: Inode, icb: TestIcb) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Ok(icb)); - } - - fn add_err(&self, ino: Inode, err: impl Into) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Err(err.into())); - } - } - - impl IcbResolver for TestResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let result = self - .responses - .lock() - .expect("test mutex") - .remove(&ino) - .unwrap_or_else(|| Err(format!("no response for inode {ino}"))); - async move { result } - } - } - - fn test_cache() -> AsyncICache { - AsyncICache::new(TestResolver::new(), 1, "/root") - } - - fn test_cache_with(resolver: TestResolver) -> AsyncICache { - AsyncICache::new(resolver, 1, "/root") - } - - #[tokio::test] - async fn contains_returns_true_for_root() { - let cache = test_cache(); - assert!(cache.contains(1), "root should exist"); - } - - #[tokio::test] - async fn contains_returns_false_for_missing() { - let cache = test_cache(); - assert!(!cache.contains(999), "missing inode should not exist"); - } - - #[tokio::test] - async fn contains_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Trigger resolve in background - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - handle - .await - .expect("task panicked") - .expect("resolve failed"); - assert!(cache.contains(42), "should be true after resolve"); - } - - #[tokio::test] - async fn new_creates_root_entry() { - let cache = test_cache(); - assert_eq!(cache.inode_count(), 1, "should have exactly 1 entry"); - } - - #[tokio::test] - async fn get_icb_returns_value() { - let cache = test_cache(); - let path = cache.get_icb(1, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/root"))); - } - - #[tokio::test] - async fn get_icb_returns_none_for_missing() { - let cache = test_cache(); - let result = cache.get_icb(999, IcbLike::rc).await; - assert_eq!(result, None, "missing inode should return None"); - } - - #[tokio::test] - async fn get_icb_mut_modifies_value() { - let cache = test_cache(); - cache - .get_icb_mut(1, |icb| { - *icb.rc_mut() += 10; - }) - .await; - let rc = cache.get_icb(1, IcbLike::rc).await; - assert_eq!(rc, Some(11), "root starts at rc=1, +10 = 11"); - } - - #[tokio::test] - async fn get_icb_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/loaded".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Resolve inode 42 - cache - .get_or_resolve(42, |_| ()) - .await - .expect("resolve failed"); - - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/loaded"))); - } - - #[tokio::test] - async fn insert_icb_adds_entry() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/foo".into(), - resolved: true, - }, - ) - .await; - assert!(cache.contains(42), "inserted entry should exist"); - assert_eq!(cache.inode_count(), 2, "root + inserted = 2"); - } - - #[tokio::test] - async fn insert_icb_does_not_clobber_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Spawn insert_icb in background — should wait for InFlight to resolve - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { - cache2 - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/inserted".into(), - resolved: true, - }, - ) - .await; - }); - - // Give insert_icb time to start waiting - tokio::task::yield_now().await; - - // Complete the InFlight from the resolver side (write directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx); // signal watchers - - handle.await.expect("task panicked"); - - // After insert_icb completes, it should have overwritten the resolved value - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/inserted"))); - } - - #[tokio::test] - async fn entry_or_insert_creates_new() { - let cache = test_cache(); - let rc = cache - .entry_or_insert_icb( - 42, - || TestIcb { - rc: 0, - path: "/new".into(), - resolved: true, - }, - |icb| { - *icb.rc_mut() += 1; - icb.rc() - }, - ) - .await; - assert_eq!(rc, 1, "factory creates rc=0, then +1 = 1"); - } - - #[tokio::test] - async fn entry_or_insert_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 5, "existing entry rc should be 5"); - } - - #[tokio::test] - async fn entry_or_insert_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Start resolve in background - let cache2 = Arc::clone(&cache); - let resolve_handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - // Wait for resolve to finish - resolve_handle - .await - .expect("task panicked") - .expect("resolve failed"); - - // Now entry_or_insert should find the existing entry - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 1, "should find the resolved entry"); - } - - #[tokio::test] - async fn inc_rc_increments() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - let new_rc = cache.inc_rc(42).await; - assert_eq!(new_rc, Some(2), "rc 1 + 1 = 2"); - } - - #[tokio::test] - async fn forget_decrements_rc() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 2).await; - assert!(evicted.is_none(), "rc 5 - 2 = 3, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(3), "rc should be 3 after forget(2)"); - } - - #[tokio::test] - async fn forget_evicts_when_rc_drops_to_zero() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 3, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 3).await; - assert!(evicted.is_some(), "rc 3 - 3 = 0, should evict"); - assert!(!cache.contains(42), "evicted entry should be gone"); - assert_eq!(cache.inode_count(), 1, "only root remains"); - } - - #[tokio::test] - async fn forget_unknown_inode_returns_none() { - let cache = test_cache(); - let evicted = cache.forget(999, 1).await; - assert!(evicted.is_none(), "unknown inode should return None"); - } - - #[tokio::test] - async fn for_each_iterates_available_entries() { - let cache = test_cache(); - cache - .insert_icb( - 2, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - cache - .insert_icb( - 3, - TestIcb { - rc: 1, - path: "/b".into(), - resolved: true, - }, - ) - .await; - - let mut seen = std::collections::HashSet::new(); - cache - .for_each(|ino, _icb| { - seen.insert(*ino); - }) - .await; - assert_eq!(seen.len(), 3, "should see all 3 entries"); - assert!(seen.contains(&1), "should contain root"); - assert!(seen.contains(&2), "should contain inode 2"); - assert!(seen.contains(&3), "should contain inode 3"); - } - - #[tokio::test] - async fn for_each_skips_inflight() { - let cache = test_cache(); - // Directly insert an InFlight entry for testing iteration - let (_tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let mut count = 0; - cache - .for_each(|_, _| { - count += 1; - }) - .await; - assert_eq!(count, 1, "only root, not the InFlight entry"); - } - - #[tokio::test] - async fn wait_does_not_miss_signal_on_immediate_complete() { - let cache = Arc::new(test_cache()); - - // Insert InFlight manually, then immediately complete before anyone waits - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Complete before any waiter (simulate resolver by writing directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/fast".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - assert!(cache.contains(42), "entry should exist in table"); - } - - // -- get_or_resolve tests -- - - #[tokio::test] - async fn get_or_resolve_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/existing"))); - } - - #[tokio::test] - async fn get_or_resolve_resolves_missing() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - // Should now be cached - assert!(cache.contains(42)); - } - - #[tokio::test] - async fn get_or_resolve_propagates_error() { - let resolver = TestResolver::new(); - resolver.add_err(42, "network error"); - let cache = test_cache_with(resolver); - - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(result, Err("network error".to_owned())); - // Entry should be cleaned up on error - assert!(!cache.contains(42)); - } - - struct CountingResolver { - count: Arc, - } - - impl IcbResolver for CountingResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - self.count.fetch_add(1, Ordering::SeqCst); - async { - tokio::task::yield_now().await; - Ok(TestIcb { - rc: 1, - path: "/coalesced".into(), - resolved: true, - }) - } - } - } - - #[tokio::test] - async fn get_or_resolve_coalesces_concurrent_requests() { - use std::sync::atomic::AtomicUsize; - - let resolve_count = Arc::new(AtomicUsize::new(0)); - - let cache = Arc::new(AsyncICache::new( - CountingResolver { - count: Arc::clone(&resolve_count), - }, - 1, - "/root", - )); - - let mut handles = Vec::new(); - for _ in 0..5 { - let c = Arc::clone(&cache); - handles.push(tokio::spawn(async move { - c.get_or_resolve(42, |icb| icb.path.clone()).await - })); - } - - for h in handles { - assert_eq!( - h.await.expect("task panicked"), - Ok(PathBuf::from("/coalesced")), - ); - } - - // Resolver should only have been called ONCE (not 5 times) - assert_eq!( - resolve_count.load(Ordering::SeqCst), - 1, - "should coalesce to 1 resolve call" - ); - } - - #[test] - fn icb_state_into_available_returns_inner() { - let state = IcbState::Available(TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }); - assert!(state.into_available().is_some()); - } - - #[test] - fn icb_state_into_available_returns_none_for_inflight() { - let (_tx, rx) = watch::channel(()); - let state: IcbState = IcbState::InFlight(rx); - assert!(state.into_available().is_none()); - } - - #[tokio::test] - async fn get_or_resolve_resolves_stub_entry() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Insert unresolved stub - cache - .insert_icb( - 42, - TestIcb { - rc: 0, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should trigger resolution because needs_resolve() == true - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - } - - #[tokio::test] - async fn forget_handles_inflight_entry() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.forget(42, 1).await }); - - // Give forget time to start waiting - tokio::task::yield_now().await; - - // Simulate resolver completing (write directly to inode_table) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 3, - path: "/inflight".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let evicted = handle.await.expect("task panicked"); - assert!(evicted.is_none(), "rc=3 - 1 = 2, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be 2 after forget(1) on rc=3"); - } - - #[tokio::test] - async fn get_or_resolve_error_preserves_stub_with_nonzero_rc() { - let resolver = TestResolver::new(); - resolver.add_err(42, "resolve failed"); - let cache = test_cache_with(resolver); - - // Insert a stub with rc=2 (simulates a looked-up entry needing resolution) - cache - .insert_icb( - 42, - TestIcb { - rc: 2, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should fail - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert!(result.is_err(), "should propagate resolver error"); - - // The stub should be preserved since rc > 0 - assert!(cache.contains(42), "entry with rc=2 should survive error"); - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be preserved"); - } - - #[tokio::test] - async fn inc_rc_missing_inode_returns_none() { - let cache = test_cache(); - assert_eq!(cache.inc_rc(999).await, None); - } - - #[tokio::test] - async fn inc_rc_waits_for_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Simulate resolver completing by writing directly to inode_table - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!( - result, - Some(2), - "waited for Available, then incremented 1 -> 2" - ); - } - - #[tokio::test] - async fn inc_rc_returns_none_after_concurrent_eviction() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Evict instead of completing - cache.inode_table.remove_async(&42).await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!(result, None, "evicted entry should return None"); - } - - /// Resolver that pauses mid-resolution via a `Notify`, allowing the test - /// to interleave a `forget` while the resolve future is suspended. - struct SlowResolver { - /// Signalled by the resolver once it has started (so the test knows - /// resolution is in progress). - started: Arc, - /// The resolver waits on this before returning (the test signals it - /// after calling `forget`). - proceed: Arc, - } - - impl IcbResolver for SlowResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let started = Arc::clone(&self.started); - let proceed = Arc::clone(&self.proceed); - async move { - started.notify_one(); - proceed.notified().await; - Ok(TestIcb { - rc: 1, - path: "/slow-resolved".into(), - resolved: true, - }) - } - } - } - - /// Regression test: `get_icb` must survive the entry cycling back to - /// `InFlight` between when `wait_for_available` returns and when - /// `read_async` runs. The loop in `get_icb` should retry and eventually - /// return the final resolved value. - #[tokio::test] - async fn wait_for_available_retries_on_re_inflight() { - let cache = Arc::new(test_cache()); - let ino: Inode = 42; - - // Phase 1: insert an InFlight entry. - let (tx1, rx1) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx1)) - .await; - - // Spawn get_icb — it will wait for InFlight to resolve. - let cache_get = Arc::clone(&cache); - let get_handle = - tokio::spawn(async move { cache_get.get_icb(ino, |icb| icb.path.clone()).await }); - - // Give get_icb time to start waiting on the watch channel. - tokio::task::yield_now().await; - - // Phase 1 complete: transition to Available briefly, then immediately - // back to InFlight (simulates get_or_resolve finding a stub and - // re-entering InFlight for a second resolution). - let (tx2, rx2) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx2)) - .await; - // Signal phase-1 watchers so get_icb wakes up; it will re-read the - // entry and find InFlight again, then loop back to wait. - drop(tx1); - - // Give get_icb time to re-enter the wait loop. - tokio::task::yield_now().await; - - // Phase 2 complete: write the final resolved value. - cache - .inode_table - .upsert_async( - ino, - IcbState::Available(TestIcb { - rc: 1, - path: "/fully-resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx2); - - // get_icb should return the final resolved value (not None). - let result = get_handle.await.expect("get_icb task panicked"); - assert_eq!( - result, - Some(PathBuf::from("/fully-resolved")), - "get_icb must survive re-InFlight and return the final resolved value" - ); - } - - /// Regression test: an entry evicted by `forget` during an in-progress - /// `get_or_resolve` must NOT be resurrected when resolution completes. - #[tokio::test] - async fn get_or_resolve_does_not_resurrect_evicted_entry() { - let started = Arc::new(tokio::sync::Notify::new()); - let proceed = Arc::new(tokio::sync::Notify::new()); - - let cache = Arc::new(AsyncICache::new( - SlowResolver { - started: Arc::clone(&started), - proceed: Arc::clone(&proceed), - }, - 1, - "/root", - )); - - let ino: Inode = 42; - - // Insert a stub with rc=1 (simulates a looked-up, unresolved entry). - cache - .insert_icb( - ino, - TestIcb { - rc: 1, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // Spawn get_or_resolve which will trigger slow resolution. - let cache2 = Arc::clone(&cache); - let resolve_handle = - tokio::spawn(async move { cache2.get_or_resolve(ino, |icb| icb.path.clone()).await }); - - // Wait until the resolver has started (entry is now InFlight). - started.notified().await; - - // Evict the entry while resolution is in progress. - // forget waits for InFlight, so we need to complete resolution for - // forget to proceed. Instead, remove the InFlight entry directly to - // simulate a concurrent eviction (e.g., by another path that already - // removed the entry). - cache.inode_table.remove_async(&ino).await; - - // Let the resolver finish. - proceed.notify_one(); - - // Wait for get_or_resolve to complete. - drop(resolve_handle.await.expect("task panicked")); - - // The entry must NOT have been resurrected by write_back_if_present. - assert!( - !cache.contains(ino), - "evicted entry must not be resurrected after resolution completes" - ); - } -} diff --git a/src/fs/icache/bridge.rs b/src/fs/icache/bridge.rs deleted file mode 100644 index e674a564..00000000 --- a/src/fs/icache/bridge.rs +++ /dev/null @@ -1,138 +0,0 @@ -use crate::fs::r#trait::{FileAttr, FileHandle, Inode}; - -/// Bidirectional bridge for both inodes and file handles between two Fs layers. -/// -/// Convention: **left = outer (caller), right = inner (callee)**. -/// `forward(left)` → right, `backward(right)` → left. -pub struct HashMapBridge { - inode_map: bimap::BiMap, - fh_map: bimap::BiMap, -} - -impl HashMapBridge { - pub fn new() -> Self { - Self { - inode_map: bimap::BiMap::new(), - fh_map: bimap::BiMap::new(), - } - } - - // ── Inode methods ──────────────────────────────────────────────────── - - pub fn insert_inode(&mut self, left: Inode, right: Inode) { - self.inode_map.insert(left, right); - } - - /// Look up right→left, or allocate a new left inode if unmapped. - pub fn backward_or_insert_inode( - &mut self, - right: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&right) { - left - } else { - let left = allocate(); - self.inode_map.insert(left, right); - left - } - } - - /// Look up left→right, or allocate a new right inode if unmapped. - pub fn forward_or_insert_inode( - &mut self, - left: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&right) = self.inode_map.get_by_left(&left) { - right - } else { - let right = allocate(); - self.inode_map.insert(left, right); - right - } - } - - /// Remove an inode mapping by its left (outer) key. - pub fn remove_inode_by_left(&mut self, left: Inode) { - self.inode_map.remove_by_left(&left); - } - - /// Look up left→right directly. - pub fn inode_map_get_by_left(&self, left: Inode) -> Option<&Inode> { - self.inode_map.get_by_left(&left) - } - - /// Rewrite the `ino` field in a [`FileAttr`] from right (inner) to left (outer) namespace. - pub fn attr_backward(&self, attr: FileAttr) -> FileAttr { - let backward = |ino: Inode| -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&ino) { - left - } else { - tracing::warn!( - inner_ino = ino, - "attr_backward: no bridge mapping, using raw inner inode" - ); - ino - } - }; - rewrite_attr_ino(attr, backward) - } - - // ── File handle methods ────────────────────────────────────────────── - - pub fn insert_fh(&mut self, left: FileHandle, right: FileHandle) { - self.fh_map.insert(left, right); - } - - pub fn fh_forward(&self, left: FileHandle) -> Option { - self.fh_map.get_by_left(&left).copied() - } - - /// Remove a file handle mapping by its left (outer) key. - pub fn remove_fh_by_left(&mut self, left: FileHandle) { - self.fh_map.remove_by_left(&left); - } -} - -/// Rewrite the `ino` field in a [`FileAttr`] using the given translation function. -fn rewrite_attr_ino(attr: FileAttr, translate: impl Fn(Inode) -> Inode) -> FileAttr { - match attr { - FileAttr::RegularFile { - mut common, - size, - blocks, - } => { - common.ino = translate(common.ino); - FileAttr::RegularFile { - common, - size, - blocks, - } - } - FileAttr::Directory { mut common } => { - common.ino = translate(common.ino); - FileAttr::Directory { common } - } - FileAttr::Symlink { mut common, size } => { - common.ino = translate(common.ino); - FileAttr::Symlink { common, size } - } - FileAttr::CharDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::CharDevice { common, rdev } - } - FileAttr::BlockDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::BlockDevice { common, rdev } - } - FileAttr::NamedPipe { mut common } => { - common.ino = translate(common.ino); - FileAttr::NamedPipe { common } - } - FileAttr::Socket { mut common } => { - common.ino = translate(common.ino); - FileAttr::Socket { common } - } - } -} diff --git a/src/fs/icache/file_table.rs b/src/fs/icache/file_table.rs deleted file mode 100644 index 332a6ffb..00000000 --- a/src/fs/icache/file_table.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::sync::atomic::{AtomicU64, Ordering}; - -use crate::fs::r#trait::FileHandle; - -/// Monotonically increasing file handle allocator. -#[must_use] -pub struct FileTable { - next_fh: AtomicU64, -} - -impl FileTable { - pub fn new() -> Self { - Self { - next_fh: AtomicU64::new(1), - } - } - - #[must_use] - pub fn allocate(&self) -> FileHandle { - self.next_fh.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/inode_factory.rs b/src/fs/icache/inode_factory.rs deleted file mode 100644 index 1a603388..00000000 --- a/src/fs/icache/inode_factory.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::fs::r#trait::Inode; -use std::sync::atomic::{AtomicU64, Ordering}; - -/// Monotonically increasing inode allocator. -pub struct InodeFactory { - next_inode: AtomicU64, -} - -impl InodeFactory { - pub fn new(start: Inode) -> Self { - Self { - next_inode: AtomicU64::new(start), - } - } - - pub fn allocate(&self) -> Inode { - self.next_inode.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/mod.rs b/src/fs/icache/mod.rs deleted file mode 100644 index 2ccd80bd..00000000 --- a/src/fs/icache/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Generic directory cache and inode management primitives. - -pub mod async_cache; -pub mod bridge; -mod file_table; -mod inode_factory; - -pub use async_cache::AsyncICache; -pub use async_cache::IcbResolver; -pub use file_table::FileTable; -pub use inode_factory::InodeFactory; - -/// Common interface for inode control block types usable with `ICache`. -pub trait IcbLike: Clone { - /// Create an ICB with rc=1, the given path, and no children. - fn new_root(path: std::path::PathBuf) -> Self; - fn rc(&self) -> u64; - fn rc_mut(&mut self) -> &mut u64; - /// Returns true if this entry needs resolution (e.g., attr not yet fetched). - fn needs_resolve(&self) -> bool; -} diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 340b5887..6e9c8bf8 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -1,12 +1,12 @@ //! Shared types and helpers used by both `MesaFS` and `RepoFs`. +use std::ffi::{OsStr, OsString}; + +use bytes::Bytes; +use git_fs::fs::{FileHandle, INode, InodeAddr, OpenFlags as LibOpenFlags}; use mesa_dev::low_level::apis; use thiserror::Error; -use crate::fs::r#trait::{FileAttr, Inode}; - -pub(super) use super::icache::InodeControlBlock; - /// A concrete error type that preserves the structure of `mesa_dev::low_level::apis::Error` /// without the generic parameter. #[derive(Debug, Error)] @@ -51,50 +51,22 @@ pub enum LookupError { #[error("inode not found")] InodeNotFound, - #[error("file does not exist")] - FileDoesNotExist, - #[error("remote mesa error")] RemoteMesaError(#[from] MesaApiError), } -impl From for i32 { - fn from(e: LookupError) -> Self { - match e { - LookupError::InodeNotFound | LookupError::FileDoesNotExist => libc::ENOENT, - LookupError::RemoteMesaError(_) => libc::EIO, - } - } -} - #[derive(Debug, Error)] pub enum GetAttrError { #[error("inode not found")] InodeNotFound, } -impl From for i32 { - fn from(e: GetAttrError) -> Self { - match e { - GetAttrError::InodeNotFound => libc::ENOENT, - } - } -} - -#[derive(Debug, Error)] +#[derive(Debug, Clone, Copy, Error)] pub enum OpenError { #[error("inode not found")] InodeNotFound, } -impl From for i32 { - fn from(e: OpenError) -> Self { - match e { - OpenError::InodeNotFound => libc::ENOENT, - } - } -} - #[derive(Debug, Error)] pub enum ReadError { #[error("file not open")] @@ -113,17 +85,6 @@ pub enum ReadError { Base64Decode(#[from] base64::DecodeError), } -impl From for i32 { - fn from(e: ReadError) -> Self { - match e { - ReadError::FileNotOpen => libc::EBADF, - ReadError::InodeNotFound => libc::ENOENT, - ReadError::RemoteMesaError(_) | ReadError::Base64Decode(_) => libc::EIO, - ReadError::NotAFile => libc::EISDIR, - } - } -} - #[derive(Debug, Error)] pub enum ReadDirError { #[error("inode not found")] @@ -143,18 +104,7 @@ impl From for ReadDirError { fn from(e: LookupError) -> Self { match e { LookupError::RemoteMesaError(api) => Self::RemoteMesaError(api), - LookupError::InodeNotFound | LookupError::FileDoesNotExist => Self::InodeNotFound, - } - } -} - -impl From for i32 { - fn from(e: ReadDirError) -> Self { - match e { - ReadDirError::InodeNotFound => libc::ENOENT, - ReadDirError::RemoteMesaError(_) => libc::EIO, - ReadDirError::NotADirectory => libc::ENOTDIR, - ReadDirError::NotPermitted => libc::EPERM, + LookupError::InodeNotFound => Self::InodeNotFound, } } } @@ -165,18 +115,38 @@ pub enum ReleaseError { FileNotOpen, } -impl From for i32 { - fn from(e: ReleaseError) -> Self { - match e { - ReleaseError::FileNotOpen => libc::EBADF, - } - } +/// A directory entry for readdir results, using lib types. +pub struct FsDirEntry { + pub ino: InodeAddr, + pub name: OsString, } -/// Allows a parent compositor to peek at cached attrs from a child filesystem. +/// Trait for child filesystems composed by [`CompositeFs`](super::composite::CompositeFs). +/// +/// Uses lib types (`INode`, `InodeAddr`) directly — no conversion to/from `FileAttr`. +/// Replaces the old `Fs + InodeCachePeek` bound. #[async_trait::async_trait] -pub(super) trait InodeCachePeek { - async fn peek_attr(&self, ino: Inode) -> Option; +pub(super) trait ChildFs: Send + Sync { + /// Look up a child by name within the given parent directory. + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result; + + /// List all children of a directory, returning full `INode` data for each. + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError>; + + /// Open a file for reading. + async fn open(&mut self, ino: InodeAddr, flags: LibOpenFlags) -> Result; + + /// Read data from an open file. + async fn read( + &mut self, + ino: InodeAddr, + fh: FileHandle, + offset: u64, + size: u32, + ) -> Result; + + /// Release (close) a file handle. + async fn release(&mut self, ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError>; } #[cfg(test)] @@ -189,12 +159,6 @@ mod tests { assert!(matches!(err, ReadDirError::InodeNotFound)); } - #[test] - fn lookup_file_does_not_exist_converts_to_readdir_inode_not_found() { - let err: ReadDirError = LookupError::FileDoesNotExist.into(); - assert!(matches!(err, ReadDirError::InodeNotFound)); - } - #[test] fn lookup_remote_error_converts_to_readdir_remote_error() { let api_err = MesaApiError::Response { diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs index 6dbac250..3356b7b5 100644 --- a/src/fs/mescloud/composite.rs +++ b/src/fs/mescloud/composite.rs @@ -1,308 +1,460 @@ use std::collections::HashMap; use std::ffi::OsStr; +use std::sync::atomic::{AtomicU64, Ordering}; use bytes::Bytes; -use tracing::{instrument, trace, warn}; - -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::dcache::DCache; +use git_fs::fs::{ + AsyncFsStats, FileHandle, INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags, }; +use rustc_hash::FxHashMap; +use tracing::{instrument, trace}; use super::common::{ - GetAttrError, InodeCachePeek, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, + ChildFs, FsDirEntry, GetAttrError, LookupError, OpenError, ReadDirError, ReadError, + ReleaseError, }; -use super::icache::{InodeControlBlock, MescloudICache}; -/// A child filesystem slot: inner filesystem + bidirectional inode/fh bridge. +/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. +/// +/// Convention: **outer = left, inner = right**. +pub(super) struct InodeBridge { + map: bimap::BiMap, +} + +impl InodeBridge { + pub fn new() -> Self { + Self { + map: bimap::BiMap::new(), + } + } + + pub fn insert(&mut self, outer: InodeAddr, inner: InodeAddr) { + self.map.insert(outer, inner); + } + + pub fn forward(&self, outer: InodeAddr) -> Option { + self.map.get_by_left(&outer).copied() + } + + #[expect(dead_code, reason = "will be needed by future callers")] + pub fn backward(&self, inner: InodeAddr) -> Option { + self.map.get_by_right(&inner).copied() + } + + /// Look up inner->outer, or allocate a new outer address if unmapped. + pub fn backward_or_insert( + &mut self, + inner: InodeAddr, + allocate: impl FnOnce() -> InodeAddr, + ) -> InodeAddr { + if let Some(&outer) = self.map.get_by_right(&inner) { + outer + } else { + let outer = allocate(); + self.map.insert(outer, inner); + outer + } + } + + pub fn remove_by_outer(&mut self, outer: InodeAddr) { + self.map.remove_by_left(&outer); + } + + #[expect(dead_code, reason = "will be needed by future callers")] + pub fn get_inner(&self, outer: InodeAddr) -> Option<&InodeAddr> { + self.map.get_by_left(&outer) + } +} + pub(super) struct ChildSlot { pub inner: Inner, - pub bridge: HashMapBridge, + pub bridge: InodeBridge, } -/// Layered filesystem that presents multiple child filesystems under a single -/// inode namespace. -/// -/// `MesaCloud`'s filesystem is a hierarchy of compositions: -/// -/// ```text -/// MesaFS (CompositeFs<_, OrgFs>) -/// └─ OrgFs (CompositeFs<_, RepoFs>) -/// └─ RepoFs (leaf — backed by git) -/// ``` -/// -/// Each child filesystem numbers its inodes starting from 1, so the composite -/// maintains a bidirectional inode/file-handle bridge per child (see -/// [`ChildSlot`]) to translate between the outer namespace visible to FUSE and -/// each child's internal namespace. -pub(super) struct CompositeFs -where - R: IcbResolver, -{ - pub icache: MescloudICache, - pub file_table: FileTable, - pub readdir_buf: Vec, - /// Maps outer inode to index into `slots` for child-root inodes. - pub child_inodes: HashMap, - /// Maps every translated outer inode to its owning slot index. - pub inode_to_slot: HashMap, - pub slots: Vec>, +/// Tracks an open file: which child slot owns it and the inner fh. +struct OpenFileEntry { + slot_idx: usize, + inner_ino: InodeAddr, + inner_fh: FileHandle, +} + +pub(super) struct CompositeFs { + pub(super) inode_table: FutureBackedCache, + pub(super) directory_cache: DCache, + readdir_populated: FutureBackedCache, + next_ino: AtomicU64, + next_fh: AtomicU64, + refcounts: FxHashMap, + pub(super) readdir_buf: Vec, + open_files: HashMap, + pub(super) child_inodes: HashMap, + pub(super) inode_to_slot: HashMap, + pub(super) slots: Vec>, + fs_owner: (u32, u32), + block_size: u32, } -impl CompositeFs -where - R: IcbResolver, - Inner: Fs< - LookupError = LookupError, - GetAttrError = GetAttrError, - OpenError = OpenError, - ReadError = ReadError, - ReaddirError = ReadDirError, - ReleaseError = ReleaseError, - > + InodeCachePeek - + Send - + Sync, -{ - /// Look up which child slot owns an inode via direct map. - #[instrument(name = "CompositeFs::slot_for_inode", skip(self))] - pub fn slot_for_inode(&self, ino: Inode) -> Option { +impl CompositeFs { + pub const ROOT_INO: InodeAddr = 1; + + pub fn new(fs_owner: (u32, u32), block_size: u32) -> Self { + let inode_table = FutureBackedCache::default(); + let now = std::time::SystemTime::now(); + let root = INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + inode_table.insert_sync(Self::ROOT_INO, root); + + let mut refcounts = FxHashMap::default(); + refcounts.insert(Self::ROOT_INO, 1); + + Self { + inode_table, + directory_cache: DCache::new(), + readdir_populated: FutureBackedCache::default(), + next_ino: AtomicU64::new(Self::ROOT_INO + 1), + next_fh: AtomicU64::new(1), + refcounts, + readdir_buf: Vec::new(), + open_files: HashMap::new(), + child_inodes: HashMap::new(), + inode_to_slot: HashMap::new(), + slots: Vec::new(), + fs_owner, + block_size, + } + } + + pub fn allocate_inode(&self) -> InodeAddr { + self.next_ino.fetch_add(1, Ordering::Relaxed) + } + + pub fn fs_owner(&self) -> (u32, u32) { + self.fs_owner + } + + #[expect(dead_code, reason = "available for future use")] + pub fn block_size(&self) -> u32 { + self.block_size + } + + pub fn add_child(&mut self, inner: Inner, child_root_ino: InodeAddr) -> InodeAddr { + self.add_child_with_parent(inner, child_root_ino, Self::ROOT_INO) + } + + pub fn cache_inode(&self, inode: INode) { + self.inode_table.insert_sync(inode.addr, inode); + } + + /// Insert the inode into the table and initialise its refcount to zero. + /// + /// The caller is responsible for bumping the refcount via [`inc_rc`](Self::inc_rc). + pub fn cache_inode_and_init_rc(&mut self, inode: INode) { + let addr = inode.addr; + self.inode_table.insert_sync(addr, inode); + self.refcounts.entry(addr).or_insert(0); + } + + pub fn inc_rc(&mut self, addr: InodeAddr) -> Option { + let rc = self.refcounts.get_mut(&addr)?; + *rc += 1; + Some(*rc) + } + + pub fn slot_for_inode(&self, ino: InodeAddr) -> Option { self.inode_to_slot.get(&ino).copied() } - /// Allocate an outer file handle and map it through the bridge. - #[must_use] - pub fn alloc_fh(&mut self, slot_idx: usize, inner_fh: FileHandle) -> FileHandle { - let fh = self.file_table.allocate(); - self.slots[slot_idx].bridge.insert_fh(fh, inner_fh); - fh + /// Like [`add_child`](Self::add_child) but sets a custom parent inode + /// instead of always using `ROOT_INO`. + pub fn add_child_with_parent( + &mut self, + inner: Inner, + child_root_ino: InodeAddr, + parent_ino: InodeAddr, + ) -> InodeAddr { + let outer_ino = self.allocate_inode(); + let now = std::time::SystemTime::now(); + let inode = INode { + addr: outer_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.fs_owner.0, + gid: self.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: Some(parent_ino), + size: 0, + itype: INodeType::Directory, + }; + self.inode_table.insert_sync(outer_ino, inode); + + let mut bridge = InodeBridge::new(); + bridge.insert(outer_ino, child_root_ino); + + let idx = self.slots.len(); + self.slots.push(ChildSlot { inner, bridge }); + self.child_inodes.insert(outer_ino, idx); + self.inode_to_slot.insert(outer_ino, idx); + + outer_ino } +} - /// Translate an inner inode to an outer inode, allocating if needed. - /// Also inserts a stub ICB into the outer icache when the inode is new. - #[instrument(name = "CompositeFs::translate_inner_ino", skip(self, name))] - pub async fn translate_inner_ino( +impl CompositeFs { + #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] + pub async fn delegated_lookup( &mut self, - slot_idx: usize, - inner_ino: Inode, - parent_outer_ino: Inode, + parent: InodeAddr, name: &OsStr, - ) -> Inode { - let outer_ino = self.slots[slot_idx] + ) -> Result { + // Fast path: DCache hit + inode still in table + if let Some(dentry) = self.directory_cache.lookup(LoadedAddr(parent), name) + && let Some(inode) = self.inode_table.get(&dentry.ino.0).await + { + *self.refcounts.entry(inode.addr).or_insert(0) += 1; + return Ok(inode); + } + + // Slow path: delegate to child + let idx = self + .inode_to_slot + .get(&parent) + .copied() + .ok_or(LookupError::InodeNotFound)?; + let inner_parent = self.slots[idx] .bridge - .backward_or_insert_inode(inner_ino, || self.icache.allocate_inode()); - self.inode_to_slot.insert(outer_ino, slot_idx); - self.icache - .entry_or_insert_icb( - outer_ino, - || InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent_outer_ino), - attr: None, - children: None, - }, - |_| {}, + .forward(parent) + .ok_or(LookupError::InodeNotFound)?; + let inner_inode = self.slots[idx].inner.lookup(inner_parent, name).await?; + + let next_ino = &self.next_ino; + let outer_ino = self.slots[idx] + .bridge + .backward_or_insert(inner_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }); + self.inode_to_slot.insert(outer_ino, idx); + + let remapped = INode { + addr: outer_ino, + ..inner_inode + }; + self.inode_table + .get_or_init(outer_ino, || async move { remapped }) + .await; + + let is_dir = matches!(inner_inode.itype, INodeType::Directory); + self.directory_cache + .insert( + LoadedAddr(parent), + name.to_os_string(), + LoadedAddr(outer_ino), + is_dir, ) .await; - outer_ino + + *self.refcounts.entry(outer_ino).or_insert(0) += 1; + let rc = self.refcounts[&outer_ino]; + trace!( + outer_ino, + inner_ino = inner_inode.addr, + rc, + "lookup: resolved via delegation" + ); + + Ok(remapped) + } + + #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] + pub async fn delegated_readdir( + &mut self, + ino: InodeAddr, + ) -> Result<&[FsDirEntry], ReadDirError> { + let idx = self + .inode_to_slot + .get(&ino) + .copied() + .ok_or(ReadDirError::InodeNotFound)?; + + if self.readdir_populated.get(&LoadedAddr(ino)).await.is_none() { + let inner_ino = self.slots[idx] + .bridge + .forward(ino) + .ok_or(ReadDirError::InodeNotFound)?; + let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; + + for (name, child_inode) in &inner_entries { + let next_ino = &self.next_ino; + let outer_child = self.slots[idx] + .bridge + .backward_or_insert(child_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }); + self.inode_to_slot.insert(outer_child, idx); + + let remapped = INode { + addr: outer_child, + ..*child_inode + }; + self.inode_table + .get_or_init(outer_child, || async move { remapped }) + .await; + + let is_dir = matches!(child_inode.itype, INodeType::Directory); + self.directory_cache + .insert( + LoadedAddr(ino), + name.clone(), + LoadedAddr(outer_child), + is_dir, + ) + .await; + } + + self.readdir_populated + .get_or_init(LoadedAddr(ino), || async {}) + .await; + } + + let mut children = self.directory_cache.readdir(LoadedAddr(ino)).await; + children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + let mut entries = Vec::with_capacity(children.len()); + for (name, dvalue) in &children { + if let Some(inode) = self.inode_table.get(&dvalue.ino.0).await { + entries.push(FsDirEntry { + ino: inode.addr, + name: name.clone(), + }); + } + } + + self.readdir_buf = entries; + Ok(&self.readdir_buf) } - /// Get cached file attributes for an inode. #[instrument(name = "CompositeFs::delegated_getattr", skip(self))] - pub async fn delegated_getattr(&self, ino: Inode) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + pub async fn delegated_getattr(&self, ino: InodeAddr) -> Result { + self.inode_table + .get(&ino) + .await + .ok_or(GetAttrError::InodeNotFound) + } + + #[expect(dead_code, reason = "will be needed by future callers")] + #[must_use] + pub fn delegated_statfs(&self) -> AsyncFsStats { + AsyncFsStats { + block_size: self.block_size, + total_blocks: 0, + free_blocks: 0, + available_blocks: 0, + total_inodes: self.inode_table.len() as u64, + free_inodes: 0, + max_filename_length: 255, + } } - /// Find slot, forward inode, delegate to inner, allocate outer file handle. #[instrument(name = "CompositeFs::delegated_open", skip(self))] pub async fn delegated_open( &mut self, - ino: Inode, + ino: InodeAddr, flags: OpenFlags, - ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "open on inode not belonging to any child"); - OpenError::InodeNotFound - })?; + ) -> Result { + let idx = self + .inode_to_slot + .get(&ino) + .copied() + .ok_or(OpenError::InodeNotFound)?; let inner_ino = self.slots[idx] .bridge - .forward_or_insert_inode(ino, || unreachable!("open: ino should be mapped")); - let inner_open = self.slots[idx].inner.open(inner_ino, flags).await?; - let outer_fh = self.alloc_fh(idx, inner_open.handle); - trace!( - ino, + .forward(ino) + .ok_or(OpenError::InodeNotFound)?; + let inner_fh = self.slots[idx].inner.open(inner_ino, flags).await?; + + let outer_fh = self.next_fh.fetch_add(1, Ordering::Relaxed); + self.open_files.insert( outer_fh, - inner_fh = inner_open.handle, - "open: assigned file handle" + OpenFileEntry { + slot_idx: idx, + inner_ino, + inner_fh, + }, ); - Ok(OpenFile { - handle: outer_fh, - options: inner_open.options, - }) + + trace!(ino, outer_fh, inner_fh, "open: assigned fh"); + Ok(outer_fh) } - /// Find slot, forward inode and file handle, delegate read to inner. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] #[instrument(name = "CompositeFs::delegated_read", skip(self))] pub async fn delegated_read( &mut self, - ino: Inode, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "read on inode not belonging to any child"); - ReadError::InodeNotFound - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("read: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "read: no fh mapping found"); - ReadError::FileNotOpen - })?; - self.slots[idx] + let entry = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; + let slot_idx = entry.slot_idx; + let inner_ino = entry.inner_ino; + let inner_fh = entry.inner_fh; + self.slots[slot_idx] .inner - .read(inner_ino, inner_fh, offset, size, flags, lock_owner) + .read(inner_ino, inner_fh, offset, size) .await } - /// Find slot, forward inode and file handle, delegate release to inner, - /// then clean up the file handle mapping. #[instrument(name = "CompositeFs::delegated_release", skip(self))] - pub async fn delegated_release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "release on inode not belonging to any child"); - ReleaseError::FileNotOpen - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("release: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "release: no fh mapping found"); - ReleaseError::FileNotOpen - })?; - let result = self.slots[idx] + pub async fn delegated_release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { + let entry = self + .open_files + .remove(&fh) + .ok_or(ReleaseError::FileNotOpen)?; + let result = self.slots[entry.slot_idx] .inner - .release(inner_ino, inner_fh, flags, flush) + .release(entry.inner_ino, entry.inner_fh) .await; - self.slots[idx].bridge.remove_fh_by_left(fh); - trace!(ino, fh, "release: cleaned up fh mapping"); + trace!(fh, "release: cleaned up fh mapping"); result } - /// Propagate forget to the inner filesystem, evict from icache, and clean - /// up bridge mappings. Returns `true` if the inode was evicted. + /// Returns `true` if the inode was evicted. /// - /// Child-root inodes (those in `child_inodes`) do NOT propagate forget to - /// the inner filesystem: the inner root's `rc=1` is an initialization - /// invariant unrelated to outer FUSE lookup counts. Propagating would - /// evict the inner root, breaking all subsequent operations on that child. + /// The composite only manages its own refcounts and inode table. + /// Inner filesystem inodes are managed by the inner FS itself through + /// its own lifecycle; the composite does not propagate forget to children. + #[expect(dead_code, reason = "will be needed by future callers")] #[must_use] #[instrument(name = "CompositeFs::delegated_forget", skip(self))] - pub async fn delegated_forget(&mut self, ino: Inode, nlookups: u64) -> bool { - let slot_idx = self.slot_for_inode(ino); - let is_child_root = self.child_inodes.contains_key(&ino); - if !is_child_root - && let Some(idx) = slot_idx - && let Some(&inner_ino) = self.slots[idx].bridge.inode_map_get_by_left(ino) - { - self.slots[idx].inner.forget(inner_ino, nlookups).await; - } - if self.icache.forget(ino, nlookups).await.is_some() { - self.child_inodes.remove(&ino); - self.inode_to_slot.remove(&ino); - if let Some(idx) = slot_idx { - self.slots[idx].bridge.remove_inode_by_left(ino); + pub fn delegated_forget(&mut self, ino: InodeAddr, nlookups: u64) -> bool { + let slot_idx = self.inode_to_slot.get(&ino).copied(); + + if let Some(rc) = self.refcounts.get_mut(&ino) { + *rc = rc.saturating_sub(nlookups); + if *rc > 0 { + return false; } - true + self.refcounts.remove(&ino); } else { - false + return false; } - } - - /// Return filesystem statistics from the icache. - #[must_use] - pub fn delegated_statfs(&self) -> FilesystemStats { - self.icache.statfs() - } - - /// Delegation branch for lookup when the parent is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] - pub async fn delegated_lookup( - &mut self, - parent: Inode, - name: &OsStr, - ) -> Result { - let idx = self - .slot_for_inode(parent) - .ok_or(LookupError::InodeNotFound)?; - let inner_parent = self.slots[idx] - .bridge - .forward_or_insert_inode(parent, || unreachable!("lookup: parent should be mapped")); - let inner_attr = self.slots[idx].inner.lookup(inner_parent, name).await?; - let inner_ino = inner_attr.common().ino; - let outer_ino = self.translate_inner_ino(idx, inner_ino, parent, name).await; - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_ino, outer_attr).await; - // None means the entry was concurrently evicted; fail the lookup so - // the kernel doesn't hold a ref the cache no longer tracks. - let rc = self - .icache - .inc_rc(outer_ino) - .await - .ok_or(LookupError::InodeNotFound)?; - trace!(outer_ino, inner_ino, rc, "lookup: resolved via delegation"); - Ok(outer_attr) - } - /// Delegation branch for readdir when the inode is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] - pub async fn delegated_readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - let idx = self - .slot_for_inode(ino) - .ok_or(ReadDirError::InodeNotFound)?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("readdir: ino should be mapped")); - let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; - let inner_entries: Vec = inner_entries.to_vec(); - let evicted = self.icache.evict_zero_rc_children(ino).await; - for evicted_ino in evicted { - if let Some(slot) = self.inode_to_slot.remove(&evicted_ino) { - self.slots[slot].bridge.remove_inode_by_left(evicted_ino); - } - self.child_inodes.remove(&evicted_ino); + self.inode_table.remove_sync(&ino); + self.child_inodes.remove(&ino); + self.inode_to_slot.remove(&ino); + if let Some(idx) = slot_idx { + self.slots[idx].bridge.remove_by_outer(ino); } - let mut outer_entries = Vec::with_capacity(inner_entries.len()); - for entry in &inner_entries { - let outer_child_ino = self - .translate_inner_ino(idx, entry.ino, ino, &entry.name) - .await; - if let Some(inner_attr) = self.slots[idx].inner.peek_attr(entry.ino).await { - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_child_ino, outer_attr).await; - } - outer_entries.push(DirEntry { - ino: outer_child_ino, - name: entry.name.clone(), - kind: entry.kind, - }); - } - self.readdir_buf = outer_entries; - Ok(&self.readdir_buf) + + true } } diff --git a/src/fs/mescloud/icache.rs b/src/fs/mescloud/icache.rs deleted file mode 100644 index 15f1f5d7..00000000 --- a/src/fs/mescloud/icache.rs +++ /dev/null @@ -1,437 +0,0 @@ -//! Mescloud-specific inode control block, helpers, and directory cache wrapper. - -use std::ffi::OsStr; -use std::time::SystemTime; - -use crate::fs::icache::{AsyncICache, IcbLike, IcbResolver, InodeFactory}; -use crate::fs::r#trait::{ - CommonFileAttr, DirEntryType, FileAttr, FilesystemStats, Inode, Permissions, -}; - -/// Inode control block for mescloud filesystem layers. -#[derive(Clone)] -pub struct InodeControlBlock { - pub parent: Option, - pub rc: u64, - pub path: std::path::PathBuf, - /// Cached file attributes from the last lookup. - pub attr: Option, - /// Cached directory children from the resolver (directories only). - pub children: Option>, -} - -impl IcbLike for InodeControlBlock { - fn new_root(path: std::path::PathBuf) -> Self { - Self { - rc: 1, - parent: None, - path, - attr: None, - children: None, - } - } - - fn rc(&self) -> u64 { - self.rc - } - - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - - fn needs_resolve(&self) -> bool { - match self.attr { - None => true, - Some(FileAttr::Directory { .. }) => self.children.is_none(), - Some(_) => false, - } - } -} - -/// Calculate the number of blocks needed for a given size. -pub fn blocks_of_size(block_size: u32, size: u64) -> u64 { - size.div_ceil(u64::from(block_size)) -} - -/// Free function -- usable by both `MescloudICache` and resolvers. -pub fn make_common_file_attr( - ino: Inode, - perm: u16, - atime: SystemTime, - mtime: SystemTime, - fs_owner: (u32, u32), - block_size: u32, -) -> CommonFileAttr { - CommonFileAttr { - ino, - atime, - mtime, - ctime: SystemTime::UNIX_EPOCH, - crtime: SystemTime::UNIX_EPOCH, - perm: Permissions::from_bits_truncate(perm), - nlink: 1, - uid: fs_owner.0, - gid: fs_owner.1, - blksize: block_size, - } -} - -/// Mescloud-specific directory cache wrapper over `AsyncICache`. -pub struct MescloudICache> { - inner: AsyncICache, - inode_factory: InodeFactory, - fs_owner: (u32, u32), - block_size: u32, -} - -impl> MescloudICache { - /// Create a new `MescloudICache`. Initializes root ICB (rc=1), caches root dir attr. - pub fn new(resolver: R, root_ino: Inode, fs_owner: (u32, u32), block_size: u32) -> Self { - let cache = Self { - inner: AsyncICache::new(resolver, root_ino, "/"), - inode_factory: InodeFactory::new(root_ino + 1), - fs_owner, - block_size, - }; - - // Set root directory attr synchronously during initialization - let now = SystemTime::now(); - let root_attr = FileAttr::Directory { - common: make_common_file_attr(root_ino, 0o755, now, now, fs_owner, block_size), - }; - cache.inner.get_icb_mut_sync(root_ino, |icb| { - icb.attr = Some(root_attr); - }); - - cache - } - - // -- Delegated from AsyncICache (async) -- - - pub fn contains(&self, ino: Inode) -> bool { - self.inner.contains(ino) - } - - pub async fn get_icb( - &self, - ino: Inode, - // `Sync` required: see comment on `AsyncICache::get_icb`. - f: impl Fn(&InodeControlBlock) -> T + Send + Sync, - ) -> Option { - self.inner.get_icb(ino, f).await - } - - pub async fn insert_icb(&self, ino: Inode, icb: InodeControlBlock) { - self.inner.insert_icb(ino, icb).await; - } - - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> InodeControlBlock, - then: impl FnOnce(&mut InodeControlBlock) -> T, - ) -> T { - self.inner.entry_or_insert_icb(ino, factory, then).await - } - - pub async fn inc_rc(&self, ino: Inode) -> Option { - self.inner.inc_rc(ino).await - } - - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - self.inner.forget(ino, nlookups).await - } - - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&InodeControlBlock) -> T, - ) -> Result { - self.inner.get_or_resolve(ino, then).await - } - - // -- Domain-specific -- - - /// Allocate a new inode number. - pub fn allocate_inode(&self) -> Inode { - self.inode_factory.allocate() - } - - pub async fn get_attr(&self, ino: Inode) -> Option { - self.inner.get_icb(ino, |icb| icb.attr).await.flatten() - } - - pub async fn cache_attr(&self, ino: Inode, attr: FileAttr) { - self.inner - .get_icb_mut(ino, |icb| { - icb.attr = Some(attr); - }) - .await; - } - - pub fn fs_owner(&self) -> (u32, u32) { - self.fs_owner - } - - pub fn block_size(&self) -> u32 { - self.block_size - } - - pub fn statfs(&self) -> FilesystemStats { - FilesystemStats { - block_size: self.block_size, - fragment_size: u64::from(self.block_size), - total_blocks: 0, - free_blocks: 0, - available_blocks: 0, - total_inodes: self.inner.inode_count() as u64, - free_inodes: 0, - available_inodes: 0, - filesystem_id: 0, - mount_flags: 0, - max_filename_length: 255, - } - } - - /// Evict all `Available` children of `parent` that have `rc == 0`. - /// Returns the list of evicted inode numbers so callers can clean up - /// associated state (e.g., bridge mappings, slot tracking). - pub async fn evict_zero_rc_children(&self, parent: Inode) -> Vec { - let mut to_evict = Vec::new(); - self.inner - .for_each(|&ino, icb| { - if icb.rc == 0 && icb.parent == Some(parent) { - to_evict.push(ino); - } - }) - .await; - let mut evicted = Vec::new(); - for ino in to_evict { - if self.inner.forget(ino, 0).await.is_some() { - evicted.push(ino); - } - } - evicted - } - - /// Find an existing child by (parent, name) or allocate a new inode. - /// If new, inserts a stub ICB (parent+path set, attr=None, children=None, rc=0). - /// Does NOT bump rc. Returns the inode number. - /// - /// # Safety invariant - /// - /// The `for_each` scan and `insert_icb` are **not** atomic. If two callers - /// race with the same `(parent, name)`, both may allocate distinct inodes - /// for the same logical child. This is currently safe because all callers - /// go through `&mut self` on the owning `Fs` implementation. - pub async fn ensure_child_ino(&self, parent: Inode, name: &OsStr) -> Inode { - // Search for existing child by parent + name - let mut existing_ino = None; - self.inner - .for_each(|&ino, icb| { - if icb.parent == Some(parent) && icb.path.as_os_str() == name { - existing_ino = Some(ino); - } - }) - .await; - - if let Some(ino) = existing_ino { - return ino; - } - - // Allocate new inode and insert stub - let ino = self.inode_factory.allocate(); - self.inner - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent), - attr: None, - children: None, - }, - ) - .await; - ino - } -} - -#[cfg(test)] -mod tests { - use std::future::Future; - - use super::*; - use crate::fs::icache::async_cache::AsyncICache; - use crate::fs::r#trait::DirEntryType; - - fn dummy_dir_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::Directory { - common: make_common_file_attr(ino, 0o755, now, now, (0, 0), 4096), - } - } - - fn dummy_file_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::RegularFile { - common: make_common_file_attr(ino, 0o644, now, now, (0, 0), 4096), - size: 100, - blocks: 1, - } - } - - #[test] - fn needs_resolve_stub_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 0, - path: "stub".into(), - attr: None, - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_file_with_attr_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "file.txt".into(), - attr: Some(dummy_file_attr(2)), - children: None, - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_without_children_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: Some(vec![("README.md".to_owned(), DirEntryType::RegularFile)]), - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_empty_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "empty-dir".into(), - attr: Some(dummy_dir_attr(4)), - children: Some(vec![]), - }; - assert!(!icb.needs_resolve()); - } - - struct NoOpResolver; - - impl IcbResolver for NoOpResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - #[expect( - clippy::manual_async_fn, - reason = "must match IcbResolver trait signature" - )] - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - async { unreachable!("NoOpResolver should not be called") } - } - } - - fn test_mescloud_cache() -> MescloudICache { - MescloudICache::new(NoOpResolver, 1, (0, 0), 4096) - } - - #[tokio::test] - async fn evict_zero_rc_children_removes_stubs() { - let cache = test_mescloud_cache(); - - // Insert stubs as children of root (ino=1) with rc=0 - cache - .insert_icb( - 10, - InodeControlBlock { - rc: 0, - path: "child_a".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - cache - .insert_icb( - 11, - InodeControlBlock { - rc: 0, - path: "child_b".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a child with rc > 0 — should survive - cache - .insert_icb( - 12, - InodeControlBlock { - rc: 1, - path: "active".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a stub under a different parent — should survive - cache - .insert_icb( - 20, - InodeControlBlock { - rc: 0, - path: "other".into(), - parent: Some(12), - attr: None, - children: None, - }, - ) - .await; - - let evicted = cache.evict_zero_rc_children(1).await; - assert_eq!(evicted.len(), 2, "should evict 2 zero-rc children of root"); - - assert!(!cache.contains(10), "child_a should be evicted"); - assert!(!cache.contains(11), "child_b should be evicted"); - assert!(cache.contains(12), "active child should survive"); - assert!( - cache.contains(20), - "child of different parent should survive" - ); - } -} diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index 1a3cce80..15a70725 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -1,24 +1,23 @@ -use std::collections::HashMap; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::future::Future; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; use std::time::SystemTime; use bytes::Bytes; +use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use opentelemetry::propagation::Injector; use secrecy::ExposeSecret as _; -use tracing::{Instrument as _, instrument, trace, warn}; +use tracing::{instrument, trace, warn}; use tracing_opentelemetry::OpenTelemetrySpanExt as _; use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; -use composite::{ChildSlot, CompositeFs}; +pub use common::FsDirEntry; +use composite::CompositeFs; + +pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; #[cfg(feature = "staging")] const MESA_API_BASE_URL: &str = "https://staging.depot.mesa.dev/api/v1"; @@ -27,17 +26,11 @@ const MESA_API_BASE_URL: &str = "https://depot.mesa.dev/api/v1"; mod common; mod composite; -use common::InodeControlBlock; -pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - -use icache as mescloud_icache; -use icache::MescloudICache; mod org; pub use org::OrgConfig; use org::OrgFs; -pub mod icache; pub mod repo; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); @@ -89,50 +82,6 @@ fn build_mesa_client(api_key: &str) -> MesaClient { .build() } -struct MesaResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for MesaResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("MesaResolver::resolve", ino)) - } -} - /// Classifies an inode by its role in the mesa hierarchy. enum InodeRole { /// The filesystem root (ino == 1). @@ -146,11 +95,11 @@ enum InodeRole { /// Composes multiple [`OrgFs`] instances, each with its own inode namespace, /// delegating to [`CompositeFs`] for inode/fh translation at each boundary. pub struct MesaFS { - composite: CompositeFs, + composite: CompositeFs, } impl MesaFS { - const ROOT_NODE_INO: Inode = 1; + const ROOT_NODE_INO: InodeAddr = CompositeFs::::ROOT_INO; const BLOCK_SIZE: u32 = 4096; /// Create a new `MesaFS` instance. @@ -160,38 +109,17 @@ impl MesaFS { fs_owner: (u32, u32), cache: &CacheConfig, ) -> Self { - let resolver = MesaResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - Self { - composite: CompositeFs { - icache: MescloudICache::new( - resolver, - Self::ROOT_NODE_INO, - fs_owner, - Self::BLOCK_SIZE, - ), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: orgs - .map(|org_conf| { - let client = build_mesa_client(org_conf.api_key.expose_secret()); - let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); - ChildSlot { - inner: org, - bridge: HashMapBridge::new(), - } - }) - .collect(), - }, + let mut composite = CompositeFs::new(fs_owner, Self::BLOCK_SIZE); + for org_conf in orgs { + let client = build_mesa_client(org_conf.api_key.expose_secret()); + let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); + composite.add_child(org, OrgFs::ROOT_INO); } + Self { composite } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { + fn inode_role(&self, ino: InodeAddr) -> Option { if ino == Self::ROOT_NODE_INO { return Some(InodeRole::Root); } @@ -205,10 +133,8 @@ impl MesaFS { } /// Ensure a mesa-level inode exists for the org at `org_idx`. - /// Seeds the bridge with (`mesa_org_ino`, `OrgFs::ROOT_INO`). /// Does NOT bump rc. - async fn ensure_org_inode(&mut self, org_idx: usize) -> (Inode, FileAttr) { - // Check if an inode already exists. + async fn ensure_org_inode(&mut self, org_idx: usize) -> (InodeAddr, INode) { let existing_ino = self .composite .child_inodes @@ -217,104 +143,62 @@ impl MesaFS { .map(|(&ino, _)| ino); if let Some(existing_ino) = existing_ino { - if let Some(attr) = self.composite.icache.get_attr(existing_ino).await { - let rc = self - .composite - .icache - .get_icb(existing_ino, |icb| icb.rc) - .await - .unwrap_or(0); + if let Ok(inode) = self.composite.delegated_getattr(existing_ino).await { trace!( ino = existing_ino, - org_idx, rc, "ensure_org_inode: reusing existing inode" - ); - return (existing_ino, attr); - } - if self.composite.icache.contains(existing_ino) { - // ICB exists but attr missing — rebuild and cache. - warn!( - ino = existing_ino, - org_idx, "ensure_org_inode: attr missing, rebuilding" + org_idx, "ensure_org_inode: reusing existing inode" ); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - existing_ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(existing_ino, attr).await; - return (existing_ino, attr); + return (existing_ino, inode); } - // ICB was evicted — clean up stale tracking entries. warn!( ino = existing_ino, - org_idx, "ensure_org_inode: ICB evicted, cleaning up stale entry" + org_idx, "ensure_org_inode: evicted, rebuilding" ); - self.composite.child_inodes.remove(&existing_ino); - self.composite.inode_to_slot.remove(&existing_ino); + let now = SystemTime::now(); + let inode = INode { + addr: existing_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_NODE_INO), + size: 0, + itype: INodeType::Directory, + }; + self.composite.cache_inode(inode); + self.composite.inode_to_slot.insert(existing_ino, org_idx); + self.composite.child_inodes.insert(existing_ino, org_idx); + return (existing_ino, inode); } - // Allocate new. + warn!( + org_idx, + "ensure_org_inode: no child_inodes entry for org slot" + ); let org_name = self.composite.slots[org_idx].inner.name().to_owned(); - let ino = self.composite.icache.allocate_inode(); - trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); - + let ino = self.composite.allocate_inode(); let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: org_name.as_str().into(), - parent: Some(Self::ROOT_NODE_INO), - attr: None, - children: None, - }, - ) - .await; - + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_NODE_INO), + size: 0, + itype: INodeType::Directory, + }; + self.composite.cache_inode(inode); self.composite.child_inodes.insert(ino, org_idx); self.composite.inode_to_slot.insert(ino, org_idx); - - // Reset bridge (may have stale mappings from a previous eviction cycle) - // and seed: mesa org-root <-> OrgFs::ROOT_INO. - self.composite.slots[org_idx].bridge = HashMapBridge::new(); - self.composite.slots[org_idx] - .bridge - .insert_inode(ino, OrgFs::ROOT_INO); - - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); + (ino, inode) } -} - -#[async_trait::async_trait] -impl Fs for MesaFS { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; #[instrument(name = "MesaFS::lookup", skip(self))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { + pub async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; match role { InodeRole::Root => { @@ -327,31 +211,23 @@ impl Fs for MesaFS { .ok_or(LookupError::InodeNotFound)?; trace!(org = org_name, "lookup: matched org"); - let (ino, attr) = self.ensure_org_inode(org_idx).await; - let rc = self - .composite - .icache + let (ino, inode) = self.ensure_org_inode(org_idx).await; + self.composite .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - trace!(ino, org = org_name, rc, "lookup: resolved org inode"); - Ok(attr) + Ok(inode) } InodeRole::OrgOwned => self.composite.delegated_lookup(parent, name).await, } } #[instrument(name = "MesaFS::getattr", skip(self))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { + pub async fn getattr(&self, ino: InodeAddr) -> Result { self.composite.delegated_getattr(ino).await } #[instrument(name = "MesaFS::readdir", skip(self))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { + pub async fn readdir(&mut self, ino: InodeAddr) -> Result<&[FsDirEntry], ReadDirError> { let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; match role { InodeRole::Root => { @@ -365,11 +241,10 @@ impl Fs for MesaFS { let mut entries = Vec::with_capacity(org_info.len()); for (org_idx, name) in &org_info { - let (org_ino, _) = self.ensure_org_inode(*org_idx).await; - entries.push(DirEntry { - ino: org_ino, + let (entry_ino, _) = self.ensure_org_inode(*org_idx).await; + entries.push(FsDirEntry { + ino: entry_ino, name: name.clone().into(), - kind: DirEntryType::Directory, }); } @@ -382,45 +257,178 @@ impl Fs for MesaFS { } #[instrument(name = "MesaFS::open", skip(self))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { + pub async fn open( + &mut self, + ino: InodeAddr, + flags: OpenFlags, + ) -> Result { self.composite.delegated_open(ino, flags).await } #[instrument(name = "MesaFS::read", skip(self))] - async fn read( + pub async fn read( &mut self, - ino: Inode, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await + self.composite.delegated_read(fh, offset, size).await } #[instrument(name = "MesaFS::release", skip(self))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await + pub async fn release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { + self.composite.delegated_release(fh).await } +} + +/// A file reader that delegates reads to `MesaFS` through a shared mutex. +/// +/// Resources are released via [`FileReader::close`](git_fs::fs::async_fs::FileReader::close), +/// which is called by the FUSE adapter during `release`. Dropping without +/// calling `close()` emits a diagnostic warning. +pub struct MesaFsReader { + inner: Arc>, + fh: FileHandle, + closed: AtomicBool, +} - #[instrument(name = "MesaFS::forget", skip(self))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - // MesaFS has no extra state to clean up on eviction (unlike OrgFs::owner_inodes). - let _ = self.composite.delegated_forget(ino, nlookups).await; +impl git_fs::fs::async_fs::FileReader for MesaFsReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let fh = self.fh; + async move { + let mut guard = inner.lock().await; + guard + .read(fh, offset, size) + .await + .map_err(|e| std::io::Error::other(e.to_string())) + } } - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) + fn close(&self) -> impl Future> + Send { + self.closed.store(true, Ordering::Relaxed); + let inner = Arc::clone(&self.inner); + let fh = self.fh; + async move { + let mut guard = inner.lock().await; + guard + .release(fh) + .await + .map_err(|e| std::io::Error::other(e.to_string())) + } + } +} + +impl Drop for MesaFsReader { + fn drop(&mut self) { + if !self.closed.load(Ordering::Relaxed) { + tracing::warn!(fh = self.fh, "MesaFsReader dropped without close()"); + } + } +} + +/// A [`FsDataProvider`](git_fs::fs::async_fs::FsDataProvider) that wraps +/// `MesaFS` behind a shared mutex. +#[derive(Clone)] +pub struct MesaFsProvider { + inner: Arc>, +} + +impl MesaFsProvider { + /// Create a new provider wrapping the given `MesaFS`. + pub fn new(mesa_fs: MesaFS) -> Self { + Self { + inner: Arc::new(tokio::sync::Mutex::new(mesa_fs)), + } + } +} + +fn lookup_error_to_io(e: LookupError) -> std::io::Error { + match e { + LookupError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + LookupError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), + } +} + +fn readdir_error_to_io(e: ReadDirError) -> std::io::Error { + match e { + ReadDirError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + ReadDirError::NotADirectory => std::io::Error::from_raw_os_error(libc::ENOTDIR), + ReadDirError::NotPermitted => std::io::Error::from_raw_os_error(libc::EPERM), + ReadDirError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), + } +} + +fn open_error_to_io(e: OpenError) -> std::io::Error { + match e { + OpenError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + } +} + +impl git_fs::fs::async_fs::FsDataProvider for MesaFsProvider { + type Reader = MesaFsReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let name = name.to_os_string(); + async move { + let mut guard = inner.lock().await; + guard + .lookup(parent.addr, &name) + .await + .map_err(lookup_error_to_io) + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let inner = Arc::clone(&self.inner); + async move { + let mut guard = inner.lock().await; + let dir_entries: Vec<(OsString, InodeAddr)> = { + let entries = guard + .readdir(parent.addr) + .await + .map_err(readdir_error_to_io)?; + entries.iter().map(|e| (e.name.clone(), e.ino)).collect() + }; + let mut result = Vec::with_capacity(dir_entries.len()); + for (name, ino) in dir_entries { + if let Ok(inode) = guard.getattr(ino).await { + result.push((name, inode)); + } + } + Ok(result) + } + } + + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + async move { + let mut guard = inner.lock().await; + let fh = guard + .open(inode.addr, flags) + .await + .map_err(open_error_to_io)?; + Ok(MesaFsReader { + inner: Arc::clone(&inner), + fh, + closed: AtomicBool::new(false), + }) + } } } diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs index 1f3b8b5f..feefaf8e 100644 --- a/src/fs/mescloud/org.rs +++ b/src/fs/mescloud/org.rs @@ -1,73 +1,19 @@ use std::collections::HashMap; -use std::ffi::OsStr; -use std::future::Future; +use std::ffi::{OsStr, OsString}; use std::time::SystemTime; use bytes::Bytes; use futures::TryStreamExt as _; +use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use secrecy::SecretString; -use tracing::{Instrument as _, instrument, trace, warn}; - -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::common::{InodeControlBlock, MesaApiError}; -use super::composite::{ChildSlot, CompositeFs}; -use super::icache as mescloud_icache; -use super::icache::MescloudICache; +use tracing::{instrument, trace, warn}; + +use super::common::{ChildFs, MesaApiError}; +pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; +use super::composite::CompositeFs; use super::repo::RepoFs; use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; - -pub(super) struct OrgResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for OrgResolver { - type Icb = InodeControlBlock; - type Error = LookupError; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("OrgResolver::resolve", ino)) - } -} #[derive(Debug, Clone)] pub struct OrgConfig { @@ -81,7 +27,7 @@ enum InodeRole { OrgRoot, /// A virtual owner directory (github only). OwnerDir, - /// An inode owned by some repo. + /// An inode owned by some repo (either a child-root or delegated). RepoOwned, } @@ -92,14 +38,14 @@ enum InodeRole { pub struct OrgFs { name: String, client: MesaClient, - composite: CompositeFs, + composite: CompositeFs, /// Maps org-level owner-dir inodes to owner name (github only). - owner_inodes: HashMap, + owner_inodes: HashMap, cache_config: CacheConfig, } impl OrgFs { - pub(crate) const ROOT_INO: Inode = 1; + pub(crate) const ROOT_INO: InodeAddr = CompositeFs::::ROOT_INO; const BLOCK_SIZE: u32 = 4096; /// The name of the organization. @@ -123,31 +69,14 @@ impl OrgFs { /// Ensure an inode exists for a virtual owner directory (github only). Does NOT bump rc. /// TODO(MES-674): Cleanup "special" casing for github. - async fn ensure_owner_inode(&mut self, owner: &str) -> (Inode, FileAttr) { + async fn ensure_owner_inode(&mut self, owner: &str) -> (InodeAddr, INode) { // Check existing let mut stale_ino = None; for (&ino, existing_owner) in &self.owner_inodes { if existing_owner == owner { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - return (ino, attr); - } - if self.composite.icache.contains(ino) { - // ICB exists but attr missing — rebuild and cache - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - return (ino, attr); + if let Ok(inode) = self.composite.delegated_getattr(ino).await { + return (ino, inode); } - // ICB was evicted — mark for cleanup stale_ino = Some(ino); break; } @@ -156,35 +85,22 @@ impl OrgFs { self.owner_inodes.remove(&ino); } - // Allocate new - let ino = self.composite.icache.allocate_inode(); + let ino = self.composite.allocate_inode(); let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: owner.into(), - parent: Some(Self::ROOT_INO), - attr: None, - children: None, - }, - ) - .await; - self.owner_inodes.insert(ino, owner.to_owned()); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_INO), + size: 0, + itype: INodeType::Directory, }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + self.composite.cache_inode_and_init_rc(inode); + self.owner_inodes.insert(ino, owner.to_owned()); + (ino, inode) } #[must_use] @@ -194,28 +110,17 @@ impl OrgFs { fs_owner: (u32, u32), cache_config: CacheConfig, ) -> Self { - let resolver = OrgResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; Self { name, client, - composite: CompositeFs { - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: Vec::new(), - }, + composite: CompositeFs::new(fs_owner, Self::BLOCK_SIZE), owner_inodes: HashMap::new(), cache_config, } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { + fn inode_role(&self, ino: InodeAddr) -> Option { if ino == Self::ROOT_INO { return Some(InodeRole::OrgRoot); } @@ -242,144 +147,92 @@ impl OrgFs { repo_name: &str, display_name: &str, default_branch: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { + parent_ino: InodeAddr, + ) -> (InodeAddr, INode) { // Check existing repos. for (&ino, &idx) in &self.composite.child_inodes { if self.composite.slots[idx].inner.repo_name() == repo_name { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - let rc = self - .composite - .icache - .get_icb(ino, |icb| icb.rc) - .await - .unwrap_or(0); - trace!(ino, repo = repo_name, rc, "ensure_repo_inode: reusing"); - return (ino, attr); + if let Ok(inode) = self.composite.delegated_getattr(ino).await { + trace!(ino, repo = repo_name, "ensure_repo_inode: reusing"); + return (ino, inode); } warn!( ino, repo = repo_name, "ensure_repo_inode: attr missing, rebuilding" ); - return self.make_repo_dir_attr(ino).await; + return self.make_repo_dir_inode(ino); } } - // Check for orphaned slot (slot exists but not in child_inodes). - if let Some(idx) = self - .composite - .slots - .iter() - .position(|s| s.inner.repo_name() == repo_name) - { - return self.register_repo_slot(idx, display_name, parent_ino).await; - } - - // Allocate truly new slot. - let ino = self.composite.icache.allocate_inode(); - trace!( - ino, - repo = repo_name, - "ensure_repo_inode: allocated new inode" - ); - - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, - ) - .await; - + // Create new RepoFs and register as child. let repo = RepoFs::new( self.client.clone(), self.name.clone(), repo_name.to_owned(), default_branch.to_owned(), - self.composite.icache.fs_owner(), - // TODO(markovejnovic): Unnecessary clone. Refactoring for clearer ownership semantics - // would be ideal. + self.composite.fs_owner(), self.cache_config.clone(), ) .await; - let mut bridge = HashMapBridge::new(); - bridge.insert_inode(ino, RepoFs::ROOT_INO); - - let idx = self.composite.slots.len(); - self.composite.slots.push(ChildSlot { - inner: repo, - bridge, - }); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await - } - - /// Allocate a new inode, register it in an existing (orphaned) slot, and - /// return `(ino, attr)`. - async fn register_repo_slot( - &mut self, - idx: usize, - display_name: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { - let ino = self.composite.icache.allocate_inode(); - trace!(ino, idx, "register_repo_slot: reusing orphaned slot"); + let outer_ino = self + .composite + .add_child_with_parent(repo, RepoFs::ROOT_INO, parent_ino); + trace!( + ino = outer_ino, + repo = repo_name, + "ensure_repo_inode: allocated new inode" + ); + // Register in directory cache so readdir sees it. self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, + .directory_cache + .insert( + git_fs::fs::LoadedAddr(parent_ino), + OsString::from(display_name), + git_fs::fs::LoadedAddr(outer_ino), + true, ) .await; - warn!( - ino, - idx, - "register_repo_slot: resetting bridge for orphaned slot; \ - inner filesystem will not receive forget for stale inode mappings" - ); - self.composite.slots[idx].bridge = HashMapBridge::new(); - self.composite.slots[idx] - .bridge - .insert_inode(ino, RepoFs::ROOT_INO); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await + let inode = self + .composite + .delegated_getattr(outer_ino) + .await + .unwrap_or_else(|_| { + let now = SystemTime::now(); + INode { + addr: outer_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(parent_ino), + size: 0, + itype: INodeType::Directory, + } + }); + (outer_ino, inode) } - /// Build and cache a directory attr for `ino`, returning `(ino, attr)`. - async fn make_repo_dir_attr(&self, ino: Inode) -> (Inode, FileAttr) { + /// Build a directory inode for `ino`, returning `(ino, inode)`. + fn make_repo_dir_inode(&self, ino: InodeAddr) -> (InodeAddr, INode) { let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + self.composite.cache_inode(inode); + (ino, inode) } /// Fetch a repo by name via the API. @@ -398,62 +251,36 @@ impl OrgFs { } #[async_trait::async_trait] -impl super::common::InodeCachePeek for OrgFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.composite.icache.get_attr(ino).await - } -} - -#[async_trait::async_trait] -impl Fs for OrgFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - +impl ChildFs for OrgFs { #[instrument(name = "OrgFs::lookup", skip(self), fields(org = %self.name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; match role { InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. let name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; if self.is_github() { - // name is an owner like "torvalds" — create lazily, no API validation. trace!(owner = name_str, "lookup: resolving github owner dir"); - let (ino, attr) = self.ensure_owner_inode(name_str).await; + let (ino, inode) = self.ensure_owner_inode(name_str).await; self.composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - Ok(attr) + Ok(inode) } else { - // Children of org root are repos. trace!(repo = name_str, "lookup: resolving repo"); - - // Validate repo exists via API. let repo = self.wait_for_sync(name_str).await?; - - let (ino, attr) = self + let (ino, inode) = self .ensure_repo_inode(name_str, name_str, &repo.default_branch, Self::ROOT_INO) .await; let rc = self .composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; trace!(ino, repo = name_str, rc, "lookup: resolved repo inode"); - Ok(attr) + Ok(inode) } } InodeRole::OwnerDir => { - // TODO(MES-674): Cleanup "special" casing for github. - // Parent is an owner dir, name is a repo like "linux". let owner = self .owner_inodes .get(&parent) @@ -464,49 +291,32 @@ impl Fs for OrgFs { let encoded = Self::encode_github_repo_name(&full_decoded); trace!( - owner = %owner, - repo = repo_name_str, - encoded = %encoded, + owner = %owner, repo = repo_name_str, encoded = %encoded, "lookup: resolving github repo via owner dir" ); - // Validate via API (uses encoded name). let repo = self.wait_for_sync(&encoded).await?; - - let (ino, attr) = self + let (ino, inode) = self .ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent) .await; self.composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - Ok(attr) + Ok(inode) } InodeRole::RepoOwned => self.composite.delegated_lookup(parent, name).await, } } - #[instrument(name = "OrgFs::getattr", skip(self), fields(org = %self.name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.composite.delegated_getattr(ino).await - } - #[instrument(name = "OrgFs::readdir", skip(self), fields(org = %self.name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; match role { InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. if self.is_github() { return Err(ReadDirError::NotPermitted); } - // List repos via API. let repos: Vec = self .client .org(&self.name) @@ -528,70 +338,53 @@ impl Fs for OrgFs { let mut entries = Vec::with_capacity(repo_infos.len()); for (repo_name, default_branch) in &repo_infos { - let (repo_ino, _) = self + let (_, inode) = self .ensure_repo_inode(repo_name, repo_name, default_branch, Self::ROOT_INO) .await; - entries.push(DirEntry { - ino: repo_ino, - name: repo_name.clone().into(), - kind: DirEntryType::Directory, - }); + entries.push((OsString::from(repo_name), inode)); } - self.composite.readdir_buf = entries; - Ok(&self.composite.readdir_buf) - } - InodeRole::OwnerDir if self.is_github() => { - // TODO(MES-674): Cleanup "special" casing for github. - Err(ReadDirError::NotPermitted) + Ok(entries) } + InodeRole::OwnerDir if self.is_github() => Err(ReadDirError::NotPermitted), InodeRole::OwnerDir => Err(ReadDirError::NotADirectory), - InodeRole::RepoOwned => self.composite.delegated_readdir(ino).await, + InodeRole::RepoOwned => { + let dir_entries: Vec<_> = self + .composite + .delegated_readdir(ino) + .await? + .iter() + .map(|e| (e.name.clone(), e.ino)) + .collect(); + let mut entries = Vec::with_capacity(dir_entries.len()); + for (name, child_ino) in dir_entries { + if let Some(inode) = self.composite.inode_table.get(&child_ino).await { + entries.push((name, inode)); + } + } + Ok(entries) + } } } #[instrument(name = "OrgFs::open", skip(self), fields(org = %self.name))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { + async fn open(&mut self, ino: InodeAddr, flags: OpenFlags) -> Result { self.composite.delegated_open(ino, flags).await } #[instrument(name = "OrgFs::read", skip(self), fields(org = %self.name))] async fn read( &mut self, - ino: Inode, + _ino: InodeAddr, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await + self.composite.delegated_read(fh, offset, size).await } #[instrument(name = "OrgFs::release", skip(self), fields(org = %self.name))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await - } - - #[instrument(name = "OrgFs::forget", skip(self), fields(org = %self.name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - let evicted = self.composite.delegated_forget(ino, nlookups).await; - if evicted { - self.owner_inodes.remove(&ino); - } - } - - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) + async fn release(&mut self, _ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError> { + self.composite.delegated_release(fh).await } } diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index 11b334a7..acff3d04 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -2,197 +2,436 @@ //! //! This module directly accesses the mesa repo through the Rust SDK, on a per-repo basis. +use std::collections::HashMap; +use std::ffi::OsString; use std::future::Future; -use std::{collections::HashMap, ffi::OsStr, path::PathBuf, time::SystemTime}; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::SystemTime; +use std::{ffi::OsStr, path::PathBuf}; use base64::Engine as _; use bytes::Bytes; use mesa_dev::MesaClient; use mesa_dev::low_level::content::{Content, DirEntry as MesaDirEntry}; use num_traits::cast::ToPrimitive as _; -use tracing::{Instrument as _, instrument, trace, warn}; +use tracing::warn; use git_fs::cache::fcache::FileCache; use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _}; +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::{ + INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags as AsyncOpenFlags, +}; use crate::app_config::CacheConfig; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FileOpenOptions, FilesystemStats, Fs, Inode, - LockOwner, OpenFile, OpenFlags, -}; use super::common::MesaApiError; -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::icache as mescloud_icache; -use super::icache::{InodeControlBlock, MescloudICache}; +pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; + +fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) + } + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), + } +} -pub(super) struct RepoResolver { +#[derive(Clone)] +pub(super) struct MesRepoProvider { + inner: Arc, +} + +struct MesRepoProviderInner { client: MesaClient, org_name: String, repo_name: String, ref_: String, fs_owner: (u32, u32), - block_size: u32, + next_addr: AtomicU64, + /// Maps inode addresses to repo-relative paths (e.g., "src/main.rs"). + /// Root directory maps to an empty `PathBuf`. + path_map: scc::HashMap, + file_cache: Option>>, +} + +impl MesRepoProvider { + pub(super) fn new( + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + fs_owner: (u32, u32), + file_cache: Option>>, + ) -> Self { + Self { + inner: Arc::new(MesRepoProviderInner { + client, + org_name, + repo_name, + ref_, + fs_owner, + next_addr: AtomicU64::new(2), // 1 is reserved for root + path_map: scc::HashMap::new(), + file_cache, + }), + } + } + + /// Store the path for the root inode address. + pub(super) fn seed_root_path(&self, root_addr: InodeAddr) { + // Root maps to empty PathBuf (no path prefix for API calls) + drop(self.inner.path_map.insert_sync(root_addr, PathBuf::new())); + } + + /// Remove the path entry for an inode. Called during forget/cleanup. + #[expect(dead_code, reason = "will be needed when child forget is implemented")] + pub(super) fn remove_path(&self, addr: InodeAddr) { + self.inner.path_map.remove_sync(&addr); + } + + /// The name of the repository. + pub(super) fn repo_name(&self) -> &str { + &self.inner.repo_name + } } -impl IcbResolver for RepoResolver { - type Icb = InodeControlBlock; - type Error = LookupError; +impl FsDataProvider for MesRepoProvider { + type Reader = MesFileReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let name = name.to_os_string(); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let child_path = parent_path.join(&name); + let child_path_str = child_path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let content = inner + .client + .org(&inner.org_name) + .repos() + .at(&inner.repo_name) + .content() + .get(Some(inner.ref_.as_str()), Some(child_path_str), Some(1u64)) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let now = SystemTime::now(); + let (uid, gid) = inner.fs_owner; + + let (itype, size) = match &content { + Content::File(f) => (INodeType::File, f.size.to_u64().unwrap_or(0)), + Content::Symlink(s) => (INodeType::File, s.size.to_u64().unwrap_or(0)), + Content::Dir(_) => (INodeType::Directory, 0), + }; + + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) + }; + + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + drop(inner.path_map.insert_async(addr, child_path).await); + + Ok(INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, + }) + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let inner = Arc::clone(&self.inner); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let api_path = if parent_path.as_os_str().is_empty() { + None + } else { + Some( + parent_path + .to_str() + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })? + .to_owned(), + ) + }; + + let content = inner + .client + .org(&inner.org_name) + .repos() + .at(&inner.repo_name) + .content() + .get(Some(inner.ref_.as_str()), api_path.as_deref(), Some(1u64)) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let dir = match content { + Content::Dir(d) => d, + Content::File(_) | Content::Symlink(_) => { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + }; + + let now = SystemTime::now(); + let (uid, gid) = inner.fs_owner; + let mut entries = Vec::with_capacity(dir.entries.len()); + + for entry in dir.entries { + let (name, itype, size) = match entry { + MesaDirEntry::File(f) => { + let Some(name) = f.name else { continue }; + (name, INodeType::File, f.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Symlink(s) => { + let Some(name) = s.name else { continue }; + (name, INodeType::File, s.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Dir(d) => { + let Some(name) = d.name else { continue }; + (name, INodeType::Directory, 0) + } + }; + + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) + }; + + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + let child_path = parent_path.join(&name); + drop(inner.path_map.insert_async(addr, child_path).await); + + let inode = INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, + }; + + entries.push((OsString::from(name), inode)); + } + + Ok(entries) + } + } - fn resolve( + fn open( &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { + inode: INode, + _flags: AsyncOpenFlags, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + async move { + let path = inner + .path_map + .get_async(&inode.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + Ok(MesFileReader { + client: inner.client.clone(), + org_name: inner.org_name.clone(), + repo_name: inner.repo_name.clone(), + ref_: inner.ref_.clone(), + path, + file_cache: inner.file_cache.clone(), + inode_addr: inode.addr, + }) + } + } +} + +pub(super) struct MesFileReader { + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + path: PathBuf, + file_cache: Option>>, + inode_addr: InodeAddr, +} + +impl FileReader for MesFileReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { let client = self.client.clone(); let org_name = self.org_name.clone(); let repo_name = self.repo_name.clone(); let ref_ = self.ref_.clone(); - let fs_owner = self.fs_owner; - let block_size = self.block_size; + let path = self.path.clone(); + let file_cache = self.file_cache.clone(); + let inode_addr = self.inode_addr; async move { - let stub = stub.ok_or(LookupError::InodeNotFound)?; - let file_path = build_repo_path(stub.parent, &stub.path, cache, RepoFs::ROOT_INO).await; - - // Non-root inodes must have a resolvable path. - if stub.parent.is_some() && file_path.is_none() { - return Err(LookupError::InodeNotFound); + // Try the file cache first. + if let Some(cache) = &file_cache + && let Some(data) = cache.get(&inode_addr).await + { + let start = usize::try_from(offset) + .unwrap_or(data.len()) + .min(data.len()); + let end = start.saturating_add(size as usize).min(data.len()); + return Ok(Bytes::copy_from_slice(&data[start..end])); } + // Cache miss -- fetch from the Mesa API. + let path_str = path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let api_path = if path_str.is_empty() { + None + } else { + Some(path_str) + }; + let content = client .org(&org_name) .repos() .at(&repo_name) .content() - .get(Some(ref_.as_str()), file_path.as_deref(), Some(1u64)) + .get(Some(ref_.as_str()), api_path, None) .await - .map_err(MesaApiError::from)?; - - let now = SystemTime::now(); - let attr = match &content { - Content::File(f) => { - let size = f.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let encoded_content = match content { + Content::File(f) => f.content.unwrap_or_default(), + Content::Symlink(s) => s.content.unwrap_or_default(), + Content::Dir(_) => { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); } - Content::Symlink(s) => { - let size = s.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } - } - Content::Dir(_) => FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }, }; - let children = match content { - Content::Dir(d) => Some( - d.entries - .into_iter() - .filter_map(|e| { - let (name, kind) = match e { - MesaDirEntry::File(f) => (f.name?, DirEntryType::RegularFile), - // TODO(MES-712): return DirEntryType::Symlink once readlink is wired up. - MesaDirEntry::Symlink(s) => (s.name?, DirEntryType::RegularFile), - MesaDirEntry::Dir(d) => (d.name?, DirEntryType::Directory), - }; - Some((name, kind)) - }) - .collect(), - ), - Content::File(_) | Content::Symlink(_) => None, - }; + let decoded = base64::engine::general_purpose::STANDARD + .decode(&encoded_content) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - Ok(InodeControlBlock { - parent: stub.parent, - path: stub.path, - rc: stub.rc, - attr: Some(attr), - children, - }) + let start = usize::try_from(offset) + .unwrap_or(decoded.len()) + .min(decoded.len()); + let end = start.saturating_add(size as usize).min(decoded.len()); + let result = Bytes::copy_from_slice(&decoded[start..end]); + + // Store the decoded content in the cache for future reads. + if let Some(cache) = &file_cache + && let Err(e) = cache.insert(&inode_addr, decoded).await + { + warn!(error = ?e, inode_addr, "failed to cache file content"); + } + + Ok(result) } - .instrument(tracing::info_span!("RepoResolver::resolve", ino)) } } -/// Walk the parent chain in the cache to build the repo-relative path. -/// Returns `None` for the root inode (maps to `path=None` in the mesa content API). -async fn build_repo_path( - parent: Option, - name: &std::path::Path, - cache: &AsyncICache, - root_ino: Inode, -) -> Option { - /// Maximum parent-chain depth before bailing out. Prevents infinite loops - /// if a bug creates a cycle in the parent pointers. - const MAX_DEPTH: usize = 1024; - - let parent = parent?; - if parent == root_ino { - return name.to_str().map(String::from); +mod repo_fs_inner { + #![allow(clippy::future_not_send, clippy::mem_forget)] + use git_fs::cache::async_backed::FutureBackedCache; + use git_fs::fs::async_fs::AsyncFs; + use git_fs::fs::{INode, InodeAddr}; + use ouroboros::self_referencing; + + use super::MesRepoProvider; + + #[self_referencing] + pub struct RepoFsInner { + pub(super) inode_table: FutureBackedCache, + #[borrows(inode_table)] + #[covariant] + pub(super) fs: AsyncFs<'this, MesRepoProvider>, } - let mut components = vec![name.to_path_buf()]; - let mut current = parent; - for _ in 0..MAX_DEPTH { - if current == root_ino { - break; + impl RepoFsInner { + pub fn create( + inode_table: FutureBackedCache, + provider: MesRepoProvider, + ) -> Self { + RepoFsInnerBuilder { + inode_table, + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() } - let (path, next_parent) = cache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = next_parent?; } - if current != root_ino { - tracing::warn!("build_repo_path: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle"); - return None; - } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) } +use repo_fs_inner::RepoFsInner; /// A filesystem rooted at a single mesa repository. /// -/// Implements [`Fs`] for navigating files and directories within one repo. -/// Does not handle organizations or multi-repo hierarchy — that is [`super::MesaFS`]'s job. +/// Wraps [`AsyncFs`] via ouroboros to co-locate the inode table +/// and the filesystem that borrows it. Implements [`Fs`] as a thin adapter. pub struct RepoFs { - client: MesaClient, - org_name: String, - repo_name: String, - ref_: String, - - icache: MescloudICache, - file_table: FileTable, - readdir_buf: Vec, - open_files: HashMap, - file_cache: Option>, + inner: RepoFsInner, + /// Reference counts for inodes held by the kernel. + refcounts: rustc_hash::FxHashMap, + /// Open file handles mapped to readers. + open_files: HashMap>, + /// Provider clone for accessing `repo_name` and `path_map` cleanup. + provider: MesRepoProvider, } impl RepoFs { - pub(crate) const ROOT_INO: Inode = 1; - const BLOCK_SIZE: u32 = 4096; + pub(crate) const ROOT_INO: InodeAddr = 1; /// Create a new `RepoFs` for a specific org and repo. pub async fn new( @@ -203,24 +442,15 @@ impl RepoFs { fs_owner: (u32, u32), cache_config: CacheConfig, ) -> Self { - let resolver = RepoResolver { - client: client.clone(), - org_name: org_name.clone(), - repo_name: repo_name.clone(), - ref_: ref_.clone(), - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - let file_cache = match cache_config.max_size { Some(max_size) if max_size.as_u64() > 0 => { let cache_dir = cache_config.path.join(&org_name).join(&repo_name); let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); match FileCache::new(&cache_dir, max_bytes).await { - Ok(cache) => Some(cache), + Ok(cache) => Some(Arc::new(cache)), Err(e) => { warn!(error = ?e, org = %org_name, repo = %repo_name, - "failed to create file cache, continuing without caching",); + "failed to create file cache, continuing without caching"); None } } @@ -228,317 +458,140 @@ impl RepoFs { _ => None, }; + let provider = + MesRepoProvider::new(client, org_name, repo_name, ref_, fs_owner, file_cache); + provider.seed_root_path(Self::ROOT_INO); + + let root = INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: SystemTime::now(), + last_modified_at: SystemTime::now(), + parent: None, + size: 0, + itype: INodeType::Directory, + }; + + let inode_table = git_fs::cache::async_backed::FutureBackedCache::default(); + inode_table.insert_sync(root.addr, root); + + let inner = RepoFsInner::create(inode_table, provider.clone()); + + let mut refcounts = rustc_hash::FxHashMap::default(); + refcounts.insert(Self::ROOT_INO, 1); + Self { - client, - org_name, - repo_name, - ref_, - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), + inner, + refcounts, open_files: HashMap::new(), - file_cache, + provider, } } /// The name of the repository this filesystem is rooted at. pub(crate) fn repo_name(&self) -> &str { - &self.repo_name - } - - /// Build the repo-relative path for an inode by walking up the parent chain. - /// - /// Returns `None` for the root inode (the repo top-level maps to `path=None` in the - /// mesa content API). - async fn path_of_inode(&self, ino: Inode) -> Option { - /// Maximum parent-chain depth before bailing out. - const MAX_DEPTH: usize = 1024; - - if ino == Self::ROOT_INO { - return None; - } - - let mut components = Vec::new(); - let mut current = ino; - for _ in 0..MAX_DEPTH { - if current == Self::ROOT_INO { - break; - } - let (path, parent) = self - .icache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = parent?; - } - if current != Self::ROOT_INO { - tracing::warn!( - ino, - "path_of_inode: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle" - ); - return None; - } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) + self.provider.repo_name() } } -#[async_trait::async_trait] -impl super::common::InodeCachePeek for RepoFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.icache.get_attr(ino).await - } +#[expect( + clippy::wildcard_enum_match_arm, + reason = "mapping all ErrorKind variants is impractical; EIO is the sensible default" +)] +fn io_error_to_errno(e: &std::io::Error) -> i32 { + e.raw_os_error().unwrap_or_else(|| match e.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) } #[async_trait::async_trait] -impl Fs for RepoFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - - #[instrument(name = "RepoFs::lookup", skip(self), fields(repo = %self.repo_name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - debug_assert!( - self.icache.contains(parent), - "lookup: parent inode {parent} not in inode table" - ); - - let ino = self.icache.ensure_child_ino(parent, name).await; - let attr = self - .icache - .get_or_resolve(ino, |icb| icb.attr) - .await? - .ok_or(LookupError::InodeNotFound)?; - - let rc = self - .icache - .inc_rc(ino) +impl super::common::ChildFs for RepoFs { + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { + let tracked = self + .inner + .borrow_fs() + .lookup(LoadedAddr(parent), name) .await - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, ?name, rc, "resolved inode"); - Ok(attr) - } - - #[instrument(name = "RepoFs::getattr", skip(self), fields(repo = %self.repo_name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + .map_err(|e| { + if io_error_to_errno(&e) == libc::ENOENT { + LookupError::InodeNotFound + } else { + LookupError::RemoteMesaError(MesaApiError::Io(e)) + } + })?; + *self.refcounts.entry(tracked.inode.addr).or_insert(0) += 1; + Ok(tracked.inode) } - #[instrument(name = "RepoFs::readdir", skip(self), fields(repo = %self.repo_name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - debug_assert!( - self.icache.contains(ino), - "readdir: inode {ino} not in inode table" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::Directory { .. }) | None - ), - "readdir: inode {ino} has non-directory cached attr" - ); - - let children = self - .icache - .get_or_resolve(ino, |icb| icb.children.clone()) - .await? - .ok_or(ReadDirError::NotADirectory)?; - - trace!( - ino, - count = children.len(), - "readdir: resolved directory listing from icache" - ); - - self.icache.evict_zero_rc_children(ino).await; - - let mut entries = Vec::with_capacity(children.len()); - for (name, kind) in &children { - let child_ino = self.icache.ensure_child_ino(ino, OsStr::new(name)).await; - // Only cache directory attrs in readdir. File attrs are left as - // None so that lookup triggers the resolver to fetch the real file - // size. Caching placeholder file attrs (size=0) would poison - // needs_resolve(), preventing resolution on subsequent lookups. - if *kind == DirEntryType::Directory { - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - child_ino, - 0o755, - now, - now, - self.icache.fs_owner(), - self.icache.block_size(), - ), - }; - self.icache.cache_attr(child_ino, attr).await; - } - entries.push(DirEntry { - ino: child_ino, - name: name.clone().into(), - kind: *kind, - }); - } - - self.readdir_buf = entries; - Ok(&self.readdir_buf) + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { + let mut entries = Vec::new(); + self.inner + .borrow_fs() + .readdir(LoadedAddr(ino), 0, |de, _offset| { + entries.push((de.name.to_os_string(), de.inode)); + false + }) + .await + .map_err(|e| { + if io_error_to_errno(&e) == libc::ENOTDIR { + ReadDirError::NotADirectory + } else if io_error_to_errno(&e) == libc::ENOENT { + ReadDirError::InodeNotFound + } else { + ReadDirError::RemoteMesaError(MesaApiError::Io(e)) + } + })?; + Ok(entries) } - #[instrument(name = "RepoFs::open", skip(self), fields(repo = %self.repo_name))] - async fn open(&mut self, ino: Inode, _flags: OpenFlags) -> Result { - if !self.icache.contains(ino) { - warn!(ino, "open on unknown inode"); - return Err(OpenError::InodeNotFound); - } - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "open: inode {ino} has non-file cached attr" - ); - let fh = self.file_table.allocate(); - self.open_files.insert(fh, ino); - trace!(ino, fh, "assigned file handle"); - Ok(OpenFile { - handle: fh, - options: FileOpenOptions::empty(), - }) + async fn open( + &mut self, + ino: InodeAddr, + flags: AsyncOpenFlags, + ) -> Result { + let open_file = self + .inner + .borrow_fs() + .open(LoadedAddr(ino), flags) + .await + .map_err(|_| OpenError::InodeNotFound)?; + self.open_files + .insert(open_file.fh, Arc::clone(&open_file.reader)); + Ok(open_file.fh) } - #[instrument(name = "RepoFs::read", skip(self), fields(repo = %self.repo_name))] async fn read( &mut self, - ino: Inode, - fh: FileHandle, + _ino: InodeAddr, + fh: git_fs::fs::FileHandle, offset: u64, size: u32, - _flags: OpenFlags, - _lock_owner: Option, ) -> Result { - let &file_ino = self.open_files.get(&fh).ok_or_else(|| { - warn!(fh, "read on unknown file handle"); - ReadError::FileNotOpen - })?; - debug_assert!( - file_ino == ino, - "read: file handle {fh} maps to inode {file_ino}, but caller passed inode {ino}" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "read: inode {ino} has non-file cached attr" - ); - - // Try the file cache first. - if let Some(cache) = &self.file_cache - && let Some(data) = cache.get(&ino).await - { - let start = usize::try_from(offset) - .unwrap_or(data.len()) - .min(data.len()); - let end = start.saturating_add(size as usize).min(data.len()); - trace!( - ino, - fh, - cached = true, - decoded_len = data.len(), - start, - end, - "read content" - ); - return Ok(Bytes::copy_from_slice(&data[start..end])); - } - - // Cache miss — fetch from the Mesa API. - let file_path = self.path_of_inode(ino).await; - - if ino != Self::ROOT_INO && file_path.is_none() { - warn!(ino, "read: path_of_inode returned None for non-root inode"); - return Err(ReadError::InodeNotFound); - } - - let content = self - .client - .org(&self.org_name) - .repos() - .at(&self.repo_name) - .content() - .get(Some(self.ref_.as_str()), file_path.as_deref(), None) - .await - .map_err(MesaApiError::from)?; - - let encoded_content = match content { - Content::File(f) => f.content.unwrap_or_default(), - // TODO(MES-712): return ReadError::NotAFile once symlinks are surfaced as - // DirEntryType::Symlink, and implement readlink to return the link target. - Content::Symlink(s) => s.content.unwrap_or_default(), - Content::Dir(_) => return Err(ReadError::NotAFile), - }; - - let decoded = base64::engine::general_purpose::STANDARD.decode(&encoded_content)?; - - let start = usize::try_from(offset) - .unwrap_or(decoded.len()) - .min(decoded.len()); - let end = start.saturating_add(size as usize).min(decoded.len()); - let result = Bytes::copy_from_slice(&decoded[start..end]); - trace!(ino, fh, cached = false, path = ?file_path, decoded_len = decoded.len(), start, end, "read content"); - - // Store the decoded content in the cache for future reads. - if let Some(cache) = &self.file_cache - && let Err(e) = cache.insert(&ino, decoded).await - { - warn!(error = ?e, ino, "failed to cache file content"); - } - - Ok(result) + let reader = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; + reader.read(offset, size).await.map_err(|e| { + if io_error_to_errno(&e) == libc::EISDIR { + ReadError::NotAFile + } else if io_error_to_errno(&e) == libc::ENOENT { + ReadError::InodeNotFound + } else { + ReadError::RemoteMesaError(MesaApiError::Io(e)) + } + }) } - #[instrument(name = "RepoFs::release", skip(self), fields(repo = %self.repo_name))] async fn release( &mut self, - ino: Inode, - fh: FileHandle, - _flags: OpenFlags, - _flush: bool, + _ino: InodeAddr, + fh: git_fs::fs::FileHandle, ) -> Result<(), ReleaseError> { - let released_ino = self.open_files.remove(&fh).ok_or_else(|| { - warn!(fh, "release on unknown file handle"); - ReleaseError::FileNotOpen - })?; - debug_assert!( - released_ino == ino, - "release: file handle {fh} mapped to inode {released_ino}, but caller passed inode {ino}" - ); - trace!(ino = released_ino, fh, "closed file handle"); + self.open_files + .remove(&fh) + .ok_or(ReleaseError::FileNotOpen)?; Ok(()) } - - #[instrument(name = "RepoFs::forget", skip(self), fields(repo = %self.repo_name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - debug_assert!( - self.icache.contains(ino), - "forget: inode {ino} not in inode table" - ); - - self.icache.forget(ino, nlookups).await; - } - - async fn statfs(&mut self) -> Result { - Ok(self.icache.statfs()) - } } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 003e1b04..a696e56f 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,4 +1 @@ -pub mod fuser; -pub mod icache; pub mod mescloud; -pub mod r#trait; diff --git a/src/fs/trait.rs b/src/fs/trait.rs deleted file mode 100644 index f4d98529..00000000 --- a/src/fs/trait.rs +++ /dev/null @@ -1,375 +0,0 @@ -//! Generic trait for implementing filesystems. -//! -//! Note that this is a slightly cleaner interface than directly using fuser. The whole point of -//! this is to abstract away fuser-specific details. -use async_trait::async_trait; -use std::{ - ffi::{OsStr, OsString}, - time::{Duration, SystemTime}, -}; -use tracing::error; - -use bitflags::bitflags; -use bytes::Bytes; - -/// Type representing an inode. -pub type Inode = u64; - -pub type FileHandle = u64; - -/// An opaque lock owner identifier provided by the kernel. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct LockOwner(pub u64); - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct Permissions: u16 { - // Other - const OTHER_EXECUTE = 1 << 0; - const OTHER_WRITE = 1 << 1; - const OTHER_READ = 1 << 2; - - // Group - const GROUP_EXECUTE = 1 << 3; - const GROUP_WRITE = 1 << 4; - const GROUP_READ = 1 << 5; - - // Owner - const OWNER_EXECUTE = 1 << 6; - const OWNER_WRITE = 1 << 7; - const OWNER_READ = 1 << 8; - - // Special bits - const STICKY = 1 << 9; - const SETGID = 1 << 10; - const SETUID = 1 << 11; - - const OTHER_RWX = Self::OTHER_READ.bits() - | Self::OTHER_WRITE.bits() - | Self::OTHER_EXECUTE.bits(); - const GROUP_RWX = Self::GROUP_READ.bits() - | Self::GROUP_WRITE.bits() - | Self::GROUP_EXECUTE.bits(); - const OWNER_RWX = Self::OWNER_READ.bits() - | Self::OWNER_WRITE.bits() - | Self::OWNER_EXECUTE.bits(); - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct OpenFlags: i32 { - // Access modes (mutually exclusive) - const RDONLY = libc::O_RDONLY; - const WRONLY = libc::O_WRONLY; - const RDWR = libc::O_RDWR; - - // Creation/status flags - const APPEND = libc::O_APPEND; - const TRUNC = libc::O_TRUNC; - const CREAT = libc::O_CREAT; - const EXCL = libc::O_EXCL; - - // Behavior flags - const NONBLOCK = libc::O_NONBLOCK; - const SYNC = libc::O_SYNC; - const DSYNC = libc::O_DSYNC; - const NOFOLLOW = libc::O_NOFOLLOW; - const CLOEXEC = libc::O_CLOEXEC; - const DIRECTORY = libc::O_DIRECTORY; - - #[cfg(target_os = "linux")] - const NOATIME = libc::O_NOATIME; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct CommonFileAttr { - pub ino: Inode, - pub atime: SystemTime, - pub mtime: SystemTime, - pub ctime: SystemTime, - pub crtime: SystemTime, - pub perm: Permissions, - pub nlink: u32, - pub uid: u32, - pub gid: u32, - pub blksize: u32, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum FileAttr { - RegularFile { - common: CommonFileAttr, - size: u64, - blocks: u64, - }, - Directory { - common: CommonFileAttr, - }, - Symlink { - common: CommonFileAttr, - size: u64, - }, - CharDevice { - common: CommonFileAttr, - rdev: u64, - }, - BlockDevice { - common: CommonFileAttr, - rdev: u64, - }, - NamedPipe { - common: CommonFileAttr, - }, - Socket { - common: CommonFileAttr, - }, -} - -impl FileAttr { - pub fn common(&self) -> &CommonFileAttr { - match self { - Self::RegularFile { common, .. } - | Self::Directory { common } - | Self::Symlink { common, .. } - | Self::CharDevice { common, .. } - | Self::BlockDevice { common, .. } - | Self::NamedPipe { common } - | Self::Socket { common } => common, - } - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub (crate) struct FileOpenOptions: u32 { - const DIRECT_IO = 1 << 0; - const KEEP_CACHE = 1 << 1; - const NONSEEKABLE = 1 << 2; - const STREAM = 1 << 4; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct OpenFile { - pub handle: FileHandle, - pub options: FileOpenOptions, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum DirEntryType { - RegularFile, - Directory, - Symlink, - CharDevice, - BlockDevice, - NamedPipe, - Socket, -} - -impl TryFrom for FileAttr { - type Error = (); - - #[expect( - clippy::cast_possible_truncation, - reason = "metadata mode/nlink/blksize narrowing is intentional" - )] - #[expect( - clippy::cast_sign_loss, - reason = "nsecs from MetadataExt is always in [0, 999_999_999]" - )] - fn try_from(meta: std::fs::Metadata) -> Result { - use std::os::unix::fs::FileTypeExt as _; - use std::os::unix::fs::MetadataExt as _; - - fn to_systime(secs: i64, nsecs: i64) -> SystemTime { - if secs >= 0 { - std::time::UNIX_EPOCH + Duration::new(secs.cast_unsigned(), nsecs as u32) - } else { - // nsecs is always in [0, 999_999_999] from MetadataExt. - // For negative secs, subtract whole seconds then add back nsecs. - std::time::UNIX_EPOCH - Duration::from_secs((-secs).cast_unsigned()) - + Duration::from_nanos(nsecs.cast_unsigned()) - } - } - - let common_attr = CommonFileAttr { - ino: meta.ino(), - atime: to_systime(meta.atime(), meta.atime_nsec()), - mtime: to_systime(meta.mtime(), meta.mtime_nsec()), - ctime: to_systime(meta.ctime(), meta.ctime_nsec()), - crtime: to_systime(0, 0), // Not available in std::fs::Metadata - perm: Permissions::from_bits_truncate(meta.mode() as u16), - nlink: meta.nlink() as u32, - uid: meta.uid(), - gid: meta.gid(), - blksize: meta.blksize() as u32, - }; - - let ft = meta.file_type(); - if ft.is_file() { - Ok(Self::RegularFile { - common: common_attr, - size: meta.len(), - blocks: meta.blocks(), - }) - } else if ft.is_dir() { - Ok(Self::Directory { - common: common_attr, - }) - } else if ft.is_symlink() { - Ok(Self::Symlink { - common: common_attr, - size: meta.len(), - }) - } else if ft.is_char_device() { - Ok(Self::CharDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_block_device() { - Ok(Self::BlockDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_fifo() { - Ok(Self::NamedPipe { - common: common_attr, - }) - } else if ft.is_socket() { - Ok(Self::Socket { - common: common_attr, - }) - } else { - debug_assert!( - false, - "Unknown file type encountered in FileAttr conversion" - ); - Err(()) - } - } -} - -impl From for DirEntryType { - fn from(attr: FileAttr) -> Self { - match attr { - FileAttr::RegularFile { .. } => Self::RegularFile, - FileAttr::Directory { .. } => Self::Directory, - FileAttr::Symlink { .. } => Self::Symlink, - FileAttr::CharDevice { .. } => Self::CharDevice, - FileAttr::BlockDevice { .. } => Self::BlockDevice, - FileAttr::NamedPipe { .. } => Self::NamedPipe, - FileAttr::Socket { .. } => Self::Socket, - } - } -} - -impl TryFrom for DirEntryType { - type Error = (); - - fn try_from(ft: std::fs::FileType) -> Result { - use std::os::unix::fs::FileTypeExt as _; - - if ft.is_file() { - Ok(Self::RegularFile) - } else if ft.is_dir() { - Ok(Self::Directory) - } else if ft.is_symlink() { - Ok(Self::Symlink) - } else if ft.is_char_device() { - Ok(Self::CharDevice) - } else if ft.is_block_device() { - Ok(Self::BlockDevice) - } else if ft.is_fifo() { - Ok(Self::NamedPipe) - } else if ft.is_socket() { - Ok(Self::Socket) - } else { - debug_assert!( - false, - "Unknown file type encountered in DirEntryType conversion" - ); - error!(ft = ?ft, "Unknown file type encountered in DirEntryType conversion"); - Err(()) - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DirEntry { - pub ino: Inode, - // TODO(markovejnovic): This OsString is hella expensive - pub name: OsString, - pub kind: DirEntryType, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct FilesystemStats { - pub block_size: u32, - pub fragment_size: u64, - pub total_blocks: u64, - pub free_blocks: u64, - pub available_blocks: u64, - pub total_inodes: u64, - pub free_inodes: u64, - pub available_inodes: u64, - pub filesystem_id: u64, - pub mount_flags: u32, - pub max_filename_length: u32, -} - -#[async_trait] -pub trait Fs { - type LookupError: std::error::Error; - type GetAttrError: std::error::Error; - type OpenError: std::error::Error; - type ReadError: std::error::Error; - type ReaddirError: std::error::Error; - type ReleaseError: std::error::Error; - - /// For each lookup call made by the kernel, it expects the icache to be updated with the - /// returned `FileAttr`. - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result; - - /// Can be called in two contexts -- the file is not open (in which case `fh` is `None`), - /// or the file is open (in which case `fh` is `Some`). - async fn getattr( - &mut self, - ino: Inode, - fh: Option, - ) -> Result; - - /// Read the contents of a directory. - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], Self::ReaddirError>; - - /// Open a file for reading. - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result; - - /// Read data from an open file. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] - async fn read( - &mut self, - ino: Inode, - fh: FileHandle, - offset: u64, - size: u32, - flags: OpenFlags, - lock_owner: Option, - ) -> Result; - - /// Called when the kernel closes a file handle. - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), Self::ReleaseError>; - - /// Called when the kernel is done with an inode. - async fn forget(&mut self, ino: Inode, nlookups: u64); - - /// Get filesystem statistics. - async fn statfs(&mut self) -> Result; -} diff --git a/tests/async_fs_correctness.rs b/tests/async_fs_correctness.rs new file mode 100644 index 00000000..5fe27a28 --- /dev/null +++ b/tests/async_fs_correctness.rs @@ -0,0 +1,609 @@ +#![allow(clippy::unwrap_used, clippy::expect_used, missing_docs)] + +mod common; + +use std::ffi::{OsStr, OsString}; + +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::async_fs::{AsyncFs, InodeLifecycle}; +use git_fs::fs::{INode, INodeType, LoadedAddr, OpenFlags}; + +use common::async_fs_mocks::{MockFsDataProvider, MockFsState, make_inode}; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_inc_returns_count_after_increment() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + + assert_eq!(lifecycle.inc(100), 1, "first inc should return 1"); + assert_eq!(lifecycle.inc(100), 2, "second inc should return 2"); + assert_eq!(lifecycle.inc(100), 3, "third inc should return 3"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_returns_remaining_count() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + lifecycle.inc(100); + + assert_eq!(lifecycle.dec(&100), Some(1), "dec from 2 should give 1"); + assert_eq!(lifecycle.dec(&100), Some(0), "dec from 1 should give 0"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_unknown_addr_returns_none() { + let table: FutureBackedCache = FutureBackedCache::default(); + let mut lifecycle = InodeLifecycle::from_table(table); + + assert_eq!( + lifecycle.dec(&999), + None, + "dec on unknown key should return None" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_to_zero_evicts_from_table() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + + assert_eq!(lifecycle.dec(&100), Some(0)); + // The inode should have been evicted from the table. + assert!( + lifecycle.table().get(&100).await.is_none(), + "inode should be evicted after refcount hits zero" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_count_decrements_by_n() { + let table: FutureBackedCache = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + lifecycle.inc(100); + lifecycle.inc(100); // count = 3 + + assert_eq!( + lifecycle.dec_count(&100, 2), + Some(1), + "dec_count(3, 2) should give 1" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_count_to_zero_evicts() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + lifecycle.inc(100); // count = 2 + + assert_eq!(lifecycle.dec_count(&100, 2), Some(0)); + assert!( + lifecycle.table().get(&100).await.is_none(), + "inode should be evicted after dec_count to zero" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_table_returns_underlying_cache() { + let table = FutureBackedCache::default(); + let inode = make_inode(42, INodeType::Directory, 0, None); + table.insert_sync(42, inode); + + let lifecycle = InodeLifecycle::from_table(table); + + let fetched = lifecycle.table().get(&42).await; + assert_eq!( + fetched.map(|n| n.addr), + Some(42), + "table() should expose the underlying cache" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn new_seeds_root_inode_into_table() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + assert_eq!(fs.inode_count(), 1, "root should be the only inode"); + let fetched = table.get(&1).await; + assert_eq!( + fetched.map(|n| n.addr), + Some(1), + "root inode should be in the table" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn new_preseeded_does_not_insert_root() { + let table: FutureBackedCache = FutureBackedCache::default(); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new_preseeded(dp, &table); + + assert_eq!( + fs.inode_count(), + 0, + "preseeded constructor should not insert anything" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn statfs_reports_inode_count() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + let stats = fs.statfs(); + + assert_eq!(stats.block_size, 4096); + assert_eq!(stats.total_inodes, 1, "should reflect the root inode"); + assert_eq!(stats.free_blocks, 0); + assert_eq!(stats.max_filename_length, 255); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn loaded_inode_returns_seeded_inode() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + let inode = fs.loaded_inode(LoadedAddr(1)).await.unwrap(); + assert_eq!(inode.addr, 1); + assert_eq!(inode.itype, INodeType::Directory); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn loaded_inode_returns_enoent_for_missing_addr() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs.loaded_inode(LoadedAddr(999)).await.unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn getattr_delegates_to_loaded_inode() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 4096, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + let inode = fs.getattr(LoadedAddr(1)).await.unwrap(); + assert_eq!(inode.addr, 1); + assert_eq!(inode.size, 4096); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_resolves_child_via_data_provider() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "readme.md".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let tracked = fs + .lookup(LoadedAddr(1), OsStr::new("readme.md")) + .await + .unwrap(); + + assert_eq!(tracked.inode.addr, 10); + assert_eq!(tracked.inode.size, 42); + assert_eq!(tracked.inode.itype, INodeType::File); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_populates_inode_table() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 100, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "file.txt".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + fs.lookup(LoadedAddr(1), OsStr::new("file.txt")) + .await + .unwrap(); + + // The child should now be in the inode table. + let cached = table.get(&10).await; + assert_eq!( + cached.map(|n| n.addr), + Some(10), + "child inode should be cached in the table after lookup" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_second_call_uses_cache() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 100, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "cached.txt".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let first = fs + .lookup(LoadedAddr(1), OsStr::new("cached.txt")) + .await + .unwrap(); + let second = fs + .lookup(LoadedAddr(1), OsStr::new("cached.txt")) + .await + .unwrap(); + + assert_eq!(first.inode.addr, second.inode.addr); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_propagates_provider_error() { + let root = make_inode(1, INodeType::Directory, 0, None); + // No lookups configured — provider will return ENOENT. + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs + .lookup(LoadedAddr(1), OsStr::new("nonexistent")) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +// open and OpenFile::read tests + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_file_handle_and_reader() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 5, Some(1)); + + let mut state = MockFsState::default(); + state + .file_contents + .insert(10, bytes::Bytes::from_static(b"hello")); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let open_file = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap(); + + assert!(open_file.fh >= 1, "file handle should start at 1"); + let data = open_file.read(0, 5).await.unwrap(); + assert_eq!(&data[..], b"hello"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_eisdir_for_directory() { + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs.open(LoadedAddr(1), OpenFlags::RDONLY).await.unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::EISDIR)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_enoent_for_missing_inode() { + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs + .open(LoadedAddr(999), OpenFlags::RDONLY) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_assigns_unique_file_handles() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 0, Some(1)); + + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let fh1 = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap().fh; + let fh2 = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap().fh; + + assert_ne!(fh1, fh2, "each open should produce a unique file handle"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_file_read_with_offset() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 11, Some(1)); + + let mut state = MockFsState::default(); + state + .file_contents + .insert(10, bytes::Bytes::from_static(b"hello world")); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let open_file = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap(); + + let data = open_file.read(6, 5).await.unwrap(); + assert_eq!(&data[..], b"world"); +} + +// readdir tests + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_lists_children_sorted_by_name() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_b = make_inode(10, INodeType::File, 10, Some(1)); + let child_a = make_inode(11, INodeType::File, 20, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("b.txt"), child_b), + (OsString::from("a.txt"), child_a), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut entries: Vec<(OsString, u64)> = Vec::new(); + fs.readdir(LoadedAddr(1), 0, |entry, _offset| { + entries.push((entry.name.to_os_string(), entry.inode.addr)); + false // don't stop + }) + .await + .unwrap(); + + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].0, "a.txt", "entries should be sorted by name"); + assert_eq!(entries[0].1, 11); + assert_eq!(entries[1].0, "b.txt"); + assert_eq!(entries[1].1, 10); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_respects_offset() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 10, Some(1)); + let child_b = make_inode(11, INodeType::File, 20, Some(1)); + let child_c = make_inode(12, INodeType::File, 30, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + (OsString::from("c"), child_c), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + // First readdir to populate cache + fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + + // Second readdir starting at offset 2 (skip first two) + let mut entries: Vec = Vec::new(); + fs.readdir(LoadedAddr(1), 2, |entry, _| { + entries.push(entry.name.to_os_string()); + false + }) + .await + .unwrap(); + + assert_eq!(entries, vec![OsString::from("c")]); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_stops_when_filler_returns_true() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 10, Some(1)); + let child_b = make_inode(11, INodeType::File, 20, Some(1)); + let child_c = make_inode(12, INodeType::File, 30, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + (OsString::from("c"), child_c), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut count = 0; + fs.readdir(LoadedAddr(1), 0, |_, _| { + count += 1; + count >= 2 // stop after 2 entries + }) + .await + .unwrap(); + + assert_eq!(count, 2, "filler should have been called exactly twice"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_returns_enotdir_for_file() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 100, Some(1)); + + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs + .readdir(LoadedAddr(10), 0, |_, _| false) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOTDIR)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_populates_inode_table_with_children() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + state + .directories + .insert(1, vec![(OsString::from("child.txt"), child)]); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + + let cached = table.get(&10).await; + assert_eq!( + cached.map(|n| n.addr), + Some(10), + "readdir should populate children into the inode table" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_empty_directory() { + let root = make_inode(1, INodeType::Directory, 0, None); + + let mut state = MockFsState::default(); + state.directories.insert(1, vec![]); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut count = 0; + fs.readdir(LoadedAddr(1), 0, |_, _| { + count += 1; + false + }) + .await + .unwrap(); + + assert_eq!(count, 0, "empty directory should yield no entries"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_provides_correct_next_offsets() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 0, Some(1)); + let child_b = make_inode(11, INodeType::File, 0, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut offsets: Vec = Vec::new(); + fs.readdir(LoadedAddr(1), 0, |_, next_offset| { + offsets.push(next_offset); + false + }) + .await + .unwrap(); + + assert_eq!( + offsets, + vec![1, 2], + "offsets should be 1-indexed and sequential" + ); +} + +// lookup-after-readdir integration test + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_after_readdir_uses_directory_cache() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + // Only configure readdir — no lookup entry. If the directory cache + // fast path is broken, the lookup will fail with ENOENT. + state + .directories + .insert(1, vec![(OsString::from("file.txt"), child)]); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + // readdir populates the directory cache. + fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + + // lookup should hit the directory cache fast path. + let tracked = fs + .lookup(LoadedAddr(1), OsStr::new("file.txt")) + .await + .unwrap(); + assert_eq!(tracked.inode.addr, 10); +} diff --git a/tests/common/async_fs_mocks.rs b/tests/common/async_fs_mocks.rs new file mode 100644 index 00000000..5c132eec --- /dev/null +++ b/tests/common/async_fs_mocks.rs @@ -0,0 +1,104 @@ +#![allow(missing_docs, clippy::unwrap_used)] + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; +use std::time::SystemTime; + +use bytes::Bytes; + +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::{INode, INodeType, InodePerms, OpenFlags}; + +/// Builds an `INode` with sensible defaults. Only `addr` and `itype` are required. +pub fn make_inode(addr: u64, itype: INodeType, size: u64, parent: Option) -> INode { + INode { + addr, + permissions: InodePerms::OWNER_RWX | InodePerms::GROUP_READ | InodePerms::OTHER_READ, + uid: 1000, + gid: 1000, + create_time: SystemTime::UNIX_EPOCH, + last_modified_at: SystemTime::UNIX_EPOCH, + parent, + size, + itype, + } +} + +/// A mock `FileReader` that returns a fixed byte slice for any read. +#[derive(Debug, Clone)] +pub struct MockFileReader { + pub data: Bytes, +} + +impl FileReader for MockFileReader { + #[expect( + clippy::cast_possible_truncation, + reason = "test mock — offsets stay small" + )] + async fn read(&self, offset: u64, size: u32) -> Result { + let start = (offset as usize).min(self.data.len()); + let end = (start + size as usize).min(self.data.len()); + Ok(self.data.slice(start..end)) + } +} + +/// Shared state backing `MockFsDataProvider`. +#[derive(Debug, Default)] +pub struct MockFsState { + /// `(parent_addr, child_name) -> child_inode` + pub lookups: HashMap<(u64, OsString), INode>, + /// `parent_addr -> vec of (child_name, child_inode)` + pub directories: HashMap>, + /// `inode_addr -> file content bytes` + pub file_contents: HashMap, +} + +/// A clonable mock data provider for `AsyncFs` tests. +#[derive(Debug, Clone)] +pub struct MockFsDataProvider { + pub state: Arc, +} + +impl MockFsDataProvider { + pub fn new(state: MockFsState) -> Self { + Self { + state: Arc::new(state), + } + } +} + +impl FsDataProvider for MockFsDataProvider { + type Reader = MockFileReader; + + async fn lookup(&self, parent: INode, name: &OsStr) -> Result { + let key = (parent.addr, name.to_os_string()); + self.state + .lookups + .get(&key) + .copied() + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT)) + } + + async fn readdir(&self, parent: INode) -> Result, std::io::Error> { + self.state + .directories + .get(&parent.addr) + .cloned() + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT)) + } + + async fn open( + &self, + inode: INode, + _flags: OpenFlags, + ) -> Result { + let data = self + .state + .file_contents + .get(&inode.addr) + .cloned() + .unwrap_or_default(); + Ok(MockFileReader { data }) + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 101f9295..2729c866 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,4 +1,6 @@ -#![allow(missing_docs, clippy::unwrap_used)] +#![allow(dead_code, missing_docs, clippy::unwrap_used)] + +pub mod async_fs_mocks; use std::sync::{Arc, Mutex}; use std::time::Duration; From c80c09c88794589c18d1d062a0c0949a4abc64d9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:20:18 -0800 Subject: [PATCH 02/41] refactor: remove redundant FUSE error types, use io_to_errno helper --- lib/fs/fuser.rs | 135 +++++++++++++----------------------------------- 1 file changed, 36 insertions(+), 99 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 50042a24..61814119 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -9,60 +9,18 @@ use super::{FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags}; use crate::cache::async_backed::FutureBackedCache; use tracing::{debug, error, instrument}; -/// Wrapper converting [`std::io::Error`] to errno. -#[derive(Debug, thiserror::Error)] -#[error("{0}")] -struct FuseIoError(std::io::Error); - +/// Convert an I/O error to the corresponding errno value for FUSE replies. #[expect( clippy::wildcard_enum_match_arm, reason = "ErrorKind is non_exhaustive; EIO is the safe default" )] -impl From for i32 { - fn from(e: FuseIoError) -> Self { - e.0.raw_os_error().unwrap_or_else(|| match e.0.kind() { - std::io::ErrorKind::NotFound => libc::ENOENT, - std::io::ErrorKind::PermissionDenied => libc::EACCES, - std::io::ErrorKind::AlreadyExists => libc::EEXIST, - _ => libc::EIO, - }) - } -} - -/// Error for read operations. -#[derive(Debug, thiserror::Error)] -enum FuseReadError { - /// The file handle was not open. - #[error("file handle not open")] - NotOpen, - /// An I/O error occurred during the read. - #[error("I/O error: {0}")] - Io(#[from] std::io::Error), -} - -impl From for i32 { - fn from(e: FuseReadError) -> Self { - match e { - FuseReadError::NotOpen => libc::EBADF, - FuseReadError::Io(ref io) => io.raw_os_error().unwrap_or(libc::EIO), - } - } -} - -/// Error for release operations. -#[derive(Debug, thiserror::Error)] -enum FuseReleaseError { - /// The file handle was not open. - #[error("file handle not open")] - NotOpen, -} - -impl From for i32 { - fn from(e: FuseReleaseError) -> Self { - match e { - FuseReleaseError::NotOpen => libc::EBADF, - } - } +fn io_to_errno(e: &std::io::Error) -> i32 { + e.raw_os_error().unwrap_or_else(|| match e.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) } mod inner { @@ -200,14 +158,9 @@ impl fuser::Filesystem for FuserAdapter { reply: fuser::ReplyEntry, ) { let result = self.runtime.block_on(async { - let tracked = self - .inner - .get_fs() - .lookup(LoadedAddr(parent), name) - .await - .map_err(FuseIoError)?; + let tracked = self.inner.get_fs().lookup(LoadedAddr(parent), name).await?; self.inner.ward_inc(tracked.inode.addr); - Ok::<_, FuseIoError>(tracked.inode) + Ok::<_, std::io::Error>(tracked.inode) }); match result { Ok(inode) => { @@ -217,7 +170,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -230,13 +183,9 @@ impl fuser::Filesystem for FuserAdapter { _fh: Option, reply: fuser::ReplyAttr, ) { - let result = self.runtime.block_on(async { - self.inner - .get_fs() - .getattr(LoadedAddr(ino)) - .await - .map_err(FuseIoError) - }); + let result = self + .runtime + .block_on(async { self.inner.get_fs().getattr(LoadedAddr(ino)).await }); match result { Ok(inode) => { let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); @@ -245,7 +194,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -268,16 +217,15 @@ impl fuser::Filesystem for FuserAdapter { entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); false }) - .await - .map_err(FuseIoError)?; - Ok::<_, FuseIoError>(entries) + .await?; + Ok::<_, std::io::Error>(entries) }); let entries = match result { Ok(entries) => entries, Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); return; } }; @@ -310,15 +258,10 @@ impl fuser::Filesystem for FuserAdapter { fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { let flags = OpenFlags::from_bits_truncate(flags); let result = self.runtime.block_on(async { - let open_file = self - .inner - .get_fs() - .open(LoadedAddr(ino), flags) - .await - .map_err(FuseIoError)?; + let open_file = self.inner.get_fs().open(LoadedAddr(ino), flags).await?; let fh = open_file.fh; self.open_files.insert(fh, Arc::clone(&open_file.reader)); - Ok::<_, FuseIoError>(fh) + Ok::<_, std::io::Error>(fh) }); match result { Ok(fh) => { @@ -327,7 +270,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -347,9 +290,12 @@ impl fuser::Filesystem for FuserAdapter { _lock_owner: Option, reply: fuser::ReplyData, ) { - let result: Result<_, FuseReadError> = self.runtime.block_on(async { - let reader = self.open_files.get(&fh).ok_or(FuseReadError::NotOpen)?; - Ok(reader.read(offset.cast_unsigned(), size).await?) + let result = self.runtime.block_on(async { + let reader = self + .open_files + .get(&fh) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EBADF))?; + reader.read(offset.cast_unsigned(), size).await }); match result { Ok(data) => { @@ -358,7 +304,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -377,24 +323,15 @@ impl fuser::Filesystem for FuserAdapter { _flush: bool, reply: fuser::ReplyEmpty, ) { - let result: Result<_, FuseReleaseError> = match self.open_files.remove(&fh) { - Some(reader) => { - if let Err(e) = self.runtime.block_on(reader.close()) { - debug!(error = %e, "reader close reported error"); - } - Ok(()) - } - None => Err(FuseReleaseError::NotOpen), - }; - match result { - Ok(()) => { - debug!("replying ok"); - reply.ok(); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); + if let Some(reader) = self.open_files.remove(&fh) { + if let Err(e) = self.runtime.block_on(reader.close()) { + debug!(error = %e, "reader close reported error"); } + debug!("replying ok"); + reply.ok(); + } else { + debug!("file handle not open, replying error"); + reply.error(libc::EBADF); } } From 7a63d496779e22ec75d86269a661f5853213b93f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:30:13 -0800 Subject: [PATCH 03/41] feat: add FuseReply trait and FuseResultExt for centralized FUSE error handling --- lib/fs/fuser.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 61814119..a24397de 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -23,6 +23,56 @@ fn io_to_errno(e: &std::io::Error) -> i32 { }) } +/// Trait abstracting the `.error(errno)` method common to all fuser reply types. +trait FuseReply { + fn error(self, errno: i32); +} + +macro_rules! impl_fuse_reply { + ($($ty:ty),* $(,)?) => { + $(impl FuseReply for $ty { + fn error(self, errno: i32) { + // Calls the inherent fuser method (not this trait method). + self.error(errno); + } + })* + }; +} + +// ReplyEmpty and ReplyStatfs are excluded: release and statfs +// do not follow the block_on -> fuse_reply pattern. +impl_fuse_reply!( + fuser::ReplyEntry, + fuser::ReplyAttr, + fuser::ReplyDirectory, + fuser::ReplyOpen, + fuser::ReplyData, +); + +/// Extension trait on `Result` for FUSE reply handling. +/// +/// Centralizes the error-logging + errno-reply path so each FUSE callback +/// only has to express its success path. +#[expect( + dead_code, + reason = "will be used by FUSE callbacks in upcoming commits" +)] +trait FuseResultExt { + fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)); +} + +impl FuseResultExt for Result { + fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)) { + match self { + Ok(val) => on_ok(val, reply), + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(io_to_errno(&e)); + } + } + } +} + mod inner { #![allow(clippy::future_not_send, clippy::mem_forget)] From 98e906f9f5ea3ce684214de8164e937c13aeba56 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:37:32 -0800 Subject: [PATCH 04/41] refactor: use fuse_reply in getattr --- lib/fs/fuser.rs | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index a24397de..41ff2140 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -53,10 +53,6 @@ impl_fuse_reply!( /// /// Centralizes the error-logging + errno-reply path so each FUSE callback /// only has to express its success path. -#[expect( - dead_code, - reason = "will be used by FUSE callbacks in upcoming commits" -)] trait FuseResultExt { fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)); } @@ -233,20 +229,13 @@ impl fuser::Filesystem for FuserAdapter { _fh: Option, reply: fuser::ReplyAttr, ) { - let result = self - .runtime - .block_on(async { self.inner.get_fs().getattr(LoadedAddr(ino)).await }); - match result { - Ok(inode) => { + self.runtime + .block_on(async { self.inner.get_fs().getattr(LoadedAddr(ino)).await }) + .fuse_reply(reply, |inode, reply| { let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); debug!(?attr, "replying..."); reply.attr(&Self::SHAMEFUL_TTL, &attr); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] From 3d26de286e5c41523718810c553d5dea1b23ec76 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:37:58 -0800 Subject: [PATCH 05/41] refactor: use fuse_reply in lookup --- lib/fs/fuser.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 41ff2140..704cddaf 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -203,22 +203,17 @@ impl fuser::Filesystem for FuserAdapter { name: &OsStr, reply: fuser::ReplyEntry, ) { - let result = self.runtime.block_on(async { - let tracked = self.inner.get_fs().lookup(LoadedAddr(parent), name).await?; - self.inner.ward_inc(tracked.inode.addr); - Ok::<_, std::io::Error>(tracked.inode) - }); - match result { - Ok(inode) => { + self.runtime + .block_on(async { + let tracked = self.inner.get_fs().lookup(LoadedAddr(parent), name).await?; + self.inner.ward_inc(tracked.inode.addr); + Ok::<_, std::io::Error>(tracked.inode) + }) + .fuse_reply(reply, |inode, reply| { let f_attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); debug!(?f_attr, "replying..."); reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument(name = "FuserAdapter::getattr", skip(self, _req, _fh, reply))] From 44ffc1fac2be9d6fa593d6fe5c251173c4e3ddc3 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:38:24 -0800 Subject: [PATCH 06/41] refactor: use fuse_reply in open --- lib/fs/fuser.rs | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 704cddaf..7d648598 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -291,22 +291,17 @@ impl fuser::Filesystem for FuserAdapter { #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { let flags = OpenFlags::from_bits_truncate(flags); - let result = self.runtime.block_on(async { - let open_file = self.inner.get_fs().open(LoadedAddr(ino), flags).await?; - let fh = open_file.fh; - self.open_files.insert(fh, Arc::clone(&open_file.reader)); - Ok::<_, std::io::Error>(fh) - }); - match result { - Ok(fh) => { + self.runtime + .block_on(async { + let open_file = self.inner.get_fs().open(LoadedAddr(ino), flags).await?; + let fh = open_file.fh; + self.open_files.insert(fh, Arc::clone(&open_file.reader)); + Ok::<_, std::io::Error>(fh) + }) + .fuse_reply(reply, |fh, reply| { debug!(handle = fh, "replying..."); reply.opened(fh, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument( From 74904cd4e9db84068c781be50c089cacf4f07e19 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:38:51 -0800 Subject: [PATCH 07/41] refactor: use fuse_reply in read --- lib/fs/fuser.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 7d648598..824fafa2 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -319,23 +319,18 @@ impl fuser::Filesystem for FuserAdapter { _lock_owner: Option, reply: fuser::ReplyData, ) { - let result = self.runtime.block_on(async { - let reader = self - .open_files - .get(&fh) - .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EBADF))?; - reader.read(offset.cast_unsigned(), size).await - }); - match result { - Ok(data) => { + self.runtime + .block_on(async { + let reader = self + .open_files + .get(&fh) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EBADF))?; + reader.read(offset.cast_unsigned(), size).await + }) + .fuse_reply(reply, |data, reply| { debug!(read_bytes = data.len(), "replying..."); reply.data(&data); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument( From cc820ca8c8a5b666bf8110f649630a0cc7d3e3e0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:39:43 -0800 Subject: [PATCH 08/41] refactor: use fuse_reply in readdir --- lib/fs/fuser.rs | 81 ++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 824fafa2..886a5f6f 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -240,52 +240,45 @@ impl fuser::Filesystem for FuserAdapter { ino: u64, _fh: u64, offset: i64, - mut reply: fuser::ReplyDirectory, + reply: fuser::ReplyDirectory, ) { let offset_u64 = offset.cast_unsigned(); - let result = self.runtime.block_on(async { - let mut entries = Vec::new(); - self.inner - .get_fs() - .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { - entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); - false - }) - .await?; - Ok::<_, std::io::Error>(entries) - }); - - let entries = match result { - Ok(entries) => entries, - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - return; - } - }; - - #[expect( - clippy::cast_possible_truncation, - reason = "offset fits in usize on supported 64-bit platforms" - )] - for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { - let kind = inode_type_to_fuser(*entry_itype); - let abs_idx = offset_u64 as usize + i + 1; - let Ok(idx): Result = abs_idx.try_into() else { - error!("Directory entry index {} too large for fuser", abs_idx); - reply.error(libc::EIO); - return; - }; - - debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); - if reply.add(*entry_ino, idx, kind, entry_name) { - debug!("buffer full for now, stopping readdir"); - break; - } - } - - debug!("finalizing reply..."); - reply.ok(); + self.runtime + .block_on(async { + let mut entries = Vec::new(); + self.inner + .get_fs() + .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { + entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); + false + }) + .await?; + Ok::<_, std::io::Error>(entries) + }) + .fuse_reply(reply, |entries, mut reply| { + for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { + let kind = inode_type_to_fuser(*entry_itype); + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + let abs_idx = offset_u64 as usize + i + 1; + let Ok(idx): Result = abs_idx.try_into() else { + error!("Directory entry index {} too large for fuser", abs_idx); + reply.error(libc::EIO); + return; + }; + + debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); + if reply.add(*entry_ino, idx, kind, entry_name) { + debug!("buffer full for now, stopping readdir"); + break; + } + } + + debug!("finalizing reply..."); + reply.ok(); + }); } #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] From e7d59095b520ace0e7a35e67ca7cbff2dc40999f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:26:29 -0800 Subject: [PATCH 09/41] DCache with per-parent info --- lib/fs/dcache.rs | 184 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 167 insertions(+), 17 deletions(-) diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index 5138e802..fab36c7b 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -1,4 +1,6 @@ use std::ffi::{OsStr, OsString}; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; use crate::fs::LoadedAddr; @@ -11,29 +13,69 @@ pub struct DValue { pub is_dir: bool, } -/// In-memory directory entry cache mapping `(parent, name)` to child metadata. +/// Per-parent directory state holding child entries and a population flag. +struct DirState { + children: scc::HashMap, + populated: AtomicBool, +} + +impl DirState { + fn new() -> Self { + Self { + children: scc::HashMap::new(), + populated: AtomicBool::new(false), + } + } +} + +/// In-memory directory entry cache with per-parent child maps. /// -/// Backed by [`scc::HashMap`] for atomic upsert on insert. The `readdir` -/// implementation scans the entire map and filters by parent — this is O(n) -/// over the cache size rather than O(log n + k) with an ordered index, but -/// guarantees that `insert` never creates a window where an entry is absent. -#[derive(Default)] +/// Each parent directory gets its own [`DirState`] containing a +/// [`scc::HashMap`] of child entries and an [`AtomicBool`] population flag. +/// This makes `readdir` O(k) in the number of children rather than O(n) +/// over the entire cache. pub struct DCache { - cache: scc::HashMap<(LoadedAddr, OsString), DValue>, + dirs: scc::HashMap>, +} + +impl Default for DCache { + fn default() -> Self { + Self::new() + } } impl DCache { /// Creates an empty directory cache. #[must_use] pub fn new() -> Self { - Self::default() + Self { + dirs: scc::HashMap::new(), + } + } + + /// Returns the [`DirState`] for `parent_ino`, creating one if absent. + fn dir_state(&self, parent_ino: LoadedAddr) -> Arc { + if let Some(entry) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) { + return entry; + } + let state = Arc::new(DirState::new()); + match self.dirs.entry_sync(parent_ino) { + scc::hash_map::Entry::Occupied(occ) => Arc::clone(occ.get()), + scc::hash_map::Entry::Vacant(vac) => { + let cloned = Arc::clone(&state); + vac.insert_entry(state); + cloned + } + } } /// Looks up a single child entry by parent inode and name. #[must_use] pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { - let key = (parent_ino, name.to_os_string()); - self.cache.read_sync(&key, |_, v| v.clone()) + let state = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v))?; + state + .children + .read_sync(&name.to_os_string(), |_, v| v.clone()) } /// Atomically inserts or overwrites a child entry in the cache. @@ -44,22 +86,130 @@ impl DCache { ino: LoadedAddr, is_dir: bool, ) { - let key = (parent_ino, name); + let state = self.dir_state(parent_ino); let value = DValue { ino, is_dir }; - self.cache.upsert_async(key, value).await; + state.children.upsert_async(name, value).await; } /// Returns all cached children of `parent_ino` as `(name, value)` pairs. pub async fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { + let Some(state) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) else { + return Vec::new(); + }; let mut entries = Vec::new(); - self.cache - .iter_async(|key, value| { - if key.0 == parent_ino { - entries.push((key.1.clone(), value.clone())); - } + state + .children + .iter_async(|k, v| { + entries.push((k.clone(), v.clone())); true }) .await; entries } + + /// Returns `true` if the directory at `parent_ino` has been fully populated. + #[must_use] + pub fn is_populated(&self, parent_ino: LoadedAddr) -> bool { + self.dirs + .read_sync(&parent_ino, |_, v| v.populated.load(Ordering::Acquire)) + .unwrap_or(false) + } + + /// Marks the directory at `parent_ino` as fully populated. + pub fn mark_populated(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); + state.populated.store(true, Ordering::Release); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::OsString; + + #[tokio::test] + async fn lookup_returns_none_for_missing_entry() { + let cache = DCache::new(); + assert!(cache.lookup(LoadedAddr(1), OsStr::new("foo")).is_none()); + } + + #[tokio::test] + async fn insert_then_lookup() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should be present after insert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(10)); + assert!(!dv.is_dir); + } + + #[tokio::test] + async fn readdir_returns_only_children_of_parent() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("a"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("b"), LoadedAddr(11), true) + .await; + cache + .insert(LoadedAddr(2), OsString::from("c"), LoadedAddr(12), false) + .await; + let children = cache.readdir(LoadedAddr(1)).await; + assert_eq!(children.len(), 2); + let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); + assert!(names.contains(&OsString::from("a"))); + assert!(names.contains(&OsString::from("b"))); + } + + #[tokio::test] + async fn readdir_empty_parent_returns_empty() { + let cache = DCache::new(); + let children = cache.readdir(LoadedAddr(1)).await; + assert!(children.is_empty()); + } + + #[tokio::test] + async fn is_populated_false_by_default() { + let cache = DCache::new(); + assert!(!cache.is_populated(LoadedAddr(1))); + } + + #[tokio::test] + async fn mark_populated_then_check() { + let cache = DCache::new(); + cache.mark_populated(LoadedAddr(1)); + assert!(cache.is_populated(LoadedAddr(1))); + } + + #[tokio::test] + async fn insert_does_not_mark_populated() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + assert!( + !cache.is_populated(LoadedAddr(1)), + "insert alone should not mark a directory as populated" + ); + } + + #[tokio::test] + async fn upsert_overwrites_existing_entry() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(20), true) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should still be present after upsert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(20)); + assert!(dv.is_dir); + } } From 44d5f0751e56a22fbe7f1d678c0e8502135f8842 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:28:37 -0800 Subject: [PATCH 10/41] refactor: use DCache population tracking, remove readdir_populated from AsyncFs --- lib/fs/async_fs.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 7626578f..3bf3b0f3 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -193,9 +193,6 @@ pub struct AsyncFs<'tbl, DP: FsDataProvider> { /// Monotonically increasing file handle counter. Starts at 1 (0 is reserved). next_fh: AtomicU64, - - /// Tracks which directories have had their children fetched via `dp.readdir`. - readdir_populated: FutureBackedCache, } impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { @@ -215,7 +212,6 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { directory_cache: DCache::new(), data_provider, next_fh: AtomicU64::new(1), - readdir_populated: FutureBackedCache::default(), } } @@ -235,7 +231,6 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { directory_cache: DCache::new(), data_provider, next_fh: AtomicU64::new(1), - readdir_populated: FutureBackedCache::default(), } } @@ -392,7 +387,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { } // Populate the directory cache on first readdir for this parent. - if self.readdir_populated.get(&parent).await.is_none() { + if !self.directory_cache.is_populated(parent) { let children = self.data_provider.readdir(parent_inode).await?; for (name, child_inode) in children { self.inode_table @@ -407,9 +402,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { ) .await; } - self.readdir_populated - .get_or_init(parent, || async {}) - .await; + self.directory_cache.mark_populated(parent); } let mut children = self.directory_cache.readdir(parent).await; From f07db8be693508b4d51fa1b96f154317db448cf0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:30:48 -0800 Subject: [PATCH 11/41] refactor: use DCache population tracking in CompositeFs --- src/fs/mescloud/composite.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs index 3356b7b5..91c35806 100644 --- a/src/fs/mescloud/composite.rs +++ b/src/fs/mescloud/composite.rs @@ -83,7 +83,6 @@ struct OpenFileEntry { pub(super) struct CompositeFs { pub(super) inode_table: FutureBackedCache, pub(super) directory_cache: DCache, - readdir_populated: FutureBackedCache, next_ino: AtomicU64, next_fh: AtomicU64, refcounts: FxHashMap, @@ -121,7 +120,6 @@ impl CompositeFs { Self { inode_table, directory_cache: DCache::new(), - readdir_populated: FutureBackedCache::default(), next_ino: AtomicU64::new(Self::ROOT_INO + 1), next_fh: AtomicU64::new(1), refcounts, @@ -286,7 +284,7 @@ impl CompositeFs { .copied() .ok_or(ReadDirError::InodeNotFound)?; - if self.readdir_populated.get(&LoadedAddr(ino)).await.is_none() { + if !self.directory_cache.is_populated(LoadedAddr(ino)) { let inner_ino = self.slots[idx] .bridge .forward(ino) @@ -321,9 +319,7 @@ impl CompositeFs { .await; } - self.readdir_populated - .get_or_init(LoadedAddr(ino), || async {}) - .await; + self.directory_cache.mark_populated(LoadedAddr(ino)); } let mut children = self.directory_cache.readdir(LoadedAddr(ino)).await; From 7558e8624380775aac8b7c175d9e7fcfeafe3ca0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:31:36 -0800 Subject: [PATCH 12/41] fix: update stale readdir_populated comment in async_fs.rs --- lib/fs/async_fs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 3bf3b0f3..761149d2 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -368,7 +368,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// /// # Concurrency /// - /// The `readdir_populated` check-then-populate is **not** atomic. If two + /// The `is_populated` check-then-populate is **not** atomic. If two /// concurrent callers invoke `readdir` for the same parent, both may call /// `dp.readdir()` and insert duplicate children. This is safe when the /// caller serializes access (e.g. via `&mut self` on the `Fs` trait). From bcf2f1eb46b5b2f90d1b335a9cfb76463ddc66f9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:08:22 -0800 Subject: [PATCH 13/41] feat: add ConcurrentBridge for lock-free inode address translation --- lib/fs/bridge.rs | 76 +++++++++++++++++++++++++++++++++++++++++++ lib/fs/mod.rs | 2 ++ tests/bridge_tests.rs | 49 ++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 lib/fs/bridge.rs create mode 100644 tests/bridge_tests.rs diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs new file mode 100644 index 00000000..5bb1b028 --- /dev/null +++ b/lib/fs/bridge.rs @@ -0,0 +1,76 @@ +//! Lock-free bidirectional inode address mapping. +//! +//! [`ConcurrentBridge`] maps between "outer" (composite) and "inner" (child) +//! inode address spaces using two [`scc::HashMap`]s. + +use crate::fs::InodeAddr; + +/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. +/// +/// Uses two lock-free `scc::HashMap`s. Insertion order: forward map first, +/// then backward map, so any observer that discovers an outer addr via +/// `backward` can immediately resolve it via `forward`. +pub struct ConcurrentBridge { + /// outer -> inner + fwd: scc::HashMap, + /// inner -> outer + bwd: scc::HashMap, +} + +impl ConcurrentBridge { + /// Creates an empty bridge. + #[must_use] + pub fn new() -> Self { + Self { + fwd: scc::HashMap::new(), + bwd: scc::HashMap::new(), + } + } + + /// Insert a mapping from outer to inner. + /// + /// Inserts into the forward map first (see module docs for ordering rationale). + pub fn insert(&self, outer: InodeAddr, inner: InodeAddr) { + let _ = self.fwd.insert_sync(outer, inner); + let _ = self.bwd.insert_sync(inner, outer); + } + + /// Resolve outer -> inner. + #[must_use] + pub fn forward(&self, outer: InodeAddr) -> Option { + self.fwd.read_sync(&outer, |_, &v| v) + } + + /// Resolve inner -> outer. + #[must_use] + pub fn backward(&self, inner: InodeAddr) -> Option { + self.bwd.read_sync(&inner, |_, &v| v) + } + + /// Look up inner -> outer, or allocate a new outer address if unmapped. + pub fn backward_or_insert( + &self, + inner: InodeAddr, + allocate: impl FnOnce() -> InodeAddr, + ) -> InodeAddr { + if let Some(outer) = self.backward(inner) { + return outer; + } + let outer = allocate(); + self.insert(outer, inner); + outer + } + + /// Remove the mapping for the given outer address. + pub fn remove_by_outer(&self, outer: InodeAddr) { + if let Some((_, inner)) = self.fwd.remove_sync(&outer) { + self.bwd.remove_sync(&inner); + } + } +} + +impl Default for ConcurrentBridge { + fn default() -> Self { + Self::new() + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index e8f971b4..f5d42961 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -1,6 +1,8 @@ //! Useful filesystem generalizations. /// Async filesystem cache with concurrent inode management. pub mod async_fs; +/// Lock-free bidirectional inode address mapping. +pub mod bridge; /// Directory entry cache for fast parent-child lookups. pub mod dcache; /// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. diff --git a/tests/bridge_tests.rs b/tests/bridge_tests.rs new file mode 100644 index 00000000..b0598e4d --- /dev/null +++ b/tests/bridge_tests.rs @@ -0,0 +1,49 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use git_fs::fs::bridge::ConcurrentBridge; + +#[test] +fn insert_then_forward_returns_inner() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + assert_eq!(bridge.forward(10), Some(100)); +} + +#[test] +fn insert_then_backward_returns_outer() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + assert_eq!(bridge.backward(100), Some(10)); +} + +#[test] +fn forward_missing_returns_none() { + let bridge = ConcurrentBridge::new(); + assert_eq!(bridge.forward(42), None); +} + +#[test] +fn backward_or_insert_existing_returns_cached() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + let outer = bridge.backward_or_insert(100, || 999); + assert_eq!(outer, 10, "should return existing outer addr"); +} + +#[test] +fn backward_or_insert_new_allocates() { + let bridge = ConcurrentBridge::new(); + let outer = bridge.backward_or_insert(200, || 50); + assert_eq!(outer, 50, "should use allocator"); + assert_eq!(bridge.forward(50), Some(200)); + assert_eq!(bridge.backward(200), Some(50)); +} + +#[test] +fn remove_by_outer_clears_both_directions() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + bridge.remove_by_outer(10); + assert_eq!(bridge.forward(10), None); + assert_eq!(bridge.backward(100), None); +} From a19e91d1d7e395d11501f4a422213c4800518bac Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:12:17 -0800 Subject: [PATCH 14/41] fix: eliminate TOCTOU race in ConcurrentBridge::backward_or_insert Use `scc::HashMap::entry_sync` for atomic check-and-insert instead of separate backward() + insert() calls that allowed two concurrent callers to both allocate for the same inner address. Also add #[must_use]. --- lib/fs/bridge.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index 5bb1b028..350d8750 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -48,17 +48,22 @@ impl ConcurrentBridge { } /// Look up inner -> outer, or allocate a new outer address if unmapped. + #[must_use] pub fn backward_or_insert( &self, inner: InodeAddr, allocate: impl FnOnce() -> InodeAddr, ) -> InodeAddr { - if let Some(outer) = self.backward(inner) { - return outer; + match self.bwd.entry_sync(inner) { + scc::hash_map::Entry::Occupied(occ) => *occ.get(), + scc::hash_map::Entry::Vacant(vac) => { + let outer = allocate(); + vac.insert_entry(outer); + // Populate forward map after backward is committed. + let _ = self.fwd.insert_sync(outer, inner); + outer + } } - let outer = allocate(); - self.insert(outer, inner); - outer } /// Remove the mapping for the given outer address. From d9fdc04b8c75e6016502889c5a67e7e4b0584fc8 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:15:20 -0800 Subject: [PATCH 15/41] feat: add CompositeRoot trait, ChildInner, and CompositeReader --- lib/fs/composite.rs | 125 ++++++++++++++++++++++++++++++++++++++++++++ lib/fs/mod.rs | 2 + 2 files changed, 127 insertions(+) create mode 100644 lib/fs/composite.rs diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs new file mode 100644 index 00000000..36969c67 --- /dev/null +++ b/lib/fs/composite.rs @@ -0,0 +1,125 @@ +//! Generic composite filesystem types. +//! +//! A composite filesystem presents multiple child filesystems under a single +//! virtual root directory. The [`CompositeRoot`] trait describes how children +//! are discovered, [`ChildInner`] co-locates an inode table with an +//! [`AsyncFs`](super::async_fs::AsyncFs), and [`CompositeReader`] wraps a +//! child reader so the composite layer can expose it through [`FileReader`]. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; + +use bytes::Bytes; + +use crate::fs::INode; +use crate::fs::async_fs::{FileReader, FsDataProvider}; + +/// Descriptor for a child filesystem returned by [`CompositeRoot`]. +pub struct ChildDescriptor { + /// The name this child is listed as in the composite root directory. + pub name: OsString, + /// The data provider for this child. + pub provider: DP, + /// The root inode of the child filesystem. + pub root_ino: INode, +} + +/// Describes the children that a composite filesystem exposes at its root. +/// +/// Implementors define domain-specific child resolution: what children exist, +/// and what [`FsDataProvider`] backs each child. +pub trait CompositeRoot: Send + Sync + 'static { + /// The data provider type for child filesystems. + type ChildDP: FsDataProvider; + + /// Resolve a child by name, returning its data provider and root inode. + /// + /// Called on lookup at the composite root. Returns `None` if the name + /// does not correspond to a known child. + fn resolve_child( + &self, + name: &OsStr, + ) -> impl Future>, std::io::Error>> + Send; + + /// List all children at the composite root. + /// + /// Called on readdir at the composite root. + fn list_children( + &self, + ) -> impl Future>, std::io::Error>> + Send; +} + +mod child_inner_impl { + #![allow(clippy::future_not_send, clippy::mem_forget)] + + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::fs::async_fs::{AsyncFs, FsDataProvider}; + use crate::fs::{INode, InodeAddr}; + + /// Self-referential struct co-locating an inode table and [`AsyncFs`]. + /// + /// The `AsyncFs` borrows from the table directly, avoiding an extra + /// indirection. This mirrors the [`FuseBridgeInner`](super::super::fuser) + /// pattern. + #[self_referencing] + pub struct ChildInner { + pub(super) table: FutureBackedCache, + #[borrows(table)] + #[covariant] + pub(super) fs: AsyncFs<'this, DP>, + } + + impl ChildInner { + #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] + pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { + ChildInnerBuilder { + table, + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() + } + + #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] + pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { + self.borrow_fs() + } + } +} + +pub use child_inner_impl::ChildInner; + +/// Wraps a child's reader so that the composite layer can expose it as its own +/// [`FileReader`]. +pub struct CompositeReader { + inner: Arc, +} + +impl CompositeReader { + /// Create a new `CompositeReader` wrapping the given reader. + pub fn new(inner: Arc) -> Self { + Self { inner } + } +} + +impl std::fmt::Debug for CompositeReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompositeReader").finish_non_exhaustive() + } +} + +impl FileReader for CompositeReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + self.inner.read(offset, size) + } + + fn close(&self) -> impl Future> + Send { + self.inner.close() + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index f5d42961..ed93bd25 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -3,6 +3,8 @@ pub mod async_fs; /// Lock-free bidirectional inode address mapping. pub mod bridge; +/// Generic composite filesystem types. +pub mod composite; /// Directory entry cache for fast parent-child lookups. pub mod dcache; /// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. From e2f8215b48bbd62a978d21f78815eee9e293dd4c Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:18:21 -0800 Subject: [PATCH 16/41] test: extract async_backed inline tests to tests/async_backed_correctness.rs --- lib/cache/async_backed.rs | 101 ------------------------------ tests/async_backed_correctness.rs | 99 +++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 101 deletions(-) create mode 100644 tests/async_backed_correctness.rs diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index c3fddd05..8f15803b 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -288,104 +288,3 @@ where } } } - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn try_init_ok_caches_value() { - let cache = FutureBackedCache::::default(); - let result: Result = cache - .get_or_try_init(1, || async { Ok("hello".to_owned()) }) - .await; - assert_eq!(result.unwrap(), "hello", "should return Ok value"); - - // Value should now be cached (get returns it without factory) - let cached = cache.get(&1).await; - assert_eq!(cached.unwrap(), "hello", "value should be in cache"); - } - - #[tokio::test] - async fn try_init_err_does_not_cache() { - let cache = FutureBackedCache::::default(); - let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; - assert_eq!(result.unwrap_err(), "boom", "should return the error"); - - // Cache should be empty — error was not stored - assert!(cache.is_empty(), "cache should have no entries after error"); - assert!(cache.get(&1).await.is_none(), "key should not exist"); - } - - #[tokio::test] - async fn try_init_err_then_retry_ok() { - let cache = FutureBackedCache::::default(); - - // First call: factory fails - let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; - assert!(r1.is_err(), "first call should fail"); - - // Second call: factory succeeds - let r2: Result = cache - .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) - .await; - assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); - - // Value should now be cached - let cached = cache.get(&1).await; - assert_eq!(cached.unwrap(), "recovered"); - } - - #[tokio::test] - async fn try_init_returns_value_cached_by_init() { - let cache = FutureBackedCache::::default(); - - // Populate via infallible get_or_init - cache - .get_or_init(1, || async { "from_init".to_owned() }) - .await; - - // get_or_try_init should return the cached value without running factory - let result: Result = cache - .get_or_try_init(1, || async { panic!("factory should not run") }) - .await; - assert_eq!(result.unwrap(), "from_init"); - } - - #[tokio::test] - async fn panic_in_factory_is_recovered() { - use std::sync::Arc; - use std::sync::atomic::{AtomicUsize, Ordering}; - - let cache = Arc::new(FutureBackedCache::::default()); - let call_count = Arc::new(AtomicUsize::new(0)); - - // Spawn a task whose factory panics. tokio::spawn catches the panic. - let cache2 = Arc::clone(&cache); - let call_count2 = Arc::clone(&call_count); - let handle = tokio::spawn(async move { - cache2 - .get_or_init(1, || { - call_count2.fetch_add(1, Ordering::Relaxed); - async { panic!("boom") } - }) - .await - }); - // The spawned task panics internally; JoinHandle returns Err. - assert!(handle.await.is_err(), "task should have panicked"); - - // The key should NOT be permanently bricked. A new caller should succeed. - let v = cache - .get_or_init(1, || { - call_count.fetch_add(1, Ordering::Relaxed); - async { "recovered".to_owned() } - }) - .await; - assert_eq!(v, "recovered", "should recover after panic"); - assert_eq!( - call_count.load(Ordering::Relaxed), - 2, - "factory called twice" - ); - } -} diff --git a/tests/async_backed_correctness.rs b/tests/async_backed_correctness.rs new file mode 100644 index 00000000..457ba948 --- /dev/null +++ b/tests/async_backed_correctness.rs @@ -0,0 +1,99 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use git_fs::cache::async_backed::FutureBackedCache; + +#[tokio::test] +async fn try_init_ok_caches_value() { + let cache = FutureBackedCache::::default(); + let result: Result = cache + .get_or_try_init(1, || async { Ok("hello".to_owned()) }) + .await; + assert_eq!(result.unwrap(), "hello", "should return Ok value"); + + // Value should now be cached (get returns it without factory) + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "hello", "value should be in cache"); +} + +#[tokio::test] +async fn try_init_err_does_not_cache() { + let cache = FutureBackedCache::::default(); + let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; + assert_eq!(result.unwrap_err(), "boom", "should return the error"); + + // Cache should be empty — error was not stored + assert!(cache.is_empty(), "cache should have no entries after error"); + assert!(cache.get(&1).await.is_none(), "key should not exist"); +} + +#[tokio::test] +async fn try_init_err_then_retry_ok() { + let cache = FutureBackedCache::::default(); + + // First call: factory fails + let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; + assert!(r1.is_err(), "first call should fail"); + + // Second call: factory succeeds + let r2: Result = cache + .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) + .await; + assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); + + // Value should now be cached + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "recovered"); +} + +#[tokio::test] +async fn try_init_returns_value_cached_by_init() { + let cache = FutureBackedCache::::default(); + + // Populate via infallible get_or_init + cache + .get_or_init(1, || async { "from_init".to_owned() }) + .await; + + // get_or_try_init should return the cached value without running factory + let result: Result = cache + .get_or_try_init(1, || async { panic!("factory should not run") }) + .await; + assert_eq!(result.unwrap(), "from_init"); +} + +#[tokio::test] +async fn panic_in_factory_is_recovered() { + let cache = Arc::new(FutureBackedCache::::default()); + let call_count = Arc::new(AtomicUsize::new(0)); + + // Spawn a task whose factory panics. tokio::spawn catches the panic. + let cache2 = Arc::clone(&cache); + let call_count2 = Arc::clone(&call_count); + let handle = tokio::spawn(async move { + cache2 + .get_or_init(1, || { + call_count2.fetch_add(1, Ordering::Relaxed); + async { panic!("boom") } + }) + .await + }); + // The spawned task panics internally; JoinHandle returns Err. + assert!(handle.await.is_err(), "task should have panicked"); + + // The key should NOT be permanently bricked. A new caller should succeed. + let v = cache + .get_or_init(1, || { + call_count.fetch_add(1, Ordering::Relaxed); + async { "recovered".to_owned() } + }) + .await; + assert_eq!(v, "recovered", "should recover after panic"); + assert_eq!( + call_count.load(Ordering::Relaxed), + 2, + "factory called twice" + ); +} From 6fe9dd52afe722fe6e6898db49dd9c3334138e2e Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:18:51 -0800 Subject: [PATCH 17/41] fix: add #[must_use] to CompositeReader::new --- lib/fs/composite.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index 36969c67..d8237dcb 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -99,6 +99,7 @@ pub struct CompositeReader { impl CompositeReader { /// Create a new `CompositeReader` wrapping the given reader. + #[must_use] pub fn new(inner: Arc) -> Self { Self { inner } } From 781d7bb28376cbfaffacfc7a8f17ef43e62fdc4b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:21:15 -0800 Subject: [PATCH 18/41] test: extract dcache inline tests to tests/dcache_correctness.rs --- lib/fs/dcache.rs | 92 ------------------------------------- tests/dcache_correctness.rs | 92 +++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 92 deletions(-) create mode 100644 tests/dcache_correctness.rs diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index fab36c7b..4870a401 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -121,95 +121,3 @@ impl DCache { state.populated.store(true, Ordering::Release); } } - -#[cfg(test)] -mod tests { - use super::*; - use std::ffi::OsString; - - #[tokio::test] - async fn lookup_returns_none_for_missing_entry() { - let cache = DCache::new(); - assert!(cache.lookup(LoadedAddr(1), OsStr::new("foo")).is_none()); - } - - #[tokio::test] - async fn insert_then_lookup() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) - .await; - let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); - assert!(dv.is_some(), "entry should be present after insert"); - let dv = dv.expect("checked above"); - assert_eq!(dv.ino, LoadedAddr(10)); - assert!(!dv.is_dir); - } - - #[tokio::test] - async fn readdir_returns_only_children_of_parent() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("a"), LoadedAddr(10), false) - .await; - cache - .insert(LoadedAddr(1), OsString::from("b"), LoadedAddr(11), true) - .await; - cache - .insert(LoadedAddr(2), OsString::from("c"), LoadedAddr(12), false) - .await; - let children = cache.readdir(LoadedAddr(1)).await; - assert_eq!(children.len(), 2); - let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); - assert!(names.contains(&OsString::from("a"))); - assert!(names.contains(&OsString::from("b"))); - } - - #[tokio::test] - async fn readdir_empty_parent_returns_empty() { - let cache = DCache::new(); - let children = cache.readdir(LoadedAddr(1)).await; - assert!(children.is_empty()); - } - - #[tokio::test] - async fn is_populated_false_by_default() { - let cache = DCache::new(); - assert!(!cache.is_populated(LoadedAddr(1))); - } - - #[tokio::test] - async fn mark_populated_then_check() { - let cache = DCache::new(); - cache.mark_populated(LoadedAddr(1)); - assert!(cache.is_populated(LoadedAddr(1))); - } - - #[tokio::test] - async fn insert_does_not_mark_populated() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) - .await; - assert!( - !cache.is_populated(LoadedAddr(1)), - "insert alone should not mark a directory as populated" - ); - } - - #[tokio::test] - async fn upsert_overwrites_existing_entry() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) - .await; - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(20), true) - .await; - let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); - assert!(dv.is_some(), "entry should still be present after upsert"); - let dv = dv.expect("checked above"); - assert_eq!(dv.ino, LoadedAddr(20)); - assert!(dv.is_dir); - } -} diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs new file mode 100644 index 00000000..59731d28 --- /dev/null +++ b/tests/dcache_correctness.rs @@ -0,0 +1,92 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use std::ffi::{OsStr, OsString}; + +use git_fs::fs::LoadedAddr; +use git_fs::fs::dcache::DCache; + +#[tokio::test] +async fn lookup_returns_none_for_missing_entry() { + let cache = DCache::new(); + assert!(cache.lookup(LoadedAddr(1), OsStr::new("foo")).is_none()); +} + +#[tokio::test] +async fn insert_then_lookup() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should be present after insert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(10)); + assert!(!dv.is_dir); +} + +#[tokio::test] +async fn readdir_returns_only_children_of_parent() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("a"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("b"), LoadedAddr(11), true) + .await; + cache + .insert(LoadedAddr(2), OsString::from("c"), LoadedAddr(12), false) + .await; + let children = cache.readdir(LoadedAddr(1)).await; + assert_eq!(children.len(), 2); + let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); + assert!(names.contains(&OsString::from("a"))); + assert!(names.contains(&OsString::from("b"))); +} + +#[tokio::test] +async fn readdir_empty_parent_returns_empty() { + let cache = DCache::new(); + let children = cache.readdir(LoadedAddr(1)).await; + assert!(children.is_empty()); +} + +#[tokio::test] +async fn is_populated_false_by_default() { + let cache = DCache::new(); + assert!(!cache.is_populated(LoadedAddr(1))); +} + +#[tokio::test] +async fn mark_populated_then_check() { + let cache = DCache::new(); + cache.mark_populated(LoadedAddr(1)); + assert!(cache.is_populated(LoadedAddr(1))); +} + +#[tokio::test] +async fn insert_does_not_mark_populated() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + assert!( + !cache.is_populated(LoadedAddr(1)), + "insert alone should not mark a directory as populated" + ); +} + +#[tokio::test] +async fn upsert_overwrites_existing_entry() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(20), true) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should still be present after upsert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(20)); + assert!(dv.is_dir); +} From 903392f23572fa882a9c1f415fcd4c77cd0c8981 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:24:26 -0800 Subject: [PATCH 19/41] feat: add CompositeFs struct with FsDataProvider impl --- lib/fs/composite.rs | 350 +++++++++++++++++++++++++++++++++++++- src/fs/mescloud/common.rs | 3 + 2 files changed, 349 insertions(+), 4 deletions(-) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index d8237dcb..ceb29308 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -9,11 +9,14 @@ use std::ffi::{OsStr, OsString}; use std::future::Future; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; use bytes::Bytes; -use crate::fs::INode; -use crate::fs::async_fs::{FileReader, FsDataProvider}; +use crate::cache::async_backed::FutureBackedCache; +use crate::fs::async_fs::{FileReader, FsDataProvider, OpenFile}; +use crate::fs::bridge::ConcurrentBridge; +use crate::fs::{INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags}; /// Descriptor for a child filesystem returned by [`CompositeRoot`]. pub struct ChildDescriptor { @@ -73,7 +76,6 @@ mod child_inner_impl { } impl ChildInner { - #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { ChildInnerBuilder { table, @@ -82,7 +84,6 @@ mod child_inner_impl { .build() } - #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { self.borrow_fs() } @@ -124,3 +125,344 @@ impl FileReader for CompositeReader { self.inner.close() } } + +struct ChildSlot { + inner: Arc>, + bridge: ConcurrentBridge, +} + +struct CompositeFsInner { + root: R, + /// Child slots, indexed by slot number. + slots: scc::HashMap>, + /// Maps a composite-level outer inode to its child slot index. + addr_to_slot: scc::HashMap, + /// Maps child name to slot index (for dedup on concurrent resolve). + name_to_slot: scc::HashMap, + /// Monotonically increasing slot counter. + next_slot: AtomicU64, + /// Monotonically increasing inode counter. Starts at 2 (1 = root). + next_ino: AtomicU64, + /// The filesystem owner uid/gid. + fs_owner: (u32, u32), +} + +/// A generic composite filesystem that routes to child `AsyncFs` instances. +/// +/// Implements [`FsDataProvider`] so it can be used inside another `AsyncFs`. +/// Clone is cheap (shared `Arc`). +pub struct CompositeFs { + inner: Arc>, +} + +impl Clone for CompositeFs { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + +impl CompositeFs { + /// Root inode address for this composite level. + pub const ROOT_INO: InodeAddr = 1; + + /// Create a new composite filesystem. + #[must_use] + pub fn new(root: R, fs_owner: (u32, u32)) -> Self { + Self { + inner: Arc::new(CompositeFsInner { + root, + slots: scc::HashMap::new(), + addr_to_slot: scc::HashMap::new(), + name_to_slot: scc::HashMap::new(), + next_slot: AtomicU64::new(0), + next_ino: AtomicU64::new(2), // 1 = root + fs_owner, + }), + } + } + + /// Build the root inode for this composite filesystem. + #[must_use] + pub fn make_root_inode(&self) -> INode { + let now = std::time::SystemTime::now(); + INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.inner.fs_owner.0, + gid: self.inner.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + } + } + + fn allocate_ino(&self) -> InodeAddr { + self.inner.next_ino.fetch_add(1, Ordering::Relaxed) + } + + fn make_child_dir_inode(&self, addr: InodeAddr) -> INode { + let now = std::time::SystemTime::now(); + INode { + addr, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.inner.fs_owner.0, + gid: self.inner.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_INO), + size: 0, + itype: INodeType::Directory, + } + } + + /// Register a child, returning the composite-level outer inode address. + /// + /// If the child is already registered by name, the existing outer address + /// is returned. Otherwise a new slot is created with a fresh inode table + /// and bridge mapping. + fn register_child(&self, desc: &ChildDescriptor) -> InodeAddr + where + R::ChildDP: Clone, + { + // Fast path: already registered by name. + match self.inner.name_to_slot.entry_sync(desc.name.clone()) { + scc::hash_map::Entry::Occupied(occ) => { + let slot_idx = *occ.get(); + // Return existing outer address for this child's root inode. + if let Some(outer) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + slot.bridge.backward(desc.root_ino.addr) + }) + .flatten() + { + return outer; + } + // Slot exists but bridge has no mapping — should not happen, + // but fall through to create a fresh slot below. + // (Remove stale name entry so the vacant path can re-insert.) + drop(occ); + self.inner.name_to_slot.remove_sync(&desc.name); + } + scc::hash_map::Entry::Vacant(vac) => { + // Claim the name slot atomically. + let outer_ino = self.allocate_ino(); + #[expect( + clippy::cast_possible_truncation, + reason = "slot index fits in usize on 64-bit" + )] + let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; + + let table = FutureBackedCache::default(); + table.insert_sync(desc.root_ino.addr, desc.root_ino); + let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); + + let bridge = ConcurrentBridge::new(); + bridge.insert(outer_ino, desc.root_ino.addr); + + drop(self.inner.slots.insert_sync( + slot_idx, + ChildSlot { + inner: child_inner, + bridge, + }, + )); + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + vac.insert_entry(slot_idx); + + return outer_ino; + } + } + + // Fallback: name was stale, create fresh. This path is rare. + let outer_ino = self.allocate_ino(); + #[expect( + clippy::cast_possible_truncation, + reason = "slot index fits in usize on 64-bit" + )] + let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; + + let table = FutureBackedCache::default(); + table.insert_sync(desc.root_ino.addr, desc.root_ino); + let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); + + let bridge = ConcurrentBridge::new(); + bridge.insert(outer_ino, desc.root_ino.addr); + + drop(self.inner.slots.insert_sync( + slot_idx, + ChildSlot { + inner: child_inner, + bridge, + }, + )); + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + drop( + self.inner + .name_to_slot + .insert_sync(desc.name.clone(), slot_idx), + ); + + outer_ino + } +} + +impl FsDataProvider for CompositeFs +where + R::ChildDP: Clone, + <::ChildDP as FsDataProvider>::Reader: 'static, +{ + type Reader = CompositeReader<<::ChildDP as FsDataProvider>::Reader>; + + async fn lookup(&self, parent: INode, name: &OsStr) -> Result { + if parent.addr == Self::ROOT_INO { + let desc = self + .inner + .root + .resolve_child(name) + .await? + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let outer_ino = self.register_child(&desc); + Ok(self.make_child_dir_inode(outer_ino)) + } else { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&parent.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Extract Arc and inner parent address under the guard. + let (child, inner_parent) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_parent = + inner_parent.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Await the lookup outside any scc guard. + let tracked = child + .get_fs() + .lookup(LoadedAddr(inner_parent), name) + .await?; + let child_inode = tracked.inode; + + // Translate inner address back to composite-level address. + let outer_ino = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + let next_ino = &self.inner.next_ino; + slot.bridge.backward_or_insert(child_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + + Ok(INode { + addr: outer_ino, + ..child_inode + }) + } + } + + async fn readdir(&self, parent: INode) -> Result, std::io::Error> { + if parent.addr == Self::ROOT_INO { + let children = self.inner.root.list_children().await?; + let mut entries = Vec::with_capacity(children.len()); + for desc in &children { + let outer_ino = self.register_child(desc); + entries.push((desc.name.clone(), self.make_child_dir_inode(outer_ino))); + } + Ok(entries) + } else { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&parent.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let (child, inner_parent) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_parent = + inner_parent.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Collect child entries outside the guard. + let mut child_entries = Vec::new(); + child + .get_fs() + .readdir(LoadedAddr(inner_parent), 0, |de, _offset| { + child_entries.push((de.name.to_os_string(), de.inode)); + false + }) + .await?; + + // Translate all inner addresses to composite-level addresses. + let mut entries = Vec::with_capacity(child_entries.len()); + for (name, child_inode) in child_entries { + let outer_ino = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + let next_ino = &self.inner.next_ino; + slot.bridge.backward_or_insert(child_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + entries.push(( + name, + INode { + addr: outer_ino, + ..child_inode + }, + )); + } + Ok(entries) + } + } + + async fn open(&self, inode: INode, flags: OpenFlags) -> Result { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&inode.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let (child, inner_ino) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + (Arc::clone(&slot.inner), slot.bridge.forward(inode.addr)) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_ino = inner_ino.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let open_file: OpenFile<<::ChildDP as FsDataProvider>::Reader> = + child.get_fs().open(LoadedAddr(inner_ino), flags).await?; + + Ok(CompositeReader { + inner: open_file.reader, + }) + } +} diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 6e9c8bf8..473b5e54 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -149,6 +149,9 @@ pub(super) trait ChildFs: Send + Sync { async fn release(&mut self, ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError>; } +// Tests kept inline: these types live in the binary crate and are not +// re-exported through the `git_fs` lib, so integration tests in `tests/` +// cannot access them. #[cfg(test)] mod tests { use super::*; From 4c55565e46733798ce61689a4c1a718b1a6e17d2 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:29:23 -0800 Subject: [PATCH 20/41] refactor: extract slot creation helper in register_child --- lib/fs/composite.rs | 89 ++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index ceb29308..bf063307 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -219,6 +219,39 @@ impl CompositeFs { } } + /// Allocate a new child slot with a fresh inode table and bridge mapping. + /// + /// Returns `(outer_ino, slot_idx)` for the newly created slot. + fn create_child_slot(&self, desc: &ChildDescriptor) -> (InodeAddr, usize) + where + R::ChildDP: Clone, + { + let outer_ino = self.allocate_ino(); + #[expect( + clippy::cast_possible_truncation, + reason = "slot index fits in usize on 64-bit" + )] + let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; + + let table = FutureBackedCache::default(); + table.insert_sync(desc.root_ino.addr, desc.root_ino); + let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); + + let bridge = ConcurrentBridge::new(); + bridge.insert(outer_ino, desc.root_ino.addr); + + drop(self.inner.slots.insert_sync( + slot_idx, + ChildSlot { + inner: child_inner, + bridge, + }, + )); + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + + (outer_ino, slot_idx) + } + /// Register a child, returning the composite-level outer inode address. /// /// If the child is already registered by name, the existing outer address @@ -246,62 +279,26 @@ impl CompositeFs { // Slot exists but bridge has no mapping — should not happen, // but fall through to create a fresh slot below. // (Remove stale name entry so the vacant path can re-insert.) + // + // Race window: between `drop(occ)` and the `remove_sync` below, + // another thread could read the stale entry and resolve to a + // broken slot. In the worst case two threads create separate + // slots for the same child — the last writer to `name_to_slot` + // wins and the other slot becomes orphaned. This is functionally + // harmless: the orphaned slot is never reached via name lookup + // and will not serve any future requests. drop(occ); self.inner.name_to_slot.remove_sync(&desc.name); } scc::hash_map::Entry::Vacant(vac) => { - // Claim the name slot atomically. - let outer_ino = self.allocate_ino(); - #[expect( - clippy::cast_possible_truncation, - reason = "slot index fits in usize on 64-bit" - )] - let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; - - let table = FutureBackedCache::default(); - table.insert_sync(desc.root_ino.addr, desc.root_ino); - let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); - - let bridge = ConcurrentBridge::new(); - bridge.insert(outer_ino, desc.root_ino.addr); - - drop(self.inner.slots.insert_sync( - slot_idx, - ChildSlot { - inner: child_inner, - bridge, - }, - )); - let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + let (outer_ino, slot_idx) = self.create_child_slot(desc); vac.insert_entry(slot_idx); - return outer_ino; } } // Fallback: name was stale, create fresh. This path is rare. - let outer_ino = self.allocate_ino(); - #[expect( - clippy::cast_possible_truncation, - reason = "slot index fits in usize on 64-bit" - )] - let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; - - let table = FutureBackedCache::default(); - table.insert_sync(desc.root_ino.addr, desc.root_ino); - let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); - - let bridge = ConcurrentBridge::new(); - bridge.insert(outer_ino, desc.root_ino.addr); - - drop(self.inner.slots.insert_sync( - slot_idx, - ChildSlot { - inner: child_inner, - bridge, - }, - )); - let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + let (outer_ino, slot_idx) = self.create_child_slot(desc); drop( self.inner .name_to_slot From 5e31225a2e11801bcfffba031d55441e4ad0461f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:32:54 -0800 Subject: [PATCH 21/41] test: add integration tests for generic CompositeFs --- tests/common/composite_mocks.rs | 55 ++++++ tests/common/mod.rs | 1 + tests/composite_fs_tests.rs | 285 ++++++++++++++++++++++++++++++++ 3 files changed, 341 insertions(+) create mode 100644 tests/common/composite_mocks.rs create mode 100644 tests/composite_fs_tests.rs diff --git a/tests/common/composite_mocks.rs b/tests/common/composite_mocks.rs new file mode 100644 index 00000000..413621d3 --- /dev/null +++ b/tests/common/composite_mocks.rs @@ -0,0 +1,55 @@ +#![allow(missing_docs, clippy::unwrap_used)] + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; + +use git_fs::fs::INode; +use git_fs::fs::composite::{ChildDescriptor, CompositeRoot}; + +use super::async_fs_mocks::MockFsDataProvider; + +/// A mock `CompositeRoot` that resolves children from a fixed map. +pub struct MockRoot { + pub children: Arc>, +} + +impl MockRoot { + pub fn new(children: HashMap) -> Self { + Self { + children: Arc::new(children), + } + } +} + +impl CompositeRoot for MockRoot { + type ChildDP = MockFsDataProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + Ok(self + .children + .get(name) + .map(|(provider, root_ino)| ChildDescriptor { + name: name.to_os_string(), + provider: provider.clone(), + root_ino: *root_ino, + })) + } + + async fn list_children( + &self, + ) -> Result>, std::io::Error> { + Ok(self + .children + .iter() + .map(|(name, (provider, root_ino))| ChildDescriptor { + name: name.clone(), + provider: provider.clone(), + root_ino: *root_ino, + }) + .collect()) + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 2729c866..96aedec1 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,6 +1,7 @@ #![allow(dead_code, missing_docs, clippy::unwrap_used)] pub mod async_fs_mocks; +pub mod composite_mocks; use std::sync::{Arc, Mutex}; use std::time::Duration; diff --git a/tests/composite_fs_tests.rs b/tests/composite_fs_tests.rs new file mode 100644 index 00000000..d6470a6a --- /dev/null +++ b/tests/composite_fs_tests.rs @@ -0,0 +1,285 @@ +#![allow(clippy::unwrap_used, clippy::expect_used, missing_docs)] + +mod common; + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; + +use bytes::Bytes; + +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::async_fs::AsyncFs; +use git_fs::fs::composite::CompositeFs; +use git_fs::fs::{INode, INodeType, LoadedAddr, OpenFlags}; + +use common::async_fs_mocks::{MockFsDataProvider, MockFsState, make_inode}; +use common::composite_mocks::MockRoot; + +/// Build a child data provider with a root directory and a set of children. +/// +/// Each child is `(name, addr, itype, size)`. Files get auto-generated content +/// of the form `"content of {name}"`. +fn make_child_provider( + root_addr: u64, + children: &[(&str, u64, INodeType, u64)], +) -> (MockFsDataProvider, INode) { + let root = make_inode(root_addr, INodeType::Directory, 0, None); + let mut state = MockFsState::default(); + let mut dir_entries = Vec::new(); + for (name, addr, itype, size) in children { + let child = make_inode(*addr, *itype, *size, Some(root_addr)); + state + .lookups + .insert((root_addr, OsString::from(name)), child); + dir_entries.push((OsString::from(name), child)); + if *itype == INodeType::File { + state + .file_contents + .insert(*addr, Bytes::from(format!("content of {name}"))); + } + } + state.directories.insert(root_addr, dir_entries); + (MockFsDataProvider::new(state), root) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_root_lookup_resolves_child() { + let (provider, root_ino) = make_child_provider(100, &[("file.txt", 101, INodeType::File, 42)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo-a"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let tracked = afs + .lookup(LoadedAddr(1), OsStr::new("repo-a")) + .await + .unwrap(); + + assert_eq!( + tracked.inode.itype, + INodeType::Directory, + "child should appear as a directory at composite level" + ); + assert_ne!( + tracked.inode.addr, 1, + "child should have a composite-level address different from root" + ); + assert_eq!( + tracked.inode.parent, + Some(1), + "child directory should have the composite root as parent" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_root_readdir_lists_children() { + let (prov_a, root_a) = make_child_provider(100, &[]); + let (prov_b, root_b) = make_child_provider(200, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("alpha"), (prov_a, root_a)); + children.insert(OsString::from("beta"), (prov_b, root_b)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let mut entries = Vec::new(); + afs.readdir(LoadedAddr(1), 0, |de, _offset| { + entries.push(de.name.to_os_string()); + false + }) + .await + .unwrap(); + + entries.sort(); + assert_eq!(entries.len(), 2, "should list both children"); + assert_eq!(entries[0], "alpha"); + assert_eq!(entries[1], "beta"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_delegated_lookup_reaches_child() { + let (provider, root_ino) = make_child_provider( + 100, + &[ + ("readme.md", 101, INodeType::File, 256), + ("src", 102, INodeType::Directory, 0), + ], + ); + + let mut children = HashMap::new(); + children.insert(OsString::from("my-repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + // First, lookup the child at root level. + let child_dir = afs + .lookup(LoadedAddr(1), OsStr::new("my-repo")) + .await + .unwrap(); + let child_addr = child_dir.inode.addr; + + // Then, lookup a file inside the child. + let file = afs + .lookup(LoadedAddr(child_addr), OsStr::new("readme.md")) + .await + .unwrap(); + + assert_eq!(file.inode.itype, INodeType::File); + assert_eq!(file.inode.size, 256); + + // Also lookup a subdirectory inside the child. + let subdir = afs + .lookup(LoadedAddr(child_addr), OsStr::new("src")) + .await + .unwrap(); + + assert_eq!(subdir.inode.itype, INodeType::Directory); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_open_and_read_through_child() { + let (provider, root_ino) = make_child_provider(100, &[("hello.txt", 101, INodeType::File, 20)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + // Navigate to the file. + let child_dir = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + let file_tracked = afs + .lookup(LoadedAddr(child_dir.inode.addr), OsStr::new("hello.txt")) + .await + .unwrap(); + let file_addr = file_tracked.inode.addr; + + // Open and read. + let open_file = afs + .open(LoadedAddr(file_addr), OpenFlags::empty()) + .await + .unwrap(); + let data = open_file.read(0, 1024).await.unwrap(); + + assert_eq!( + data, + Bytes::from("content of hello.txt"), + "should read the file content through the composite layer" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_lookup_unknown_child_returns_enoent() { + let (provider, root_ino) = make_child_provider(100, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("existing"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let err = afs + .lookup(LoadedAddr(1), OsStr::new("nonexistent")) + .await + .unwrap_err(); + + assert_eq!( + err.raw_os_error(), + Some(libc::ENOENT), + "looking up a nonexistent child at root should return ENOENT" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_readdir_delegated_lists_child_contents() { + let (provider, root_ino) = make_child_provider( + 100, + &[ + ("a.rs", 101, INodeType::File, 10), + ("b.rs", 102, INodeType::File, 20), + ("lib", 103, INodeType::Directory, 0), + ], + ); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + // Navigate into the child. + let child_dir = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + + // Readdir inside the child. + let mut entries = Vec::new(); + afs.readdir(LoadedAddr(child_dir.inode.addr), 0, |de, _offset| { + entries.push((de.name.to_os_string(), de.inode.itype)); + false + }) + .await + .unwrap(); + + entries.sort_by(|(a, _), (b, _)| a.cmp(b)); + assert_eq!(entries.len(), 3); + assert_eq!(entries[0], (OsString::from("a.rs"), INodeType::File)); + assert_eq!(entries[1], (OsString::from("b.rs"), INodeType::File)); + assert_eq!(entries[2], (OsString::from("lib"), INodeType::Directory)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_repeated_lookup_returns_same_addr() { + let (provider, root_ino) = make_child_provider(100, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let first = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + let second = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + + assert_eq!( + first.inode.addr, second.inode.addr, + "repeated lookups for the same child should return the same composite address" + ); +} From aa989f7118c8159bce364af0b9d8f535022c6955 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 15:03:06 -0800 Subject: [PATCH 22/41] feat: add domain roots (MesaRoot, StandardOrgRoot, GithubOrgRoot) and OrgChildDP enum --- src/fs/mescloud/mod.rs | 1 + src/fs/mescloud/roots.rs | 483 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 484 insertions(+) create mode 100644 src/fs/mescloud/roots.rs diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index 15a70725..a9e5e155 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -32,6 +32,7 @@ pub use org::OrgConfig; use org::OrgFs; pub mod repo; +mod roots; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); diff --git a/src/fs/mescloud/roots.rs b/src/fs/mescloud/roots.rs new file mode 100644 index 00000000..aafe0c4c --- /dev/null +++ b/src/fs/mescloud/roots.rs @@ -0,0 +1,483 @@ +//! Domain-specific [`CompositeRoot`] implementations and the [`OrgChildDP`] enum. +//! +//! Bridges the generic `CompositeFs` from `lib/fs/composite.rs` with +//! Mesa/GitHub-specific org and repo resolution logic. +//! +//! These types are not yet wired into the daemon entry point; they will be +//! connected in a follow-up change that replaces the old `MesaFS` + `OrgFs` +//! pipeline. +#![expect(dead_code, reason = "wired in the follow-up daemon change")] + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::time::SystemTime; + +use base64::Engine as _; +use futures::TryStreamExt as _; +use mesa_dev::MesaClient; +use tracing::warn; + +use git_fs::cache::fcache::FileCache; +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::composite::{ChildDescriptor, CompositeFs, CompositeReader, CompositeRoot}; +use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags}; + +use super::common::MesaApiError; +use super::repo::{MesFileReader, MesRepoProvider}; +use crate::app_config::CacheConfig; + +const CHILD_ROOT_ADDR: InodeAddr = 1; + +fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) + } + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), + } +} + +/// Create a [`MesRepoProvider`] and its root [`INode`] for a given repo. +async fn create_repo_provider( + client: &MesaClient, + org_name: &str, + repo_name: &str, + ref_: &str, + fs_owner: (u32, u32), + cache_config: &CacheConfig, +) -> (MesRepoProvider, INode) { + let file_cache = match cache_config.max_size { + Some(max_size) if max_size.as_u64() > 0 => { + let cache_dir = cache_config.path.join(org_name).join(repo_name); + let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); + match FileCache::new(&cache_dir, max_bytes).await { + Ok(cache) => Some(Arc::new(cache)), + Err(e) => { + warn!(error = ?e, org = %org_name, repo = %repo_name, + "failed to create file cache, continuing without caching"); + None + } + } + } + _ => None, + }; + + let provider = MesRepoProvider::new( + client.clone(), + org_name.to_owned(), + repo_name.to_owned(), + ref_.to_owned(), + fs_owner, + file_cache, + ); + + provider.seed_root_path(CHILD_ROOT_ADDR); + + let now = SystemTime::now(); + let root_ino = INode { + addr: CHILD_ROOT_ADDR, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + + (provider, root_ino) +} + +/// Returns `Ok(())` if the error is a 404; otherwise returns the IO error. +/// +/// Callers use this to treat 404 as "not found" (return `Ok(None)`) while +/// propagating all other API errors. +fn check_not_found(e: MesaApiError) -> Result<(), std::io::Error> { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => Ok(()), + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => Err(mesa_api_error_to_io(e)), + } +} + +pub(super) struct StandardOrgRoot { + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl StandardOrgRoot { + pub(super) fn new( + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), + ) -> Self { + Self { + client, + org_name, + cache_config, + fs_owner, + } + } +} + +impl CompositeRoot for StandardOrgRoot { + type ChildDP = MesRepoProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let name_str = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "repo name contains non-UTF-8 characters", + ) + })?; + + let repo = match self + .client + .org(&self.org_name) + .repos() + .at(name_str) + .get() + .await + .map_err(MesaApiError::from) + { + Ok(repo) => repo, + Err(e) => { + check_not_found(e)?; + return Ok(None); + } + }; + + // Single-repo GET returns `default_branch: String` (non-optional), + // unlike the list endpoint which returns `Option`. + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + name_str, + &repo.default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + let repos: Vec = self + .client + .org(&self.org_name) + .repos() + .list(None) + .try_collect() + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let mut children = Vec::with_capacity(repos.len()); + for repo in repos { + let Some(repo_name) = repo.name else { + continue; + }; + let default_branch = repo.default_branch.unwrap_or_else(|| "main".to_owned()); + + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + &repo_name, + &default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + children.push(ChildDescriptor { + name: OsString::from(repo_name), + provider, + root_ino, + }); + } + + Ok(children) + } +} + +pub(super) struct GithubRepoRoot { + client: MesaClient, + org_name: String, + owner: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl CompositeRoot for GithubRepoRoot { + type ChildDP = MesRepoProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let repo_name = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "repo name contains non-UTF-8 characters", + ) + })?; + + let full_decoded = format!("{}/{}", self.owner, repo_name); + let encoded = base64::engine::general_purpose::STANDARD.encode(&full_decoded); + + let repo = match self + .client + .org(&self.org_name) + .repos() + .at(&encoded) + .get() + .await + .map_err(MesaApiError::from) + { + Ok(repo) => repo, + Err(e) => { + check_not_found(e)?; + return Ok(None); + } + }; + + // Single-repo GET returns `default_branch: String` (non-optional). + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + &encoded, + &repo.default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Err(std::io::Error::from_raw_os_error(libc::EPERM)) + } +} + +pub(super) struct GithubOrgRoot { + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl GithubOrgRoot { + pub(super) fn new( + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), + ) -> Self { + Self { + client, + org_name, + cache_config, + fs_owner, + } + } +} + +impl CompositeRoot for GithubOrgRoot { + type ChildDP = CompositeFs; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let owner = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "owner name contains non-UTF-8 characters", + ) + })?; + + let repo_root = GithubRepoRoot { + client: self.client.clone(), + org_name: self.org_name.clone(), + owner: owner.to_owned(), + cache_config: self.cache_config.clone(), + fs_owner: self.fs_owner, + }; + + let composite = CompositeFs::new(repo_root, self.fs_owner); + let root_ino = composite.make_root_inode(); + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider: composite, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Err(std::io::Error::from_raw_os_error(libc::EPERM)) + } +} + +#[derive(Clone)] +pub(super) enum OrgChildDP { + Standard(CompositeFs), + Github(CompositeFs), +} + +impl OrgChildDP { + fn make_root_inode(&self) -> INode { + match self { + Self::Standard(c) => c.make_root_inode(), + Self::Github(c) => c.make_root_inode(), + } + } +} + +impl FsDataProvider for OrgChildDP { + type Reader = OrgChildReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let this = self.clone(); + let name = name.to_os_string(); + async move { + match this { + Self::Standard(c) => c.lookup(parent, &name).await, + Self::Github(c) => c.lookup(parent, &name).await, + } + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let this = self.clone(); + async move { + match this { + Self::Standard(c) => c.readdir(parent).await, + Self::Github(c) => c.readdir(parent).await, + } + } + } + + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send { + let this = self.clone(); + async move { + match this { + Self::Standard(c) => c.open(inode, flags).await.map(OrgChildReader::Standard), + Self::Github(c) => c.open(inode, flags).await.map(OrgChildReader::Github), + } + } + } +} + +pub(super) enum OrgChildReader { + Standard(CompositeReader), + Github(CompositeReader>), +} + +impl std::fmt::Debug for OrgChildReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Standard(_) => f.debug_tuple("Standard").finish(), + Self::Github(_) => f.debug_tuple("Github").finish(), + } + } +} + +impl FileReader for OrgChildReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + match self { + Self::Standard(r) => futures::future::Either::Left(r.read(offset, size)), + Self::Github(r) => futures::future::Either::Right(r.read(offset, size)), + } + } + + fn close(&self) -> impl Future> + Send { + match self { + Self::Standard(r) => futures::future::Either::Left(r.close()), + Self::Github(r) => futures::future::Either::Right(r.close()), + } + } +} + +pub(super) struct MesaRoot { + orgs: Vec<(OsString, OrgChildDP)>, +} + +impl MesaRoot { + pub(super) fn new(orgs: Vec<(OsString, OrgChildDP)>) -> Self { + Self { orgs } + } +} + +impl CompositeRoot for MesaRoot { + type ChildDP = OrgChildDP; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let found = self.orgs.iter().find(|(n, _)| n == name); + match found { + Some((_, dp)) => Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider: dp.clone(), + root_ino: dp.make_root_inode(), + })), + None => Ok(None), + } + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Ok(self + .orgs + .iter() + .map(|(name, dp)| ChildDescriptor { + name: name.clone(), + provider: dp.clone(), + root_ino: dp.make_root_inode(), + }) + .collect()) + } +} From 9885de0dc3d56f41a9c055549b35b5d47faf6154 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 15:23:43 -0800 Subject: [PATCH 23/41] refactor: wire CompositeFs into daemon, delete old composite.rs and ChildFs --- lib/fs/async_fs.rs | 3 +- src/daemon.rs | 66 ++--- src/fs/mescloud/common.rs | 139 +---------- src/fs/mescloud/composite.rs | 456 ----------------------------------- src/fs/mescloud/mod.rs | 378 +---------------------------- src/fs/mescloud/org.rs | 390 ------------------------------ src/fs/mescloud/repo.rs | 248 +------------------ src/fs/mescloud/roots.rs | 40 +-- 8 files changed, 73 insertions(+), 1647 deletions(-) delete mode 100644 src/fs/mescloud/composite.rs delete mode 100644 src/fs/mescloud/org.rs diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 761149d2..1f81a87e 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -370,8 +370,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// /// The `is_populated` check-then-populate is **not** atomic. If two /// concurrent callers invoke `readdir` for the same parent, both may call - /// `dp.readdir()` and insert duplicate children. This is safe when the - /// caller serializes access (e.g. via `&mut self` on the `Fs` trait). + /// `dp.readdir()` and insert duplicate children. /// /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and /// avoid racing with `lookup`/`createfile`. diff --git a/src/daemon.rs b/src/daemon.rs index 0a7a9f31..102e476b 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -1,7 +1,6 @@ use tokio::select; use crate::app_config; -use crate::fs::mescloud::{MesaFS, OrgConfig}; use tracing::{debug, error, info}; mod managed_fuse { @@ -15,12 +14,11 @@ mod managed_fuse { use nix::errno::Errno; use git_fs::cache::async_backed::FutureBackedCache; - use git_fs::fs::{INode, INodeType, InodePerms}; - use super::{MesaFS, OrgConfig, app_config, debug, error}; - use crate::fs::mescloud::MesaFsProvider; + use super::{app_config, debug, error}; use fuser::BackgroundSession; use git_fs::fs::fuser::FuserAdapter; + use secrecy::ExposeSecret as _; pub struct FuseCoreScope { _session: BackgroundSession, @@ -40,32 +38,44 @@ mod managed_fuse { config: app_config::Config, handle: tokio::runtime::Handle, ) -> Result { - let orgs = config - .organizations - .iter() - .map(|(org_name, org)| OrgConfig { - name: org_name.clone(), - api_key: org.api_key.clone(), - }); - let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid), &config.cache); + let fs_owner = (config.uid, config.gid); + + let mut org_children = Vec::new(); + for (org_name, org_conf) in &config.organizations { + let client = + crate::fs::mescloud::build_mesa_client(org_conf.api_key.expose_secret()); + let dp = if org_name == "github" { + let github_org_root = crate::fs::mescloud::roots::GithubOrgRoot::new( + client, + org_name.clone(), + config.cache.clone(), + fs_owner, + ); + crate::fs::mescloud::roots::OrgChildDP::Github( + git_fs::fs::composite::CompositeFs::new(github_org_root, fs_owner), + ) + } else { + let standard_org_root = crate::fs::mescloud::roots::StandardOrgRoot::new( + client, + org_name.clone(), + config.cache.clone(), + fs_owner, + ); + crate::fs::mescloud::roots::OrgChildDP::Standard( + git_fs::fs::composite::CompositeFs::new(standard_org_root, fs_owner), + ) + }; + org_children.push((std::ffi::OsString::from(org_name), dp)); + } + + let mesa_root = crate::fs::mescloud::roots::MesaRoot::new(org_children); + let composite = git_fs::fs::composite::CompositeFs::new(mesa_root, fs_owner); let table = FutureBackedCache::default(); - let now = std::time::SystemTime::now(); - let root = INode { - addr: 1, - permissions: InodePerms::from_bits_truncate(0o755), - uid: config.uid, - gid: config.gid, - create_time: now, - last_modified_at: now, - parent: None, - size: 0, - itype: INodeType::Directory, - }; - table.insert_sync(1, root); - - let provider = MesaFsProvider::new(mesa_fs); - let fuse_adapter = FuserAdapter::new(table, provider, handle); + let root_inode = composite.make_root_inode(); + table.insert_sync(1, root_inode); + + let fuse_adapter = FuserAdapter::new(table, composite, handle); let mount_opts = [ fuser::MountOption::FSName("git-fs".to_owned()), fuser::MountOption::RO, diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 473b5e54..cf57e392 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -1,9 +1,3 @@ -//! Shared types and helpers used by both `MesaFS` and `RepoFs`. - -use std::ffi::{OsStr, OsString}; - -use bytes::Bytes; -use git_fs::fs::{FileHandle, INode, InodeAddr, OpenFlags as LibOpenFlags}; use mesa_dev::low_level::apis; use thiserror::Error; @@ -46,129 +40,16 @@ impl From> for MesaAp } } -#[derive(Debug, Error)] -pub enum LookupError { - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), -} - -#[derive(Debug, Error)] -pub enum GetAttrError { - #[error("inode not found")] - InodeNotFound, -} - -#[derive(Debug, Clone, Copy, Error)] -pub enum OpenError { - #[error("inode not found")] - InodeNotFound, -} - -#[derive(Debug, Error)] -pub enum ReadError { - #[error("file not open")] - FileNotOpen, - - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), - - #[error("content is not a file")] - NotAFile, - - #[error("base64 decode error: {0}")] - Base64Decode(#[from] base64::DecodeError), -} - -#[derive(Debug, Error)] -pub enum ReadDirError { - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), - - #[error("inode is not a directory")] - NotADirectory, - - #[error("operation not permitted")] - NotPermitted, -} - -impl From for ReadDirError { - fn from(e: LookupError) -> Self { - match e { - LookupError::RemoteMesaError(api) => Self::RemoteMesaError(api), - LookupError::InodeNotFound => Self::InodeNotFound, +pub(super) fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) } - } -} - -#[derive(Debug, Error)] -pub enum ReleaseError { - #[error("file not open")] - FileNotOpen, -} - -/// A directory entry for readdir results, using lib types. -pub struct FsDirEntry { - pub ino: InodeAddr, - pub name: OsString, -} - -/// Trait for child filesystems composed by [`CompositeFs`](super::composite::CompositeFs). -/// -/// Uses lib types (`INode`, `InodeAddr`) directly — no conversion to/from `FileAttr`. -/// Replaces the old `Fs + InodeCachePeek` bound. -#[async_trait::async_trait] -pub(super) trait ChildFs: Send + Sync { - /// Look up a child by name within the given parent directory. - async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result; - - /// List all children of a directory, returning full `INode` data for each. - async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError>; - - /// Open a file for reading. - async fn open(&mut self, ino: InodeAddr, flags: LibOpenFlags) -> Result; - - /// Read data from an open file. - async fn read( - &mut self, - ino: InodeAddr, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result; - - /// Release (close) a file handle. - async fn release(&mut self, ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError>; -} - -// Tests kept inline: these types live in the binary crate and are not -// re-exported through the `git_fs` lib, so integration tests in `tests/` -// cannot access them. -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn lookup_inode_not_found_converts_to_readdir_inode_not_found() { - let err: ReadDirError = LookupError::InodeNotFound.into(); - assert!(matches!(err, ReadDirError::InodeNotFound)); - } - - #[test] - fn lookup_remote_error_converts_to_readdir_remote_error() { - let api_err = MesaApiError::Response { - status: 500, - body: "test".to_owned(), - }; - let err: ReadDirError = LookupError::RemoteMesaError(api_err).into(); - assert!(matches!(err, ReadDirError::RemoteMesaError(_))); + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), } } diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs deleted file mode 100644 index 91c35806..00000000 --- a/src/fs/mescloud/composite.rs +++ /dev/null @@ -1,456 +0,0 @@ -use std::collections::HashMap; -use std::ffi::OsStr; -use std::sync::atomic::{AtomicU64, Ordering}; - -use bytes::Bytes; -use git_fs::cache::async_backed::FutureBackedCache; -use git_fs::fs::dcache::DCache; -use git_fs::fs::{ - AsyncFsStats, FileHandle, INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags, -}; -use rustc_hash::FxHashMap; -use tracing::{instrument, trace}; - -use super::common::{ - ChildFs, FsDirEntry, GetAttrError, LookupError, OpenError, ReadDirError, ReadError, - ReleaseError, -}; - -/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. -/// -/// Convention: **outer = left, inner = right**. -pub(super) struct InodeBridge { - map: bimap::BiMap, -} - -impl InodeBridge { - pub fn new() -> Self { - Self { - map: bimap::BiMap::new(), - } - } - - pub fn insert(&mut self, outer: InodeAddr, inner: InodeAddr) { - self.map.insert(outer, inner); - } - - pub fn forward(&self, outer: InodeAddr) -> Option { - self.map.get_by_left(&outer).copied() - } - - #[expect(dead_code, reason = "will be needed by future callers")] - pub fn backward(&self, inner: InodeAddr) -> Option { - self.map.get_by_right(&inner).copied() - } - - /// Look up inner->outer, or allocate a new outer address if unmapped. - pub fn backward_or_insert( - &mut self, - inner: InodeAddr, - allocate: impl FnOnce() -> InodeAddr, - ) -> InodeAddr { - if let Some(&outer) = self.map.get_by_right(&inner) { - outer - } else { - let outer = allocate(); - self.map.insert(outer, inner); - outer - } - } - - pub fn remove_by_outer(&mut self, outer: InodeAddr) { - self.map.remove_by_left(&outer); - } - - #[expect(dead_code, reason = "will be needed by future callers")] - pub fn get_inner(&self, outer: InodeAddr) -> Option<&InodeAddr> { - self.map.get_by_left(&outer) - } -} - -pub(super) struct ChildSlot { - pub inner: Inner, - pub bridge: InodeBridge, -} - -/// Tracks an open file: which child slot owns it and the inner fh. -struct OpenFileEntry { - slot_idx: usize, - inner_ino: InodeAddr, - inner_fh: FileHandle, -} - -pub(super) struct CompositeFs { - pub(super) inode_table: FutureBackedCache, - pub(super) directory_cache: DCache, - next_ino: AtomicU64, - next_fh: AtomicU64, - refcounts: FxHashMap, - pub(super) readdir_buf: Vec, - open_files: HashMap, - pub(super) child_inodes: HashMap, - pub(super) inode_to_slot: HashMap, - pub(super) slots: Vec>, - fs_owner: (u32, u32), - block_size: u32, -} - -impl CompositeFs { - pub const ROOT_INO: InodeAddr = 1; - - pub fn new(fs_owner: (u32, u32), block_size: u32) -> Self { - let inode_table = FutureBackedCache::default(); - let now = std::time::SystemTime::now(); - let root = INode { - addr: Self::ROOT_INO, - permissions: InodePerms::from_bits_truncate(0o755), - uid: fs_owner.0, - gid: fs_owner.1, - create_time: now, - last_modified_at: now, - parent: None, - size: 0, - itype: INodeType::Directory, - }; - inode_table.insert_sync(Self::ROOT_INO, root); - - let mut refcounts = FxHashMap::default(); - refcounts.insert(Self::ROOT_INO, 1); - - Self { - inode_table, - directory_cache: DCache::new(), - next_ino: AtomicU64::new(Self::ROOT_INO + 1), - next_fh: AtomicU64::new(1), - refcounts, - readdir_buf: Vec::new(), - open_files: HashMap::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: Vec::new(), - fs_owner, - block_size, - } - } - - pub fn allocate_inode(&self) -> InodeAddr { - self.next_ino.fetch_add(1, Ordering::Relaxed) - } - - pub fn fs_owner(&self) -> (u32, u32) { - self.fs_owner - } - - #[expect(dead_code, reason = "available for future use")] - pub fn block_size(&self) -> u32 { - self.block_size - } - - pub fn add_child(&mut self, inner: Inner, child_root_ino: InodeAddr) -> InodeAddr { - self.add_child_with_parent(inner, child_root_ino, Self::ROOT_INO) - } - - pub fn cache_inode(&self, inode: INode) { - self.inode_table.insert_sync(inode.addr, inode); - } - - /// Insert the inode into the table and initialise its refcount to zero. - /// - /// The caller is responsible for bumping the refcount via [`inc_rc`](Self::inc_rc). - pub fn cache_inode_and_init_rc(&mut self, inode: INode) { - let addr = inode.addr; - self.inode_table.insert_sync(addr, inode); - self.refcounts.entry(addr).or_insert(0); - } - - pub fn inc_rc(&mut self, addr: InodeAddr) -> Option { - let rc = self.refcounts.get_mut(&addr)?; - *rc += 1; - Some(*rc) - } - - pub fn slot_for_inode(&self, ino: InodeAddr) -> Option { - self.inode_to_slot.get(&ino).copied() - } - - /// Like [`add_child`](Self::add_child) but sets a custom parent inode - /// instead of always using `ROOT_INO`. - pub fn add_child_with_parent( - &mut self, - inner: Inner, - child_root_ino: InodeAddr, - parent_ino: InodeAddr, - ) -> InodeAddr { - let outer_ino = self.allocate_inode(); - let now = std::time::SystemTime::now(); - let inode = INode { - addr: outer_ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.fs_owner.0, - gid: self.fs_owner.1, - create_time: now, - last_modified_at: now, - parent: Some(parent_ino), - size: 0, - itype: INodeType::Directory, - }; - self.inode_table.insert_sync(outer_ino, inode); - - let mut bridge = InodeBridge::new(); - bridge.insert(outer_ino, child_root_ino); - - let idx = self.slots.len(); - self.slots.push(ChildSlot { inner, bridge }); - self.child_inodes.insert(outer_ino, idx); - self.inode_to_slot.insert(outer_ino, idx); - - outer_ino - } -} - -impl CompositeFs { - #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] - pub async fn delegated_lookup( - &mut self, - parent: InodeAddr, - name: &OsStr, - ) -> Result { - // Fast path: DCache hit + inode still in table - if let Some(dentry) = self.directory_cache.lookup(LoadedAddr(parent), name) - && let Some(inode) = self.inode_table.get(&dentry.ino.0).await - { - *self.refcounts.entry(inode.addr).or_insert(0) += 1; - return Ok(inode); - } - - // Slow path: delegate to child - let idx = self - .inode_to_slot - .get(&parent) - .copied() - .ok_or(LookupError::InodeNotFound)?; - let inner_parent = self.slots[idx] - .bridge - .forward(parent) - .ok_or(LookupError::InodeNotFound)?; - let inner_inode = self.slots[idx].inner.lookup(inner_parent, name).await?; - - let next_ino = &self.next_ino; - let outer_ino = self.slots[idx] - .bridge - .backward_or_insert(inner_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }); - self.inode_to_slot.insert(outer_ino, idx); - - let remapped = INode { - addr: outer_ino, - ..inner_inode - }; - self.inode_table - .get_or_init(outer_ino, || async move { remapped }) - .await; - - let is_dir = matches!(inner_inode.itype, INodeType::Directory); - self.directory_cache - .insert( - LoadedAddr(parent), - name.to_os_string(), - LoadedAddr(outer_ino), - is_dir, - ) - .await; - - *self.refcounts.entry(outer_ino).or_insert(0) += 1; - let rc = self.refcounts[&outer_ino]; - trace!( - outer_ino, - inner_ino = inner_inode.addr, - rc, - "lookup: resolved via delegation" - ); - - Ok(remapped) - } - - #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] - pub async fn delegated_readdir( - &mut self, - ino: InodeAddr, - ) -> Result<&[FsDirEntry], ReadDirError> { - let idx = self - .inode_to_slot - .get(&ino) - .copied() - .ok_or(ReadDirError::InodeNotFound)?; - - if !self.directory_cache.is_populated(LoadedAddr(ino)) { - let inner_ino = self.slots[idx] - .bridge - .forward(ino) - .ok_or(ReadDirError::InodeNotFound)?; - let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; - - for (name, child_inode) in &inner_entries { - let next_ino = &self.next_ino; - let outer_child = self.slots[idx] - .bridge - .backward_or_insert(child_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }); - self.inode_to_slot.insert(outer_child, idx); - - let remapped = INode { - addr: outer_child, - ..*child_inode - }; - self.inode_table - .get_or_init(outer_child, || async move { remapped }) - .await; - - let is_dir = matches!(child_inode.itype, INodeType::Directory); - self.directory_cache - .insert( - LoadedAddr(ino), - name.clone(), - LoadedAddr(outer_child), - is_dir, - ) - .await; - } - - self.directory_cache.mark_populated(LoadedAddr(ino)); - } - - let mut children = self.directory_cache.readdir(LoadedAddr(ino)).await; - children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); - - let mut entries = Vec::with_capacity(children.len()); - for (name, dvalue) in &children { - if let Some(inode) = self.inode_table.get(&dvalue.ino.0).await { - entries.push(FsDirEntry { - ino: inode.addr, - name: name.clone(), - }); - } - } - - self.readdir_buf = entries; - Ok(&self.readdir_buf) - } - - #[instrument(name = "CompositeFs::delegated_getattr", skip(self))] - pub async fn delegated_getattr(&self, ino: InodeAddr) -> Result { - self.inode_table - .get(&ino) - .await - .ok_or(GetAttrError::InodeNotFound) - } - - #[expect(dead_code, reason = "will be needed by future callers")] - #[must_use] - pub fn delegated_statfs(&self) -> AsyncFsStats { - AsyncFsStats { - block_size: self.block_size, - total_blocks: 0, - free_blocks: 0, - available_blocks: 0, - total_inodes: self.inode_table.len() as u64, - free_inodes: 0, - max_filename_length: 255, - } - } - - #[instrument(name = "CompositeFs::delegated_open", skip(self))] - pub async fn delegated_open( - &mut self, - ino: InodeAddr, - flags: OpenFlags, - ) -> Result { - let idx = self - .inode_to_slot - .get(&ino) - .copied() - .ok_or(OpenError::InodeNotFound)?; - let inner_ino = self.slots[idx] - .bridge - .forward(ino) - .ok_or(OpenError::InodeNotFound)?; - let inner_fh = self.slots[idx].inner.open(inner_ino, flags).await?; - - let outer_fh = self.next_fh.fetch_add(1, Ordering::Relaxed); - self.open_files.insert( - outer_fh, - OpenFileEntry { - slot_idx: idx, - inner_ino, - inner_fh, - }, - ); - - trace!(ino, outer_fh, inner_fh, "open: assigned fh"); - Ok(outer_fh) - } - - #[instrument(name = "CompositeFs::delegated_read", skip(self))] - pub async fn delegated_read( - &mut self, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result { - let entry = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; - let slot_idx = entry.slot_idx; - let inner_ino = entry.inner_ino; - let inner_fh = entry.inner_fh; - self.slots[slot_idx] - .inner - .read(inner_ino, inner_fh, offset, size) - .await - } - - #[instrument(name = "CompositeFs::delegated_release", skip(self))] - pub async fn delegated_release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { - let entry = self - .open_files - .remove(&fh) - .ok_or(ReleaseError::FileNotOpen)?; - let result = self.slots[entry.slot_idx] - .inner - .release(entry.inner_ino, entry.inner_fh) - .await; - trace!(fh, "release: cleaned up fh mapping"); - result - } - - /// Returns `true` if the inode was evicted. - /// - /// The composite only manages its own refcounts and inode table. - /// Inner filesystem inodes are managed by the inner FS itself through - /// its own lifecycle; the composite does not propagate forget to children. - #[expect(dead_code, reason = "will be needed by future callers")] - #[must_use] - #[instrument(name = "CompositeFs::delegated_forget", skip(self))] - pub fn delegated_forget(&mut self, ino: InodeAddr, nlookups: u64) -> bool { - let slot_idx = self.inode_to_slot.get(&ino).copied(); - - if let Some(rc) = self.refcounts.get_mut(&ino) { - *rc = rc.saturating_sub(nlookups); - if *rc > 0 { - return false; - } - self.refcounts.remove(&ino); - } else { - return false; - } - - self.inode_table.remove_sync(&ino); - self.child_inodes.remove(&ino); - self.inode_to_slot.remove(&ino); - if let Some(idx) = slot_idx { - self.slots[idx].bridge.remove_by_outer(ino); - } - - true - } -} diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index a9e5e155..ab3745db 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -1,38 +1,15 @@ -use std::ffi::{OsStr, OsString}; -use std::future::Future; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::time::SystemTime; - -use bytes::Bytes; -use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use opentelemetry::propagation::Injector; -use secrecy::ExposeSecret as _; -use tracing::{instrument, trace, warn}; use tracing_opentelemetry::OpenTelemetrySpanExt as _; -use crate::app_config::CacheConfig; - -pub use common::FsDirEntry; -use composite::CompositeFs; - -pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - #[cfg(feature = "staging")] const MESA_API_BASE_URL: &str = "https://staging.depot.mesa.dev/api/v1"; #[cfg(not(feature = "staging"))] const MESA_API_BASE_URL: &str = "https://depot.mesa.dev/api/v1"; mod common; -mod composite; - -mod org; -pub use org::OrgConfig; -use org::OrgFs; - pub mod repo; -mod roots; +pub mod roots; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); @@ -72,7 +49,7 @@ impl reqwest_middleware::Middleware for OtelPropagationMiddleware { } } -fn build_mesa_client(api_key: &str) -> MesaClient { +pub fn build_mesa_client(api_key: &str) -> MesaClient { let client = reqwest_middleware::ClientBuilder::new(reqwest::Client::new()) .with(OtelPropagationMiddleware) .build(); @@ -82,354 +59,3 @@ fn build_mesa_client(api_key: &str) -> MesaClient { .with_client(client) .build() } - -/// Classifies an inode by its role in the mesa hierarchy. -enum InodeRole { - /// The filesystem root (ino == 1). - Root, - /// An inode owned by some org. - OrgOwned, -} - -/// The top-level `MesaFS` filesystem. -/// -/// Composes multiple [`OrgFs`] instances, each with its own inode namespace, -/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. -pub struct MesaFS { - composite: CompositeFs, -} - -impl MesaFS { - const ROOT_NODE_INO: InodeAddr = CompositeFs::::ROOT_INO; - const BLOCK_SIZE: u32 = 4096; - - /// Create a new `MesaFS` instance. - #[must_use] - pub fn new( - orgs: impl Iterator, - fs_owner: (u32, u32), - cache: &CacheConfig, - ) -> Self { - let mut composite = CompositeFs::new(fs_owner, Self::BLOCK_SIZE); - for org_conf in orgs { - let client = build_mesa_client(org_conf.api_key.expose_secret()); - let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); - composite.add_child(org, OrgFs::ROOT_INO); - } - Self { composite } - } - - /// Classify an inode by its role. - fn inode_role(&self, ino: InodeAddr) -> Option { - if ino == Self::ROOT_NODE_INO { - return Some(InodeRole::Root); - } - if self.composite.child_inodes.contains_key(&ino) { - return Some(InodeRole::OrgOwned); - } - if self.composite.slot_for_inode(ino).is_some() { - return Some(InodeRole::OrgOwned); - } - None - } - - /// Ensure a mesa-level inode exists for the org at `org_idx`. - /// Does NOT bump rc. - async fn ensure_org_inode(&mut self, org_idx: usize) -> (InodeAddr, INode) { - let existing_ino = self - .composite - .child_inodes - .iter() - .find(|&(_, &idx)| idx == org_idx) - .map(|(&ino, _)| ino); - - if let Some(existing_ino) = existing_ino { - if let Ok(inode) = self.composite.delegated_getattr(existing_ino).await { - trace!( - ino = existing_ino, - org_idx, "ensure_org_inode: reusing existing inode" - ); - return (existing_ino, inode); - } - warn!( - ino = existing_ino, - org_idx, "ensure_org_inode: evicted, rebuilding" - ); - let now = SystemTime::now(); - let inode = INode { - addr: existing_ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(Self::ROOT_NODE_INO), - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode(inode); - self.composite.inode_to_slot.insert(existing_ino, org_idx); - self.composite.child_inodes.insert(existing_ino, org_idx); - return (existing_ino, inode); - } - - warn!( - org_idx, - "ensure_org_inode: no child_inodes entry for org slot" - ); - let org_name = self.composite.slots[org_idx].inner.name().to_owned(); - let ino = self.composite.allocate_inode(); - let now = SystemTime::now(); - let inode = INode { - addr: ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(Self::ROOT_NODE_INO), - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode(inode); - self.composite.child_inodes.insert(ino, org_idx); - self.composite.inode_to_slot.insert(ino, org_idx); - trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); - (ino, inode) - } - - #[instrument(name = "MesaFS::lookup", skip(self))] - pub async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { - let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; - match role { - InodeRole::Root => { - let org_name = name.to_str().ok_or(LookupError::InodeNotFound)?; - let org_idx = self - .composite - .slots - .iter() - .position(|s| s.inner.name() == org_name) - .ok_or(LookupError::InodeNotFound)?; - - trace!(org = org_name, "lookup: matched org"); - let (ino, inode) = self.ensure_org_inode(org_idx).await; - self.composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - Ok(inode) - } - InodeRole::OrgOwned => self.composite.delegated_lookup(parent, name).await, - } - } - - #[instrument(name = "MesaFS::getattr", skip(self))] - pub async fn getattr(&self, ino: InodeAddr) -> Result { - self.composite.delegated_getattr(ino).await - } - - #[instrument(name = "MesaFS::readdir", skip(self))] - pub async fn readdir(&mut self, ino: InodeAddr) -> Result<&[FsDirEntry], ReadDirError> { - let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; - match role { - InodeRole::Root => { - let org_info: Vec<(usize, String)> = self - .composite - .slots - .iter() - .enumerate() - .map(|(idx, s)| (idx, s.inner.name().to_owned())) - .collect(); - - let mut entries = Vec::with_capacity(org_info.len()); - for (org_idx, name) in &org_info { - let (entry_ino, _) = self.ensure_org_inode(*org_idx).await; - entries.push(FsDirEntry { - ino: entry_ino, - name: name.clone().into(), - }); - } - - trace!(entry_count = entries.len(), "readdir: listing orgs"); - self.composite.readdir_buf = entries; - Ok(&self.composite.readdir_buf) - } - InodeRole::OrgOwned => self.composite.delegated_readdir(ino).await, - } - } - - #[instrument(name = "MesaFS::open", skip(self))] - pub async fn open( - &mut self, - ino: InodeAddr, - flags: OpenFlags, - ) -> Result { - self.composite.delegated_open(ino, flags).await - } - - #[instrument(name = "MesaFS::read", skip(self))] - pub async fn read( - &mut self, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result { - self.composite.delegated_read(fh, offset, size).await - } - - #[instrument(name = "MesaFS::release", skip(self))] - pub async fn release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { - self.composite.delegated_release(fh).await - } -} - -/// A file reader that delegates reads to `MesaFS` through a shared mutex. -/// -/// Resources are released via [`FileReader::close`](git_fs::fs::async_fs::FileReader::close), -/// which is called by the FUSE adapter during `release`. Dropping without -/// calling `close()` emits a diagnostic warning. -pub struct MesaFsReader { - inner: Arc>, - fh: FileHandle, - closed: AtomicBool, -} - -impl git_fs::fs::async_fs::FileReader for MesaFsReader { - fn read( - &self, - offset: u64, - size: u32, - ) -> impl Future> + Send { - let inner = Arc::clone(&self.inner); - let fh = self.fh; - async move { - let mut guard = inner.lock().await; - guard - .read(fh, offset, size) - .await - .map_err(|e| std::io::Error::other(e.to_string())) - } - } - - fn close(&self) -> impl Future> + Send { - self.closed.store(true, Ordering::Relaxed); - let inner = Arc::clone(&self.inner); - let fh = self.fh; - async move { - let mut guard = inner.lock().await; - guard - .release(fh) - .await - .map_err(|e| std::io::Error::other(e.to_string())) - } - } -} - -impl Drop for MesaFsReader { - fn drop(&mut self) { - if !self.closed.load(Ordering::Relaxed) { - tracing::warn!(fh = self.fh, "MesaFsReader dropped without close()"); - } - } -} - -/// A [`FsDataProvider`](git_fs::fs::async_fs::FsDataProvider) that wraps -/// `MesaFS` behind a shared mutex. -#[derive(Clone)] -pub struct MesaFsProvider { - inner: Arc>, -} - -impl MesaFsProvider { - /// Create a new provider wrapping the given `MesaFS`. - pub fn new(mesa_fs: MesaFS) -> Self { - Self { - inner: Arc::new(tokio::sync::Mutex::new(mesa_fs)), - } - } -} - -fn lookup_error_to_io(e: LookupError) -> std::io::Error { - match e { - LookupError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), - LookupError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), - } -} - -fn readdir_error_to_io(e: ReadDirError) -> std::io::Error { - match e { - ReadDirError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), - ReadDirError::NotADirectory => std::io::Error::from_raw_os_error(libc::ENOTDIR), - ReadDirError::NotPermitted => std::io::Error::from_raw_os_error(libc::EPERM), - ReadDirError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), - } -} - -fn open_error_to_io(e: OpenError) -> std::io::Error { - match e { - OpenError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), - } -} - -impl git_fs::fs::async_fs::FsDataProvider for MesaFsProvider { - type Reader = MesaFsReader; - - fn lookup( - &self, - parent: INode, - name: &OsStr, - ) -> impl Future> + Send { - let inner = Arc::clone(&self.inner); - let name = name.to_os_string(); - async move { - let mut guard = inner.lock().await; - guard - .lookup(parent.addr, &name) - .await - .map_err(lookup_error_to_io) - } - } - - fn readdir( - &self, - parent: INode, - ) -> impl Future, std::io::Error>> + Send { - let inner = Arc::clone(&self.inner); - async move { - let mut guard = inner.lock().await; - let dir_entries: Vec<(OsString, InodeAddr)> = { - let entries = guard - .readdir(parent.addr) - .await - .map_err(readdir_error_to_io)?; - entries.iter().map(|e| (e.name.clone(), e.ino)).collect() - }; - let mut result = Vec::with_capacity(dir_entries.len()); - for (name, ino) in dir_entries { - if let Ok(inode) = guard.getattr(ino).await { - result.push((name, inode)); - } - } - Ok(result) - } - } - - fn open( - &self, - inode: INode, - flags: OpenFlags, - ) -> impl Future> + Send { - let inner = Arc::clone(&self.inner); - async move { - let mut guard = inner.lock().await; - let fh = guard - .open(inode.addr, flags) - .await - .map_err(open_error_to_io)?; - Ok(MesaFsReader { - inner: Arc::clone(&inner), - fh, - closed: AtomicBool::new(false), - }) - } - } -} diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs deleted file mode 100644 index feefaf8e..00000000 --- a/src/fs/mescloud/org.rs +++ /dev/null @@ -1,390 +0,0 @@ -use std::collections::HashMap; -use std::ffi::{OsStr, OsString}; -use std::time::SystemTime; - -use bytes::Bytes; -use futures::TryStreamExt as _; -use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; -use mesa_dev::MesaClient; -use secrecy::SecretString; -use tracing::{instrument, trace, warn}; - -use super::common::{ChildFs, MesaApiError}; -pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; -use super::composite::CompositeFs; -use super::repo::RepoFs; -use crate::app_config::CacheConfig; - -#[derive(Debug, Clone)] -pub struct OrgConfig { - pub name: String, - pub api_key: SecretString, -} - -/// Classifies an inode by its role in the org hierarchy. -enum InodeRole { - /// The org root directory. - OrgRoot, - /// A virtual owner directory (github only). - OwnerDir, - /// An inode owned by some repo (either a child-root or delegated). - RepoOwned, -} - -/// A filesystem rooted at a single organization. -/// -/// Composes multiple [`RepoFs`] instances, each with its own inode namespace, -/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. -pub struct OrgFs { - name: String, - client: MesaClient, - composite: CompositeFs, - /// Maps org-level owner-dir inodes to owner name (github only). - owner_inodes: HashMap, - cache_config: CacheConfig, -} - -impl OrgFs { - pub(crate) const ROOT_INO: InodeAddr = CompositeFs::::ROOT_INO; - const BLOCK_SIZE: u32 = 4096; - - /// The name of the organization. - #[must_use] - pub(crate) fn name(&self) -> &str { - &self.name - } - - /// Whether this org uses the github two-level owner/repo hierarchy. - /// TODO(MES-674): Cleanup "special" casing for github. - fn is_github(&self) -> bool { - self.name == "github" - } - - /// Encode "owner/repo" to base64 for API calls. - /// TODO(MES-674): Cleanup "special" casing for github. - fn encode_github_repo_name(decoded: &str) -> String { - use base64::Engine as _; - base64::engine::general_purpose::STANDARD.encode(decoded) - } - - /// Ensure an inode exists for a virtual owner directory (github only). Does NOT bump rc. - /// TODO(MES-674): Cleanup "special" casing for github. - async fn ensure_owner_inode(&mut self, owner: &str) -> (InodeAddr, INode) { - // Check existing - let mut stale_ino = None; - for (&ino, existing_owner) in &self.owner_inodes { - if existing_owner == owner { - if let Ok(inode) = self.composite.delegated_getattr(ino).await { - return (ino, inode); - } - stale_ino = Some(ino); - break; - } - } - if let Some(ino) = stale_ino { - self.owner_inodes.remove(&ino); - } - - let ino = self.composite.allocate_inode(); - let now = SystemTime::now(); - let inode = INode { - addr: ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(Self::ROOT_INO), - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode_and_init_rc(inode); - self.owner_inodes.insert(ino, owner.to_owned()); - (ino, inode) - } - - #[must_use] - pub fn new( - name: String, - client: MesaClient, - fs_owner: (u32, u32), - cache_config: CacheConfig, - ) -> Self { - Self { - name, - client, - composite: CompositeFs::new(fs_owner, Self::BLOCK_SIZE), - owner_inodes: HashMap::new(), - cache_config, - } - } - - /// Classify an inode by its role. - fn inode_role(&self, ino: InodeAddr) -> Option { - if ino == Self::ROOT_INO { - return Some(InodeRole::OrgRoot); - } - if self.owner_inodes.contains_key(&ino) { - return Some(InodeRole::OwnerDir); - } - if self.composite.child_inodes.contains_key(&ino) { - return Some(InodeRole::RepoOwned); - } - if self.composite.slot_for_inode(ino).is_some() { - return Some(InodeRole::RepoOwned); - } - None - } - - /// Ensure an inode + `RepoFs` exists for the given repo name. - /// Does NOT bump rc. - /// - /// - `repo_name`: name used for API calls / `RepoFs` (base64-encoded for github) - /// - `display_name`: name shown in filesystem ("linux" for github, same as `repo_name` otherwise) - /// - `parent_ino`: owner-dir inode for github, `ROOT_INO` otherwise - async fn ensure_repo_inode( - &mut self, - repo_name: &str, - display_name: &str, - default_branch: &str, - parent_ino: InodeAddr, - ) -> (InodeAddr, INode) { - // Check existing repos. - for (&ino, &idx) in &self.composite.child_inodes { - if self.composite.slots[idx].inner.repo_name() == repo_name { - if let Ok(inode) = self.composite.delegated_getattr(ino).await { - trace!(ino, repo = repo_name, "ensure_repo_inode: reusing"); - return (ino, inode); - } - warn!( - ino, - repo = repo_name, - "ensure_repo_inode: attr missing, rebuilding" - ); - return self.make_repo_dir_inode(ino); - } - } - - // Create new RepoFs and register as child. - let repo = RepoFs::new( - self.client.clone(), - self.name.clone(), - repo_name.to_owned(), - default_branch.to_owned(), - self.composite.fs_owner(), - self.cache_config.clone(), - ) - .await; - - let outer_ino = self - .composite - .add_child_with_parent(repo, RepoFs::ROOT_INO, parent_ino); - trace!( - ino = outer_ino, - repo = repo_name, - "ensure_repo_inode: allocated new inode" - ); - - // Register in directory cache so readdir sees it. - self.composite - .directory_cache - .insert( - git_fs::fs::LoadedAddr(parent_ino), - OsString::from(display_name), - git_fs::fs::LoadedAddr(outer_ino), - true, - ) - .await; - - let inode = self - .composite - .delegated_getattr(outer_ino) - .await - .unwrap_or_else(|_| { - let now = SystemTime::now(); - INode { - addr: outer_ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(parent_ino), - size: 0, - itype: INodeType::Directory, - } - }); - (outer_ino, inode) - } - - /// Build a directory inode for `ino`, returning `(ino, inode)`. - fn make_repo_dir_inode(&self, ino: InodeAddr) -> (InodeAddr, INode) { - let now = SystemTime::now(); - let inode = INode { - addr: ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: None, - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode(inode); - (ino, inode) - } - - /// Fetch a repo by name via the API. - async fn wait_for_sync( - &self, - repo_name: &str, - ) -> Result { - self.client - .org(&self.name) - .repos() - .at(repo_name) - .get() - .await - .map_err(MesaApiError::from) - } -} - -#[async_trait::async_trait] -impl ChildFs for OrgFs { - #[instrument(name = "OrgFs::lookup", skip(self), fields(org = %self.name))] - async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { - let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; - match role { - InodeRole::OrgRoot => { - let name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; - - if self.is_github() { - trace!(owner = name_str, "lookup: resolving github owner dir"); - let (ino, inode) = self.ensure_owner_inode(name_str).await; - self.composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - Ok(inode) - } else { - trace!(repo = name_str, "lookup: resolving repo"); - let repo = self.wait_for_sync(name_str).await?; - let (ino, inode) = self - .ensure_repo_inode(name_str, name_str, &repo.default_branch, Self::ROOT_INO) - .await; - let rc = self - .composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, repo = name_str, rc, "lookup: resolved repo inode"); - Ok(inode) - } - } - InodeRole::OwnerDir => { - let owner = self - .owner_inodes - .get(&parent) - .ok_or(LookupError::InodeNotFound)? - .clone(); - let repo_name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; - let full_decoded = format!("{owner}/{repo_name_str}"); - let encoded = Self::encode_github_repo_name(&full_decoded); - - trace!( - owner = %owner, repo = repo_name_str, encoded = %encoded, - "lookup: resolving github repo via owner dir" - ); - - let repo = self.wait_for_sync(&encoded).await?; - let (ino, inode) = self - .ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent) - .await; - self.composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - Ok(inode) - } - InodeRole::RepoOwned => self.composite.delegated_lookup(parent, name).await, - } - } - - #[instrument(name = "OrgFs::readdir", skip(self), fields(org = %self.name))] - async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { - let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; - match role { - InodeRole::OrgRoot => { - if self.is_github() { - return Err(ReadDirError::NotPermitted); - } - - let repos: Vec = self - .client - .org(&self.name) - .repos() - .list(None) - .try_collect() - .await - .map_err(MesaApiError::from)?; - - let repo_infos: Vec<(String, String)> = repos - .into_iter() - .filter_map(|r| { - let name = r.name?; - let branch = r.default_branch.unwrap_or_else(|| "main".to_owned()); - Some((name, branch)) - }) - .collect(); - trace!(count = repo_infos.len(), "readdir: fetched repo list"); - - let mut entries = Vec::with_capacity(repo_infos.len()); - for (repo_name, default_branch) in &repo_infos { - let (_, inode) = self - .ensure_repo_inode(repo_name, repo_name, default_branch, Self::ROOT_INO) - .await; - entries.push((OsString::from(repo_name), inode)); - } - - Ok(entries) - } - InodeRole::OwnerDir if self.is_github() => Err(ReadDirError::NotPermitted), - InodeRole::OwnerDir => Err(ReadDirError::NotADirectory), - InodeRole::RepoOwned => { - let dir_entries: Vec<_> = self - .composite - .delegated_readdir(ino) - .await? - .iter() - .map(|e| (e.name.clone(), e.ino)) - .collect(); - let mut entries = Vec::with_capacity(dir_entries.len()); - for (name, child_ino) in dir_entries { - if let Some(inode) = self.composite.inode_table.get(&child_ino).await { - entries.push((name, inode)); - } - } - Ok(entries) - } - } - } - - #[instrument(name = "OrgFs::open", skip(self), fields(org = %self.name))] - async fn open(&mut self, ino: InodeAddr, flags: OpenFlags) -> Result { - self.composite.delegated_open(ino, flags).await - } - - #[instrument(name = "OrgFs::read", skip(self), fields(org = %self.name))] - async fn read( - &mut self, - _ino: InodeAddr, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result { - self.composite.delegated_read(fh, offset, size).await - } - - #[instrument(name = "OrgFs::release", skip(self), fields(org = %self.name))] - async fn release(&mut self, _ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError> { - self.composite.delegated_release(fh).await - } -} diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index acff3d04..f13ead88 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -2,13 +2,12 @@ //! //! This module directly accesses the mesa repo through the Rust SDK, on a per-repo basis. -use std::collections::HashMap; -use std::ffi::OsString; +use std::ffi::{OsStr, OsString}; use std::future::Future; +use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::SystemTime; -use std::{ffi::OsStr, path::PathBuf}; use base64::Engine as _; use bytes::Bytes; @@ -20,31 +19,12 @@ use tracing::warn; use git_fs::cache::fcache::FileCache; use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _}; use git_fs::fs::async_fs::{FileReader, FsDataProvider}; -use git_fs::fs::{ - INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags as AsyncOpenFlags, -}; +use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags as AsyncOpenFlags}; -use crate::app_config::CacheConfig; - -use super::common::MesaApiError; -pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - -fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { - match &e { - MesaApiError::Response { status, .. } if *status == 404 => { - std::io::Error::from_raw_os_error(libc::ENOENT) - } - MesaApiError::Reqwest(_) - | MesaApiError::ReqwestMiddleware(_) - | MesaApiError::Serde(_) - | MesaApiError::SerdePath(_) - | MesaApiError::Io(_) - | MesaApiError::Response { .. } => std::io::Error::other(e), - } -} +use super::common::{MesaApiError, mesa_api_error_to_io}; #[derive(Clone)] -pub(super) struct MesRepoProvider { +pub struct MesRepoProvider { inner: Arc, } @@ -97,6 +77,10 @@ impl MesRepoProvider { } /// The name of the repository. + #[expect( + dead_code, + reason = "useful diagnostic accessor retained for future use" + )] pub(super) fn repo_name(&self) -> &str { &self.inner.repo_name } @@ -294,7 +278,7 @@ impl FsDataProvider for MesRepoProvider { } } -pub(super) struct MesFileReader { +pub struct MesFileReader { client: MesaClient, org_name: String, repo_name: String, @@ -383,215 +367,3 @@ impl FileReader for MesFileReader { } } } - -mod repo_fs_inner { - #![allow(clippy::future_not_send, clippy::mem_forget)] - use git_fs::cache::async_backed::FutureBackedCache; - use git_fs::fs::async_fs::AsyncFs; - use git_fs::fs::{INode, InodeAddr}; - use ouroboros::self_referencing; - - use super::MesRepoProvider; - - #[self_referencing] - pub struct RepoFsInner { - pub(super) inode_table: FutureBackedCache, - #[borrows(inode_table)] - #[covariant] - pub(super) fs: AsyncFs<'this, MesRepoProvider>, - } - - impl RepoFsInner { - pub fn create( - inode_table: FutureBackedCache, - provider: MesRepoProvider, - ) -> Self { - RepoFsInnerBuilder { - inode_table, - fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), - } - .build() - } - } -} -use repo_fs_inner::RepoFsInner; - -/// A filesystem rooted at a single mesa repository. -/// -/// Wraps [`AsyncFs`] via ouroboros to co-locate the inode table -/// and the filesystem that borrows it. Implements [`Fs`] as a thin adapter. -pub struct RepoFs { - inner: RepoFsInner, - /// Reference counts for inodes held by the kernel. - refcounts: rustc_hash::FxHashMap, - /// Open file handles mapped to readers. - open_files: HashMap>, - /// Provider clone for accessing `repo_name` and `path_map` cleanup. - provider: MesRepoProvider, -} - -impl RepoFs { - pub(crate) const ROOT_INO: InodeAddr = 1; - - /// Create a new `RepoFs` for a specific org and repo. - pub async fn new( - client: MesaClient, - org_name: String, - repo_name: String, - ref_: String, - fs_owner: (u32, u32), - cache_config: CacheConfig, - ) -> Self { - let file_cache = match cache_config.max_size { - Some(max_size) if max_size.as_u64() > 0 => { - let cache_dir = cache_config.path.join(&org_name).join(&repo_name); - let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); - match FileCache::new(&cache_dir, max_bytes).await { - Ok(cache) => Some(Arc::new(cache)), - Err(e) => { - warn!(error = ?e, org = %org_name, repo = %repo_name, - "failed to create file cache, continuing without caching"); - None - } - } - } - _ => None, - }; - - let provider = - MesRepoProvider::new(client, org_name, repo_name, ref_, fs_owner, file_cache); - provider.seed_root_path(Self::ROOT_INO); - - let root = INode { - addr: Self::ROOT_INO, - permissions: InodePerms::from_bits_truncate(0o755), - uid: fs_owner.0, - gid: fs_owner.1, - create_time: SystemTime::now(), - last_modified_at: SystemTime::now(), - parent: None, - size: 0, - itype: INodeType::Directory, - }; - - let inode_table = git_fs::cache::async_backed::FutureBackedCache::default(); - inode_table.insert_sync(root.addr, root); - - let inner = RepoFsInner::create(inode_table, provider.clone()); - - let mut refcounts = rustc_hash::FxHashMap::default(); - refcounts.insert(Self::ROOT_INO, 1); - - Self { - inner, - refcounts, - open_files: HashMap::new(), - provider, - } - } - - /// The name of the repository this filesystem is rooted at. - pub(crate) fn repo_name(&self) -> &str { - self.provider.repo_name() - } -} - -#[expect( - clippy::wildcard_enum_match_arm, - reason = "mapping all ErrorKind variants is impractical; EIO is the sensible default" -)] -fn io_error_to_errno(e: &std::io::Error) -> i32 { - e.raw_os_error().unwrap_or_else(|| match e.kind() { - std::io::ErrorKind::NotFound => libc::ENOENT, - std::io::ErrorKind::PermissionDenied => libc::EACCES, - std::io::ErrorKind::AlreadyExists => libc::EEXIST, - _ => libc::EIO, - }) -} - -#[async_trait::async_trait] -impl super::common::ChildFs for RepoFs { - async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { - let tracked = self - .inner - .borrow_fs() - .lookup(LoadedAddr(parent), name) - .await - .map_err(|e| { - if io_error_to_errno(&e) == libc::ENOENT { - LookupError::InodeNotFound - } else { - LookupError::RemoteMesaError(MesaApiError::Io(e)) - } - })?; - *self.refcounts.entry(tracked.inode.addr).or_insert(0) += 1; - Ok(tracked.inode) - } - - async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { - let mut entries = Vec::new(); - self.inner - .borrow_fs() - .readdir(LoadedAddr(ino), 0, |de, _offset| { - entries.push((de.name.to_os_string(), de.inode)); - false - }) - .await - .map_err(|e| { - if io_error_to_errno(&e) == libc::ENOTDIR { - ReadDirError::NotADirectory - } else if io_error_to_errno(&e) == libc::ENOENT { - ReadDirError::InodeNotFound - } else { - ReadDirError::RemoteMesaError(MesaApiError::Io(e)) - } - })?; - Ok(entries) - } - - async fn open( - &mut self, - ino: InodeAddr, - flags: AsyncOpenFlags, - ) -> Result { - let open_file = self - .inner - .borrow_fs() - .open(LoadedAddr(ino), flags) - .await - .map_err(|_| OpenError::InodeNotFound)?; - self.open_files - .insert(open_file.fh, Arc::clone(&open_file.reader)); - Ok(open_file.fh) - } - - async fn read( - &mut self, - _ino: InodeAddr, - fh: git_fs::fs::FileHandle, - offset: u64, - size: u32, - ) -> Result { - let reader = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; - reader.read(offset, size).await.map_err(|e| { - if io_error_to_errno(&e) == libc::EISDIR { - ReadError::NotAFile - } else if io_error_to_errno(&e) == libc::ENOENT { - ReadError::InodeNotFound - } else { - ReadError::RemoteMesaError(MesaApiError::Io(e)) - } - }) - } - - async fn release( - &mut self, - _ino: InodeAddr, - fh: git_fs::fs::FileHandle, - ) -> Result<(), ReleaseError> { - self.open_files - .remove(&fh) - .ok_or(ReleaseError::FileNotOpen)?; - Ok(()) - } -} diff --git a/src/fs/mescloud/roots.rs b/src/fs/mescloud/roots.rs index aafe0c4c..8893d379 100644 --- a/src/fs/mescloud/roots.rs +++ b/src/fs/mescloud/roots.rs @@ -3,10 +3,8 @@ //! Bridges the generic `CompositeFs` from `lib/fs/composite.rs` with //! Mesa/GitHub-specific org and repo resolution logic. //! -//! These types are not yet wired into the daemon entry point; they will be -//! connected in a follow-up change that replaces the old `MesaFS` + `OrgFs` -//! pipeline. -#![expect(dead_code, reason = "wired in the follow-up daemon change")] +//! These types are wired into the daemon entry point, replacing the old +//! `MesaFS` + `OrgFs` pipeline. use std::ffi::{OsStr, OsString}; use std::future::Future; @@ -23,26 +21,12 @@ use git_fs::fs::async_fs::{FileReader, FsDataProvider}; use git_fs::fs::composite::{ChildDescriptor, CompositeFs, CompositeReader, CompositeRoot}; use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags}; -use super::common::MesaApiError; +use super::common::{MesaApiError, mesa_api_error_to_io}; use super::repo::{MesFileReader, MesRepoProvider}; use crate::app_config::CacheConfig; const CHILD_ROOT_ADDR: InodeAddr = 1; -fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { - match &e { - MesaApiError::Response { status, .. } if *status == 404 => { - std::io::Error::from_raw_os_error(libc::ENOENT) - } - MesaApiError::Reqwest(_) - | MesaApiError::ReqwestMiddleware(_) - | MesaApiError::Serde(_) - | MesaApiError::SerdePath(_) - | MesaApiError::Io(_) - | MesaApiError::Response { .. } => std::io::Error::other(e), - } -} - /// Create a [`MesRepoProvider`] and its root [`INode`] for a given repo. async fn create_repo_provider( client: &MesaClient, @@ -111,7 +95,7 @@ fn check_not_found(e: MesaApiError) -> Result<(), std::io::Error> { } } -pub(super) struct StandardOrgRoot { +pub struct StandardOrgRoot { client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -119,7 +103,7 @@ pub(super) struct StandardOrgRoot { } impl StandardOrgRoot { - pub(super) fn new( + pub fn new( client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -222,7 +206,7 @@ impl CompositeRoot for StandardOrgRoot { } } -pub(super) struct GithubRepoRoot { +pub struct GithubRepoRoot { client: MesaClient, org_name: String, owner: String, @@ -286,7 +270,7 @@ impl CompositeRoot for GithubRepoRoot { } } -pub(super) struct GithubOrgRoot { +pub struct GithubOrgRoot { client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -294,7 +278,7 @@ pub(super) struct GithubOrgRoot { } impl GithubOrgRoot { - pub(super) fn new( + pub fn new( client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -347,7 +331,7 @@ impl CompositeRoot for GithubOrgRoot { } #[derive(Clone)] -pub(super) enum OrgChildDP { +pub enum OrgChildDP { Standard(CompositeFs), Github(CompositeFs), } @@ -407,7 +391,7 @@ impl FsDataProvider for OrgChildDP { } } -pub(super) enum OrgChildReader { +pub enum OrgChildReader { Standard(CompositeReader), Github(CompositeReader>), } @@ -441,12 +425,12 @@ impl FileReader for OrgChildReader { } } -pub(super) struct MesaRoot { +pub struct MesaRoot { orgs: Vec<(OsString, OrgChildDP)>, } impl MesaRoot { - pub(super) fn new(orgs: Vec<(OsString, OrgChildDP)>) -> Self { + pub fn new(orgs: Vec<(OsString, OrgChildDP)>) -> Self { Self { orgs } } } From 36d9fea53f29b38215e249c52f8bf0b01cf98f3c Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 16:20:10 -0800 Subject: [PATCH 24/41] bug fixes --- lib/cache/async_backed.rs | 72 ++++++++++++++---------- lib/fs/async_fs.rs | 79 +++++++++++++++++--------- lib/fs/composite.rs | 107 +++++++++++++++--------------------- lib/fs/dcache.rs | 76 ++++++++++++++++++++----- lib/fs/fuser.rs | 9 ++- src/fs/mescloud/repo.rs | 7 ++- src/fs/mescloud/roots.rs | 7 +++ tests/dcache_correctness.rs | 54 +++++++++++++++--- 8 files changed, 270 insertions(+), 141 deletions(-) diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index 8f15803b..273bcd39 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -7,6 +7,7 @@ //! Note that this cache does not support automatic eviction. use std::panic::AssertUnwindSafe; +use std::sync::atomic::{AtomicU64, Ordering}; use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; use futures::FutureExt as _; @@ -17,10 +18,12 @@ type SharedFut = Shared> + Send>>>; /// Two-state slot: `InFlight` while a factory future is running, then promoted to `Ready` once /// the future completes. /// -/// The `InFlight` variant holds a `Shared<..., Output = Option>` where `None` signals that the -/// factory panicked (caught by `catch_unwind`). On `None`, callers remove the entry and retry. +/// The `InFlight` variant holds a generation counter and a `Shared<..., Output = Option>` +/// where `None` signals that the factory panicked (caught by `catch_unwind`). On `None`, callers +/// remove the entry only if the generation matches, avoiding destruction of a valid re-inserted +/// entry. enum Slot { - InFlight(SharedFut), + InFlight(u64, SharedFut), Ready(V), } @@ -30,6 +33,7 @@ enum Slot { /// invocation of the factory runs. All callers receive a clone of the result. pub struct FutureBackedCache { map: scc::HashMap>, + next_gen: AtomicU64, } impl Default for FutureBackedCache @@ -40,6 +44,7 @@ where fn default() -> Self { Self { map: scc::HashMap::default(), + next_gen: AtomicU64::new(0), } } } @@ -69,14 +74,14 @@ where .map .read_async(&key, |_, slot| match slot { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Err(shared.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), }) .await; match existing { Some(Ok(v)) => return v, - Some(Err(shared)) => { - if let Some(v) = self.await_shared(&key, shared).await { + Some(Err((generation, shared))) => { + if let Some(v) = self.await_shared(&key, generation, shared).await { return v; } // Factory panicked; entry removed. Fall through to re-insert below. @@ -85,20 +90,21 @@ where } // Slow path: use entry_async for atomic check-and-insert. - let shared = match self.map.entry_async(key.clone()).await { + let (generation, shared) = match self.map.entry_async(key.clone()).await { scc::hash_map::Entry::Occupied(occ) => match occ.get() { Slot::Ready(v) => return v.clone(), - Slot::InFlight(shared) => shared.clone(), + Slot::InFlight(g, shared) => (*g, shared.clone()), }, scc::hash_map::Entry::Vacant(vac) => { + let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); let shared = Self::make_shared(factory); let ret = shared.clone(); - vac.insert_entry(Slot::InFlight(shared)); - ret + vac.insert_entry(Slot::InFlight(generation, shared)); + (generation, ret) } }; - if let Some(v) = self.await_shared(&key, shared).await { + if let Some(v) = self.await_shared(&key, generation, shared).await { return v; } @@ -124,14 +130,14 @@ where .map .read_async(&key, |_, slot| match slot { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Err(shared.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), }) .await; match existing { Some(Ok(v)) => return Ok(v), - Some(Err(shared)) => { - if let Some(v) = self.await_shared(&key, shared).await { + Some(Err((generation, shared))) => { + if let Some(v) = self.await_shared(&key, generation, shared).await { return Ok(v); } // Factory panicked; entry was removed. Fall through to run our own factory. @@ -147,10 +153,13 @@ where match self.map.entry_async(key).await { scc::hash_map::Entry::Occupied(occ) => match occ.get() { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Ok(self - .await_shared(occ.key(), shared.clone()) - .await - .unwrap_or(val)), + Slot::InFlight(g, shared) => { + let generation = *g; + Ok(self + .await_shared(occ.key(), generation, shared.clone()) + .await + .unwrap_or(val)) + } }, scc::hash_map::Entry::Vacant(vac) => { vac.insert_entry(Slot::Ready(val.clone())); @@ -170,25 +179,30 @@ where .map .read_async(key, |_, slot| match slot { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Err(shared.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), }) .await; match existing { Some(Ok(v)) => Some(v), - Some(Err(shared)) => self.await_shared(key, shared).await, + Some(Err((generation, shared))) => self.await_shared(key, generation, shared).await, None => None, } } /// Await a `Shared` future, handle promotion to `Ready`, and handle panic recovery. /// + /// The `observed_gen` parameter is the generation of the `InFlight` slot that was read. + /// On panic recovery, only the entry with this exact generation is removed, preventing + /// destruction of a valid entry re-inserted by a recovered thread. + /// /// Returns `Some(v)` on success. Returns `None` if the factory panicked, after removing /// the poisoned entry from the map. - async fn await_shared(&self, key: &K, shared: SharedFut) -> Option { + async fn await_shared(&self, key: &K, observed_gen: u64, shared: SharedFut) -> Option { let mut guard = PromoteGuard { map: &self.map, key, + observed_gen, value: None, }; @@ -199,7 +213,7 @@ where self.map .update_async(key, |_, slot| { - if matches!(slot, Slot::InFlight(_)) { + if matches!(slot, Slot::InFlight(g, _) if *g == observed_gen) { *slot = Slot::Ready(v.clone()); } }) @@ -209,11 +223,11 @@ where Some(v) } else { // Factory panicked. Remove the poisoned InFlight entry so the next caller - // can retry. - drop( - self.map - .remove_if_sync(key, |slot| matches!(slot, Slot::InFlight(_))), - ); + // can retry — but only if the generation matches our observation. + drop(self.map.remove_if_sync( + key, + |slot| matches!(slot, Slot::InFlight(g, _) if *g == observed_gen), + )); None } } @@ -270,6 +284,7 @@ where { map: &'a scc::HashMap>, key: &'a K, + observed_gen: u64, value: Option, } @@ -280,8 +295,9 @@ where { fn drop(&mut self) { if let Some(v) = self.value.take() { + let generation = self.observed_gen; self.map.update_sync(self.key, |_, slot| { - if matches!(slot, Slot::InFlight(_)) { + if matches!(slot, Slot::InFlight(g, _) if *g == generation) { *slot = Slot::Ready(v); } }); diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 1f81a87e..a13a6617 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -64,6 +64,12 @@ pub trait FsDataProvider: Clone + Send + Sync + 'static { inode: INode, flags: OpenFlags, ) -> impl Future> + Send; + + /// Called when the kernel forgets an inode (refcount reaches zero). + /// + /// Implementations should clean up any internal mappings for the given + /// address (e.g. bridge maps, path maps). The default is a no-op. + fn forget(&self, _addr: InodeAddr) {} } /// Zero-sized tag whose [`StatelessDrop`] implementation automatically evicts @@ -76,6 +82,15 @@ impl<'a> StatelessDrop<&'a FutureBackedCache, InodeAddr> for I } } +impl<'a, DP: FsDataProvider> StatelessDrop<(&'a FutureBackedCache, DP), InodeAddr> + for InodeForget +{ + fn delete(ctx: &(&'a FutureBackedCache, DP), key: &InodeAddr) { + ctx.0.remove_sync(key); + ctx.1.forget(*key); + } +} + /// A looked-up inode whose lifetime must be managed by the caller. /// /// Each `TrackedINode` returned by [`AsyncFs::lookup`] represents one @@ -283,14 +298,14 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { // Inode was evicted from the table — fall through to the slow path. let name_owned = name.to_os_string(); - let name_for_cache = name_owned.clone(); let lookup_key = (parent.0, name_owned.clone()); let dp = self.data_provider.clone(); let child = self .lookup_cache - .get_or_try_init(lookup_key, || async move { - dp.lookup(parent_ino, &name_owned).await + .get_or_try_init(lookup_key, || { + let name_for_dp = name_owned.clone(); + async move { dp.lookup(parent_ino, &name_for_dp).await } }) .await?; @@ -301,7 +316,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { self.directory_cache .insert( parent, - name_for_cache, + name_owned, LoadedAddr(child.addr), matches!(child.itype, INodeType::Directory), ) @@ -366,12 +381,6 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// returns `true` (indicating the caller's buffer is full), iteration /// stops early. /// - /// # Concurrency - /// - /// The `is_populated` check-then-populate is **not** atomic. If two - /// concurrent callers invoke `readdir` for the same parent, both may call - /// `dp.readdir()` and insert duplicate children. - /// /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and /// avoid racing with `lookup`/`createfile`. pub async fn readdir( @@ -380,28 +389,48 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { offset: u64, mut filler: impl FnMut(DirEntry<'_>, u64) -> bool, ) -> Result<(), std::io::Error> { + use crate::fs::dcache::PopulateStatus; + let parent_inode = self.loaded_inode(parent).await?; if parent_inode.itype != INodeType::Directory { return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); } // Populate the directory cache on first readdir for this parent. - if !self.directory_cache.is_populated(parent) { - let children = self.data_provider.readdir(parent_inode).await?; - for (name, child_inode) in children { - self.inode_table - .get_or_init(child_inode.addr, || async move { child_inode }) - .await; - self.directory_cache - .insert( - parent, - name, - LoadedAddr(child_inode.addr), - child_inode.itype == INodeType::Directory, - ) - .await; + // Uses a three-state CAS gate to prevent duplicate dp.readdir() calls. + loop { + match self.directory_cache.try_claim_populate(parent) { + PopulateStatus::Claimed => { + match self.data_provider.readdir(parent_inode).await { + Ok(children) => { + for (name, child_inode) in children { + self.inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + self.directory_cache + .insert( + parent, + name, + LoadedAddr(child_inode.addr), + child_inode.itype == INodeType::Directory, + ) + .await; + } + self.directory_cache.finish_populate(parent); + } + Err(e) => { + self.directory_cache.abort_populate(parent); + return Err(e); + } + } + break; + } + PopulateStatus::InProgress => { + self.directory_cache.wait_populated(parent).await; + // Re-check: the populator may have aborted. + } + PopulateStatus::Done => break, } - self.directory_cache.mark_populated(parent); } let mut children = self.directory_cache.readdir(parent).await; diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index bf063307..7d9748c6 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -128,7 +128,7 @@ impl FileReader for CompositeReader { struct ChildSlot { inner: Arc>, - bridge: ConcurrentBridge, + bridge: Arc, } struct CompositeFsInner { @@ -237,7 +237,7 @@ impl CompositeFs { table.insert_sync(desc.root_ino.addr, desc.root_ino); let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); - let bridge = ConcurrentBridge::new(); + let bridge = Arc::new(ConcurrentBridge::new()); bridge.insert(outer_ino, desc.root_ino.addr); drop(self.inner.slots.insert_sync( @@ -261,51 +261,29 @@ impl CompositeFs { where R::ChildDP: Clone, { - // Fast path: already registered by name. match self.inner.name_to_slot.entry_sync(desc.name.clone()) { - scc::hash_map::Entry::Occupied(occ) => { + scc::hash_map::Entry::Occupied(mut occ) => { let slot_idx = *occ.get(); - // Return existing outer address for this child's root inode. - if let Some(outer) = self + // Extract bridge Arc from the slot guard, then query outside. + let bridge = self .inner .slots - .read_sync(&slot_idx, |_, slot| { - slot.bridge.backward(desc.root_ino.addr) - }) - .flatten() - { + .read_sync(&slot_idx, |_, slot| Arc::clone(&slot.bridge)); + if let Some(outer) = bridge.and_then(|b| b.backward(desc.root_ino.addr)) { return outer; } - // Slot exists but bridge has no mapping — should not happen, - // but fall through to create a fresh slot below. - // (Remove stale name entry so the vacant path can re-insert.) - // - // Race window: between `drop(occ)` and the `remove_sync` below, - // another thread could read the stale entry and resolve to a - // broken slot. In the worst case two threads create separate - // slots for the same child — the last writer to `name_to_slot` - // wins and the other slot becomes orphaned. This is functionally - // harmless: the orphaned slot is never reached via name lookup - // and will not serve any future requests. - drop(occ); - self.inner.name_to_slot.remove_sync(&desc.name); + // Slot exists but bridge has no mapping — replace in-place + // while still holding the entry guard to prevent races. + let (outer_ino, new_slot_idx) = self.create_child_slot(desc); + *occ.get_mut() = new_slot_idx; + outer_ino } scc::hash_map::Entry::Vacant(vac) => { let (outer_ino, slot_idx) = self.create_child_slot(desc); vac.insert_entry(slot_idx); - return outer_ino; + outer_ino } } - - // Fallback: name was stale, create fresh. This path is rare. - let (outer_ino, slot_idx) = self.create_child_slot(desc); - drop( - self.inner - .name_to_slot - .insert_sync(desc.name.clone(), slot_idx), - ); - - outer_ino } } @@ -334,12 +312,16 @@ where .read_sync(&parent.addr, |_, &v| v) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; - // Extract Arc and inner parent address under the guard. - let (child, inner_parent) = self + // Extract Arc, bridge, and inner parent address under the guard. + let (child, bridge, inner_parent) = self .inner .slots .read_sync(&slot_idx, |_, slot| { - (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + ( + Arc::clone(&slot.inner), + Arc::clone(&slot.bridge), + slot.bridge.forward(parent.addr), + ) }) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; @@ -353,17 +335,10 @@ where .await?; let child_inode = tracked.inode; - // Translate inner address back to composite-level address. - let outer_ino = self - .inner - .slots - .read_sync(&slot_idx, |_, slot| { - let next_ino = &self.inner.next_ino; - slot.bridge.backward_or_insert(child_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }) - }) - .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + // Translate inner address back to composite-level address (outside scc guard). + let outer_ino = bridge.backward_or_insert(child_inode.addr, || { + self.inner.next_ino.fetch_add(1, Ordering::Relaxed) + }); let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); @@ -390,11 +365,15 @@ where .read_sync(&parent.addr, |_, &v| v) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; - let (child, inner_parent) = self + let (child, bridge, inner_parent) = self .inner .slots .read_sync(&slot_idx, |_, slot| { - (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + ( + Arc::clone(&slot.inner), + Arc::clone(&slot.bridge), + slot.bridge.forward(parent.addr), + ) }) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; @@ -411,19 +390,12 @@ where }) .await?; - // Translate all inner addresses to composite-level addresses. + // Translate all inner addresses to composite-level addresses (outside scc guard). let mut entries = Vec::with_capacity(child_entries.len()); for (name, child_inode) in child_entries { - let outer_ino = self - .inner - .slots - .read_sync(&slot_idx, |_, slot| { - let next_ino = &self.inner.next_ino; - slot.bridge.backward_or_insert(child_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }) - }) - .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + let outer_ino = bridge.backward_or_insert(child_inode.addr, || { + self.inner.next_ino.fetch_add(1, Ordering::Relaxed) + }); let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); entries.push(( @@ -462,4 +434,15 @@ where inner: open_file.reader, }) } + + fn forget(&self, addr: InodeAddr) { + if addr == Self::ROOT_INO { + return; + } + if let Some((_, slot_idx)) = self.inner.addr_to_slot.remove_sync(&addr) { + self.inner + .slots + .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)); + } + } } diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index 4870a401..aea5bb2c 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -1,6 +1,6 @@ use std::ffi::{OsStr, OsString}; use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicU8, Ordering}; use crate::fs::LoadedAddr; @@ -13,17 +13,32 @@ pub struct DValue { pub is_dir: bool, } +/// Population states for a directory. +const POPULATE_UNCLAIMED: u8 = 0; +const POPULATE_IN_PROGRESS: u8 = 1; +const POPULATE_DONE: u8 = 2; + +/// Result of attempting to claim a directory for population. +pub enum PopulateStatus { + /// This caller won the race and should populate the directory. + Claimed, + /// Another caller is currently populating; wait and re-check. + InProgress, + /// The directory is already fully populated. + Done, +} + /// Per-parent directory state holding child entries and a population flag. struct DirState { children: scc::HashMap, - populated: AtomicBool, + populated: AtomicU8, } impl DirState { fn new() -> Self { Self { children: scc::HashMap::new(), - populated: AtomicBool::new(false), + populated: AtomicU8::new(POPULATE_UNCLAIMED), } } } @@ -73,9 +88,7 @@ impl DCache { #[must_use] pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { let state = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v))?; - state - .children - .read_sync(&name.to_os_string(), |_, v| v.clone()) + state.children.read_sync(name, |_, v| v.clone()) } /// Atomically inserts or overwrites a child entry in the cache. @@ -107,17 +120,50 @@ impl DCache { entries } - /// Returns `true` if the directory at `parent_ino` has been fully populated. - #[must_use] - pub fn is_populated(&self, parent_ino: LoadedAddr) -> bool { - self.dirs - .read_sync(&parent_ino, |_, v| v.populated.load(Ordering::Acquire)) - .unwrap_or(false) + /// Atomically try to claim a directory for population. + /// + /// Uses `compare_exchange` on the three-state flag: + /// - `UNCLAIMED → IN_PROGRESS`: returns `Claimed` (caller should populate) + /// - Already `IN_PROGRESS`: returns `InProgress` (caller should wait) + /// - Already `DONE`: returns `Done` (nothing to do) + pub fn try_claim_populate(&self, parent_ino: LoadedAddr) -> PopulateStatus { + let state = self.dir_state(parent_ino); + match state.populated.compare_exchange( + POPULATE_UNCLAIMED, + POPULATE_IN_PROGRESS, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => PopulateStatus::Claimed, + Err(POPULATE_IN_PROGRESS) => PopulateStatus::InProgress, + Err(_) => PopulateStatus::Done, + } } - /// Marks the directory at `parent_ino` as fully populated. - pub fn mark_populated(&self, parent_ino: LoadedAddr) { + /// Mark a directory as fully populated after successful population. + pub fn finish_populate(&self, parent_ino: LoadedAddr) { let state = self.dir_state(parent_ino); - state.populated.store(true, Ordering::Release); + state.populated.store(POPULATE_DONE, Ordering::Release); + } + + /// Abort a population attempt, resetting back to unclaimed so another + /// caller can retry. + pub fn abort_populate(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); + state.populated.store(POPULATE_UNCLAIMED, Ordering::Release); + } + + /// Wait until a directory is no longer in the `InProgress` state. + pub async fn wait_populated(&self, parent_ino: LoadedAddr) { + loop { + let current = self + .dirs + .read_sync(&parent_ino, |_, v| v.populated.load(Ordering::Acquire)) + .unwrap_or(POPULATE_UNCLAIMED); + if current != POPULATE_IN_PROGRESS { + return; + } + tokio::task::yield_now().await; + } } } diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 886a5f6f..7a9bed24 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -83,12 +83,16 @@ mod inner { /// /// Both `ward` and `fs` borrow from `table`. The ward manages inode /// refcounts; the fs serves lookup/readdir/open/read operations. + /// + /// The ward context is `(&table, DP)` so that [`InodeForget`] can both + /// remove the inode from the table and call `dp.forget()` to clean up + /// provider-internal maps (bridge mappings, path maps, etc.). #[self_referencing] pub(super) struct FuseBridgeInner { table: FutureBackedCache, #[borrows(table)] #[not_covariant] - ward: DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + ward: DropWard<(&'this FutureBackedCache, DP), InodeAddr, InodeForget>, #[borrows(table)] #[covariant] fs: AsyncFs<'this, DP>, @@ -96,9 +100,10 @@ mod inner { impl FuseBridgeInner { pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { + let ward_provider = provider.clone(); FuseBridgeInnerBuilder { table, - ward_builder: |tbl| DropWard::new(tbl), + ward_builder: |tbl| DropWard::new((tbl, ward_provider)), fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), } .build() diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index f13ead88..f2041d10 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -71,8 +71,7 @@ impl MesRepoProvider { } /// Remove the path entry for an inode. Called during forget/cleanup. - #[expect(dead_code, reason = "will be needed when child forget is implemented")] - pub(super) fn remove_path(&self, addr: InodeAddr) { + fn remove_path(&self, addr: InodeAddr) { self.inner.path_map.remove_sync(&addr); } @@ -276,6 +275,10 @@ impl FsDataProvider for MesRepoProvider { }) } } + + fn forget(&self, addr: InodeAddr) { + self.remove_path(addr); + } } pub struct MesFileReader { diff --git a/src/fs/mescloud/roots.rs b/src/fs/mescloud/roots.rs index 8893d379..7c8701db 100644 --- a/src/fs/mescloud/roots.rs +++ b/src/fs/mescloud/roots.rs @@ -389,6 +389,13 @@ impl FsDataProvider for OrgChildDP { } } } + + fn forget(&self, addr: InodeAddr) { + match self { + Self::Standard(c) => c.forget(addr), + Self::Github(c) => c.forget(addr), + } + } } pub enum OrgChildReader { diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs index 59731d28..34dcf088 100644 --- a/tests/dcache_correctness.rs +++ b/tests/dcache_correctness.rs @@ -3,7 +3,7 @@ use std::ffi::{OsStr, OsString}; use git_fs::fs::LoadedAddr; -use git_fs::fs::dcache::DCache; +use git_fs::fs::dcache::{DCache, PopulateStatus}; #[tokio::test] async fn lookup_returns_none_for_missing_entry() { @@ -51,16 +51,53 @@ async fn readdir_empty_parent_returns_empty() { } #[tokio::test] -async fn is_populated_false_by_default() { +async fn try_claim_populate_unclaimed_returns_claimed() { let cache = DCache::new(); - assert!(!cache.is_populated(LoadedAddr(1))); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); } #[tokio::test] -async fn mark_populated_then_check() { +async fn finish_populate_then_claim_returns_done() { let cache = DCache::new(); - cache.mark_populated(LoadedAddr(1)); - assert!(cache.is_populated(LoadedAddr(1))); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); + cache.finish_populate(LoadedAddr(1)); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Done + )); +} + +#[tokio::test] +async fn double_claim_returns_in_progress() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::InProgress + )); +} + +#[tokio::test] +async fn abort_populate_allows_reclaim() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); + cache.abort_populate(LoadedAddr(1)); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); } #[tokio::test] @@ -70,7 +107,10 @@ async fn insert_does_not_mark_populated() { .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) .await; assert!( - !cache.is_populated(LoadedAddr(1)), + matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + ), "insert alone should not mark a directory as populated" ); } From b735ac89dc528a2115291437d2ed29f57b72095a Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 16:52:20 -0800 Subject: [PATCH 25/41] more fixes --- lib/cache/async_backed.rs | 100 +++++++++++++++++++++++++++++++------- lib/fs/async_fs.rs | 74 ++++++++++++++++++++-------- lib/fs/bridge.rs | 36 +++++++++++--- lib/fs/composite.rs | 6 ++- lib/fs/dcache.rs | 20 +++++--- src/fs/mescloud/repo.rs | 49 ++++++++++--------- 6 files changed, 207 insertions(+), 78 deletions(-) diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index 273bcd39..f304ca6d 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -7,6 +7,7 @@ //! Note that this cache does not support automatic eviction. use std::panic::AssertUnwindSafe; +use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; @@ -116,16 +117,20 @@ where /// If the factory returns `Ok(v)`, the value is cached and returned. If it returns `Err(e)`, /// **nothing is cached** and the error is propagated to the caller. /// - /// Unlike `get_or_init`, concurrent callers are **not** deduplicated — each caller that - /// finds the key absent will invoke the factory independently. However, if a value was - /// previously cached (by either `get_or_init` or a successful `get_or_try_init`), it is - /// returned immediately without calling the factory. + /// Concurrent callers for the same key are deduplicated: only one factory invocation runs, + /// and joiners await its shared result. If the factory fails, the poisoned `InFlight` entry + /// is removed and joiners fall through to run their own factory (non-deduplicated retry). + /// + /// # Panics + /// + /// Panics if the factory panics (caught internally via `catch_unwind`). pub async fn get_or_try_init(&self, key: K, factory: F) -> Result where F: FnOnce() -> Fut, Fut: Future> + Send + 'static, + E: Send + 'static, { - // Fast path: value already cached or in-flight from an infallible init. + // Fast path: value already cached or in-flight. let existing = self .map .read_async(&key, |_, slot| match slot { @@ -140,30 +145,60 @@ where if let Some(v) = self.await_shared(&key, generation, shared).await { return Ok(v); } - // Factory panicked; entry was removed. Fall through to run our own factory. + // In-flight failed; fall through to slow path. } None => {} } - // Run the fallible factory (not deduplicated). - let val = factory().await?; + // Slow path: claim a slot or join an existing in-flight computation. + // The error side-channel lets the owner retrieve the `Err(e)` from the + // shared future (which only produces `Option`). + let error_cell: Arc>> = Arc::new(std::sync::Mutex::new(None)); - // Attempt to cache. If another caller raced us and already inserted, - // return the existing value and discard ours. - match self.map.entry_async(key).await { + match self.map.entry_async(key.clone()).await { scc::hash_map::Entry::Occupied(occ) => match occ.get() { Slot::Ready(v) => Ok(v.clone()), Slot::InFlight(g, shared) => { - let generation = *g; - Ok(self - .await_shared(occ.key(), generation, shared.clone()) - .await - .unwrap_or(val)) + let (generation, shared) = (*g, shared.clone()); + drop(occ); + if let Some(v) = self.await_shared(&key, generation, shared).await { + return Ok(v); + } + // In-flight failed. We still have `factory` — run it ourselves. + let val = factory().await?; + match self.map.entry_async(key).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(..) => Ok(val), + }, + scc::hash_map::Entry::Vacant(vac) => { + vac.insert_entry(Slot::Ready(val.clone())); + Ok(val) + } + } } }, scc::hash_map::Entry::Vacant(vac) => { - vac.insert_entry(Slot::Ready(val.clone())); - Ok(val) + let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); + let shared = Self::make_shared_fallible(factory, Arc::clone(&error_cell)); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(generation, shared)); + + if let Some(v) = self.await_shared(&key, generation, ret).await { + return Ok(v); + } + // Our factory returned `Err` — retrieve it from the side channel. + let captured = error_cell + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner) + .take(); + match captured { + Some(e) => Err(e), + None => panic!( + "FutureBackedCache: factory for key {key:?} resolved to None \ + but no error was captured (factory panicked)" + ), + } } } } @@ -244,6 +279,35 @@ where boxed.shared() } + /// Like [`make_shared`](Self::make_shared), but for fallible factories. + /// + /// On `Ok(v)`, the shared future resolves to `Some(v)`. On `Err(e)`, the + /// error is captured in `error_cell` and the future resolves to `None`. + fn make_shared_fallible( + factory: F, + error_cell: Arc>>, + ) -> SharedFut + where + F: FnOnce() -> Fut, + Fut: Future> + Send + 'static, + E: Send + 'static, + { + let fut = AssertUnwindSafe(factory()).catch_unwind(); + let boxed: Pin> + Send>> = Box::pin(async move { + match fut.await { + Ok(Ok(v)) => Some(v), + Ok(Err(e)) => { + *error_cell + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner) = Some(e); + None + } + Err(_panic) => None, + } + }); + boxed.shared() + } + /// Returns the number of entries in the cache (both `Ready` and `InFlight`). #[must_use] pub fn len(&self) -> usize { diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index a13a6617..839cb267 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -184,6 +184,39 @@ impl InodeLifecycle { } } +/// RAII guard that calls [`DCache::abort_populate`] on drop unless defused. +/// +/// Prevents the populate flag from getting stuck in `IN_PROGRESS` if the +/// populating future is cancelled (e.g. by a FUSE interrupt or `select!`). +struct PopulateGuard<'a> { + dcache: &'a DCache, + parent: LoadedAddr, + armed: bool, +} + +impl<'a> PopulateGuard<'a> { + fn new(dcache: &'a DCache, parent: LoadedAddr) -> Self { + Self { + dcache, + parent, + armed: true, + } + } + + /// Defuse the guard after a successful `finish_populate`. + fn defuse(&mut self) { + self.armed = false; + } +} + +impl Drop for PopulateGuard<'_> { + fn drop(&mut self) { + if self.armed { + self.dcache.abort_populate(self.parent); + } + } +} + /// An asynchronous filesystem cache mapping `InodeAddr` to `INode`. /// /// Uses two [`FutureBackedCache`] layers: @@ -401,28 +434,27 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { loop { match self.directory_cache.try_claim_populate(parent) { PopulateStatus::Claimed => { - match self.data_provider.readdir(parent_inode).await { - Ok(children) => { - for (name, child_inode) in children { - self.inode_table - .get_or_init(child_inode.addr, || async move { child_inode }) - .await; - self.directory_cache - .insert( - parent, - name, - LoadedAddr(child_inode.addr), - child_inode.itype == INodeType::Directory, - ) - .await; - } - self.directory_cache.finish_populate(parent); - } - Err(e) => { - self.directory_cache.abort_populate(parent); - return Err(e); - } + // RAII guard: if this future is cancelled between Claimed + // and finish_populate, automatically abort so other waiters + // can retry instead of hanging forever. + let mut guard = PopulateGuard::new(&self.directory_cache, parent); + + let children = self.data_provider.readdir(parent_inode).await?; + for (name, child_inode) in children { + self.inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + self.directory_cache + .insert( + parent, + name, + LoadedAddr(child_inode.addr), + child_inode.itype == INodeType::Directory, + ) + .await; } + self.directory_cache.finish_populate(parent); + guard.defuse(); break; } PopulateStatus::InProgress => { diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index 350d8750..37599388 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -1,20 +1,26 @@ -//! Lock-free bidirectional inode address mapping. +//! Bidirectional inode address mapping. //! //! [`ConcurrentBridge`] maps between "outer" (composite) and "inner" (child) -//! inode address spaces using two [`scc::HashMap`]s. +//! inode address spaces using two [`scc::HashMap`]s guarded by a coordination +//! lock for cross-map atomicity. + +use std::sync::Mutex; use crate::fs::InodeAddr; /// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. /// -/// Uses two lock-free `scc::HashMap`s. Insertion order: forward map first, -/// then backward map, so any observer that discovers an outer addr via -/// `backward` can immediately resolve it via `forward`. +/// Uses two concurrent `scc::HashMap`s for lock-free reads. Mutations that +/// touch both maps are serialized by a `Mutex<()>` to prevent cross-map +/// inconsistencies (e.g. a concurrent `remove_by_outer` between the two +/// `insert_sync` calls in `insert` could leave orphaned entries). pub struct ConcurrentBridge { /// outer -> inner fwd: scc::HashMap, /// inner -> outer bwd: scc::HashMap, + /// Serializes mutations that touch both maps. + mu: Mutex<()>, } impl ConcurrentBridge { @@ -24,13 +30,18 @@ impl ConcurrentBridge { Self { fwd: scc::HashMap::new(), bwd: scc::HashMap::new(), + mu: Mutex::new(()), } } /// Insert a mapping from outer to inner. /// - /// Inserts into the forward map first (see module docs for ordering rationale). + /// Serialized with other mutations via the coordination lock. pub fn insert(&self, outer: InodeAddr, inner: InodeAddr) { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let _ = self.fwd.insert_sync(outer, inner); let _ = self.bwd.insert_sync(inner, outer); } @@ -48,18 +59,23 @@ impl ConcurrentBridge { } /// Look up inner -> outer, or allocate a new outer address if unmapped. + /// + /// Serialized with other mutations via the coordination lock. #[must_use] pub fn backward_or_insert( &self, inner: InodeAddr, allocate: impl FnOnce() -> InodeAddr, ) -> InodeAddr { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); match self.bwd.entry_sync(inner) { scc::hash_map::Entry::Occupied(occ) => *occ.get(), scc::hash_map::Entry::Vacant(vac) => { let outer = allocate(); vac.insert_entry(outer); - // Populate forward map after backward is committed. let _ = self.fwd.insert_sync(outer, inner); outer } @@ -67,7 +83,13 @@ impl ConcurrentBridge { } /// Remove the mapping for the given outer address. + /// + /// Serialized with other mutations via the coordination lock. pub fn remove_by_outer(&self, outer: InodeAddr) { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); if let Some((_, inner)) = self.fwd.remove_sync(&outer) { self.bwd.remove_sync(&inner); } diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index 7d9748c6..abb65fc2 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -263,12 +263,12 @@ impl CompositeFs { { match self.inner.name_to_slot.entry_sync(desc.name.clone()) { scc::hash_map::Entry::Occupied(mut occ) => { - let slot_idx = *occ.get(); + let old_slot_idx = *occ.get(); // Extract bridge Arc from the slot guard, then query outside. let bridge = self .inner .slots - .read_sync(&slot_idx, |_, slot| Arc::clone(&slot.bridge)); + .read_sync(&old_slot_idx, |_, slot| Arc::clone(&slot.bridge)); if let Some(outer) = bridge.and_then(|b| b.backward(desc.root_ino.addr)) { return outer; } @@ -276,6 +276,8 @@ impl CompositeFs { // while still holding the entry guard to prevent races. let (outer_ino, new_slot_idx) = self.create_child_slot(desc); *occ.get_mut() = new_slot_idx; + // Remove the orphaned old slot to prevent unbounded growth. + self.inner.slots.remove_sync(&old_slot_idx); outer_ino } scc::hash_map::Entry::Vacant(vac) => { diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index aea5bb2c..d8778fb8 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -2,6 +2,8 @@ use std::ffi::{OsStr, OsString}; use std::sync::Arc; use std::sync::atomic::{AtomicU8, Ordering}; +use tokio::sync::Notify; + use crate::fs::LoadedAddr; /// Cached metadata for a directory entry. @@ -32,6 +34,8 @@ pub enum PopulateStatus { struct DirState { children: scc::HashMap, populated: AtomicU8, + /// Wakes waiters when `populated` transitions out of `IN_PROGRESS`. + notify: Notify, } impl DirState { @@ -39,6 +43,7 @@ impl DirState { Self { children: scc::HashMap::new(), populated: AtomicU8::new(POPULATE_UNCLAIMED), + notify: Notify::new(), } } } @@ -46,7 +51,7 @@ impl DirState { /// In-memory directory entry cache with per-parent child maps. /// /// Each parent directory gets its own [`DirState`] containing a -/// [`scc::HashMap`] of child entries and an [`AtomicBool`] population flag. +/// [`scc::HashMap`] of child entries and an [`AtomicU8`] population flag. /// This makes `readdir` O(k) in the number of children rather than O(n) /// over the entire cache. pub struct DCache { @@ -144,6 +149,7 @@ impl DCache { pub fn finish_populate(&self, parent_ino: LoadedAddr) { let state = self.dir_state(parent_ino); state.populated.store(POPULATE_DONE, Ordering::Release); + state.notify.notify_waiters(); } /// Abort a population attempt, resetting back to unclaimed so another @@ -151,19 +157,21 @@ impl DCache { pub fn abort_populate(&self, parent_ino: LoadedAddr) { let state = self.dir_state(parent_ino); state.populated.store(POPULATE_UNCLAIMED, Ordering::Release); + state.notify.notify_waiters(); } /// Wait until a directory is no longer in the `InProgress` state. + /// + /// Uses [`Notify`] to sleep efficiently instead of spinning. pub async fn wait_populated(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); loop { - let current = self - .dirs - .read_sync(&parent_ino, |_, v| v.populated.load(Ordering::Acquire)) - .unwrap_or(POPULATE_UNCLAIMED); + let notified = state.notify.notified(); + let current = state.populated.load(Ordering::Acquire); if current != POPULATE_IN_PROGRESS { return; } - tokio::task::yield_now().await; + notified.await; } } } diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index f2041d10..df0b2dbb 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -265,13 +265,15 @@ impl FsDataProvider for MesRepoProvider { .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; Ok(MesFileReader { - client: inner.client.clone(), - org_name: inner.org_name.clone(), - repo_name: inner.repo_name.clone(), - ref_: inner.ref_.clone(), - path, - file_cache: inner.file_cache.clone(), - inode_addr: inode.addr, + inner: Arc::new(MesFileReaderCtx { + client: inner.client.clone(), + org_name: inner.org_name.clone(), + repo_name: inner.repo_name.clone(), + ref_: inner.ref_.clone(), + path, + file_cache: inner.file_cache.clone(), + inode_addr: inode.addr, + }), }) } } @@ -282,6 +284,10 @@ impl FsDataProvider for MesRepoProvider { } pub struct MesFileReader { + inner: Arc, +} + +struct MesFileReaderCtx { client: MesaClient, org_name: String, repo_name: String, @@ -297,18 +303,12 @@ impl FileReader for MesFileReader { offset: u64, size: u32, ) -> impl Future> + Send { - let client = self.client.clone(); - let org_name = self.org_name.clone(); - let repo_name = self.repo_name.clone(); - let ref_ = self.ref_.clone(); - let path = self.path.clone(); - let file_cache = self.file_cache.clone(); - let inode_addr = self.inode_addr; + let ctx = Arc::clone(&self.inner); async move { // Try the file cache first. - if let Some(cache) = &file_cache - && let Some(data) = cache.get(&inode_addr).await + if let Some(cache) = &ctx.file_cache + && let Some(data) = cache.get(&ctx.inode_addr).await { let start = usize::try_from(offset) .unwrap_or(data.len()) @@ -318,7 +318,7 @@ impl FileReader for MesFileReader { } // Cache miss -- fetch from the Mesa API. - let path_str = path.to_str().ok_or_else(|| { + let path_str = ctx.path.to_str().ok_or_else(|| { std::io::Error::new( std::io::ErrorKind::InvalidData, "path contains non-UTF-8 characters", @@ -331,12 +331,13 @@ impl FileReader for MesFileReader { Some(path_str) }; - let content = client - .org(&org_name) + let content = ctx + .client + .org(&ctx.org_name) .repos() - .at(&repo_name) + .at(&ctx.repo_name) .content() - .get(Some(ref_.as_str()), api_path, None) + .get(Some(ctx.ref_.as_str()), api_path, None) .await .map_err(MesaApiError::from) .map_err(mesa_api_error_to_io)?; @@ -360,10 +361,10 @@ impl FileReader for MesFileReader { let result = Bytes::copy_from_slice(&decoded[start..end]); // Store the decoded content in the cache for future reads. - if let Some(cache) = &file_cache - && let Err(e) = cache.insert(&inode_addr, decoded).await + if let Some(cache) = &ctx.file_cache + && let Err(e) = cache.insert(&ctx.inode_addr, decoded).await { - warn!(error = ?e, inode_addr, "failed to cache file content"); + warn!(error = ?e, inode_addr = ctx.inode_addr, "failed to cache file content"); } Ok(result) From 7106d6053af6daf6a520191afa364aa37c62a3b9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 12:02:21 -0800 Subject: [PATCH 26/41] tests --- lib/cache/async_backed.rs | 95 +++++++++++++++---------------- lib/fs/async_fs.rs | 19 +++++-- tests/async_backed_correctness.rs | 77 +++++++++++++++++++++++++ tests/async_fs_correctness.rs | 42 ++++++++++++++ tests/common/async_fs_mocks.rs | 8 +++ 5 files changed, 186 insertions(+), 55 deletions(-) diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index f304ca6d..9c05ee0b 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -7,7 +7,6 @@ //! Note that this cache does not support automatic eviction. use std::panic::AssertUnwindSafe; -use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; @@ -119,7 +118,9 @@ where /// /// Concurrent callers for the same key are deduplicated: only one factory invocation runs, /// and joiners await its shared result. If the factory fails, the poisoned `InFlight` entry - /// is removed and joiners fall through to run their own factory (non-deduplicated retry). + /// is removed and joiners retry by re-entering the `entry_async` gate, so a single new + /// owner is elected. Joiners never receive the original error — the retrying owner invokes + /// its own factory independently and may produce a different error or succeed. /// /// # Panics /// @@ -151,53 +152,49 @@ where } // Slow path: claim a slot or join an existing in-flight computation. - // The error side-channel lets the owner retrieve the `Err(e)` from the - // shared future (which only produces `Option`). - let error_cell: Arc>> = Arc::new(std::sync::Mutex::new(None)); + // Wrapped in `Option` so the `FnOnce` factory can be consumed exactly + // once inside the loop (only in the `Vacant` branch, which always returns). + let mut factory = Some(factory); - match self.map.entry_async(key.clone()).await { - scc::hash_map::Entry::Occupied(occ) => match occ.get() { - Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(g, shared) => { - let (generation, shared) = (*g, shared.clone()); - drop(occ); - if let Some(v) = self.await_shared(&key, generation, shared).await { - return Ok(v); - } - // In-flight failed. We still have `factory` — run it ourselves. - let val = factory().await?; - match self.map.entry_async(key).await { - scc::hash_map::Entry::Occupied(occ) => match occ.get() { - Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(..) => Ok(val), - }, - scc::hash_map::Entry::Vacant(vac) => { - vac.insert_entry(Slot::Ready(val.clone())); - Ok(val) + loop { + match self.map.entry_async(key.clone()).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => return Ok(v.clone()), + Slot::InFlight(g, shared) => { + let (generation, shared) = (*g, shared.clone()); + drop(occ); + if let Some(v) = self.await_shared(&key, generation, shared).await { + return Ok(v); } + // In-flight failed. Loop back to `entry_async` so the + // next caller gets proper dedup instead of running + // factory directly. } - } - }, - scc::hash_map::Entry::Vacant(vac) => { - let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); - let shared = Self::make_shared_fallible(factory, Arc::clone(&error_cell)); - let ret = shared.clone(); - vac.insert_entry(Slot::InFlight(generation, shared)); + }, + scc::hash_map::Entry::Vacant(vac) => { + let f = factory.take().unwrap_or_else(|| { + unreachable!( + "FutureBackedCache: factory already consumed but \ + reached Vacant branch again for key {key:?}" + ) + }); + let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); + let (error_tx, mut error_rx) = tokio::sync::oneshot::channel(); + let shared = Self::make_shared_fallible(f, error_tx); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(generation, shared)); - if let Some(v) = self.await_shared(&key, generation, ret).await { - return Ok(v); - } - // Our factory returned `Err` — retrieve it from the side channel. - let captured = error_cell - .lock() - .unwrap_or_else(std::sync::PoisonError::into_inner) - .take(); - match captured { - Some(e) => Err(e), - None => panic!( - "FutureBackedCache: factory for key {key:?} resolved to None \ - but no error was captured (factory panicked)" - ), + if let Some(v) = self.await_shared(&key, generation, ret).await { + return Ok(v); + } + // Our factory returned `Err` — retrieve it from the channel. + return match error_rx.try_recv().ok() { + Some(e) => Err(e), + None => panic!( + "FutureBackedCache: factory for key {key:?} resolved to None \ + but no error was captured (factory panicked)" + ), + }; } } } @@ -282,10 +279,10 @@ where /// Like [`make_shared`](Self::make_shared), but for fallible factories. /// /// On `Ok(v)`, the shared future resolves to `Some(v)`. On `Err(e)`, the - /// error is captured in `error_cell` and the future resolves to `None`. + /// error is sent through `error_tx` and the future resolves to `None`. fn make_shared_fallible( factory: F, - error_cell: Arc>>, + error_tx: tokio::sync::oneshot::Sender, ) -> SharedFut where F: FnOnce() -> Fut, @@ -297,9 +294,7 @@ where match fut.await { Ok(Ok(v)) => Some(v), Ok(Err(e)) => { - *error_cell - .lock() - .unwrap_or_else(std::sync::PoisonError::into_inner) = Some(e); + drop(error_tx.send(e)); None } Err(_panic) => None, diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 839cb267..061f974b 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -323,12 +323,21 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { "parent inode should be a directory" ); - if let Some(dentry) = self.directory_cache.lookup(parent, name) - && let Some(inode) = self.inode_table.get(&dentry.ino.0).await - { - return Ok(TrackedINode { inode }); + if let Some(dentry) = self.directory_cache.lookup(parent, name) { + if let Some(inode) = self.inode_table.get(&dentry.ino.0).await { + return Ok(TrackedINode { inode }); + } + // Inode was evicted (e.g. by forget). Evict the stale lookup_cache + // entry so the slow path calls dp.lookup() fresh. + // + // Note: a concurrent task may re-insert into lookup_cache between + // our inode_table miss and this remove_sync. This is benign — it + // causes at most one redundant dp.lookup() call because all + // downstream operations (get_or_try_init, get_or_init) are + // idempotent or deduplicated. + self.lookup_cache + .remove_sync(&(parent.0, name.to_os_string())); } - // Inode was evicted from the table — fall through to the slow path. let name_owned = name.to_os_string(); let lookup_key = (parent.0, name_owned.clone()); diff --git a/tests/async_backed_correctness.rs b/tests/async_backed_correctness.rs index 457ba948..097226aa 100644 --- a/tests/async_backed_correctness.rs +++ b/tests/async_backed_correctness.rs @@ -3,6 +3,8 @@ use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; +use tokio::sync::oneshot; + use git_fs::cache::async_backed::FutureBackedCache; #[tokio::test] @@ -97,3 +99,78 @@ async fn panic_in_factory_is_recovered() { "factory called twice" ); } + +/// With 3+ joiners the dedup property becomes observable: under the old +/// broken code each joiner would run its own factory after the owner fails +/// (4 total calls for 1 owner + 3 joiners). With the loop-based retry only +/// one joiner wins the `Vacant` race, so we expect exactly 2 calls +/// (A's fail + one winner's success). +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn try_init_retry_after_joined_failure_deduplicates() { + let cache = Arc::new(FutureBackedCache::::default()); + let call_count = Arc::new(AtomicUsize::new(0)); + + // Channel to control timing of Task A's factory. + let (release_tx, release_rx) = oneshot::channel::<()>(); + + // Task A: starts a failing InFlight, held until we release. + let cache_a = Arc::clone(&cache); + let count_a = Arc::clone(&call_count); + let task_a = tokio::spawn(async move { + let result: Result = cache_a + .get_or_try_init(1, || { + count_a.fetch_add(1, Ordering::Relaxed); + async move { + let _ = release_rx.await; + Err("task_a_fail".to_owned()) + } + }) + .await; + result + }); + + // Give Task A time to register the InFlight slot. + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + // Spawn 3 joiners that all join A's InFlight. After A fails, exactly + // one should win the Vacant race and run its factory; the others join + // the new InFlight. + let mut joiner_handles = Vec::new(); + for _ in 0..3 { + let cache_j = Arc::clone(&cache); + let count_j = Arc::clone(&call_count); + joiner_handles.push(tokio::spawn(async move { + let result: Result = cache_j + .get_or_try_init(1, || { + count_j.fetch_add(1, Ordering::Relaxed); + async move { Ok("joiner_ok".to_owned()) } + }) + .await; + result + })); + } + + // Give joiners time to join the InFlight. + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + // Release A's factory → it fails. + release_tx.send(()).unwrap(); + + let result_a = task_a.await.unwrap(); + assert!(result_a.is_err(), "task A should fail"); + + for handle in joiner_handles { + let result = handle.await.unwrap(); + assert_eq!(result.unwrap(), "joiner_ok", "every joiner should succeed"); + } + + // Factory should have been called exactly 2 times: A's fail + one + // joiner winning the Vacant race. The other 2 joiners piggyback on + // the winner's InFlight via Shared, so their factories are never called. + assert_eq!( + call_count.load(Ordering::Relaxed), + 2, + "factory should be called exactly twice (A's fail + one joiner's success), \ + not 4 (which would indicate each joiner ran its own factory)" + ); +} diff --git a/tests/async_fs_correctness.rs b/tests/async_fs_correctness.rs index 5fe27a28..fd6c2bdc 100644 --- a/tests/async_fs_correctness.rs +++ b/tests/async_fs_correctness.rs @@ -3,6 +3,7 @@ mod common; use std::ffi::{OsStr, OsString}; +use std::sync::Arc; use git_fs::cache::async_backed::FutureBackedCache; use git_fs::fs::async_fs::{AsyncFs, InodeLifecycle}; @@ -579,6 +580,47 @@ async fn readdir_provides_correct_next_offsets() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_after_eviction_returns_fresh_inode() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_v1 = make_inode(10, INodeType::File, 42, Some(1)); + let child_v2 = make_inode(20, INodeType::File, 99, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "readme.md".into()), child_v1); + let dp = MockFsDataProvider::new(state); + let state_ref = Arc::clone(&dp.state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + // First lookup → addr=10 + let first = fs + .lookup(LoadedAddr(1), OsStr::new("readme.md")) + .await + .unwrap(); + assert_eq!(first.inode.addr, 10); + + // Simulate forget: remove the inode from the table. + table.remove_sync(&10); + + // Insert the refresh entry *after* the first lookup so dp.lookup() + // returns child_v2 on the next call (refresh_lookups is checked first). + drop( + state_ref + .refresh_lookups + .insert_sync((1, "readme.md".into()), child_v2), + ); + + // Second lookup should NOT return the stale addr=10. + let second = fs + .lookup(LoadedAddr(1), OsStr::new("readme.md")) + .await + .unwrap(); + assert_ne!(second.inode.addr, 10, "should not return stale inode"); + assert_eq!(second.inode.addr, 20, "should return the fresh inode"); +} + // lookup-after-readdir integration test #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/tests/common/async_fs_mocks.rs b/tests/common/async_fs_mocks.rs index 5c132eec..4441544c 100644 --- a/tests/common/async_fs_mocks.rs +++ b/tests/common/async_fs_mocks.rs @@ -52,6 +52,10 @@ pub struct MockFsState { pub directories: HashMap>, /// `inode_addr -> file content bytes` pub file_contents: HashMap, + /// Mutable overrides for `lookups`. When populated, entries here take + /// precedence and are consumed on use (removed after the first hit). + /// Existing tests are unaffected because this defaults to empty. + pub refresh_lookups: scc::HashMap<(u64, OsString), INode>, } /// A clonable mock data provider for `AsyncFs` tests. @@ -73,6 +77,10 @@ impl FsDataProvider for MockFsDataProvider { async fn lookup(&self, parent: INode, name: &OsStr) -> Result { let key = (parent.addr, name.to_os_string()); + // Check mutable overrides first (consumed on use). + if let Some((_, inode)) = self.state.refresh_lookups.remove_sync(&key) { + return Ok(inode); + } self.state .lookups .get(&key) From 61f5f3082067caf0d6955d3547b22e1af025f2d6 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 12:24:23 -0800 Subject: [PATCH 27/41] more docs --- lib/fs/async_fs.rs | 22 +++++++++++++++++----- lib/fs/composite.rs | 3 +++ src/fs/mescloud/repo.rs | 17 +++++++++++++++-- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 061f974b..7b5b8d2f 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -65,23 +65,35 @@ pub trait FsDataProvider: Clone + Send + Sync + 'static { flags: OpenFlags, ) -> impl Future> + Send; - /// Called when the kernel forgets an inode (refcount reaches zero). + /// Clean up provider-internal state for an evicted inode. /// - /// Implementations should clean up any internal mappings for the given - /// address (e.g. bridge maps, path maps). The default is a no-op. + /// The `DropWard`/`InodeForget` system automatically removes inodes from + /// the shared `inode_table` when the FUSE refcount reaches zero, but data + /// providers often maintain auxiliary structures (path maps, bridge maps) + /// that also need cleanup. This method is that extension point. + /// + /// Never called directly -- [`InodeForget::delete`] invokes it + /// automatically when the refcount drops to zero. fn forget(&self, _addr: InodeAddr) {} } -/// Zero-sized tag whose [`StatelessDrop`] implementation automatically evicts -/// an inode from the inode table when its reference count reaches zero. +/// Zero-sized cleanup tag for inode eviction. +/// +/// The [`StatelessDrop`] implementations on this type evict inodes from the +/// inode table and, when a data provider is present, delegate to +/// [`FsDataProvider::forget`] so the provider can clean up its own auxiliary +/// structures (path maps, bridge maps, etc.). pub struct InodeForget; +/// Evicts the inode from the table only. Used when no data provider is available. impl<'a> StatelessDrop<&'a FutureBackedCache, InodeAddr> for InodeForget { fn delete(inode_table: &&'a FutureBackedCache, addr: &InodeAddr) { inode_table.remove_sync(addr); } } +/// Evicts the inode from the table and delegates to [`FsDataProvider::forget`] +/// so the provider can clean up its own auxiliary state. impl<'a, DP: FsDataProvider> StatelessDrop<(&'a FutureBackedCache, DP), InodeAddr> for InodeForget { diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index abb65fc2..ff00c8d3 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -437,6 +437,9 @@ where }) } + /// Removes the composite-level address from `addr_to_slot` and the + /// child's bridge map. Called automatically by `InodeForget` when the + /// FUSE refcount drops to zero. The root inode is never forgotten. fn forget(&self, addr: InodeAddr) { if addr == Self::ROOT_INO { return; diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index df0b2dbb..aae85491 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -35,8 +35,19 @@ struct MesRepoProviderInner { ref_: String, fs_owner: (u32, u32), next_addr: AtomicU64, - /// Maps inode addresses to repo-relative paths (e.g., "src/main.rs"). - /// Root directory maps to an empty `PathBuf`. + /// Maps inode addresses to repo-relative paths (e.g. `"src/main.rs"`). + /// Root maps to an empty `PathBuf`. + /// + /// Exists alongside the [`DCache`](git_fs::fs::dcache::DCache) because + /// they serve different purposes: the dcache maps + /// `(parent_addr, child_name) -> child_addr` (single-hop name resolution), + /// while this map provides the full repo-relative path needed for Mesa API + /// calls. Reconstructing the full path from the dcache would require + /// walking parent pointers to the root on every API call; this map + /// materializes that walk as an O(1) lookup. + /// + /// Entries are inserted during `lookup`/`readdir` and removed via + /// [`forget`](Self::remove_path) when the FUSE refcount reaches zero. path_map: scc::HashMap, file_cache: Option>>, } @@ -278,6 +289,8 @@ impl FsDataProvider for MesRepoProvider { } } + /// Evicts the inode's entry from [`path_map`](MesRepoProviderInner::path_map). + /// Called automatically by `InodeForget` when the FUSE refcount drops to zero. fn forget(&self, addr: InodeAddr) { self.remove_path(addr); } From e890c3da1ea99bb704679e03e3d2a16694b16311 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 13:48:08 -0800 Subject: [PATCH 28/41] thread-safety on forget --- lib/cache/async_backed.rs | 63 ++++++++++++++++++++++++++------------- lib/drop_ward.rs | 5 +++- lib/fs/bridge.rs | 17 +++++++++++ lib/fs/composite.rs | 24 ++++++++++++--- 4 files changed, 83 insertions(+), 26 deletions(-) diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index 9c05ee0b..d8989cf8 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -62,8 +62,10 @@ where /// /// # Panics /// - /// Panics if this caller joins an in-flight factory that itself panicked (i.e. the caller - /// lost the race to insert a fresh entry after the poisoned slot was removed). + /// Panics only if *this* caller's own factory panicked (i.e. this caller won the `Vacant` + /// slot and the factory it spawned panicked). Joiners who observe a panicked factory loop + /// back to `entry_async` so a new owner is elected, matching the retry semantics of + /// [`get_or_try_init`](Self::get_or_try_init). pub async fn get_or_init(&self, key: K, factory: F) -> V where F: FnOnce() -> Fut, @@ -84,31 +86,50 @@ where if let Some(v) = self.await_shared(&key, generation, shared).await { return v; } - // Factory panicked; entry removed. Fall through to re-insert below. + // Factory panicked; entry removed. Fall through to slow path. } None => {} } - // Slow path: use entry_async for atomic check-and-insert. - let (generation, shared) = match self.map.entry_async(key.clone()).await { - scc::hash_map::Entry::Occupied(occ) => match occ.get() { - Slot::Ready(v) => return v.clone(), - Slot::InFlight(g, shared) => (*g, shared.clone()), - }, - scc::hash_map::Entry::Vacant(vac) => { - let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); - let shared = Self::make_shared(factory); - let ret = shared.clone(); - vac.insert_entry(Slot::InFlight(generation, shared)); - (generation, ret) - } - }; + // Slow path: claim a slot or join an existing in-flight computation. + // Wrapped in `Option` so the `FnOnce` factory can be consumed exactly + // once inside the loop (only in the `Vacant` branch, which always returns). + let mut factory = Some(factory); - if let Some(v) = self.await_shared(&key, generation, shared).await { - return v; - } + loop { + match self.map.entry_async(key.clone()).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => return v.clone(), + Slot::InFlight(g, shared) => { + let (generation, shared) = (*g, shared.clone()); + drop(occ); + if let Some(v) = self.await_shared(&key, generation, shared).await { + return v; + } + // In-flight failed. Loop back to `entry_async` so the + // next caller gets proper dedup instead of running + // factory directly. + } + }, + scc::hash_map::Entry::Vacant(vac) => { + let f = factory.take().unwrap_or_else(|| { + unreachable!( + "FutureBackedCache: factory already consumed but \ + reached Vacant branch again for key {key:?}" + ) + }); + let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); + let shared = Self::make_shared(f); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(generation, shared)); - panic!("FutureBackedCache: joined an in-flight factory that panicked for key {key:?}"); + if let Some(v) = self.await_shared(&key, generation, ret).await { + return v; + } + panic!("FutureBackedCache: factory for key {key:?} panicked"); + } + } + } } /// Like [`get_or_init`](Self::get_or_init), but for fallible factories. diff --git a/lib/drop_ward.rs b/lib/drop_ward.rs index 4922e13c..848d1dfb 100644 --- a/lib/drop_ward.rs +++ b/lib/drop_ward.rs @@ -107,8 +107,11 @@ where let curr = *self.map.get(key)?; let new_count = curr.saturating_sub(by); if new_count == 0 { - self.map.remove(key); + // Delete before removing from the map: if `delete` panics the + // entry remains and a subsequent `dec` can retry cleanup. The + // reverse order would silently lose the entry. T::delete(&self.ctx, key); + self.map.remove(key); } else if let Some(slot) = self.map.get_mut(key) { *slot = new_count; } diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index 37599388..5b5354d9 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -47,12 +47,29 @@ impl ConcurrentBridge { } /// Resolve outer -> inner. + /// + /// This read is **not** serialized with mutations. A concurrent [`insert`] + /// may have completed the forward entry but not yet the backward entry (or + /// vice versa for [`remove_by_outer`]). Callers must tolerate stale or + /// transiently-missing results. Use [`backward_or_insert`] when + /// cross-map consistency is required. + /// + /// [`insert`]: Self::insert + /// [`remove_by_outer`]: Self::remove_by_outer + /// [`backward_or_insert`]: Self::backward_or_insert #[must_use] pub fn forward(&self, outer: InodeAddr) -> Option { self.fwd.read_sync(&outer, |_, &v| v) } /// Resolve inner -> outer. + /// + /// This read is **not** serialized with mutations. See [`forward`] for + /// the consistency caveats. Use [`backward_or_insert`] when cross-map + /// consistency is required. + /// + /// [`forward`]: Self::forward + /// [`backward_or_insert`]: Self::backward_or_insert #[must_use] pub fn backward(&self, inner: InodeAddr) -> Option { self.bwd.read_sync(&inner, |_, &v| v) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index ff00c8d3..957836b1 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -138,6 +138,10 @@ struct CompositeFsInner { /// Maps a composite-level outer inode to its child slot index. addr_to_slot: scc::HashMap, /// Maps child name to slot index (for dedup on concurrent resolve). + /// + /// `register_child` uses `entry_sync` on this map for per-name + /// exclusion, serializing concurrent registrations of the same child + /// without a global lock. `forget` never touches this map. name_to_slot: scc::HashMap, /// Monotonically increasing slot counter. next_slot: AtomicU64, @@ -257,6 +261,13 @@ impl CompositeFs { /// If the child is already registered by name, the existing outer address /// is returned. Otherwise a new slot is created with a fresh inode table /// and bridge mapping. + /// + /// Uses `entry_sync` on `name_to_slot` for per-name exclusion: + /// concurrent registrations of the same child are serialized by the + /// `scc::HashMap` bucket lock, while different names proceed in + /// parallel. `forget` never touches `name_to_slot` and is fully + /// independent — outer inode addresses are monotonic and never reused, + /// so `forget` cannot corrupt a replacement slot. fn register_child(&self, desc: &ChildDescriptor) -> InodeAddr where R::ChildDP: Clone, @@ -264,7 +275,6 @@ impl CompositeFs { match self.inner.name_to_slot.entry_sync(desc.name.clone()) { scc::hash_map::Entry::Occupied(mut occ) => { let old_slot_idx = *occ.get(); - // Extract bridge Arc from the slot guard, then query outside. let bridge = self .inner .slots @@ -272,11 +282,9 @@ impl CompositeFs { if let Some(outer) = bridge.and_then(|b| b.backward(desc.root_ino.addr)) { return outer; } - // Slot exists but bridge has no mapping — replace in-place - // while still holding the entry guard to prevent races. + // Slot exists but bridge has no mapping — replace it. let (outer_ino, new_slot_idx) = self.create_child_slot(desc); *occ.get_mut() = new_slot_idx; - // Remove the orphaned old slot to prevent unbounded growth. self.inner.slots.remove_sync(&old_slot_idx); outer_ino } @@ -440,6 +448,14 @@ where /// Removes the composite-level address from `addr_to_slot` and the /// child's bridge map. Called automatically by `InodeForget` when the /// FUSE refcount drops to zero. The root inode is never forgotten. + /// + /// Lock-free with respect to [`register_child`](CompositeFs::register_child): + /// outer inode addresses are monotonically increasing and never reused, + /// so `forget(addr)` can only affect the slot that originally owned + /// `addr`. If a concurrent `register_child` has already replaced the + /// slot, `slots.read_sync` returns `None` and the bridge cleanup is + /// skipped — the old slot's `Arc` is dropped with its + /// `Arc` refcount. fn forget(&self, addr: InodeAddr) { if addr == Self::ROOT_INO { return; From 08d51c64686a7140188e0c3c09683590e2a83cbe Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 14:09:43 -0800 Subject: [PATCH 29/41] TOCTOU fixes --- lib/fs/async_fs.rs | 26 +++++++---- lib/fs/bridge.rs | 8 +++- lib/fs/composite.rs | 19 +++++--- lib/fs/fuser.rs | 31 ++++++++++--- lib/fs/mod.rs | 29 +++++++++++- tests/async_fs_correctness.rs | 78 ++++++++++++++++++++----------- tests/composite_fs_tests.rs | 54 +++++++++++++++------- tests/dcache_correctness.rs | 87 +++++++++++++++++++++++++---------- 8 files changed, 242 insertions(+), 90 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 7b5b8d2f..625f3f8c 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -222,6 +222,11 @@ impl<'a> PopulateGuard<'a> { } impl Drop for PopulateGuard<'_> { + /// Fires when the populating future is cancelled before [`defuse`](Self::defuse) + /// is called, resetting the dcache populate flag from `IN_PROGRESS` back to + /// `UNCLAIMED` so a subsequent `readdir` can retry. This is a normal + /// occurrence under FUSE interrupts or `tokio::select!` cancellation — + /// not an error. fn drop(&mut self) { if self.armed { self.dcache.abort_populate(self.parent); @@ -336,7 +341,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { ); if let Some(dentry) = self.directory_cache.lookup(parent, name) { - if let Some(inode) = self.inode_table.get(&dentry.ino.0).await { + if let Some(inode) = self.inode_table.get(&dentry.ino.addr()).await { return Ok(TrackedINode { inode }); } // Inode was evicted (e.g. by forget). Evict the stale lookup_cache @@ -348,11 +353,11 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { // downstream operations (get_or_try_init, get_or_init) are // idempotent or deduplicated. self.lookup_cache - .remove_sync(&(parent.0, name.to_os_string())); + .remove_sync(&(parent.addr(), name.to_os_string())); } let name_owned = name.to_os_string(); - let lookup_key = (parent.0, name_owned.clone()); + let lookup_key = (parent.addr(), name_owned.clone()); let dp = self.data_provider.clone(); let child = self @@ -371,7 +376,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { .insert( parent, name_owned, - LoadedAddr(child.addr), + LoadedAddr::new_unchecked(child.addr), matches!(child.itype, INodeType::Directory), ) .await; @@ -384,9 +389,9 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// If the inode is currently in-flight (being loaded by another caller), this awaits /// completion. Returns an error if the inode is not in the table at all. pub async fn loaded_inode(&self, addr: LoadedAddr) -> Result { - self.inode_table.get(&addr.0).await.ok_or_else(|| { + self.inode_table.get(&addr.addr()).await.ok_or_else(|| { tracing::error!( - inode = ?addr.0, + inode = ?addr.addr(), "inode not found in table — this is a programming bug" ); std::io::Error::from_raw_os_error(libc::ENOENT) @@ -469,7 +474,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { .insert( parent, name, - LoadedAddr(child_inode.addr), + LoadedAddr::new_unchecked(child_inode.addr), child_inode.itype == INodeType::Directory, ) .await; @@ -494,7 +499,12 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { reason = "offset fits in usize on supported 64-bit platforms" )] for (i, (name, dvalue)) in children.iter().enumerate().skip(offset as usize) { - let inode = self.loaded_inode(dvalue.ino).await?; + let Some(inode) = self.inode_table.get(&dvalue.ino.addr()).await else { + // Inode was evicted between readdir collection and iteration + // (e.g. by a concurrent forget). Skip the stale entry. + tracing::debug!(addr = ?dvalue.ino.addr(), name = ?name, "inode evicted during readdir, skipping"); + continue; + }; let next_offset = (i + 1) as u64; if filler(DirEntry { name, inode }, next_offset) { break; diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index 5b5354d9..b0366cfd 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -101,8 +101,11 @@ impl ConcurrentBridge { /// Remove the mapping for the given outer address. /// - /// Serialized with other mutations via the coordination lock. - pub fn remove_by_outer(&self, outer: InodeAddr) { + /// Returns `true` if the bridge is empty after the removal — the caller + /// can use this to garbage-collect the owning slot. The emptiness check + /// is performed under the coordination lock so there is no TOCTOU gap + /// with the removal itself. + pub fn remove_by_outer(&self, outer: InodeAddr) -> bool { let _guard = self .mu .lock() @@ -110,6 +113,7 @@ impl ConcurrentBridge { if let Some((_, inner)) = self.fwd.remove_sync(&outer) { self.bwd.remove_sync(&inner); } + self.fwd.is_empty() } } diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index 957836b1..3ba0f5f7 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -341,7 +341,7 @@ where // Await the lookup outside any scc guard. let tracked = child .get_fs() - .lookup(LoadedAddr(inner_parent), name) + .lookup(LoadedAddr::new_unchecked(inner_parent), name) .await?; let child_inode = tracked.inode; @@ -394,7 +394,7 @@ where let mut child_entries = Vec::new(); child .get_fs() - .readdir(LoadedAddr(inner_parent), 0, |de, _offset| { + .readdir(LoadedAddr::new_unchecked(inner_parent), 0, |de, _offset| { child_entries.push((de.name.to_os_string(), de.inode)); false }) @@ -437,8 +437,10 @@ where let inner_ino = inner_ino.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; - let open_file: OpenFile<<::ChildDP as FsDataProvider>::Reader> = - child.get_fs().open(LoadedAddr(inner_ino), flags).await?; + let open_file: OpenFile<<::ChildDP as FsDataProvider>::Reader> = child + .get_fs() + .open(LoadedAddr::new_unchecked(inner_ino), flags) + .await?; Ok(CompositeReader { inner: open_file.reader, @@ -461,9 +463,14 @@ where return; } if let Some((_, slot_idx)) = self.inner.addr_to_slot.remove_sync(&addr) { - self.inner + let bridge_empty = self + .inner .slots - .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)); + .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)) + .unwrap_or(false); + if bridge_empty { + self.inner.slots.remove_sync(&slot_idx); + } } } } diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 7a9bed24..15fa36f7 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -210,7 +210,11 @@ impl fuser::Filesystem for FuserAdapter { ) { self.runtime .block_on(async { - let tracked = self.inner.get_fs().lookup(LoadedAddr(parent), name).await?; + let tracked = self + .inner + .get_fs() + .lookup(LoadedAddr::new_unchecked(parent), name) + .await?; self.inner.ward_inc(tracked.inode.addr); Ok::<_, std::io::Error>(tracked.inode) }) @@ -230,7 +234,12 @@ impl fuser::Filesystem for FuserAdapter { reply: fuser::ReplyAttr, ) { self.runtime - .block_on(async { self.inner.get_fs().getattr(LoadedAddr(ino)).await }) + .block_on(async { + self.inner + .get_fs() + .getattr(LoadedAddr::new_unchecked(ino)) + .await + }) .fuse_reply(reply, |inode, reply| { let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); debug!(?attr, "replying..."); @@ -253,10 +262,14 @@ impl fuser::Filesystem for FuserAdapter { let mut entries = Vec::new(); self.inner .get_fs() - .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { - entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); - false - }) + .readdir( + LoadedAddr::new_unchecked(ino), + offset_u64, + |de, _next_offset| { + entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); + false + }, + ) .await?; Ok::<_, std::io::Error>(entries) }) @@ -291,7 +304,11 @@ impl fuser::Filesystem for FuserAdapter { let flags = OpenFlags::from_bits_truncate(flags); self.runtime .block_on(async { - let open_file = self.inner.get_fs().open(LoadedAddr(ino), flags).await?; + let open_file = self + .inner + .get_fs() + .open(LoadedAddr::new_unchecked(ino), flags) + .await?; let fh = open_file.fh; self.open_files.insert(fh, Arc::clone(&open_file.reader)); Ok::<_, std::io::Error>(fh) diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index ed93bd25..02ef8384 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -24,8 +24,35 @@ pub type InodeAddr = u64; /// /// This newtype wrapper distinguishes inode addresses that are known to exist /// in the [`async_fs::AsyncFs`] inode table from raw [`InodeAddr`] values. +/// +/// The inner field is private to prevent unchecked construction. Code within +/// the crate may use [`LoadedAddr::new_unchecked`] at trusted boundaries +/// (e.g. after inserting into the inode table, or at the FUSE adapter boundary +/// where the kernel provides addresses it previously received from us). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct LoadedAddr(pub InodeAddr); +pub struct LoadedAddr(InodeAddr); + +impl LoadedAddr { + /// Construct a `LoadedAddr` without validating that the address exists in + /// the inode table. + /// + /// # Safety contract (logical, not `unsafe`) + /// + /// The caller must ensure one of: + /// - The address was previously inserted into an inode table, **or** + /// - The address originates from the FUSE kernel (which only knows + /// addresses we previously returned to it). + #[must_use] + pub fn new_unchecked(addr: InodeAddr) -> Self { + Self(addr) + } + + /// Return the raw inode address. + #[must_use] + pub fn addr(self) -> InodeAddr { + self.0 + } +} /// Type representing a file handle. pub type FileHandle = u64; diff --git a/tests/async_fs_correctness.rs b/tests/async_fs_correctness.rs index fd6c2bdc..643e7b1e 100644 --- a/tests/async_fs_correctness.rs +++ b/tests/async_fs_correctness.rs @@ -172,7 +172,7 @@ async fn loaded_inode_returns_seeded_inode() { let fs = AsyncFs::new(dp, root, &table).await; - let inode = fs.loaded_inode(LoadedAddr(1)).await.unwrap(); + let inode = fs.loaded_inode(LoadedAddr::new_unchecked(1)).await.unwrap(); assert_eq!(inode.addr, 1); assert_eq!(inode.itype, INodeType::Directory); } @@ -185,7 +185,10 @@ async fn loaded_inode_returns_enoent_for_missing_addr() { let fs = AsyncFs::new(dp, root, &table).await; - let err = fs.loaded_inode(LoadedAddr(999)).await.unwrap_err(); + let err = fs + .loaded_inode(LoadedAddr::new_unchecked(999)) + .await + .unwrap_err(); assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); } @@ -197,7 +200,7 @@ async fn getattr_delegates_to_loaded_inode() { let fs = AsyncFs::new(dp, root, &table).await; - let inode = fs.getattr(LoadedAddr(1)).await.unwrap(); + let inode = fs.getattr(LoadedAddr::new_unchecked(1)).await.unwrap(); assert_eq!(inode.addr, 1); assert_eq!(inode.size, 4096); } @@ -215,7 +218,7 @@ async fn lookup_resolves_child_via_data_provider() { let fs = AsyncFs::new(dp, root, &table).await; let tracked = fs - .lookup(LoadedAddr(1), OsStr::new("readme.md")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("readme.md")) .await .unwrap(); @@ -236,7 +239,7 @@ async fn lookup_populates_inode_table() { let table = FutureBackedCache::default(); let fs = AsyncFs::new(dp, root, &table).await; - fs.lookup(LoadedAddr(1), OsStr::new("file.txt")) + fs.lookup(LoadedAddr::new_unchecked(1), OsStr::new("file.txt")) .await .unwrap(); @@ -262,11 +265,11 @@ async fn lookup_second_call_uses_cache() { let fs = AsyncFs::new(dp, root, &table).await; let first = fs - .lookup(LoadedAddr(1), OsStr::new("cached.txt")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("cached.txt")) .await .unwrap(); let second = fs - .lookup(LoadedAddr(1), OsStr::new("cached.txt")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("cached.txt")) .await .unwrap(); @@ -283,7 +286,7 @@ async fn lookup_propagates_provider_error() { let fs = AsyncFs::new(dp, root, &table).await; let err = fs - .lookup(LoadedAddr(1), OsStr::new("nonexistent")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("nonexistent")) .await .unwrap_err(); assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); @@ -306,7 +309,10 @@ async fn open_returns_file_handle_and_reader() { table.insert_sync(10, file); let fs = AsyncFs::new(dp, root, &table).await; - let open_file = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap(); + let open_file = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap(); assert!(open_file.fh >= 1, "file handle should start at 1"); let data = open_file.read(0, 5).await.unwrap(); @@ -321,7 +327,10 @@ async fn open_returns_eisdir_for_directory() { let table = FutureBackedCache::default(); let fs = AsyncFs::new(dp, root, &table).await; - let err = fs.open(LoadedAddr(1), OpenFlags::RDONLY).await.unwrap_err(); + let err = fs + .open(LoadedAddr::new_unchecked(1), OpenFlags::RDONLY) + .await + .unwrap_err(); assert_eq!(err.raw_os_error(), Some(libc::EISDIR)); } @@ -334,7 +343,7 @@ async fn open_returns_enoent_for_missing_inode() { let fs = AsyncFs::new(dp, root, &table).await; let err = fs - .open(LoadedAddr(999), OpenFlags::RDONLY) + .open(LoadedAddr::new_unchecked(999), OpenFlags::RDONLY) .await .unwrap_err(); assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); @@ -351,8 +360,16 @@ async fn open_assigns_unique_file_handles() { table.insert_sync(10, file); let fs = AsyncFs::new(dp, root, &table).await; - let fh1 = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap().fh; - let fh2 = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap().fh; + let fh1 = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap() + .fh; + let fh2 = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap() + .fh; assert_ne!(fh1, fh2, "each open should produce a unique file handle"); } @@ -372,7 +389,10 @@ async fn open_file_read_with_offset() { table.insert_sync(10, file); let fs = AsyncFs::new(dp, root, &table).await; - let open_file = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap(); + let open_file = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap(); let data = open_file.read(6, 5).await.unwrap(); assert_eq!(&data[..], b"world"); @@ -400,7 +420,7 @@ async fn readdir_lists_children_sorted_by_name() { let fs = AsyncFs::new(dp, root, &table).await; let mut entries: Vec<(OsString, u64)> = Vec::new(); - fs.readdir(LoadedAddr(1), 0, |entry, _offset| { + fs.readdir(LoadedAddr::new_unchecked(1), 0, |entry, _offset| { entries.push((entry.name.to_os_string(), entry.inode.addr)); false // don't stop }) @@ -436,11 +456,13 @@ async fn readdir_respects_offset() { let fs = AsyncFs::new(dp, root, &table).await; // First readdir to populate cache - fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); // Second readdir starting at offset 2 (skip first two) let mut entries: Vec = Vec::new(); - fs.readdir(LoadedAddr(1), 2, |entry, _| { + fs.readdir(LoadedAddr::new_unchecked(1), 2, |entry, _| { entries.push(entry.name.to_os_string()); false }) @@ -472,7 +494,7 @@ async fn readdir_stops_when_filler_returns_true() { let fs = AsyncFs::new(dp, root, &table).await; let mut count = 0; - fs.readdir(LoadedAddr(1), 0, |_, _| { + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| { count += 1; count >= 2 // stop after 2 entries }) @@ -494,7 +516,7 @@ async fn readdir_returns_enotdir_for_file() { let fs = AsyncFs::new(dp, root, &table).await; let err = fs - .readdir(LoadedAddr(10), 0, |_, _| false) + .readdir(LoadedAddr::new_unchecked(10), 0, |_, _| false) .await .unwrap_err(); assert_eq!(err.raw_os_error(), Some(libc::ENOTDIR)); @@ -514,7 +536,9 @@ async fn readdir_populates_inode_table_with_children() { let table = FutureBackedCache::default(); let fs = AsyncFs::new(dp, root, &table).await; - fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); let cached = table.get(&10).await; assert_eq!( @@ -536,7 +560,7 @@ async fn readdir_empty_directory() { let fs = AsyncFs::new(dp, root, &table).await; let mut count = 0; - fs.readdir(LoadedAddr(1), 0, |_, _| { + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| { count += 1; false }) @@ -566,7 +590,7 @@ async fn readdir_provides_correct_next_offsets() { let fs = AsyncFs::new(dp, root, &table).await; let mut offsets: Vec = Vec::new(); - fs.readdir(LoadedAddr(1), 0, |_, next_offset| { + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, next_offset| { offsets.push(next_offset); false }) @@ -596,7 +620,7 @@ async fn lookup_after_eviction_returns_fresh_inode() { // First lookup → addr=10 let first = fs - .lookup(LoadedAddr(1), OsStr::new("readme.md")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("readme.md")) .await .unwrap(); assert_eq!(first.inode.addr, 10); @@ -614,7 +638,7 @@ async fn lookup_after_eviction_returns_fresh_inode() { // Second lookup should NOT return the stale addr=10. let second = fs - .lookup(LoadedAddr(1), OsStr::new("readme.md")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("readme.md")) .await .unwrap(); assert_ne!(second.inode.addr, 10, "should not return stale inode"); @@ -640,11 +664,13 @@ async fn lookup_after_readdir_uses_directory_cache() { let fs = AsyncFs::new(dp, root, &table).await; // readdir populates the directory cache. - fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); // lookup should hit the directory cache fast path. let tracked = fs - .lookup(LoadedAddr(1), OsStr::new("file.txt")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("file.txt")) .await .unwrap(); assert_eq!(tracked.inode.addr, 10); diff --git a/tests/composite_fs_tests.rs b/tests/composite_fs_tests.rs index d6470a6a..d68dd6ea 100644 --- a/tests/composite_fs_tests.rs +++ b/tests/composite_fs_tests.rs @@ -58,7 +58,7 @@ async fn composite_root_lookup_resolves_child() { let afs = AsyncFs::new_preseeded(composite, &table); let tracked = afs - .lookup(LoadedAddr(1), OsStr::new("repo-a")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo-a")) .await .unwrap(); @@ -96,7 +96,7 @@ async fn composite_root_readdir_lists_children() { let afs = AsyncFs::new_preseeded(composite, &table); let mut entries = Vec::new(); - afs.readdir(LoadedAddr(1), 0, |de, _offset| { + afs.readdir(LoadedAddr::new_unchecked(1), 0, |de, _offset| { entries.push(de.name.to_os_string()); false }) @@ -132,14 +132,17 @@ async fn composite_delegated_lookup_reaches_child() { // First, lookup the child at root level. let child_dir = afs - .lookup(LoadedAddr(1), OsStr::new("my-repo")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("my-repo")) .await .unwrap(); let child_addr = child_dir.inode.addr; // Then, lookup a file inside the child. let file = afs - .lookup(LoadedAddr(child_addr), OsStr::new("readme.md")) + .lookup( + LoadedAddr::new_unchecked(child_addr), + OsStr::new("readme.md"), + ) .await .unwrap(); @@ -148,7 +151,7 @@ async fn composite_delegated_lookup_reaches_child() { // Also lookup a subdirectory inside the child. let subdir = afs - .lookup(LoadedAddr(child_addr), OsStr::new("src")) + .lookup(LoadedAddr::new_unchecked(child_addr), OsStr::new("src")) .await .unwrap(); @@ -171,16 +174,22 @@ async fn composite_open_and_read_through_child() { let afs = AsyncFs::new_preseeded(composite, &table); // Navigate to the file. - let child_dir = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + let child_dir = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); let file_tracked = afs - .lookup(LoadedAddr(child_dir.inode.addr), OsStr::new("hello.txt")) + .lookup( + LoadedAddr::new_unchecked(child_dir.inode.addr), + OsStr::new("hello.txt"), + ) .await .unwrap(); let file_addr = file_tracked.inode.addr; // Open and read. let open_file = afs - .open(LoadedAddr(file_addr), OpenFlags::empty()) + .open(LoadedAddr::new_unchecked(file_addr), OpenFlags::empty()) .await .unwrap(); let data = open_file.read(0, 1024).await.unwrap(); @@ -208,7 +217,7 @@ async fn composite_lookup_unknown_child_returns_enoent() { let afs = AsyncFs::new_preseeded(composite, &table); let err = afs - .lookup(LoadedAddr(1), OsStr::new("nonexistent")) + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("nonexistent")) .await .unwrap_err(); @@ -242,14 +251,21 @@ async fn composite_readdir_delegated_lists_child_contents() { let afs = AsyncFs::new_preseeded(composite, &table); // Navigate into the child. - let child_dir = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + let child_dir = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); // Readdir inside the child. let mut entries = Vec::new(); - afs.readdir(LoadedAddr(child_dir.inode.addr), 0, |de, _offset| { - entries.push((de.name.to_os_string(), de.inode.itype)); - false - }) + afs.readdir( + LoadedAddr::new_unchecked(child_dir.inode.addr), + 0, + |de, _offset| { + entries.push((de.name.to_os_string(), de.inode.itype)); + false + }, + ) .await .unwrap(); @@ -275,8 +291,14 @@ async fn composite_repeated_lookup_returns_same_addr() { table.insert_sync(1, root_inode); let afs = AsyncFs::new_preseeded(composite, &table); - let first = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); - let second = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + let first = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + let second = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); assert_eq!( first.inode.addr, second.inode.addr, diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs index 34dcf088..83074517 100644 --- a/tests/dcache_correctness.rs +++ b/tests/dcache_correctness.rs @@ -8,19 +8,28 @@ use git_fs::fs::dcache::{DCache, PopulateStatus}; #[tokio::test] async fn lookup_returns_none_for_missing_entry() { let cache = DCache::new(); - assert!(cache.lookup(LoadedAddr(1), OsStr::new("foo")).is_none()); + assert!( + cache + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")) + .is_none() + ); } #[tokio::test] async fn insert_then_lookup() { let cache = DCache::new(); cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ) .await; - let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + let dv = cache.lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")); assert!(dv.is_some(), "entry should be present after insert"); let dv = dv.expect("checked above"); - assert_eq!(dv.ino, LoadedAddr(10)); + assert_eq!(dv.ino, LoadedAddr::new_unchecked(10)); assert!(!dv.is_dir); } @@ -28,15 +37,30 @@ async fn insert_then_lookup() { async fn readdir_returns_only_children_of_parent() { let cache = DCache::new(); cache - .insert(LoadedAddr(1), OsString::from("a"), LoadedAddr(10), false) + .insert( + LoadedAddr::new_unchecked(1), + OsString::from("a"), + LoadedAddr::new_unchecked(10), + false, + ) .await; cache - .insert(LoadedAddr(1), OsString::from("b"), LoadedAddr(11), true) + .insert( + LoadedAddr::new_unchecked(1), + OsString::from("b"), + LoadedAddr::new_unchecked(11), + true, + ) .await; cache - .insert(LoadedAddr(2), OsString::from("c"), LoadedAddr(12), false) + .insert( + LoadedAddr::new_unchecked(2), + OsString::from("c"), + LoadedAddr::new_unchecked(12), + false, + ) .await; - let children = cache.readdir(LoadedAddr(1)).await; + let children = cache.readdir(LoadedAddr::new_unchecked(1)).await; assert_eq!(children.len(), 2); let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); assert!(names.contains(&OsString::from("a"))); @@ -46,7 +70,7 @@ async fn readdir_returns_only_children_of_parent() { #[tokio::test] async fn readdir_empty_parent_returns_empty() { let cache = DCache::new(); - let children = cache.readdir(LoadedAddr(1)).await; + let children = cache.readdir(LoadedAddr::new_unchecked(1)).await; assert!(children.is_empty()); } @@ -54,7 +78,7 @@ async fn readdir_empty_parent_returns_empty() { async fn try_claim_populate_unclaimed_returns_claimed() { let cache = DCache::new(); assert!(matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::Claimed )); } @@ -63,12 +87,12 @@ async fn try_claim_populate_unclaimed_returns_claimed() { async fn finish_populate_then_claim_returns_done() { let cache = DCache::new(); assert!(matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::Claimed )); - cache.finish_populate(LoadedAddr(1)); + cache.finish_populate(LoadedAddr::new_unchecked(1)); assert!(matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::Done )); } @@ -77,11 +101,11 @@ async fn finish_populate_then_claim_returns_done() { async fn double_claim_returns_in_progress() { let cache = DCache::new(); assert!(matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::Claimed )); assert!(matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::InProgress )); } @@ -90,12 +114,12 @@ async fn double_claim_returns_in_progress() { async fn abort_populate_allows_reclaim() { let cache = DCache::new(); assert!(matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::Claimed )); - cache.abort_populate(LoadedAddr(1)); + cache.abort_populate(LoadedAddr::new_unchecked(1)); assert!(matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::Claimed )); } @@ -104,11 +128,16 @@ async fn abort_populate_allows_reclaim() { async fn insert_does_not_mark_populated() { let cache = DCache::new(); cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ) .await; assert!( matches!( - cache.try_claim_populate(LoadedAddr(1)), + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), PopulateStatus::Claimed ), "insert alone should not mark a directory as populated" @@ -119,14 +148,24 @@ async fn insert_does_not_mark_populated() { async fn upsert_overwrites_existing_entry() { let cache = DCache::new(); cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ) .await; cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(20), true) + .insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(20), + true, + ) .await; - let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + let dv = cache.lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")); assert!(dv.is_some(), "entry should still be present after upsert"); let dv = dv.expect("checked above"); - assert_eq!(dv.ino, LoadedAddr(20)); + assert_eq!(dv.ino, LoadedAddr::new_unchecked(20)); assert!(dv.is_dir); } From 9b30b5503bfa5b859980cd889aee8d98ae06598b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 15:05:03 -0800 Subject: [PATCH 30/41] fix: atomicize forget slot GC and clean up name_to_slot --- lib/fs/bridge.rs | 11 +++++++++ lib/fs/composite.rs | 41 +++++++++++++++++++++---------- tests/composite_fs_tests.rs | 48 ++++++++++++++++++++++++++++++++++++- 3 files changed, 86 insertions(+), 14 deletions(-) diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index b0366cfd..49acf9ee 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -115,6 +115,17 @@ impl ConcurrentBridge { } self.fwd.is_empty() } + + /// Returns `true` if the bridge contains no mappings. + /// + /// Reads are not serialized with mutations. The result is a + /// snapshot that may be immediately stale. Use under the + /// coordination lock or an external guard when consistency + /// with mutations is required. + #[must_use] + pub fn is_empty(&self) -> bool { + self.fwd.is_empty() + } } impl Default for ConcurrentBridge { diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index 3ba0f5f7..ea7349dc 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -141,7 +141,8 @@ struct CompositeFsInner { /// /// `register_child` uses `entry_sync` on this map for per-name /// exclusion, serializing concurrent registrations of the same child - /// without a global lock. `forget` never touches this map. + /// without a global lock. `forget` cleans up entries when a slot's + /// bridge becomes empty. name_to_slot: scc::HashMap, /// Monotonically increasing slot counter. next_slot: AtomicU64, @@ -265,8 +266,9 @@ impl CompositeFs { /// Uses `entry_sync` on `name_to_slot` for per-name exclusion: /// concurrent registrations of the same child are serialized by the /// `scc::HashMap` bucket lock, while different names proceed in - /// parallel. `forget` never touches `name_to_slot` and is fully - /// independent — outer inode addresses are monotonic and never reused, + /// parallel. `forget` may remove entries from `name_to_slot` when a + /// slot's bridge becomes empty, but this is safe — outer inode addresses + /// are monotonic and never reused, /// so `forget` cannot corrupt a replacement slot. fn register_child(&self, desc: &ChildDescriptor) -> InodeAddr where @@ -448,28 +450,41 @@ where } /// Removes the composite-level address from `addr_to_slot` and the - /// child's bridge map. Called automatically by `InodeForget` when the - /// FUSE refcount drops to zero. The root inode is never forgotten. + /// child's bridge map. When the bridge becomes empty, the slot and its + /// `name_to_slot` entry are garbage-collected. /// - /// Lock-free with respect to [`register_child`](CompositeFs::register_child): - /// outer inode addresses are monotonically increasing and never reused, - /// so `forget(addr)` can only affect the slot that originally owned - /// `addr`. If a concurrent `register_child` has already replaced the - /// slot, `slots.read_sync` returns `None` and the bridge cleanup is - /// skipped — the old slot's `Arc` is dropped with its - /// `Arc` refcount. + /// The slot removal uses `remove_if_sync` with a re-check of + /// `bridge.is_empty()`, preventing a concurrent `backward_or_insert` + /// from inserting a new mapping between the bridge emptiness check + /// and the slot removal. + /// + /// The root inode is never forgotten. fn forget(&self, addr: InodeAddr) { if addr == Self::ROOT_INO { return; } if let Some((_, slot_idx)) = self.inner.addr_to_slot.remove_sync(&addr) { + // Remove the outer->inner mapping from the bridge. The bridge's + // internal mutex serializes this with `backward_or_insert`. let bridge_empty = self .inner .slots .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)) .unwrap_or(false); if bridge_empty { - self.inner.slots.remove_sync(&slot_idx); + // Bridge is empty — atomically remove the slot only if no one + // has re-populated the bridge between our check and this removal. + // `remove_if_sync` holds the scc bucket lock during evaluation. + let removed = self + .inner + .slots + .remove_if_sync(&slot_idx, |slot| slot.bridge.is_empty()); + if removed.is_some() { + // Clean up name_to_slot to prevent dead slot indices. + self.inner + .name_to_slot + .retain_sync(|_, &mut idx| idx != slot_idx); + } } } } diff --git a/tests/composite_fs_tests.rs b/tests/composite_fs_tests.rs index d68dd6ea..ce110acb 100644 --- a/tests/composite_fs_tests.rs +++ b/tests/composite_fs_tests.rs @@ -8,7 +8,7 @@ use std::ffi::{OsStr, OsString}; use bytes::Bytes; use git_fs::cache::async_backed::FutureBackedCache; -use git_fs::fs::async_fs::AsyncFs; +use git_fs::fs::async_fs::{AsyncFs, FsDataProvider as _}; use git_fs::fs::composite::CompositeFs; use git_fs::fs::{INode, INodeType, LoadedAddr, OpenFlags}; @@ -305,3 +305,49 @@ async fn composite_repeated_lookup_returns_same_addr() { "repeated lookups for the same child should return the same composite address" ); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_forget_cleans_up_slot_and_name_mapping() { + // Setup: one child "repo" with a file. + let (provider, root_ino) = make_child_provider(100, &[("file.txt", 101, INodeType::File, 42)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite.clone(), &table); + + // Look up the child and a file inside it. + let child_dir = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + let child_addr = child_dir.inode.addr; + + let file = afs + .lookup( + LoadedAddr::new_unchecked(child_addr), + OsStr::new("file.txt"), + ) + .await + .unwrap(); + let file_addr = file.inode.addr; + + // Forget the file, then the child directory. + composite.forget(file_addr); + composite.forget(child_addr); + + // Re-lookup the child — should succeed with a fresh slot. + let re_resolved = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + + assert_eq!(re_resolved.inode.itype, INodeType::Directory); + // The new address may differ from the original (fresh slot allocated). +} From 2dccf32d3213c7a9d766a1a874fb55a2b8e336e2 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 15:39:58 -0800 Subject: [PATCH 31/41] perf: use BTreeMap in DCache to eliminate readdir re-sort --- lib/fs/async_fs.rs | 31 ++++------ lib/fs/dcache.rs | 52 ++++++++-------- tests/dcache_correctness.rs | 118 ++++++++++++++++++------------------ 3 files changed, 101 insertions(+), 100 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 625f3f8c..78adfacc 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -372,14 +372,12 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { .get_or_init(child.addr, || async move { child }) .await; - self.directory_cache - .insert( - parent, - name_owned, - LoadedAddr::new_unchecked(child.addr), - matches!(child.itype, INodeType::Directory), - ) - .await; + self.directory_cache.insert( + parent, + name_owned, + LoadedAddr::new_unchecked(child.addr), + matches!(child.itype, INodeType::Directory), + ); Ok(TrackedINode { inode: child }) } @@ -470,14 +468,12 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { self.inode_table .get_or_init(child_inode.addr, || async move { child_inode }) .await; - self.directory_cache - .insert( - parent, - name, - LoadedAddr::new_unchecked(child_inode.addr), - child_inode.itype == INodeType::Directory, - ) - .await; + self.directory_cache.insert( + parent, + name, + LoadedAddr::new_unchecked(child_inode.addr), + child_inode.itype == INodeType::Directory, + ); } self.directory_cache.finish_populate(parent); guard.defuse(); @@ -491,8 +487,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { } } - let mut children = self.directory_cache.readdir(parent).await; - children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + let children = self.directory_cache.readdir(parent); #[expect( clippy::cast_possible_truncation, diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index d8778fb8..247070e9 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -1,6 +1,7 @@ +use std::collections::BTreeMap; use std::ffi::{OsStr, OsString}; -use std::sync::Arc; use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::{Arc, RwLock}; use tokio::sync::Notify; @@ -32,7 +33,7 @@ pub enum PopulateStatus { /// Per-parent directory state holding child entries and a population flag. struct DirState { - children: scc::HashMap, + children: RwLock>, populated: AtomicU8, /// Wakes waiters when `populated` transitions out of `IN_PROGRESS`. notify: Notify, @@ -41,7 +42,7 @@ struct DirState { impl DirState { fn new() -> Self { Self { - children: scc::HashMap::new(), + children: RwLock::new(BTreeMap::new()), populated: AtomicU8::new(POPULATE_UNCLAIMED), notify: Notify::new(), } @@ -51,9 +52,9 @@ impl DirState { /// In-memory directory entry cache with per-parent child maps. /// /// Each parent directory gets its own [`DirState`] containing a -/// [`scc::HashMap`] of child entries and an [`AtomicU8`] population flag. -/// This makes `readdir` O(k) in the number of children rather than O(n) -/// over the entire cache. +/// [`BTreeMap`] of child entries (kept in sorted order) and an [`AtomicU8`] +/// population flag. This makes `readdir` O(k) in the number of children +/// with zero sorting overhead. pub struct DCache { dirs: scc::HashMap>, } @@ -93,36 +94,39 @@ impl DCache { #[must_use] pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { let state = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v))?; - state.children.read_sync(name, |_, v| v.clone()) + let children = state + .children + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); + children.get(name).cloned() } /// Atomically inserts or overwrites a child entry in the cache. - pub async fn insert( - &self, - parent_ino: LoadedAddr, - name: OsString, - ino: LoadedAddr, - is_dir: bool, - ) { + pub fn insert(&self, parent_ino: LoadedAddr, name: OsString, ino: LoadedAddr, is_dir: bool) { let state = self.dir_state(parent_ino); let value = DValue { ino, is_dir }; - state.children.upsert_async(name, value).await; + let mut children = state + .children + .write() + .unwrap_or_else(std::sync::PoisonError::into_inner); + children.insert(name, value); } /// Returns all cached children of `parent_ino` as `(name, value)` pairs. - pub async fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { + /// + /// Entries are returned in name-sorted order (guaranteed by `BTreeMap`). + pub fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { let Some(state) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) else { return Vec::new(); }; - let mut entries = Vec::new(); - state + let children = state .children - .iter_async(|k, v| { - entries.push((k.clone(), v.clone())); - true - }) - .await; - entries + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); + children + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect() } /// Atomically try to claim a directory for population. diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs index 83074517..c2273076 100644 --- a/tests/dcache_correctness.rs +++ b/tests/dcache_correctness.rs @@ -18,14 +18,12 @@ async fn lookup_returns_none_for_missing_entry() { #[tokio::test] async fn insert_then_lookup() { let cache = DCache::new(); - cache - .insert( - LoadedAddr::new_unchecked(1), - OsString::from("foo"), - LoadedAddr::new_unchecked(10), - false, - ) - .await; + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ); let dv = cache.lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")); assert!(dv.is_some(), "entry should be present after insert"); let dv = dv.expect("checked above"); @@ -36,31 +34,25 @@ async fn insert_then_lookup() { #[tokio::test] async fn readdir_returns_only_children_of_parent() { let cache = DCache::new(); - cache - .insert( - LoadedAddr::new_unchecked(1), - OsString::from("a"), - LoadedAddr::new_unchecked(10), - false, - ) - .await; - cache - .insert( - LoadedAddr::new_unchecked(1), - OsString::from("b"), - LoadedAddr::new_unchecked(11), - true, - ) - .await; - cache - .insert( - LoadedAddr::new_unchecked(2), - OsString::from("c"), - LoadedAddr::new_unchecked(12), - false, - ) - .await; - let children = cache.readdir(LoadedAddr::new_unchecked(1)).await; + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("a"), + LoadedAddr::new_unchecked(10), + false, + ); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("b"), + LoadedAddr::new_unchecked(11), + true, + ); + cache.insert( + LoadedAddr::new_unchecked(2), + OsString::from("c"), + LoadedAddr::new_unchecked(12), + false, + ); + let children = cache.readdir(LoadedAddr::new_unchecked(1)); assert_eq!(children.len(), 2); let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); assert!(names.contains(&OsString::from("a"))); @@ -70,7 +62,7 @@ async fn readdir_returns_only_children_of_parent() { #[tokio::test] async fn readdir_empty_parent_returns_empty() { let cache = DCache::new(); - let children = cache.readdir(LoadedAddr::new_unchecked(1)).await; + let children = cache.readdir(LoadedAddr::new_unchecked(1)); assert!(children.is_empty()); } @@ -127,14 +119,12 @@ async fn abort_populate_allows_reclaim() { #[tokio::test] async fn insert_does_not_mark_populated() { let cache = DCache::new(); - cache - .insert( - LoadedAddr::new_unchecked(1), - OsString::from("foo"), - LoadedAddr::new_unchecked(10), - false, - ) - .await; + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ); assert!( matches!( cache.try_claim_populate(LoadedAddr::new_unchecked(1)), @@ -147,25 +137,37 @@ async fn insert_does_not_mark_populated() { #[tokio::test] async fn upsert_overwrites_existing_entry() { let cache = DCache::new(); - cache - .insert( - LoadedAddr::new_unchecked(1), - OsString::from("foo"), - LoadedAddr::new_unchecked(10), - false, - ) - .await; - cache - .insert( - LoadedAddr::new_unchecked(1), - OsString::from("foo"), - LoadedAddr::new_unchecked(20), - true, - ) - .await; + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(20), + true, + ); let dv = cache.lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")); assert!(dv.is_some(), "entry should still be present after upsert"); let dv = dv.expect("checked above"); assert_eq!(dv.ino, LoadedAddr::new_unchecked(20)); assert!(dv.is_dir); } + +#[tokio::test] +async fn readdir_returns_entries_in_sorted_order() { + let cache = DCache::new(); + for name in ["zebra", "apple", "mango"] { + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from(name), + LoadedAddr::new_unchecked(10), + false, + ); + } + let children = cache.readdir(LoadedAddr::new_unchecked(1)); + let names: Vec<_> = children.iter().map(|(n, _)| n.to_str().unwrap()).collect(); + assert_eq!(names, ["apple", "mango", "zebra"]); +} From 2dd4d39ae47a0b9918511bee84dec42bd9cd9168 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 15:47:36 -0800 Subject: [PATCH 32/41] refactor: rename TrackedINode to ResolvedINode --- lib/fs/async_fs.rs | 16 ++++++++-------- lib/fs/mod.rs | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 78adfacc..51236ce8 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -103,13 +103,13 @@ impl<'a, DP: FsDataProvider> StatelessDrop<(&'a FutureBackedCache AsyncFs<'tbl, DP> { &self, parent: LoadedAddr, name: &OsStr, - ) -> Result { + ) -> Result { let parent_ino = self.loaded_inode(parent).await?; debug_assert!( matches!(parent_ino.itype, INodeType::Directory), @@ -342,7 +342,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { if let Some(dentry) = self.directory_cache.lookup(parent, name) { if let Some(inode) = self.inode_table.get(&dentry.ino.addr()).await { - return Ok(TrackedINode { inode }); + return Ok(ResolvedINode { inode }); } // Inode was evicted (e.g. by forget). Evict the stale lookup_cache // entry so the slow path calls dp.lookup() fresh. @@ -379,7 +379,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { matches!(child.itype, INodeType::Directory), ); - Ok(TrackedINode { inode: child }) + Ok(ResolvedINode { inode: child }) } /// Retrieve an inode that is expected to already be loaded. diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index 02ef8384..2ecf4f3a 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -10,7 +10,7 @@ pub mod dcache; /// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. pub mod fuser; -pub use async_fs::{InodeForget, InodeLifecycle, OpenFile, TrackedINode}; +pub use async_fs::{InodeForget, InodeLifecycle, OpenFile, ResolvedINode}; use std::ffi::OsStr; use std::time::SystemTime; From 754eeca3bca76e4b644cb5da1f8893d4bd4e0199 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 15:49:19 -0800 Subject: [PATCH 33/41] perf: use Arc for lookup cache key to reduce allocations --- lib/fs/async_fs.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 51236ce8..3aedca26 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -248,7 +248,7 @@ pub struct AsyncFs<'tbl, DP: FsDataProvider> { /// Deduplicating lookup cache keyed by `(parent_addr, child_name)`. The factory is /// `dp.lookup()`, so the data provider is only called on a true cache miss. - lookup_cache: FutureBackedCache<(InodeAddr, OsString), INode>, + lookup_cache: FutureBackedCache<(InodeAddr, Arc), INode>, /// Directory entry cache, mapping `(parent, name)` to child inode address. directory_cache: DCache, @@ -346,24 +346,18 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { } // Inode was evicted (e.g. by forget). Evict the stale lookup_cache // entry so the slow path calls dp.lookup() fresh. - // - // Note: a concurrent task may re-insert into lookup_cache between - // our inode_table miss and this remove_sync. This is benign — it - // causes at most one redundant dp.lookup() call because all - // downstream operations (get_or_try_init, get_or_init) are - // idempotent or deduplicated. self.lookup_cache - .remove_sync(&(parent.addr(), name.to_os_string())); + .remove_sync(&(parent.addr(), Arc::from(name))); } - let name_owned = name.to_os_string(); - let lookup_key = (parent.addr(), name_owned.clone()); + let name_arc: Arc = Arc::from(name); + let lookup_key = (parent.addr(), Arc::clone(&name_arc)); let dp = self.data_provider.clone(); let child = self .lookup_cache .get_or_try_init(lookup_key, || { - let name_for_dp = name_owned.clone(); + let name_for_dp = Arc::clone(&name_arc); async move { dp.lookup(parent_ino, &name_for_dp).await } }) .await?; @@ -374,7 +368,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { self.directory_cache.insert( parent, - name_owned, + name_arc.as_ref().to_os_string(), LoadedAddr::new_unchecked(child.addr), matches!(child.itype, INodeType::Directory), ); From 3b93bc4dec3e191db1fbcf6bb2a6f836614434e0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 15:50:45 -0800 Subject: [PATCH 34/41] fix: hide LoadedAddr::new_unchecked from public API docs --- lib/fs/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index 2ecf4f3a..b33b3ca7 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -42,6 +42,7 @@ impl LoadedAddr { /// - The address was previously inserted into an inode table, **or** /// - The address originates from the FUSE kernel (which only knows /// addresses we previously returned to it). + #[doc(hidden)] #[must_use] pub fn new_unchecked(addr: InodeAddr) -> Self { Self(addr) From d9169afc17f5e695e83f95956666579dc76ba1a8 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 15:51:53 -0800 Subject: [PATCH 35/41] docs: document get_or_try_init dedup limitation and cache invalidation strategy --- lib/cache/async_backed.rs | 9 +++++++++ lib/fs/async_fs.rs | 5 +++++ lib/fs/mod.rs | 20 ++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index d8989cf8..9f3a7f94 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -143,6 +143,15 @@ where /// owner is elected. Joiners never receive the original error — the retrying owner invokes /// its own factory independently and may produce a different error or succeed. /// + /// # Deduplication of failures + /// + /// When the factory returns `Err`, the poisoned entry is removed and the + /// next caller becomes a new owner with its own factory invocation. This + /// means failures are **not deduplicated**: under transient errors, N + /// concurrent callers may each independently invoke their factory rather + /// than coalescing on the first error. This is intentional — callers + /// may have different retry or error-handling semantics. + /// /// # Panics /// /// Panics if the factory panics (caught internally via `catch_unwind`). diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 3aedca26..37c4b3d6 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -350,6 +350,11 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { .remove_sync(&(parent.addr(), Arc::from(name))); } + // Note: get_or_try_init deduplicates successful lookups but NOT + // failures. Under transient API errors, concurrent lookups for + // the same (parent, name) may each independently call dp.lookup(). + // This is acceptable: the cost of a redundant API call on error is + // low compared to the complexity of error-channel deduplication. let name_arc: Arc = Arc::from(name); let lookup_key = (parent.addr(), Arc::clone(&name_arc)); let dp = self.data_provider.clone(); diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index b33b3ca7..52f9510e 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -1,4 +1,24 @@ //! Useful filesystem generalizations. +//! +//! # Cache invalidation +//! +//! The current implementation caches directory listings and inode data +//! indefinitely once populated. Staleness is mitigated only by a short +//! FUSE entry/attr TTL (currently 1 second in `FuserAdapter`). +//! +//! The intended long-term strategy is to use FUSE kernel notifications +//! (`notify_inval_inode` / `notify_inval_entry`) to proactively invalidate +//! specific entries when the backing data changes. This would allow a +//! much higher TTL while still reflecting changes promptly. The key +//! changes needed: +//! +//! 1. `DCache` needs a `remove` or `invalidate` method to reset a +//! parent's `PopulateStatus` back to `UNCLAIMED`. +//! 2. `FuserAdapter` needs access to the `fuser::Session` handle to +//! send `notify_inval_entry` notifications. +//! 3. Data providers need a way to signal when their backing data changes +//! (e.g. webhook, polling, or subscription). + /// Async filesystem cache with concurrent inode management. pub mod async_fs; /// Lock-free bidirectional inode address mapping. From 4d19859d3b6621116391cd221130a038b536cce9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 18:27:58 -0800 Subject: [PATCH 36/41] slightly more cleanup --- lib/fs/async_fs.rs | 23 +++++++++++++++++------ lib/fs/bridge.rs | 19 +++++++++---------- lib/fs/composite.rs | 21 ++++++++++----------- lib/fs/dcache.rs | 17 +++++++++-------- tests/bridge_tests.rs | 6 +++--- tests/dcache_correctness.rs | 16 ++++++++++++---- 6 files changed, 60 insertions(+), 42 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 37c4b3d6..1899e136 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -486,20 +486,31 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { } } - let children = self.directory_cache.readdir(parent); - #[expect( clippy::cast_possible_truncation, reason = "offset fits in usize on supported 64-bit platforms" )] - for (i, (name, dvalue)) in children.iter().enumerate().skip(offset as usize) { - let Some(inode) = self.inode_table.get(&dvalue.ino.addr()).await else { + let skip = offset as usize; + + // Collect only entries at or past `offset`, avoiding clones for + // entries that will be skipped during paginated readdir. + let mut entries: Vec<(OsString, LoadedAddr)> = Vec::new(); + let mut idx = 0usize; + self.directory_cache.readdir(parent, |name, dvalue| { + if idx >= skip { + entries.push((name.to_os_string(), dvalue.ino)); + } + idx += 1; + }); + + for (i, (name, child_addr)) in entries.iter().enumerate() { + let Some(inode) = self.inode_table.get(&child_addr.addr()).await else { // Inode was evicted between readdir collection and iteration // (e.g. by a concurrent forget). Skip the stale entry. - tracing::debug!(addr = ?dvalue.ino.addr(), name = ?name, "inode evicted during readdir, skipping"); + tracing::debug!(addr = ?child_addr.addr(), name = ?name, "inode evicted during readdir, skipping"); continue; }; - let next_offset = (i + 1) as u64; + let next_offset = (skip + i + 1) as u64; if filler(DirEntry { name, inode }, next_offset) { break; } diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index 49acf9ee..6e4ef942 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -75,15 +75,15 @@ impl ConcurrentBridge { self.bwd.read_sync(&inner, |_, &v| v) } - /// Look up inner -> outer, or allocate a new outer address if unmapped. + /// Look up inner -> outer, or insert `fallback` as the new outer address. + /// + /// `fallback` is a pre-allocated address provided by the caller. If the + /// inner address already has a mapping, `fallback` is unused (the caller + /// accepts that the monotonic address counter may skip values). /// /// Serialized with other mutations via the coordination lock. #[must_use] - pub fn backward_or_insert( - &self, - inner: InodeAddr, - allocate: impl FnOnce() -> InodeAddr, - ) -> InodeAddr { + pub fn backward_or_insert(&self, inner: InodeAddr, fallback: InodeAddr) -> InodeAddr { let _guard = self .mu .lock() @@ -91,10 +91,9 @@ impl ConcurrentBridge { match self.bwd.entry_sync(inner) { scc::hash_map::Entry::Occupied(occ) => *occ.get(), scc::hash_map::Entry::Vacant(vac) => { - let outer = allocate(); - vac.insert_entry(outer); - let _ = self.fwd.insert_sync(outer, inner); - outer + vac.insert_entry(fallback); + let _ = self.fwd.insert_sync(fallback, inner); + fallback } } } diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index ea7349dc..b564924d 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -129,6 +129,9 @@ impl FileReader for CompositeReader { struct ChildSlot { inner: Arc>, bridge: Arc, + /// The name under which this child was registered in `name_to_slot`. + /// Stored here so `forget` can do O(1) removal instead of a linear scan. + name: OsString, } struct CompositeFsInner { @@ -250,6 +253,7 @@ impl CompositeFs { ChildSlot { inner: child_inner, bridge, + name: desc.name.clone(), }, )); let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); @@ -348,9 +352,8 @@ where let child_inode = tracked.inode; // Translate inner address back to composite-level address (outside scc guard). - let outer_ino = bridge.backward_or_insert(child_inode.addr, || { - self.inner.next_ino.fetch_add(1, Ordering::Relaxed) - }); + let fallback = self.allocate_ino(); + let outer_ino = bridge.backward_or_insert(child_inode.addr, fallback); let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); @@ -405,9 +408,8 @@ where // Translate all inner addresses to composite-level addresses (outside scc guard). let mut entries = Vec::with_capacity(child_entries.len()); for (name, child_inode) in child_entries { - let outer_ino = bridge.backward_or_insert(child_inode.addr, || { - self.inner.next_ino.fetch_add(1, Ordering::Relaxed) - }); + let fallback = self.allocate_ino(); + let outer_ino = bridge.backward_or_insert(child_inode.addr, fallback); let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); entries.push(( @@ -479,11 +481,8 @@ where .inner .slots .remove_if_sync(&slot_idx, |slot| slot.bridge.is_empty()); - if removed.is_some() { - // Clean up name_to_slot to prevent dead slot indices. - self.inner - .name_to_slot - .retain_sync(|_, &mut idx| idx != slot_idx); + if let Some((_, slot)) = removed { + self.inner.name_to_slot.remove_sync(&slot.name); } } } diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index 247070e9..abd412d2 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -112,21 +112,22 @@ impl DCache { children.insert(name, value); } - /// Returns all cached children of `parent_ino` as `(name, value)` pairs. + /// Iterate all cached children of `parent_ino` in name-sorted order. /// - /// Entries are returned in name-sorted order (guaranteed by `BTreeMap`). - pub fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { + /// Calls `f` for each `(name, value)` pair while holding the read lock. + /// Callers decide what to collect, avoiding unnecessary allocations for + /// entries that will be skipped (e.g. by offset-based pagination). + pub fn readdir(&self, parent_ino: LoadedAddr, mut f: impl FnMut(&OsStr, &DValue)) { let Some(state) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) else { - return Vec::new(); + return; }; let children = state .children .read() .unwrap_or_else(std::sync::PoisonError::into_inner); - children - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect() + for (name, value) in children.iter() { + f(name, value); + } } /// Atomically try to claim a directory for population. diff --git a/tests/bridge_tests.rs b/tests/bridge_tests.rs index b0598e4d..d8389273 100644 --- a/tests/bridge_tests.rs +++ b/tests/bridge_tests.rs @@ -26,15 +26,15 @@ fn forward_missing_returns_none() { fn backward_or_insert_existing_returns_cached() { let bridge = ConcurrentBridge::new(); bridge.insert(10, 100); - let outer = bridge.backward_or_insert(100, || 999); + let outer = bridge.backward_or_insert(100, 999); assert_eq!(outer, 10, "should return existing outer addr"); } #[test] fn backward_or_insert_new_allocates() { let bridge = ConcurrentBridge::new(); - let outer = bridge.backward_or_insert(200, || 50); - assert_eq!(outer, 50, "should use allocator"); + let outer = bridge.backward_or_insert(200, 50); + assert_eq!(outer, 50, "should use fallback address"); assert_eq!(bridge.forward(50), Some(200)); assert_eq!(bridge.backward(200), Some(50)); } diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs index c2273076..f99d797b 100644 --- a/tests/dcache_correctness.rs +++ b/tests/dcache_correctness.rs @@ -52,7 +52,10 @@ async fn readdir_returns_only_children_of_parent() { LoadedAddr::new_unchecked(12), false, ); - let children = cache.readdir(LoadedAddr::new_unchecked(1)); + let mut children = Vec::new(); + cache.readdir(LoadedAddr::new_unchecked(1), |name, dvalue| { + children.push((name.to_os_string(), dvalue.clone())); + }); assert_eq!(children.len(), 2); let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); assert!(names.contains(&OsString::from("a"))); @@ -62,7 +65,10 @@ async fn readdir_returns_only_children_of_parent() { #[tokio::test] async fn readdir_empty_parent_returns_empty() { let cache = DCache::new(); - let children = cache.readdir(LoadedAddr::new_unchecked(1)); + let mut children = Vec::new(); + cache.readdir(LoadedAddr::new_unchecked(1), |name, dvalue| { + children.push((name.to_os_string(), dvalue.clone())); + }); assert!(children.is_empty()); } @@ -167,7 +173,9 @@ async fn readdir_returns_entries_in_sorted_order() { false, ); } - let children = cache.readdir(LoadedAddr::new_unchecked(1)); - let names: Vec<_> = children.iter().map(|(n, _)| n.to_str().unwrap()).collect(); + let mut names = Vec::new(); + cache.readdir(LoadedAddr::new_unchecked(1), |name, _| { + names.push(name.to_str().unwrap().to_owned()); + }); assert_eq!(names, ["apple", "mango", "zebra"]); } From f274a5a7782d19f792829131818046c27e8b1bab Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 19:26:59 -0800 Subject: [PATCH 37/41] refactor: replace ouroboros with Arc in AsyncFs, InodeLifecycle, FuseBridgeInner, ChildInner Remove the ouroboros dependency entirely. All self-referencing structs now use Arc for shared ownership, which is simpler and enables spawning background tasks that reference the inode table. --- Cargo.lock | 74 +------------------- Cargo.toml | 1 - lib/cache/async_backed.rs | 3 +- lib/fs/async_fs.rs | 86 ++++++++++------------- lib/fs/composite.rs | 48 ++++--------- lib/fs/fuser.rs | 72 +++++++------------- tests/async_fs_correctness.rs | 124 +++++++++++++++++----------------- tests/composite_fs_tests.rs | 33 ++++----- 8 files changed, 156 insertions(+), 285 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1050f46b..d4cf1499 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "aliasable" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -234,7 +228,7 @@ version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn", @@ -764,7 +758,6 @@ dependencies = [ "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", - "ouroboros", "rand", "reqwest", "reqwest-middleware", @@ -846,12 +839,6 @@ dependencies = [ "hashbrown 0.16.1", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -1510,30 +1497,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ouroboros" -version = "0.18.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" -dependencies = [ - "aliasable", - "ouroboros_macro", - "static_assertions", -] - -[[package]] -name = "ouroboros_macro" -version = "0.18.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "proc-macro2-diagnostics", - "quote", - "syn", -] - [[package]] name = "page_size" version = "0.6.0" @@ -1660,19 +1623,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "proc-macro2-diagnostics" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "version_check", - "yansi", -] - [[package]] name = "prost" version = "0.13.5" @@ -2362,12 +2312,6 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "strsim" version = "0.11.1" @@ -2921,12 +2865,6 @@ dependencies = [ "rustversion", ] -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "vt100" version = "0.16.2" @@ -3371,7 +3309,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck 0.5.0", + "heck", "wit-parser", ] @@ -3382,7 +3320,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck 0.5.0", + "heck", "indexmap 2.13.0", "prettyplease", "syn", @@ -3449,12 +3387,6 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" -[[package]] -name = "yansi" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" - [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index dcf7b555..d837f7fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,6 @@ tracing-indicatif = "0.3.14" opentelemetry = { version = "0.29" } opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"] } opentelemetry-otlp = { version = "0.29", default-features = false, features = ["http-proto", "trace", "reqwest-blocking-client"] } -ouroboros = "0.18" tracing-opentelemetry = { version = "0.30" } hashlink = "0.11.0" diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index 9f3a7f94..6ec95d75 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -347,8 +347,7 @@ where /// Synchronously insert a value, overwriting any existing entry. /// - /// Suitable for seeding the cache before async operations begin (e.g. - /// inside an ouroboros builder where async is unavailable). + /// Suitable for seeding the cache before async operations begin. pub fn insert_sync(&self, key: K, value: V) { drop(self.map.insert_sync(key, Slot::Ready(value))); } diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 1899e136..32c5251d 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -86,18 +86,18 @@ pub trait FsDataProvider: Clone + Send + Sync + 'static { pub struct InodeForget; /// Evicts the inode from the table only. Used when no data provider is available. -impl<'a> StatelessDrop<&'a FutureBackedCache, InodeAddr> for InodeForget { - fn delete(inode_table: &&'a FutureBackedCache, addr: &InodeAddr) { +impl StatelessDrop>, InodeAddr> for InodeForget { + fn delete(inode_table: &Arc>, addr: &InodeAddr) { inode_table.remove_sync(addr); } } /// Evicts the inode from the table and delegates to [`FsDataProvider::forget`] /// so the provider can clean up its own auxiliary state. -impl<'a, DP: FsDataProvider> StatelessDrop<(&'a FutureBackedCache, DP), InodeAddr> +impl StatelessDrop<(Arc>, DP), InodeAddr> for InodeForget { - fn delete(ctx: &(&'a FutureBackedCache, DP), key: &InodeAddr) { + fn delete(ctx: &(Arc>, DP), key: &InodeAddr) { ctx.0.remove_sync(key); ctx.1.forget(*key); } @@ -134,44 +134,29 @@ impl OpenFile { } } -mod inode_lifecycle_impl { - #![allow(clippy::future_not_send, clippy::mem_forget)] - use ouroboros::self_referencing; - - use crate::cache::async_backed::FutureBackedCache; - use crate::drop_ward::DropWard; - use crate::fs::InodeAddr; - - use super::{INode, InodeForget}; - - /// Co-located inode table and reference-count ward. - /// - /// The ward borrows the table directly (no `Arc`) via `ouroboros`. - /// When `dec` reaches zero for a key, [`InodeForget::delete`] synchronously - /// removes that inode from the table. - #[self_referencing] - pub struct InodeLifecycle { - pub(super) table: FutureBackedCache, - #[borrows(table)] - #[not_covariant] - pub(super) ward: - DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, - } - - impl InodeLifecycle { - /// Create a new lifecycle managing the given inode table. - pub fn from_table(table: FutureBackedCache) -> Self { - Self::new(table, |tbl| DropWard::new(tbl)) - } - } +/// Co-located inode table and reference-count ward. +/// +/// When `dec` reaches zero for a key, [`InodeForget::delete`] synchronously +/// removes that inode from the table. +pub struct InodeLifecycle { + table: Arc>, + ward: crate::drop_ward::DropWard< + Arc>, + InodeAddr, + InodeForget, + >, } -pub use inode_lifecycle_impl::InodeLifecycle; - impl InodeLifecycle { + /// Create a new lifecycle managing the given inode table. + pub fn from_table(table: Arc>) -> Self { + let ward = crate::drop_ward::DropWard::new(Arc::clone(&table)); + Self { table, ward } + } + /// Increment the reference count for an inode address. pub fn inc(&mut self, addr: InodeAddr) -> usize { - self.with_ward_mut(|ward| ward.inc(addr)) + self.ward.inc(addr) } /// Decrement the reference count for an inode address. @@ -179,20 +164,20 @@ impl InodeLifecycle { /// When the count reaches zero, the inode is automatically evicted /// from the table via [`InodeForget::delete`]. pub fn dec(&mut self, addr: &InodeAddr) -> Option { - self.with_ward_mut(|ward| ward.dec(addr)) + self.ward.dec(addr) } /// Decrement the reference count by `count`. /// /// When the count reaches zero, the inode is automatically evicted. pub fn dec_count(&mut self, addr: &InodeAddr, count: usize) -> Option { - self.with_ward_mut(|ward| ward.dec_count(addr, count)) + self.ward.dec_count(addr, count) } /// Read-only access to the underlying inode table. #[must_use] pub fn table(&self) -> &FutureBackedCache { - self.borrow_table() + &self.table } } @@ -242,16 +227,16 @@ impl Drop for PopulateGuard<'_> { /// called on a true cache miss (not already cached or in-flight). /// /// The [`DCache`] sits in front as a synchronous fast path mapping `(parent, name)` to child addr. -pub struct AsyncFs<'tbl, DP: FsDataProvider> { +pub struct AsyncFs { /// Canonical addr -> `INode` map. Used by `loaded_inode()` to retrieve inodes by address. - inode_table: &'tbl FutureBackedCache, + inode_table: Arc>, /// Deduplicating lookup cache keyed by `(parent_addr, child_name)`. The factory is /// `dp.lookup()`, so the data provider is only called on a true cache miss. lookup_cache: FutureBackedCache<(InodeAddr, Arc), INode>, /// Directory entry cache, mapping `(parent, name)` to child inode address. - directory_cache: DCache, + directory_cache: Arc, /// The data provider used to fetch inode data on cache misses. data_provider: DP, @@ -260,12 +245,12 @@ pub struct AsyncFs<'tbl, DP: FsDataProvider> { next_fh: AtomicU64, } -impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { +impl AsyncFs { /// Create a new `AsyncFs`, seeding the root inode into the table. pub async fn new( data_provider: DP, root: INode, - inode_table: &'tbl FutureBackedCache, + inode_table: Arc>, ) -> Self { inode_table .get_or_init(root.addr, || async move { root }) @@ -274,7 +259,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { Self { inode_table, lookup_cache: FutureBackedCache::default(), - directory_cache: DCache::new(), + directory_cache: Arc::new(DCache::new()), data_provider, next_fh: AtomicU64::new(1), } @@ -282,18 +267,17 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// Create a new `AsyncFs`, assuming the root inode is already in the table. /// - /// This synchronous constructor is needed for ouroboros builders where - /// async is unavailable. The caller must ensure the root inode has already - /// been inserted into `inode_table` (e.g. via [`FutureBackedCache::insert_sync`]). + /// The caller must ensure the root inode has already been inserted into + /// `inode_table` (e.g. via [`FutureBackedCache::insert_sync`]). #[must_use] pub fn new_preseeded( data_provider: DP, - inode_table: &'tbl FutureBackedCache, + inode_table: Arc>, ) -> Self { Self { inode_table, lookup_cache: FutureBackedCache::default(), - directory_cache: DCache::new(), + directory_cache: Arc::new(DCache::new()), data_provider, next_fh: AtomicU64::new(1), } diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index b564924d..92361344 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -14,7 +14,7 @@ use std::sync::atomic::{AtomicU64, Ordering}; use bytes::Bytes; use crate::cache::async_backed::FutureBackedCache; -use crate::fs::async_fs::{FileReader, FsDataProvider, OpenFile}; +use crate::fs::async_fs::{AsyncFs, FileReader, FsDataProvider, OpenFile}; use crate::fs::bridge::ConcurrentBridge; use crate::fs::{INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags}; @@ -53,45 +53,25 @@ pub trait CompositeRoot: Send + Sync + 'static { ) -> impl Future>, std::io::Error>> + Send; } -mod child_inner_impl { - #![allow(clippy::future_not_send, clippy::mem_forget)] - - use ouroboros::self_referencing; - - use crate::cache::async_backed::FutureBackedCache; - use crate::fs::async_fs::{AsyncFs, FsDataProvider}; - use crate::fs::{INode, InodeAddr}; +/// Co-locates an inode table and [`AsyncFs`]. +pub struct ChildInner { + #[expect(dead_code)] + table: Arc>, + fs: AsyncFs, +} - /// Self-referential struct co-locating an inode table and [`AsyncFs`]. - /// - /// The `AsyncFs` borrows from the table directly, avoiding an extra - /// indirection. This mirrors the [`FuseBridgeInner`](super::super::fuser) - /// pattern. - #[self_referencing] - pub struct ChildInner { - pub(super) table: FutureBackedCache, - #[borrows(table)] - #[covariant] - pub(super) fs: AsyncFs<'this, DP>, +impl ChildInner { + pub(crate) fn create(table: FutureBackedCache, provider: DP) -> Self { + let table = Arc::new(table); + let fs = AsyncFs::new_preseeded(provider, Arc::clone(&table)); + Self { table, fs } } - impl ChildInner { - pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { - ChildInnerBuilder { - table, - fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), - } - .build() - } - - pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { - self.borrow_fs() - } + pub(crate) fn get_fs(&self) -> &AsyncFs { + &self.fs } } -pub use child_inner_impl::ChildInner; - /// Wraps a child's reader so that the composite layer can expose it as its own /// [`FileReader`]. pub struct CompositeReader { diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 15fa36f7..06c27d4d 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -69,62 +69,38 @@ impl FuseResultExt for Result { } } -mod inner { - #![allow(clippy::future_not_send, clippy::mem_forget)] - - use ouroboros::self_referencing; - - use crate::cache::async_backed::FutureBackedCache; - use crate::drop_ward::DropWard; - use crate::fs::async_fs::{AsyncFs, FsDataProvider, InodeForget}; - use crate::fs::{INode, InodeAddr}; +type FuseWard = crate::drop_ward::DropWard< + (Arc>, DP), + InodeAddr, + super::async_fs::InodeForget, +>; + +struct FuseBridgeInner { + ward: FuseWard, + fs: super::async_fs::AsyncFs, +} - /// Self-referential struct holding the inode table, refcount ward, and `AsyncFs`. - /// - /// Both `ward` and `fs` borrow from `table`. The ward manages inode - /// refcounts; the fs serves lookup/readdir/open/read operations. - /// - /// The ward context is `(&table, DP)` so that [`InodeForget`] can both - /// remove the inode from the table and call `dp.forget()` to clean up - /// provider-internal maps (bridge mappings, path maps, etc.). - #[self_referencing] - pub(super) struct FuseBridgeInner { - table: FutureBackedCache, - #[borrows(table)] - #[not_covariant] - ward: DropWard<(&'this FutureBackedCache, DP), InodeAddr, InodeForget>, - #[borrows(table)] - #[covariant] - fs: AsyncFs<'this, DP>, +impl FuseBridgeInner { + fn create(table: FutureBackedCache, provider: DP) -> Self { + let table = Arc::new(table); + let ward = crate::drop_ward::DropWard::new((Arc::clone(&table), provider.clone())); + let fs = super::async_fs::AsyncFs::new_preseeded(provider, table); + Self { ward, fs } } - impl FuseBridgeInner { - pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { - let ward_provider = provider.clone(); - FuseBridgeInnerBuilder { - table, - ward_builder: |tbl| DropWard::new((tbl, ward_provider)), - fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), - } - .build() - } - - pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { - self.borrow_fs() - } + fn get_fs(&self) -> &super::async_fs::AsyncFs { + &self.fs + } - pub(super) fn ward_inc(&mut self, addr: InodeAddr) -> usize { - self.with_ward_mut(|ward| ward.inc(addr)) - } + fn ward_inc(&mut self, addr: InodeAddr) -> usize { + self.ward.inc(addr) + } - pub(super) fn ward_dec_count(&mut self, addr: InodeAddr, count: usize) -> Option { - self.with_ward_mut(|ward| ward.dec_count(&addr, count)) - } + fn ward_dec_count(&mut self, addr: InodeAddr, count: usize) -> Option { + self.ward.dec_count(&addr, count) } } -use inner::FuseBridgeInner; - /// Convert an `INode` to the fuser-specific `FileAttr`. fn inode_to_fuser_attr(inode: &INode, block_size: u32) -> fuser::FileAttr { fuser::FileAttr { diff --git a/tests/async_fs_correctness.rs b/tests/async_fs_correctness.rs index 643e7b1e..5ce202c6 100644 --- a/tests/async_fs_correctness.rs +++ b/tests/async_fs_correctness.rs @@ -13,11 +13,11 @@ use common::async_fs_mocks::{MockFsDataProvider, MockFsState, make_inode}; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn lifecycle_inc_returns_count_after_increment() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let inode = make_inode(100, INodeType::File, 0, Some(1)); table.insert_sync(100, inode); - let mut lifecycle = InodeLifecycle::from_table(table); + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); assert_eq!(lifecycle.inc(100), 1, "first inc should return 1"); assert_eq!(lifecycle.inc(100), 2, "second inc should return 2"); @@ -26,11 +26,11 @@ async fn lifecycle_inc_returns_count_after_increment() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn lifecycle_dec_returns_remaining_count() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let inode = make_inode(100, INodeType::File, 0, Some(1)); table.insert_sync(100, inode); - let mut lifecycle = InodeLifecycle::from_table(table); + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); lifecycle.inc(100); lifecycle.inc(100); @@ -40,8 +40,8 @@ async fn lifecycle_dec_returns_remaining_count() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn lifecycle_dec_unknown_addr_returns_none() { - let table: FutureBackedCache = FutureBackedCache::default(); - let mut lifecycle = InodeLifecycle::from_table(table); + let table: Arc> = Arc::new(FutureBackedCache::default()); + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); assert_eq!( lifecycle.dec(&999), @@ -52,11 +52,11 @@ async fn lifecycle_dec_unknown_addr_returns_none() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn lifecycle_dec_to_zero_evicts_from_table() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let inode = make_inode(100, INodeType::File, 0, Some(1)); table.insert_sync(100, inode); - let mut lifecycle = InodeLifecycle::from_table(table); + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); lifecycle.inc(100); assert_eq!(lifecycle.dec(&100), Some(0)); @@ -69,11 +69,11 @@ async fn lifecycle_dec_to_zero_evicts_from_table() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn lifecycle_dec_count_decrements_by_n() { - let table: FutureBackedCache = FutureBackedCache::default(); + let table: Arc> = Arc::new(FutureBackedCache::default()); let inode = make_inode(100, INodeType::File, 0, Some(1)); table.insert_sync(100, inode); - let mut lifecycle = InodeLifecycle::from_table(table); + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); lifecycle.inc(100); lifecycle.inc(100); lifecycle.inc(100); // count = 3 @@ -87,11 +87,11 @@ async fn lifecycle_dec_count_decrements_by_n() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn lifecycle_dec_count_to_zero_evicts() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let inode = make_inode(100, INodeType::File, 0, Some(1)); table.insert_sync(100, inode); - let mut lifecycle = InodeLifecycle::from_table(table); + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); lifecycle.inc(100); lifecycle.inc(100); // count = 2 @@ -104,11 +104,11 @@ async fn lifecycle_dec_count_to_zero_evicts() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn lifecycle_table_returns_underlying_cache() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let inode = make_inode(42, INodeType::Directory, 0, None); table.insert_sync(42, inode); - let lifecycle = InodeLifecycle::from_table(table); + let lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); let fetched = lifecycle.table().get(&42).await; assert_eq!( @@ -120,11 +120,11 @@ async fn lifecycle_table_returns_underlying_cache() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn new_seeds_root_inode_into_table() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let root = make_inode(1, INodeType::Directory, 0, None); let dp = MockFsDataProvider::new(MockFsState::default()); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; assert_eq!(fs.inode_count(), 1, "root should be the only inode"); let fetched = table.get(&1).await; @@ -137,10 +137,10 @@ async fn new_seeds_root_inode_into_table() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn new_preseeded_does_not_insert_root() { - let table: FutureBackedCache = FutureBackedCache::default(); + let table: Arc> = Arc::new(FutureBackedCache::default()); let dp = MockFsDataProvider::new(MockFsState::default()); - let fs = AsyncFs::new_preseeded(dp, &table); + let fs = AsyncFs::new_preseeded(dp, Arc::clone(&table)); assert_eq!( fs.inode_count(), @@ -151,11 +151,11 @@ async fn new_preseeded_does_not_insert_root() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn statfs_reports_inode_count() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let root = make_inode(1, INodeType::Directory, 0, None); let dp = MockFsDataProvider::new(MockFsState::default()); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let stats = fs.statfs(); assert_eq!(stats.block_size, 4096); @@ -166,11 +166,11 @@ async fn statfs_reports_inode_count() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn loaded_inode_returns_seeded_inode() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let root = make_inode(1, INodeType::Directory, 0, None); let dp = MockFsDataProvider::new(MockFsState::default()); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let inode = fs.loaded_inode(LoadedAddr::new_unchecked(1)).await.unwrap(); assert_eq!(inode.addr, 1); @@ -179,11 +179,11 @@ async fn loaded_inode_returns_seeded_inode() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn loaded_inode_returns_enoent_for_missing_addr() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let root = make_inode(1, INodeType::Directory, 0, None); let dp = MockFsDataProvider::new(MockFsState::default()); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let err = fs .loaded_inode(LoadedAddr::new_unchecked(999)) @@ -194,11 +194,11 @@ async fn loaded_inode_returns_enoent_for_missing_addr() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn getattr_delegates_to_loaded_inode() { - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); let root = make_inode(1, INodeType::Directory, 4096, None); let dp = MockFsDataProvider::new(MockFsState::default()); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let inode = fs.getattr(LoadedAddr::new_unchecked(1)).await.unwrap(); assert_eq!(inode.addr, 1); @@ -214,8 +214,8 @@ async fn lookup_resolves_child_via_data_provider() { state.lookups.insert((1, "readme.md".into()), child); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let tracked = fs .lookup(LoadedAddr::new_unchecked(1), OsStr::new("readme.md")) @@ -236,8 +236,8 @@ async fn lookup_populates_inode_table() { state.lookups.insert((1, "file.txt".into()), child); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; fs.lookup(LoadedAddr::new_unchecked(1), OsStr::new("file.txt")) .await @@ -261,8 +261,8 @@ async fn lookup_second_call_uses_cache() { state.lookups.insert((1, "cached.txt".into()), child); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let first = fs .lookup(LoadedAddr::new_unchecked(1), OsStr::new("cached.txt")) @@ -282,8 +282,8 @@ async fn lookup_propagates_provider_error() { // No lookups configured — provider will return ENOENT. let dp = MockFsDataProvider::new(MockFsState::default()); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let err = fs .lookup(LoadedAddr::new_unchecked(1), OsStr::new("nonexistent")) @@ -305,9 +305,9 @@ async fn open_returns_file_handle_and_reader() { .insert(10, bytes::Bytes::from_static(b"hello")); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(10, file); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let open_file = fs .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) @@ -324,8 +324,8 @@ async fn open_returns_eisdir_for_directory() { let root = make_inode(1, INodeType::Directory, 0, None); let dp = MockFsDataProvider::new(MockFsState::default()); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let err = fs .open(LoadedAddr::new_unchecked(1), OpenFlags::RDONLY) @@ -339,8 +339,8 @@ async fn open_returns_enoent_for_missing_inode() { let root = make_inode(1, INodeType::Directory, 0, None); let dp = MockFsDataProvider::new(MockFsState::default()); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let err = fs .open(LoadedAddr::new_unchecked(999), OpenFlags::RDONLY) @@ -356,9 +356,9 @@ async fn open_assigns_unique_file_handles() { let dp = MockFsDataProvider::new(MockFsState::default()); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(10, file); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let fh1 = fs .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) @@ -385,9 +385,9 @@ async fn open_file_read_with_offset() { .insert(10, bytes::Bytes::from_static(b"hello world")); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(10, file); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let open_file = fs .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) @@ -416,8 +416,8 @@ async fn readdir_lists_children_sorted_by_name() { ); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let mut entries: Vec<(OsString, u64)> = Vec::new(); fs.readdir(LoadedAddr::new_unchecked(1), 0, |entry, _offset| { @@ -452,8 +452,8 @@ async fn readdir_respects_offset() { ); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; // First readdir to populate cache fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) @@ -490,8 +490,8 @@ async fn readdir_stops_when_filler_returns_true() { ); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let mut count = 0; fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| { @@ -511,9 +511,9 @@ async fn readdir_returns_enotdir_for_file() { let dp = MockFsDataProvider::new(MockFsState::default()); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(10, file); - let fs = AsyncFs::new(dp, root, &table).await; + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let err = fs .readdir(LoadedAddr::new_unchecked(10), 0, |_, _| false) @@ -533,8 +533,8 @@ async fn readdir_populates_inode_table_with_children() { .insert(1, vec![(OsString::from("child.txt"), child)]); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) .await @@ -556,8 +556,8 @@ async fn readdir_empty_directory() { state.directories.insert(1, vec![]); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let mut count = 0; fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| { @@ -586,8 +586,8 @@ async fn readdir_provides_correct_next_offsets() { ); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; let mut offsets: Vec = Vec::new(); fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, next_offset| { @@ -615,8 +615,8 @@ async fn lookup_after_eviction_returns_fresh_inode() { let dp = MockFsDataProvider::new(state); let state_ref = Arc::clone(&dp.state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; // First lookup → addr=10 let first = fs @@ -660,8 +660,8 @@ async fn lookup_after_readdir_uses_directory_cache() { .insert(1, vec![(OsString::from("file.txt"), child)]); let dp = MockFsDataProvider::new(state); - let table = FutureBackedCache::default(); - let fs = AsyncFs::new(dp, root, &table).await; + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; // readdir populates the directory cache. fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) diff --git a/tests/composite_fs_tests.rs b/tests/composite_fs_tests.rs index ce110acb..1c263425 100644 --- a/tests/composite_fs_tests.rs +++ b/tests/composite_fs_tests.rs @@ -4,6 +4,7 @@ mod common; use std::collections::HashMap; use std::ffi::{OsStr, OsString}; +use std::sync::Arc; use bytes::Bytes; @@ -53,9 +54,9 @@ async fn composite_root_lookup_resolves_child() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite, &table); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); let tracked = afs .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo-a")) @@ -91,9 +92,9 @@ async fn composite_root_readdir_lists_children() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite, &table); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); let mut entries = Vec::new(); afs.readdir(LoadedAddr::new_unchecked(1), 0, |de, _offset| { @@ -126,9 +127,9 @@ async fn composite_delegated_lookup_reaches_child() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite, &table); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); // First, lookup the child at root level. let child_dir = afs @@ -169,9 +170,9 @@ async fn composite_open_and_read_through_child() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite, &table); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); // Navigate to the file. let child_dir = afs @@ -212,9 +213,9 @@ async fn composite_lookup_unknown_child_returns_enoent() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite, &table); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); let err = afs .lookup(LoadedAddr::new_unchecked(1), OsStr::new("nonexistent")) @@ -246,9 +247,9 @@ async fn composite_readdir_delegated_lists_child_contents() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite, &table); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); // Navigate into the child. let child_dir = afs @@ -287,9 +288,9 @@ async fn composite_repeated_lookup_returns_same_addr() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite, &table); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); let first = afs .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) @@ -318,9 +319,9 @@ async fn composite_forget_cleans_up_slot_and_name_mapping() { let composite = CompositeFs::new(mock_root, (1000, 1000)); let root_inode = composite.make_root_inode(); - let table = FutureBackedCache::default(); + let table = Arc::new(FutureBackedCache::default()); table.insert_sync(1, root_inode); - let afs = AsyncFs::new_preseeded(composite.clone(), &table); + let afs = AsyncFs::new_preseeded(composite.clone(), Arc::clone(&table)); // Look up the child and a file inside it. let child_dir = afs From 12fac6d3b6f91a25ce0be1b2b1303afce754ae69 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 19:28:57 -0800 Subject: [PATCH 38/41] feat: add DCache::child_dir_addrs for prefetch discovery --- lib/fs/dcache.rs | 20 ++++++++++++++++++ tests/dcache_correctness.rs | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index abd412d2..7f22258a 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -130,6 +130,26 @@ impl DCache { } } + /// Returns the [`LoadedAddr`] of every child that is itself a directory. + /// + /// Used by the prefetch logic to discover which subdirectories to + /// background-populate after a `readdir` completes. + #[must_use] + pub fn child_dir_addrs(&self, parent_ino: LoadedAddr) -> Vec { + let Some(state) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) else { + return Vec::new(); + }; + let children = state + .children + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); + children + .values() + .filter(|dv| dv.is_dir) + .map(|dv| dv.ino) + .collect() + } + /// Atomically try to claim a directory for population. /// /// Uses `compare_exchange` on the three-state flag: diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs index f99d797b..7043bd9b 100644 --- a/tests/dcache_correctness.rs +++ b/tests/dcache_correctness.rs @@ -179,3 +179,45 @@ async fn readdir_returns_entries_in_sorted_order() { }); assert_eq!(names, ["apple", "mango", "zebra"]); } + +#[tokio::test] +async fn child_dir_addrs_returns_only_directories() { + let cache = DCache::new(); + let parent = LoadedAddr::new_unchecked(1); + cache.insert( + parent, + OsString::from("file.txt"), + LoadedAddr::new_unchecked(10), + false, + ); + cache.insert( + parent, + OsString::from("subdir"), + LoadedAddr::new_unchecked(11), + true, + ); + cache.insert( + parent, + OsString::from("another_file"), + LoadedAddr::new_unchecked(12), + false, + ); + cache.insert( + parent, + OsString::from("another_dir"), + LoadedAddr::new_unchecked(13), + true, + ); + + let dirs = cache.child_dir_addrs(parent); + assert_eq!(dirs.len(), 2); + assert!(dirs.contains(&LoadedAddr::new_unchecked(11))); + assert!(dirs.contains(&LoadedAddr::new_unchecked(13))); +} + +#[tokio::test] +async fn child_dir_addrs_returns_empty_for_unknown_parent() { + let cache = DCache::new(); + let dirs = cache.child_dir_addrs(LoadedAddr::new_unchecked(999)); + assert!(dirs.is_empty()); +} From e16c0a24e6cccc77fe6d6c83c61ba3ef65d2fe28 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 19:31:26 -0800 Subject: [PATCH 39/41] feat: prefetch child directories after readdir populates parent After readdir populates a directory via the Claimed CAS path, spawn background tokio tasks to prefetch each child directory. This makes subsequent navigation into subdirectories instant since the dcache and inode table are already populated. The prefetch uses the same CAS gate (try_claim_populate) so duplicate work is impossible, and errors are silently ignored since prefetch is best-effort. --- lib/fs/async_fs.rs | 55 ++++++++++++++++++++ tests/async_fs_correctness.rs | 95 ++++++++++++++++++++++++++++++++++ tests/common/async_fs_mocks.rs | 5 ++ 3 files changed, 155 insertions(+) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 32c5251d..434239a9 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -219,6 +219,49 @@ impl Drop for PopulateGuard<'_> { } } +/// Background-populate a single child directory into the caches. +/// +/// Uses the same CAS gate as `readdir` so duplicate work is impossible. +/// Errors are silently ignored — prefetch is best-effort. +async fn prefetch_dir( + dir_addr: LoadedAddr, + directory_cache: Arc, + inode_table: Arc>, + data_provider: DP, +) { + use crate::fs::dcache::PopulateStatus; + + match directory_cache.try_claim_populate(dir_addr) { + PopulateStatus::Claimed => {} + PopulateStatus::InProgress | PopulateStatus::Done => return, + } + + let mut guard = PopulateGuard::new(&directory_cache, dir_addr); + + let Some(dir_inode) = inode_table.get(&dir_addr.addr()).await else { + return; + }; + + let Ok(children) = data_provider.readdir(dir_inode).await else { + return; + }; + + for (name, child_inode) in children { + let is_dir = child_inode.itype == INodeType::Directory; + inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + directory_cache.insert( + dir_addr, + name, + LoadedAddr::new_unchecked(child_inode.addr), + is_dir, + ); + } + directory_cache.finish_populate(dir_addr); + guard.defuse(); +} + /// An asynchronous filesystem cache mapping `InodeAddr` to `INode`. /// /// Uses two [`FutureBackedCache`] layers: @@ -283,6 +326,17 @@ impl AsyncFs { } } + /// Spawn background tasks to prefetch each child directory of `parent`. + fn spawn_prefetch_children(&self, parent: LoadedAddr) { + let child_dirs = self.directory_cache.child_dir_addrs(parent); + for child_addr in child_dirs { + let dcache = Arc::clone(&self.directory_cache); + let table = Arc::clone(&self.inode_table); + let dp = self.data_provider.clone(); + tokio::spawn(prefetch_dir(child_addr, dcache, table, dp)); + } + } + /// Get the total number of inodes currently stored in the inode table. #[must_use] pub fn inode_count(&self) -> usize { @@ -460,6 +514,7 @@ impl AsyncFs { } self.directory_cache.finish_populate(parent); guard.defuse(); + self.spawn_prefetch_children(parent); break; } PopulateStatus::InProgress => { diff --git a/tests/async_fs_correctness.rs b/tests/async_fs_correctness.rs index 5ce202c6..e3087ceb 100644 --- a/tests/async_fs_correctness.rs +++ b/tests/async_fs_correctness.rs @@ -675,3 +675,98 @@ async fn lookup_after_readdir_uses_directory_cache() { .unwrap(); assert_eq!(tracked.inode.addr, 10); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_prefetches_child_directories() { + use std::sync::atomic::Ordering; + + let root = make_inode(1, INodeType::Directory, 0, None); + let child_dir = make_inode(10, INodeType::Directory, 0, Some(1)); + let child_file = make_inode(11, INodeType::File, 100, Some(1)); + let grandchild = make_inode(20, INodeType::File, 50, Some(10)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("subdir"), child_dir), + (OsString::from("file.txt"), child_file), + ], + ); + state + .directories + .insert(10, vec![(OsString::from("grandchild.txt"), grandchild)]); + let dp = MockFsDataProvider::new(state); + let readdir_count = Arc::clone(&dp.state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + // readdir on root should trigger prefetch of child_dir (addr=10) + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); + + // Wait for prefetch to complete (mock is instant, just need task to run) + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + + // dp.readdir should have been called twice: once for root, once for child_dir prefetch + assert_eq!( + readdir_count.readdir_count.load(Ordering::Relaxed), + 2, + "prefetch should have called readdir on the child directory" + ); + + // Now readdir on child_dir should NOT call dp.readdir again (served from cache) + let mut entries = Vec::new(); + fs.readdir(LoadedAddr::new_unchecked(10), 0, |entry, _| { + entries.push(entry.name.to_os_string()); + false + }) + .await + .unwrap(); + + assert_eq!(entries, vec![OsString::from("grandchild.txt")]); + assert_eq!( + readdir_count.readdir_count.load(Ordering::Relaxed), + 2, + "cached readdir should not call dp.readdir again" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn prefetch_failure_does_not_affect_parent_readdir() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_dir = make_inode(10, INodeType::Directory, 0, Some(1)); + + let mut state = MockFsState::default(); + state + .directories + .insert(1, vec![(OsString::from("bad_dir"), child_dir)]); + // Don't configure readdir for addr=10 — mock will return ENOENT + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + // Parent readdir should succeed even though child prefetch will fail + let mut entries = Vec::new(); + fs.readdir(LoadedAddr::new_unchecked(1), 0, |entry, _| { + entries.push(entry.name.to_os_string()); + false + }) + .await + .unwrap(); + + assert_eq!(entries, vec![OsString::from("bad_dir")]); + + // Wait for prefetch to attempt and fail + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + + // Direct readdir on child should still work (CAS reset to UNCLAIMED by PopulateGuard) + let err = fs + .readdir(LoadedAddr::new_unchecked(10), 0, |_, _| false) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} diff --git a/tests/common/async_fs_mocks.rs b/tests/common/async_fs_mocks.rs index 4441544c..b95f6ebe 100644 --- a/tests/common/async_fs_mocks.rs +++ b/tests/common/async_fs_mocks.rs @@ -56,6 +56,8 @@ pub struct MockFsState { /// precedence and are consumed on use (removed after the first hit). /// Existing tests are unaffected because this defaults to empty. pub refresh_lookups: scc::HashMap<(u64, OsString), INode>, + /// Counts how many times `readdir` has been called on this provider. + pub readdir_count: std::sync::atomic::AtomicU64, } /// A clonable mock data provider for `AsyncFs` tests. @@ -89,6 +91,9 @@ impl FsDataProvider for MockFsDataProvider { } async fn readdir(&self, parent: INode) -> Result, std::io::Error> { + self.state + .readdir_count + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); self.state .directories .get(&parent.addr) From eb394925eca6d7e243835cc13a0678f497f43d5a Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 20:07:21 -0800 Subject: [PATCH 40/41] concurrency edge cases --- lib/fs/composite.rs | 65 +++++++++++++++++++++++++++++++-------------- lib/fs/dcache.rs | 11 +++++++- 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index 92361344..c0aedfed 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -431,9 +431,18 @@ where }) } - /// Removes the composite-level address from `addr_to_slot` and the - /// child's bridge map. When the bridge becomes empty, the slot and its - /// `name_to_slot` entry are garbage-collected. + /// Removes the composite-level address from the child's bridge map and + /// then from `addr_to_slot`. When the bridge becomes empty, the slot + /// and its `name_to_slot` entry are garbage-collected. + /// + /// **Ordering invariant:** the bridge mapping is removed *before* + /// `addr_to_slot` so that a concurrent [`lookup`](Self::lookup) + /// calling `backward_or_insert` will allocate a *fresh* outer address + /// (since the old inner→outer entry is already gone from the bridge) + /// rather than returning the about-to-be-forgotten address. Because + /// the fresh address differs from the forgotten one, the subsequent + /// `addr_to_slot.remove_sync` here cannot destroy the concurrent + /// lookup's mapping. /// /// The slot removal uses `remove_if_sync` with a re-check of /// `bridge.is_empty()`, preventing a concurrent `backward_or_insert` @@ -445,25 +454,41 @@ where if addr == Self::ROOT_INO { return; } - if let Some((_, slot_idx)) = self.inner.addr_to_slot.remove_sync(&addr) { - // Remove the outer->inner mapping from the bridge. The bridge's - // internal mutex serializes this with `backward_or_insert`. - let bridge_empty = self + let Some(slot_idx) = self.inner.addr_to_slot.read_sync(&addr, |_, &v| v) else { + return; + }; + // Remove from the bridge FIRST. The bridge's internal mutex + // serializes this with `backward_or_insert`, ensuring that any + // concurrent lookup that arrives after this point will allocate a + // fresh outer address rather than reusing the forgotten `addr`. + let bridge_empty = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)) + .unwrap_or(false); + // Now safe to remove from addr_to_slot — concurrent lookups that + // raced with us either: + // (a) ran backward_or_insert BEFORE our bridge removal and got + // `addr` back (same key we are removing — acceptable, see + // below), or + // (b) ran AFTER and got a fresh fallback address (different key, + // unaffected by this removal). + // + // Case (a) is a FUSE protocol-level race: the kernel sent + // `forget` for this address while a lookup resolved to the same + // inner entity. In practice, this should not occur because + // `forget` fires only when nlookup reaches zero. + self.inner.addr_to_slot.remove_sync(&addr); + if bridge_empty { + // Bridge is empty — atomically remove the slot only if no one + // has re-populated the bridge between our check and this removal. + // `remove_if_sync` holds the scc bucket lock during evaluation. + let removed = self .inner .slots - .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)) - .unwrap_or(false); - if bridge_empty { - // Bridge is empty — atomically remove the slot only if no one - // has re-populated the bridge between our check and this removal. - // `remove_if_sync` holds the scc bucket lock during evaluation. - let removed = self - .inner - .slots - .remove_if_sync(&slot_idx, |slot| slot.bridge.is_empty()); - if let Some((_, slot)) = removed { - self.inner.name_to_slot.remove_sync(&slot.name); - } + .remove_if_sync(&slot_idx, |slot| slot.bridge.is_empty()); + if let Some((_, slot)) = removed { + self.inner.name_to_slot.remove_sync(&slot.name); } } } diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index 7f22258a..82f73b66 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -188,14 +188,23 @@ impl DCache { /// Wait until a directory is no longer in the `InProgress` state. /// /// Uses [`Notify`] to sleep efficiently instead of spinning. + /// + /// The `Notified` future is pinned and `enable()`d before checking the + /// flag so that the waiter is registered with the `Notify` *before* the + /// state check. Without this, a `notify_waiters()` firing between + /// `notified()` and the first poll would be lost (since + /// `notify_waiters` does not store a permit), causing a permanent hang. pub async fn wait_populated(&self, parent_ino: LoadedAddr) { let state = self.dir_state(parent_ino); loop { - let notified = state.notify.notified(); + let mut notified = std::pin::pin!(state.notify.notified()); + notified.as_mut().enable(); let current = state.populated.load(Ordering::Acquire); if current != POPULATE_IN_PROGRESS { return; } + // SAFETY(cancel): re-entering the loop re-creates the Notified + // future, so spurious wakeups just re-check the flag. notified.await; } } From defd96350c58ab260d26a4424247188ee2562344 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 21 Feb 2026 20:39:02 -0800 Subject: [PATCH 41/41] fix: race in forget slot removal and unbounded prefetch spawning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two concurrency fixes identified during review: 1. CompositeFs::forget could prematurely remove a slot when a concurrent backward_or_insert was mid-insert. The remove_if_sync predicate called bridge.is_empty() without the coordination mutex, so it could observe fwd as empty while backward_or_insert had inserted into bwd but not yet fwd. Added ConcurrentBridge::is_empty_locked() that acquires the mutex, and use it in the predicate. No deadlock risk — lock ordering is always slots bucket lock → bridge mutex. 2. spawn_prefetch_children spawned one tokio::spawn per child directory with no bound, creating a thundering herd on the API backend for directories with many subdirs. Added a per-AsyncFs semaphore capping concurrent prefetch tasks at 8. --- lib/fs/async_fs.rs | 24 +++++++++++++++++++++++- lib/fs/bridge.rs | 20 +++++++++++++++++--- lib/fs/composite.rs | 12 +++++++----- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 434239a9..1c069f4d 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use bytes::Bytes; +use tokio::sync::Semaphore; use crate::cache::async_backed::FutureBackedCache; use crate::drop_ward::StatelessDrop; @@ -262,6 +263,14 @@ async fn prefetch_dir( guard.defuse(); } +/// Maximum number of concurrent prefetch tasks spawned per [`AsyncFs`] instance. +/// +/// Prevents thundering-herd API calls when a parent directory contains many +/// subdirectories (e.g. `node_modules`). Each `readdir` that discovers child +/// directories spawns at most this many concurrent prefetch tasks; additional +/// children wait for a permit. +const MAX_PREFETCH_CONCURRENCY: usize = 8; + /// An asynchronous filesystem cache mapping `InodeAddr` to `INode`. /// /// Uses two [`FutureBackedCache`] layers: @@ -286,6 +295,9 @@ pub struct AsyncFs { /// Monotonically increasing file handle counter. Starts at 1 (0 is reserved). next_fh: AtomicU64, + + /// Bounds the number of concurrent background prefetch tasks. + prefetch_semaphore: Arc, } impl AsyncFs { @@ -305,6 +317,7 @@ impl AsyncFs { directory_cache: Arc::new(DCache::new()), data_provider, next_fh: AtomicU64::new(1), + prefetch_semaphore: Arc::new(Semaphore::new(MAX_PREFETCH_CONCURRENCY)), } } @@ -323,17 +336,26 @@ impl AsyncFs { directory_cache: Arc::new(DCache::new()), data_provider, next_fh: AtomicU64::new(1), + prefetch_semaphore: Arc::new(Semaphore::new(MAX_PREFETCH_CONCURRENCY)), } } /// Spawn background tasks to prefetch each child directory of `parent`. + /// + /// Concurrency is bounded by [`MAX_PREFETCH_CONCURRENCY`] via a shared + /// semaphore, preventing thundering-herd API calls when a parent + /// directory contains many subdirectories. fn spawn_prefetch_children(&self, parent: LoadedAddr) { let child_dirs = self.directory_cache.child_dir_addrs(parent); for child_addr in child_dirs { + let sem = Arc::clone(&self.prefetch_semaphore); let dcache = Arc::clone(&self.directory_cache); let table = Arc::clone(&self.inode_table); let dp = self.data_provider.clone(); - tokio::spawn(prefetch_dir(child_addr, dcache, table, dp)); + tokio::spawn(async move { + let _permit = sem.acquire().await; + prefetch_dir(child_addr, dcache, table, dp).await; + }); } } diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index 6e4ef942..c6edda8a 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -118,13 +118,27 @@ impl ConcurrentBridge { /// Returns `true` if the bridge contains no mappings. /// /// Reads are not serialized with mutations. The result is a - /// snapshot that may be immediately stale. Use under the - /// coordination lock or an external guard when consistency - /// with mutations is required. + /// snapshot that may be immediately stale. Use [`is_empty_locked`](Self::is_empty_locked) + /// when consistency with concurrent mutations is required. #[must_use] pub fn is_empty(&self) -> bool { self.fwd.is_empty() } + + /// Returns `true` if the bridge contains no mappings, serialized with + /// mutations via the coordination lock. + /// + /// Use this instead of [`is_empty`](Self::is_empty) when the result + /// must be consistent with a concurrent [`backward_or_insert`](Self::backward_or_insert) + /// that may be mid-insert. + #[must_use] + pub fn is_empty_locked(&self) -> bool { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + self.fwd.is_empty() + } } impl Default for ConcurrentBridge { diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index c0aedfed..e4245e5c 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -445,9 +445,9 @@ where /// lookup's mapping. /// /// The slot removal uses `remove_if_sync` with a re-check of - /// `bridge.is_empty()`, preventing a concurrent `backward_or_insert` - /// from inserting a new mapping between the bridge emptiness check - /// and the slot removal. + /// `bridge.is_empty_locked()`, which acquires the bridge's + /// coordination mutex to serialize with a concurrent + /// `backward_or_insert` that may be mid-insert. /// /// The root inode is never forgotten. fn forget(&self, addr: InodeAddr) { @@ -482,11 +482,13 @@ where if bridge_empty { // Bridge is empty — atomically remove the slot only if no one // has re-populated the bridge between our check and this removal. - // `remove_if_sync` holds the scc bucket lock during evaluation. + // `remove_if_sync` holds the scc bucket lock during evaluation, + // and `is_empty_locked` acquires the bridge's coordination mutex + // to serialize with any concurrent `backward_or_insert`. let removed = self .inner .slots - .remove_if_sync(&slot_idx, |slot| slot.bridge.is_empty()); + .remove_if_sync(&slot_idx, |slot| slot.bridge.is_empty_locked()); if let Some((_, slot)) = removed { self.inner.name_to_slot.remove_sync(&slot.name); }