diff --git a/CLAUDE.md b/CLAUDE.md index 9ba3f68b..653c07a6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -43,6 +43,11 @@ cargo fmt --all && cargo clippy --all-targets --all-features -- -D warnings && c - Channels: `tokio::sync::mpsc` for multi-producer, `tokio::sync::oneshot` for request-response - Never block the async runtime — offload blocking work with `tokio::task::spawn_blocking` +## Testing + +- Avoid writing tests in-line in the same file as production code; use separate `tests/` directory + for tests. + ## Dependencies - Check for existing deps with `cargo tree` before adding new crates diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs new file mode 100644 index 00000000..6ec95d75 --- /dev/null +++ b/lib/cache/async_backed.rs @@ -0,0 +1,394 @@ +//! Concurrent deduplication cache for async computations. +//! +//! Given a key and an async factory, ensures the factory runs at most once per key. Subsequent +//! callers for the same key await the already-in-flight computation via a [`Shared`] future, +//! avoiding the race conditions inherent in `Notify`-based signalling. +//! +//! Note that this cache does not support automatic eviction. + +use std::panic::AssertUnwindSafe; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; + +use futures::FutureExt as _; +use futures::future::Shared; + +type SharedFut = Shared> + Send>>>; + +/// Two-state slot: `InFlight` while a factory future is running, then promoted to `Ready` once +/// the future completes. +/// +/// The `InFlight` variant holds a generation counter and a `Shared<..., Output = Option>` +/// where `None` signals that the factory panicked (caught by `catch_unwind`). On `None`, callers +/// remove the entry only if the generation matches, avoiding destruction of a valid re-inserted +/// entry. +enum Slot { + InFlight(u64, SharedFut), + Ready(V), +} + +/// Deduplicating async cache. +/// +/// If [`get_or_init`](Self::get_or_init) is called concurrently for the same key, only one +/// invocation of the factory runs. All callers receive a clone of the result. +pub struct FutureBackedCache { + map: scc::HashMap>, + next_gen: AtomicU64, +} + +impl Default for FutureBackedCache +where + K: Eq + Hash, + V: Clone + Send + 'static, +{ + fn default() -> Self { + Self { + map: scc::HashMap::default(), + next_gen: AtomicU64::new(0), + } + } +} + +impl FutureBackedCache +where + K: Eq + Hash + Debug + Clone + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ + /// Get the cached value for `key`, or initialize it by running `factory`. + /// + /// If another caller is already computing the value for this key, this awaits the in-flight + /// computation instead of spawning a duplicate. If the factory panics, the entry is removed + /// and the next caller retries with a fresh factory invocation. + /// + /// # Panics + /// + /// Panics only if *this* caller's own factory panicked (i.e. this caller won the `Vacant` + /// slot and the factory it spawned panicked). Joiners who observe a panicked factory loop + /// back to `entry_async` so a new owner is elected, matching the retry semantics of + /// [`get_or_try_init`](Self::get_or_try_init). + pub async fn get_or_init(&self, key: K, factory: F) -> V + where + F: FnOnce() -> Fut, + Fut: Future + Send + 'static, + { + // Fast path: value already cached. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), + }) + .await; + + match existing { + Some(Ok(v)) => return v, + Some(Err((generation, shared))) => { + if let Some(v) = self.await_shared(&key, generation, shared).await { + return v; + } + // Factory panicked; entry removed. Fall through to slow path. + } + None => {} + } + + // Slow path: claim a slot or join an existing in-flight computation. + // Wrapped in `Option` so the `FnOnce` factory can be consumed exactly + // once inside the loop (only in the `Vacant` branch, which always returns). + let mut factory = Some(factory); + + loop { + match self.map.entry_async(key.clone()).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => return v.clone(), + Slot::InFlight(g, shared) => { + let (generation, shared) = (*g, shared.clone()); + drop(occ); + if let Some(v) = self.await_shared(&key, generation, shared).await { + return v; + } + // In-flight failed. Loop back to `entry_async` so the + // next caller gets proper dedup instead of running + // factory directly. + } + }, + scc::hash_map::Entry::Vacant(vac) => { + let f = factory.take().unwrap_or_else(|| { + unreachable!( + "FutureBackedCache: factory already consumed but \ + reached Vacant branch again for key {key:?}" + ) + }); + let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); + let shared = Self::make_shared(f); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(generation, shared)); + + if let Some(v) = self.await_shared(&key, generation, ret).await { + return v; + } + panic!("FutureBackedCache: factory for key {key:?} panicked"); + } + } + } + } + + /// Like [`get_or_init`](Self::get_or_init), but for fallible factories. + /// + /// If the factory returns `Ok(v)`, the value is cached and returned. If it returns `Err(e)`, + /// **nothing is cached** and the error is propagated to the caller. + /// + /// Concurrent callers for the same key are deduplicated: only one factory invocation runs, + /// and joiners await its shared result. If the factory fails, the poisoned `InFlight` entry + /// is removed and joiners retry by re-entering the `entry_async` gate, so a single new + /// owner is elected. Joiners never receive the original error — the retrying owner invokes + /// its own factory independently and may produce a different error or succeed. + /// + /// # Deduplication of failures + /// + /// When the factory returns `Err`, the poisoned entry is removed and the + /// next caller becomes a new owner with its own factory invocation. This + /// means failures are **not deduplicated**: under transient errors, N + /// concurrent callers may each independently invoke their factory rather + /// than coalescing on the first error. This is intentional — callers + /// may have different retry or error-handling semantics. + /// + /// # Panics + /// + /// Panics if the factory panics (caught internally via `catch_unwind`). + pub async fn get_or_try_init(&self, key: K, factory: F) -> Result + where + F: FnOnce() -> Fut, + Fut: Future> + Send + 'static, + E: Send + 'static, + { + // Fast path: value already cached or in-flight. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), + }) + .await; + + match existing { + Some(Ok(v)) => return Ok(v), + Some(Err((generation, shared))) => { + if let Some(v) = self.await_shared(&key, generation, shared).await { + return Ok(v); + } + // In-flight failed; fall through to slow path. + } + None => {} + } + + // Slow path: claim a slot or join an existing in-flight computation. + // Wrapped in `Option` so the `FnOnce` factory can be consumed exactly + // once inside the loop (only in the `Vacant` branch, which always returns). + let mut factory = Some(factory); + + loop { + match self.map.entry_async(key.clone()).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => return Ok(v.clone()), + Slot::InFlight(g, shared) => { + let (generation, shared) = (*g, shared.clone()); + drop(occ); + if let Some(v) = self.await_shared(&key, generation, shared).await { + return Ok(v); + } + // In-flight failed. Loop back to `entry_async` so the + // next caller gets proper dedup instead of running + // factory directly. + } + }, + scc::hash_map::Entry::Vacant(vac) => { + let f = factory.take().unwrap_or_else(|| { + unreachable!( + "FutureBackedCache: factory already consumed but \ + reached Vacant branch again for key {key:?}" + ) + }); + let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); + let (error_tx, mut error_rx) = tokio::sync::oneshot::channel(); + let shared = Self::make_shared_fallible(f, error_tx); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(generation, shared)); + + if let Some(v) = self.await_shared(&key, generation, ret).await { + return Ok(v); + } + // Our factory returned `Err` — retrieve it from the channel. + return match error_rx.try_recv().ok() { + Some(e) => Err(e), + None => panic!( + "FutureBackedCache: factory for key {key:?} resolved to None \ + but no error was captured (factory panicked)" + ), + }; + } + } + } + } + + /// Get the cached value for `key` if it exists. + /// + /// - If the value is `Ready`, returns `Some(v)` immediately. + /// - If the value is `InFlight`, awaits the in-flight computation and returns `Some(v)`. + /// - If the key is absent, returns `None`. + /// - If the in-flight factory panicked, returns `None` (and removes the poisoned entry). + pub async fn get(&self, key: &K) -> Option { + let existing = self + .map + .read_async(key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), + }) + .await; + + match existing { + Some(Ok(v)) => Some(v), + Some(Err((generation, shared))) => self.await_shared(key, generation, shared).await, + None => None, + } + } + + /// Await a `Shared` future, handle promotion to `Ready`, and handle panic recovery. + /// + /// The `observed_gen` parameter is the generation of the `InFlight` slot that was read. + /// On panic recovery, only the entry with this exact generation is removed, preventing + /// destruction of a valid entry re-inserted by a recovered thread. + /// + /// Returns `Some(v)` on success. Returns `None` if the factory panicked, after removing + /// the poisoned entry from the map. + async fn await_shared(&self, key: &K, observed_gen: u64, shared: SharedFut) -> Option { + let mut guard = PromoteGuard { + map: &self.map, + key, + observed_gen, + value: None, + }; + + let result = shared.await; + + if let Some(v) = result { + guard.value = Some(v.clone()); + + self.map + .update_async(key, |_, slot| { + if matches!(slot, Slot::InFlight(g, _) if *g == observed_gen) { + *slot = Slot::Ready(v.clone()); + } + }) + .await; + + guard.value = None; + Some(v) + } else { + // Factory panicked. Remove the poisoned InFlight entry so the next caller + // can retry — but only if the generation matches our observation. + drop(self.map.remove_if_sync( + key, + |slot| matches!(slot, Slot::InFlight(g, _) if *g == observed_gen), + )); + None + } + } + + /// Wrap a factory future in `catch_unwind`, producing a `Shared` with `Output = Option`. + fn make_shared(factory: F) -> SharedFut + where + F: FnOnce() -> Fut, + Fut: Future + Send + 'static, + { + let fut = AssertUnwindSafe(factory()).catch_unwind(); + let boxed: Pin> + Send>> = + Box::pin(async move { fut.await.ok() }); + boxed.shared() + } + + /// Like [`make_shared`](Self::make_shared), but for fallible factories. + /// + /// On `Ok(v)`, the shared future resolves to `Some(v)`. On `Err(e)`, the + /// error is sent through `error_tx` and the future resolves to `None`. + fn make_shared_fallible( + factory: F, + error_tx: tokio::sync::oneshot::Sender, + ) -> SharedFut + where + F: FnOnce() -> Fut, + Fut: Future> + Send + 'static, + E: Send + 'static, + { + let fut = AssertUnwindSafe(factory()).catch_unwind(); + let boxed: Pin> + Send>> = Box::pin(async move { + match fut.await { + Ok(Ok(v)) => Some(v), + Ok(Err(e)) => { + drop(error_tx.send(e)); + None + } + Err(_panic) => None, + } + }); + boxed.shared() + } + + /// Returns the number of entries in the cache (both `Ready` and `InFlight`). + #[must_use] + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns `true` if the cache contains no entries. + #[must_use] + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Synchronously insert a value, overwriting any existing entry. + /// + /// Suitable for seeding the cache before async operations begin. + pub fn insert_sync(&self, key: K, value: V) { + drop(self.map.insert_sync(key, Slot::Ready(value))); + } + + /// Synchronously remove the entry for `key`, returning `true` if it was present. + /// + /// Suitable for use in contexts where async is not available (e.g. inside + /// [`StatelessDrop::delete`](crate::drop_ward::StatelessDrop::delete)). + pub fn remove_sync(&self, key: &K) -> bool { + self.map.remove_sync(key).is_some() + } +} + +/// Drop guard that synchronously promotes an `InFlight` entry to `Ready` if the caller +/// is cancelled between `shared.await` completing and the async promotion running. +/// +/// Set `value = None` to defuse after successful promotion. +struct PromoteGuard<'a, K, V> +where + K: Eq + Hash, + V: Clone + Send + Sync + 'static, +{ + map: &'a scc::HashMap>, + key: &'a K, + observed_gen: u64, + value: Option, +} + +impl Drop for PromoteGuard<'_, K, V> +where + K: Eq + Hash, + V: Clone + Send + Sync + 'static, +{ + fn drop(&mut self) { + if let Some(v) = self.value.take() { + let generation = self.observed_gen; + self.map.update_sync(self.key, |_, slot| { + if matches!(slot, Slot::InFlight(g, _) if *g == generation) { + *slot = Slot::Ready(v); + } + }); + } + } +} diff --git a/lib/cache/mod.rs b/lib/cache/mod.rs index e0c1c97f..5c48ee22 100644 --- a/lib/cache/mod.rs +++ b/lib/cache/mod.rs @@ -1,3 +1,5 @@ +/// Async-backed cache implementation. +pub mod async_backed; /// Cache eviction policies. pub mod eviction; /// File-backed cache implementation. diff --git a/lib/drop_ward.rs b/lib/drop_ward.rs new file mode 100644 index 00000000..848d1dfb --- /dev/null +++ b/lib/drop_ward.rs @@ -0,0 +1,136 @@ +//! Automatic, type-directed cleanup driven by reference counting. +//! +//! [`DropWard`] tracks how many live references exist for a given key and invokes a cleanup +//! callback when a key's count reaches zero. The cleanup logic is selected at the type level +//! through a zero-sized "tag" type that implements [`StatelessDrop`], keeping the ward itself +//! generic over *what* it manages without storing per-key values. +//! +//! This is designed for resources whose lifecycle is bound to an external context (e.g. GPU device +//! handles, connection pools, graphics pipelines) where Rust's built-in `Drop` cannot be used +//! because cleanup requires access to that context. +//! +//! # Design rationale +//! +//! The tag type `T` is constrained to be zero-sized. It exists only to carry the [`StatelessDrop`] +//! implementation at the type level — no `T` value is ever constructed or stored. This means a +//! single `DropWard` instance adds no per-key overhead beyond the key and its `usize` count. +//! +//! # Example +//! +//! ```ignore +//! struct GpuTextureDrop; +//! +//! impl StatelessDrop for GpuTextureDrop { +//! fn delete(device: &wgpu::Device, _key: &TextureId) { +//! // e.g. flush a deferred-destruction queue +//! device.poll(wgpu::Maintain::Wait); +//! } +//! } +//! +//! let mut ward: DropWard = DropWard::new(device); +//! +//! ward.inc(texture_id); // → 1 +//! ward.inc(texture_id); // → 2 +//! ward.dec(&texture_id); // → Some(1) +//! ward.dec(&texture_id); // → Some(0), calls GpuTextureDrop::delete(&device, &texture_id) +//! ``` + +use std::marker::PhantomData; + +use rustc_hash::FxHashMap; + +/// Type-level hook for cleanup that requires an external context. +/// +/// Implement this on a zero-sized tag type. The tag is never instantiated — it only selects which +/// `delete` implementation a [`DropWard`] will call. +pub trait StatelessDrop { + /// Called exactly once when a key's reference count reaches zero. + /// + /// `ctx` is the shared context owned by the [`DropWard`]. `key` is the key whose count just + /// reached zero. This callback fires synchronously inside [`DropWard::dec`]; avoid blocking or + /// panicking if the ward is used on a hot path. + fn delete(ctx: &Ctx, key: &K); +} + +/// A reference-counted key set that triggers [`StatelessDrop::delete`] on the associated context +/// when any key's count drops to zero. +/// +/// # Type parameters +/// +/// - `Ctx` — shared context passed to `T::delete` (e.g. a device handle). +/// - `K` — the key type being reference-counted. +/// - `T` — a **zero-sized** tag type carrying the cleanup logic. +/// Will fail to compile if `size_of::() != 0`. +/// +/// # Concurrency +/// +/// Not thread-safe. All access requires `&mut self`. Wrap in a `Mutex` or similar if shared across +/// threads. +/// +#[derive(Debug, Clone)] +pub struct DropWard { + map: FxHashMap, + ctx: Ctx, + _marker: PhantomData, +} + +impl DropWard +where + K: Eq + std::hash::Hash, + T: StatelessDrop, +{ + /// Compile-time guard: `T` must be zero-sized. + const _ASSERT_ZST: () = assert!(size_of::() == 0, "T must be zero-sized"); + + /// Create a new ward that will pass `ctx` to `T::delete` on cleanup. + pub fn new(ctx: Ctx) -> Self { + Self { + map: FxHashMap::default(), + ctx, + _marker: PhantomData, + } + } + + /// Increment the reference count for `key`, inserting it with a count + /// of 1 if it does not exist. + /// + /// Returns the count **after** incrementing. + pub fn inc(&mut self, key: K) -> usize { + *self + .map + .entry(key) + .and_modify(|count| *count += 1) + .or_insert(1) + } + + fn dec_by(&mut self, key: &K, by: usize) -> Option { + let curr = *self.map.get(key)?; + let new_count = curr.saturating_sub(by); + if new_count == 0 { + // Delete before removing from the map: if `delete` panics the + // entry remains and a subsequent `dec` can retry cleanup. The + // reverse order would silently lose the entry. + T::delete(&self.ctx, key); + self.map.remove(key); + } else if let Some(slot) = self.map.get_mut(key) { + *slot = new_count; + } + Some(new_count) + } + + /// Decrement the reference count for `key`. + /// + /// If the count reaches zero, the key is removed and `T::delete` is + /// called synchronously with the ward's context. Returns `Some(0)` in + /// this case — the key will no longer be tracked. + /// + /// Returns `None` if `key` was not present (no-op). + pub fn dec(&mut self, key: &K) -> Option { + self.dec_by(key, 1) + } + + /// Decrement the reference count for `key` by `count`. + pub fn dec_count(&mut self, key: &K, count: usize) -> Option { + self.dec_by(key, count) + } +} diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs new file mode 100644 index 00000000..1c069f4d --- /dev/null +++ b/lib/fs/async_fs.rs @@ -0,0 +1,582 @@ +//! Async `INode` Table which supports concurrent access and modification. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +use bytes::Bytes; +use tokio::sync::Semaphore; + +use crate::cache::async_backed::FutureBackedCache; +use crate::drop_ward::StatelessDrop; +use crate::fs::{ + AsyncFsStats, DirEntry, FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags, + dcache::DCache, +}; + +/// A reader for an open file, returned by [`FsDataProvider::open`]. +/// +/// Implementors provide the actual data for read operations. The FUSE +/// adapter calls [`close`](Self::close) to release resources explicitly. +pub trait FileReader: Send + Sync + 'static { + /// Read up to `size` bytes starting at byte `offset`. + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send; + + /// Release any resources held by this reader. + /// + /// Called explicitly by the FUSE adapter during `release`. Implementations + /// that hold inner file handles should release them here. The default + /// implementation is a no-op. + fn close(&self) -> impl Future> + Send { + async { Ok(()) } + } +} + +/// A data provider for [`AsyncFs`] that fetches inode data on cache misses. +pub trait FsDataProvider: Clone + Send + Sync + 'static { + /// The reader type returned by [`open`](Self::open). + type Reader: FileReader; + + /// Look up a child inode by name within the given parent directory. + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send; + + /// List all children of a directory. + /// + /// Called by [`AsyncFs::readdir`] on a cache miss. The returned + /// children are inserted into the directory cache and inode table + /// so subsequent reads are served from cache. + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send; + + /// Open a file and return a reader for subsequent read calls. + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send; + + /// Clean up provider-internal state for an evicted inode. + /// + /// The `DropWard`/`InodeForget` system automatically removes inodes from + /// the shared `inode_table` when the FUSE refcount reaches zero, but data + /// providers often maintain auxiliary structures (path maps, bridge maps) + /// that also need cleanup. This method is that extension point. + /// + /// Never called directly -- [`InodeForget::delete`] invokes it + /// automatically when the refcount drops to zero. + fn forget(&self, _addr: InodeAddr) {} +} + +/// Zero-sized cleanup tag for inode eviction. +/// +/// The [`StatelessDrop`] implementations on this type evict inodes from the +/// inode table and, when a data provider is present, delegate to +/// [`FsDataProvider::forget`] so the provider can clean up its own auxiliary +/// structures (path maps, bridge maps, etc.). +pub struct InodeForget; + +/// Evicts the inode from the table only. Used when no data provider is available. +impl StatelessDrop>, InodeAddr> for InodeForget { + fn delete(inode_table: &Arc>, addr: &InodeAddr) { + inode_table.remove_sync(addr); + } +} + +/// Evicts the inode from the table and delegates to [`FsDataProvider::forget`] +/// so the provider can clean up its own auxiliary state. +impl StatelessDrop<(Arc>, DP), InodeAddr> + for InodeForget +{ + fn delete(ctx: &(Arc>, DP), key: &InodeAddr) { + ctx.0.remove_sync(key); + ctx.1.forget(*key); + } +} + +/// A looked-up inode returned by [`AsyncFs::lookup`]. +/// +/// Each `ResolvedINode` returned by lookup represents one reference that +/// the FUSE kernel holds. The caller must balance it by decrementing the +/// [`InodeLifecycle`] ward when the kernel sends `forget`. +#[derive(Debug, Clone, Copy)] +pub struct ResolvedINode { + /// The resolved inode data. + pub inode: INode, +} + +/// An open file that provides read access. +/// +/// Returned by [`AsyncFs::open`]. The caller owns this handle and uses +/// [`read`](Self::read) to fetch data. Dropping the handle releases +/// the underlying reader when the last `Arc` clone is gone. +#[derive(Debug, Clone)] +pub struct OpenFile { + /// The raw file handle number, suitable for returning to the FUSE kernel. + pub fh: FileHandle, + /// The reader backing this open file. + pub reader: Arc, +} + +impl OpenFile { + /// Read up to `size` bytes starting at byte `offset`. + pub async fn read(&self, offset: u64, size: u32) -> Result { + self.reader.read(offset, size).await + } +} + +/// Co-located inode table and reference-count ward. +/// +/// When `dec` reaches zero for a key, [`InodeForget::delete`] synchronously +/// removes that inode from the table. +pub struct InodeLifecycle { + table: Arc>, + ward: crate::drop_ward::DropWard< + Arc>, + InodeAddr, + InodeForget, + >, +} + +impl InodeLifecycle { + /// Create a new lifecycle managing the given inode table. + pub fn from_table(table: Arc>) -> Self { + let ward = crate::drop_ward::DropWard::new(Arc::clone(&table)); + Self { table, ward } + } + + /// Increment the reference count for an inode address. + pub fn inc(&mut self, addr: InodeAddr) -> usize { + self.ward.inc(addr) + } + + /// Decrement the reference count for an inode address. + /// + /// When the count reaches zero, the inode is automatically evicted + /// from the table via [`InodeForget::delete`]. + pub fn dec(&mut self, addr: &InodeAddr) -> Option { + self.ward.dec(addr) + } + + /// Decrement the reference count by `count`. + /// + /// When the count reaches zero, the inode is automatically evicted. + pub fn dec_count(&mut self, addr: &InodeAddr, count: usize) -> Option { + self.ward.dec_count(addr, count) + } + + /// Read-only access to the underlying inode table. + #[must_use] + pub fn table(&self) -> &FutureBackedCache { + &self.table + } +} + +/// RAII guard that calls [`DCache::abort_populate`] on drop unless defused. +/// +/// Prevents the populate flag from getting stuck in `IN_PROGRESS` if the +/// populating future is cancelled (e.g. by a FUSE interrupt or `select!`). +struct PopulateGuard<'a> { + dcache: &'a DCache, + parent: LoadedAddr, + armed: bool, +} + +impl<'a> PopulateGuard<'a> { + fn new(dcache: &'a DCache, parent: LoadedAddr) -> Self { + Self { + dcache, + parent, + armed: true, + } + } + + /// Defuse the guard after a successful `finish_populate`. + fn defuse(&mut self) { + self.armed = false; + } +} + +impl Drop for PopulateGuard<'_> { + /// Fires when the populating future is cancelled before [`defuse`](Self::defuse) + /// is called, resetting the dcache populate flag from `IN_PROGRESS` back to + /// `UNCLAIMED` so a subsequent `readdir` can retry. This is a normal + /// occurrence under FUSE interrupts or `tokio::select!` cancellation — + /// not an error. + fn drop(&mut self) { + if self.armed { + self.dcache.abort_populate(self.parent); + } + } +} + +/// Background-populate a single child directory into the caches. +/// +/// Uses the same CAS gate as `readdir` so duplicate work is impossible. +/// Errors are silently ignored — prefetch is best-effort. +async fn prefetch_dir( + dir_addr: LoadedAddr, + directory_cache: Arc, + inode_table: Arc>, + data_provider: DP, +) { + use crate::fs::dcache::PopulateStatus; + + match directory_cache.try_claim_populate(dir_addr) { + PopulateStatus::Claimed => {} + PopulateStatus::InProgress | PopulateStatus::Done => return, + } + + let mut guard = PopulateGuard::new(&directory_cache, dir_addr); + + let Some(dir_inode) = inode_table.get(&dir_addr.addr()).await else { + return; + }; + + let Ok(children) = data_provider.readdir(dir_inode).await else { + return; + }; + + for (name, child_inode) in children { + let is_dir = child_inode.itype == INodeType::Directory; + inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + directory_cache.insert( + dir_addr, + name, + LoadedAddr::new_unchecked(child_inode.addr), + is_dir, + ); + } + directory_cache.finish_populate(dir_addr); + guard.defuse(); +} + +/// Maximum number of concurrent prefetch tasks spawned per [`AsyncFs`] instance. +/// +/// Prevents thundering-herd API calls when a parent directory contains many +/// subdirectories (e.g. `node_modules`). Each `readdir` that discovers child +/// directories spawns at most this many concurrent prefetch tasks; additional +/// children wait for a permit. +const MAX_PREFETCH_CONCURRENCY: usize = 8; + +/// An asynchronous filesystem cache mapping `InodeAddr` to `INode`. +/// +/// Uses two [`FutureBackedCache`] layers: +/// - `inode_table` stores resolved inodes by address, used by [`loaded_inode`](Self::loaded_inode). +/// - `lookup_cache` stores lookup results by `(parent_addr, name)`, ensuring `dp.lookup()` is only +/// called on a true cache miss (not already cached or in-flight). +/// +/// The [`DCache`] sits in front as a synchronous fast path mapping `(parent, name)` to child addr. +pub struct AsyncFs { + /// Canonical addr -> `INode` map. Used by `loaded_inode()` to retrieve inodes by address. + inode_table: Arc>, + + /// Deduplicating lookup cache keyed by `(parent_addr, child_name)`. The factory is + /// `dp.lookup()`, so the data provider is only called on a true cache miss. + lookup_cache: FutureBackedCache<(InodeAddr, Arc), INode>, + + /// Directory entry cache, mapping `(parent, name)` to child inode address. + directory_cache: Arc, + + /// The data provider used to fetch inode data on cache misses. + data_provider: DP, + + /// Monotonically increasing file handle counter. Starts at 1 (0 is reserved). + next_fh: AtomicU64, + + /// Bounds the number of concurrent background prefetch tasks. + prefetch_semaphore: Arc, +} + +impl AsyncFs { + /// Create a new `AsyncFs`, seeding the root inode into the table. + pub async fn new( + data_provider: DP, + root: INode, + inode_table: Arc>, + ) -> Self { + inode_table + .get_or_init(root.addr, || async move { root }) + .await; + + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: Arc::new(DCache::new()), + data_provider, + next_fh: AtomicU64::new(1), + prefetch_semaphore: Arc::new(Semaphore::new(MAX_PREFETCH_CONCURRENCY)), + } + } + + /// Create a new `AsyncFs`, assuming the root inode is already in the table. + /// + /// The caller must ensure the root inode has already been inserted into + /// `inode_table` (e.g. via [`FutureBackedCache::insert_sync`]). + #[must_use] + pub fn new_preseeded( + data_provider: DP, + inode_table: Arc>, + ) -> Self { + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: Arc::new(DCache::new()), + data_provider, + next_fh: AtomicU64::new(1), + prefetch_semaphore: Arc::new(Semaphore::new(MAX_PREFETCH_CONCURRENCY)), + } + } + + /// Spawn background tasks to prefetch each child directory of `parent`. + /// + /// Concurrency is bounded by [`MAX_PREFETCH_CONCURRENCY`] via a shared + /// semaphore, preventing thundering-herd API calls when a parent + /// directory contains many subdirectories. + fn spawn_prefetch_children(&self, parent: LoadedAddr) { + let child_dirs = self.directory_cache.child_dir_addrs(parent); + for child_addr in child_dirs { + let sem = Arc::clone(&self.prefetch_semaphore); + let dcache = Arc::clone(&self.directory_cache); + let table = Arc::clone(&self.inode_table); + let dp = self.data_provider.clone(); + tokio::spawn(async move { + let _permit = sem.acquire().await; + prefetch_dir(child_addr, dcache, table, dp).await; + }); + } + } + + /// Get the total number of inodes currently stored in the inode table. + #[must_use] + pub fn inode_count(&self) -> usize { + self.inode_table.len() + } + + /// Return filesystem statistics. + /// + /// Reports the current inode count from the cache. Block-related + /// fields default to values appropriate for a virtual read-only + /// filesystem (4 KiB blocks, no free space). + #[must_use] + pub fn statfs(&self) -> AsyncFsStats { + AsyncFsStats { + block_size: 4096, + total_blocks: 0, + free_blocks: 0, + available_blocks: 0, + total_inodes: self.inode_count() as u64, + free_inodes: 0, + max_filename_length: 255, + } + } + + /// Asynchronously look up an inode by name within a parent directory. + /// + /// Resolution order: + /// 1. Directory cache (synchronous fast path) + /// 2. Lookup cache (`get_or_try_init` — calls `dp.lookup()` only on a true miss) + /// 3. On success, populates inode table and directory cache + pub async fn lookup( + &self, + parent: LoadedAddr, + name: &OsStr, + ) -> Result { + let parent_ino = self.loaded_inode(parent).await?; + debug_assert!( + matches!(parent_ino.itype, INodeType::Directory), + "parent inode should be a directory" + ); + + if let Some(dentry) = self.directory_cache.lookup(parent, name) { + if let Some(inode) = self.inode_table.get(&dentry.ino.addr()).await { + return Ok(ResolvedINode { inode }); + } + // Inode was evicted (e.g. by forget). Evict the stale lookup_cache + // entry so the slow path calls dp.lookup() fresh. + self.lookup_cache + .remove_sync(&(parent.addr(), Arc::from(name))); + } + + // Note: get_or_try_init deduplicates successful lookups but NOT + // failures. Under transient API errors, concurrent lookups for + // the same (parent, name) may each independently call dp.lookup(). + // This is acceptable: the cost of a redundant API call on error is + // low compared to the complexity of error-channel deduplication. + let name_arc: Arc = Arc::from(name); + let lookup_key = (parent.addr(), Arc::clone(&name_arc)); + let dp = self.data_provider.clone(); + + let child = self + .lookup_cache + .get_or_try_init(lookup_key, || { + let name_for_dp = Arc::clone(&name_arc); + async move { dp.lookup(parent_ino, &name_for_dp).await } + }) + .await?; + + self.inode_table + .get_or_init(child.addr, || async move { child }) + .await; + + self.directory_cache.insert( + parent, + name_arc.as_ref().to_os_string(), + LoadedAddr::new_unchecked(child.addr), + matches!(child.itype, INodeType::Directory), + ); + + Ok(ResolvedINode { inode: child }) + } + + /// Retrieve an inode that is expected to already be loaded. + /// + /// If the inode is currently in-flight (being loaded by another caller), this awaits + /// completion. Returns an error if the inode is not in the table at all. + pub async fn loaded_inode(&self, addr: LoadedAddr) -> Result { + self.inode_table.get(&addr.addr()).await.ok_or_else(|| { + tracing::error!( + inode = ?addr.addr(), + "inode not found in table — this is a programming bug" + ); + std::io::Error::from_raw_os_error(libc::ENOENT) + }) + } + + /// Return the attributes of the inode at `addr`. + /// + /// This is the getattr entry point for the filesystem. Returns the + /// cached [`INode`] directly — callers at the FUSE boundary are + /// responsible for converting to `fuser::FileAttr`. + pub async fn getattr(&self, addr: LoadedAddr) -> Result { + self.loaded_inode(addr).await + } + + /// Open a file for reading. + /// + /// Validates the inode is not a directory, delegates to the data provider + /// to create a [`FileReader`], and returns an [`OpenFile`] that the caller + /// owns. Reads go through [`OpenFile::read`]. + pub async fn open( + &self, + addr: LoadedAddr, + flags: OpenFlags, + ) -> Result, std::io::Error> { + let inode = self.loaded_inode(addr).await?; + if inode.itype == INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); + } + let reader = self.data_provider.open(inode, flags).await?; + let fh = self.next_fh.fetch_add(1, Ordering::Relaxed); + Ok(OpenFile { + fh, + reader: Arc::new(reader), + }) + } + + /// Iterate directory entries for `parent`, starting from `offset`. + /// + /// On the first call for a given parent, fetches the directory listing + /// from the data provider and populates the directory cache and inode + /// table. Subsequent calls serve entries directly from cache. + /// + /// Entries are yielded in name-sorted order. For each entry, `filler` is + /// called with the [`DirEntry`] and the next offset value. If `filler` + /// returns `true` (indicating the caller's buffer is full), iteration + /// stops early. + /// + /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and + /// avoid racing with `lookup`/`createfile`. + pub async fn readdir( + &self, + parent: LoadedAddr, + offset: u64, + mut filler: impl FnMut(DirEntry<'_>, u64) -> bool, + ) -> Result<(), std::io::Error> { + use crate::fs::dcache::PopulateStatus; + + let parent_inode = self.loaded_inode(parent).await?; + if parent_inode.itype != INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + + // Populate the directory cache on first readdir for this parent. + // Uses a three-state CAS gate to prevent duplicate dp.readdir() calls. + loop { + match self.directory_cache.try_claim_populate(parent) { + PopulateStatus::Claimed => { + // RAII guard: if this future is cancelled between Claimed + // and finish_populate, automatically abort so other waiters + // can retry instead of hanging forever. + let mut guard = PopulateGuard::new(&self.directory_cache, parent); + + let children = self.data_provider.readdir(parent_inode).await?; + for (name, child_inode) in children { + self.inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + self.directory_cache.insert( + parent, + name, + LoadedAddr::new_unchecked(child_inode.addr), + child_inode.itype == INodeType::Directory, + ); + } + self.directory_cache.finish_populate(parent); + guard.defuse(); + self.spawn_prefetch_children(parent); + break; + } + PopulateStatus::InProgress => { + self.directory_cache.wait_populated(parent).await; + // Re-check: the populator may have aborted. + } + PopulateStatus::Done => break, + } + } + + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + let skip = offset as usize; + + // Collect only entries at or past `offset`, avoiding clones for + // entries that will be skipped during paginated readdir. + let mut entries: Vec<(OsString, LoadedAddr)> = Vec::new(); + let mut idx = 0usize; + self.directory_cache.readdir(parent, |name, dvalue| { + if idx >= skip { + entries.push((name.to_os_string(), dvalue.ino)); + } + idx += 1; + }); + + for (i, (name, child_addr)) in entries.iter().enumerate() { + let Some(inode) = self.inode_table.get(&child_addr.addr()).await else { + // Inode was evicted between readdir collection and iteration + // (e.g. by a concurrent forget). Skip the stale entry. + tracing::debug!(addr = ?child_addr.addr(), name = ?name, "inode evicted during readdir, skipping"); + continue; + }; + let next_offset = (skip + i + 1) as u64; + if filler(DirEntry { name, inode }, next_offset) { + break; + } + } + + Ok(()) + } +} diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs new file mode 100644 index 00000000..c6edda8a --- /dev/null +++ b/lib/fs/bridge.rs @@ -0,0 +1,148 @@ +//! Bidirectional inode address mapping. +//! +//! [`ConcurrentBridge`] maps between "outer" (composite) and "inner" (child) +//! inode address spaces using two [`scc::HashMap`]s guarded by a coordination +//! lock for cross-map atomicity. + +use std::sync::Mutex; + +use crate::fs::InodeAddr; + +/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. +/// +/// Uses two concurrent `scc::HashMap`s for lock-free reads. Mutations that +/// touch both maps are serialized by a `Mutex<()>` to prevent cross-map +/// inconsistencies (e.g. a concurrent `remove_by_outer` between the two +/// `insert_sync` calls in `insert` could leave orphaned entries). +pub struct ConcurrentBridge { + /// outer -> inner + fwd: scc::HashMap, + /// inner -> outer + bwd: scc::HashMap, + /// Serializes mutations that touch both maps. + mu: Mutex<()>, +} + +impl ConcurrentBridge { + /// Creates an empty bridge. + #[must_use] + pub fn new() -> Self { + Self { + fwd: scc::HashMap::new(), + bwd: scc::HashMap::new(), + mu: Mutex::new(()), + } + } + + /// Insert a mapping from outer to inner. + /// + /// Serialized with other mutations via the coordination lock. + pub fn insert(&self, outer: InodeAddr, inner: InodeAddr) { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _ = self.fwd.insert_sync(outer, inner); + let _ = self.bwd.insert_sync(inner, outer); + } + + /// Resolve outer -> inner. + /// + /// This read is **not** serialized with mutations. A concurrent [`insert`] + /// may have completed the forward entry but not yet the backward entry (or + /// vice versa for [`remove_by_outer`]). Callers must tolerate stale or + /// transiently-missing results. Use [`backward_or_insert`] when + /// cross-map consistency is required. + /// + /// [`insert`]: Self::insert + /// [`remove_by_outer`]: Self::remove_by_outer + /// [`backward_or_insert`]: Self::backward_or_insert + #[must_use] + pub fn forward(&self, outer: InodeAddr) -> Option { + self.fwd.read_sync(&outer, |_, &v| v) + } + + /// Resolve inner -> outer. + /// + /// This read is **not** serialized with mutations. See [`forward`] for + /// the consistency caveats. Use [`backward_or_insert`] when cross-map + /// consistency is required. + /// + /// [`forward`]: Self::forward + /// [`backward_or_insert`]: Self::backward_or_insert + #[must_use] + pub fn backward(&self, inner: InodeAddr) -> Option { + self.bwd.read_sync(&inner, |_, &v| v) + } + + /// Look up inner -> outer, or insert `fallback` as the new outer address. + /// + /// `fallback` is a pre-allocated address provided by the caller. If the + /// inner address already has a mapping, `fallback` is unused (the caller + /// accepts that the monotonic address counter may skip values). + /// + /// Serialized with other mutations via the coordination lock. + #[must_use] + pub fn backward_or_insert(&self, inner: InodeAddr, fallback: InodeAddr) -> InodeAddr { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + match self.bwd.entry_sync(inner) { + scc::hash_map::Entry::Occupied(occ) => *occ.get(), + scc::hash_map::Entry::Vacant(vac) => { + vac.insert_entry(fallback); + let _ = self.fwd.insert_sync(fallback, inner); + fallback + } + } + } + + /// Remove the mapping for the given outer address. + /// + /// Returns `true` if the bridge is empty after the removal — the caller + /// can use this to garbage-collect the owning slot. The emptiness check + /// is performed under the coordination lock so there is no TOCTOU gap + /// with the removal itself. + pub fn remove_by_outer(&self, outer: InodeAddr) -> bool { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + if let Some((_, inner)) = self.fwd.remove_sync(&outer) { + self.bwd.remove_sync(&inner); + } + self.fwd.is_empty() + } + + /// Returns `true` if the bridge contains no mappings. + /// + /// Reads are not serialized with mutations. The result is a + /// snapshot that may be immediately stale. Use [`is_empty_locked`](Self::is_empty_locked) + /// when consistency with concurrent mutations is required. + #[must_use] + pub fn is_empty(&self) -> bool { + self.fwd.is_empty() + } + + /// Returns `true` if the bridge contains no mappings, serialized with + /// mutations via the coordination lock. + /// + /// Use this instead of [`is_empty`](Self::is_empty) when the result + /// must be consistent with a concurrent [`backward_or_insert`](Self::backward_or_insert) + /// that may be mid-insert. + #[must_use] + pub fn is_empty_locked(&self) -> bool { + let _guard = self + .mu + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + self.fwd.is_empty() + } +} + +impl Default for ConcurrentBridge { + fn default() -> Self { + Self::new() + } +} diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs new file mode 100644 index 00000000..e4245e5c --- /dev/null +++ b/lib/fs/composite.rs @@ -0,0 +1,497 @@ +//! Generic composite filesystem types. +//! +//! A composite filesystem presents multiple child filesystems under a single +//! virtual root directory. The [`CompositeRoot`] trait describes how children +//! are discovered, [`ChildInner`] co-locates an inode table with an +//! [`AsyncFs`](super::async_fs::AsyncFs), and [`CompositeReader`] wraps a +//! child reader so the composite layer can expose it through [`FileReader`]. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +use bytes::Bytes; + +use crate::cache::async_backed::FutureBackedCache; +use crate::fs::async_fs::{AsyncFs, FileReader, FsDataProvider, OpenFile}; +use crate::fs::bridge::ConcurrentBridge; +use crate::fs::{INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags}; + +/// Descriptor for a child filesystem returned by [`CompositeRoot`]. +pub struct ChildDescriptor { + /// The name this child is listed as in the composite root directory. + pub name: OsString, + /// The data provider for this child. + pub provider: DP, + /// The root inode of the child filesystem. + pub root_ino: INode, +} + +/// Describes the children that a composite filesystem exposes at its root. +/// +/// Implementors define domain-specific child resolution: what children exist, +/// and what [`FsDataProvider`] backs each child. +pub trait CompositeRoot: Send + Sync + 'static { + /// The data provider type for child filesystems. + type ChildDP: FsDataProvider; + + /// Resolve a child by name, returning its data provider and root inode. + /// + /// Called on lookup at the composite root. Returns `None` if the name + /// does not correspond to a known child. + fn resolve_child( + &self, + name: &OsStr, + ) -> impl Future>, std::io::Error>> + Send; + + /// List all children at the composite root. + /// + /// Called on readdir at the composite root. + fn list_children( + &self, + ) -> impl Future>, std::io::Error>> + Send; +} + +/// Co-locates an inode table and [`AsyncFs`]. +pub struct ChildInner { + #[expect(dead_code)] + table: Arc>, + fs: AsyncFs, +} + +impl ChildInner { + pub(crate) fn create(table: FutureBackedCache, provider: DP) -> Self { + let table = Arc::new(table); + let fs = AsyncFs::new_preseeded(provider, Arc::clone(&table)); + Self { table, fs } + } + + pub(crate) fn get_fs(&self) -> &AsyncFs { + &self.fs + } +} + +/// Wraps a child's reader so that the composite layer can expose it as its own +/// [`FileReader`]. +pub struct CompositeReader { + inner: Arc, +} + +impl CompositeReader { + /// Create a new `CompositeReader` wrapping the given reader. + #[must_use] + pub fn new(inner: Arc) -> Self { + Self { inner } + } +} + +impl std::fmt::Debug for CompositeReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompositeReader").finish_non_exhaustive() + } +} + +impl FileReader for CompositeReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + self.inner.read(offset, size) + } + + fn close(&self) -> impl Future> + Send { + self.inner.close() + } +} + +struct ChildSlot { + inner: Arc>, + bridge: Arc, + /// The name under which this child was registered in `name_to_slot`. + /// Stored here so `forget` can do O(1) removal instead of a linear scan. + name: OsString, +} + +struct CompositeFsInner { + root: R, + /// Child slots, indexed by slot number. + slots: scc::HashMap>, + /// Maps a composite-level outer inode to its child slot index. + addr_to_slot: scc::HashMap, + /// Maps child name to slot index (for dedup on concurrent resolve). + /// + /// `register_child` uses `entry_sync` on this map for per-name + /// exclusion, serializing concurrent registrations of the same child + /// without a global lock. `forget` cleans up entries when a slot's + /// bridge becomes empty. + name_to_slot: scc::HashMap, + /// Monotonically increasing slot counter. + next_slot: AtomicU64, + /// Monotonically increasing inode counter. Starts at 2 (1 = root). + next_ino: AtomicU64, + /// The filesystem owner uid/gid. + fs_owner: (u32, u32), +} + +/// A generic composite filesystem that routes to child `AsyncFs` instances. +/// +/// Implements [`FsDataProvider`] so it can be used inside another `AsyncFs`. +/// Clone is cheap (shared `Arc`). +pub struct CompositeFs { + inner: Arc>, +} + +impl Clone for CompositeFs { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + +impl CompositeFs { + /// Root inode address for this composite level. + pub const ROOT_INO: InodeAddr = 1; + + /// Create a new composite filesystem. + #[must_use] + pub fn new(root: R, fs_owner: (u32, u32)) -> Self { + Self { + inner: Arc::new(CompositeFsInner { + root, + slots: scc::HashMap::new(), + addr_to_slot: scc::HashMap::new(), + name_to_slot: scc::HashMap::new(), + next_slot: AtomicU64::new(0), + next_ino: AtomicU64::new(2), // 1 = root + fs_owner, + }), + } + } + + /// Build the root inode for this composite filesystem. + #[must_use] + pub fn make_root_inode(&self) -> INode { + let now = std::time::SystemTime::now(); + INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.inner.fs_owner.0, + gid: self.inner.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + } + } + + fn allocate_ino(&self) -> InodeAddr { + self.inner.next_ino.fetch_add(1, Ordering::Relaxed) + } + + fn make_child_dir_inode(&self, addr: InodeAddr) -> INode { + let now = std::time::SystemTime::now(); + INode { + addr, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.inner.fs_owner.0, + gid: self.inner.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_INO), + size: 0, + itype: INodeType::Directory, + } + } + + /// Allocate a new child slot with a fresh inode table and bridge mapping. + /// + /// Returns `(outer_ino, slot_idx)` for the newly created slot. + fn create_child_slot(&self, desc: &ChildDescriptor) -> (InodeAddr, usize) + where + R::ChildDP: Clone, + { + let outer_ino = self.allocate_ino(); + #[expect( + clippy::cast_possible_truncation, + reason = "slot index fits in usize on 64-bit" + )] + let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; + + let table = FutureBackedCache::default(); + table.insert_sync(desc.root_ino.addr, desc.root_ino); + let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); + + let bridge = Arc::new(ConcurrentBridge::new()); + bridge.insert(outer_ino, desc.root_ino.addr); + + drop(self.inner.slots.insert_sync( + slot_idx, + ChildSlot { + inner: child_inner, + bridge, + name: desc.name.clone(), + }, + )); + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + + (outer_ino, slot_idx) + } + + /// Register a child, returning the composite-level outer inode address. + /// + /// If the child is already registered by name, the existing outer address + /// is returned. Otherwise a new slot is created with a fresh inode table + /// and bridge mapping. + /// + /// Uses `entry_sync` on `name_to_slot` for per-name exclusion: + /// concurrent registrations of the same child are serialized by the + /// `scc::HashMap` bucket lock, while different names proceed in + /// parallel. `forget` may remove entries from `name_to_slot` when a + /// slot's bridge becomes empty, but this is safe — outer inode addresses + /// are monotonic and never reused, + /// so `forget` cannot corrupt a replacement slot. + fn register_child(&self, desc: &ChildDescriptor) -> InodeAddr + where + R::ChildDP: Clone, + { + match self.inner.name_to_slot.entry_sync(desc.name.clone()) { + scc::hash_map::Entry::Occupied(mut occ) => { + let old_slot_idx = *occ.get(); + let bridge = self + .inner + .slots + .read_sync(&old_slot_idx, |_, slot| Arc::clone(&slot.bridge)); + if let Some(outer) = bridge.and_then(|b| b.backward(desc.root_ino.addr)) { + return outer; + } + // Slot exists but bridge has no mapping — replace it. + let (outer_ino, new_slot_idx) = self.create_child_slot(desc); + *occ.get_mut() = new_slot_idx; + self.inner.slots.remove_sync(&old_slot_idx); + outer_ino + } + scc::hash_map::Entry::Vacant(vac) => { + let (outer_ino, slot_idx) = self.create_child_slot(desc); + vac.insert_entry(slot_idx); + outer_ino + } + } + } +} + +impl FsDataProvider for CompositeFs +where + R::ChildDP: Clone, + <::ChildDP as FsDataProvider>::Reader: 'static, +{ + type Reader = CompositeReader<<::ChildDP as FsDataProvider>::Reader>; + + async fn lookup(&self, parent: INode, name: &OsStr) -> Result { + if parent.addr == Self::ROOT_INO { + let desc = self + .inner + .root + .resolve_child(name) + .await? + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let outer_ino = self.register_child(&desc); + Ok(self.make_child_dir_inode(outer_ino)) + } else { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&parent.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Extract Arc, bridge, and inner parent address under the guard. + let (child, bridge, inner_parent) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + ( + Arc::clone(&slot.inner), + Arc::clone(&slot.bridge), + slot.bridge.forward(parent.addr), + ) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_parent = + inner_parent.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Await the lookup outside any scc guard. + let tracked = child + .get_fs() + .lookup(LoadedAddr::new_unchecked(inner_parent), name) + .await?; + let child_inode = tracked.inode; + + // Translate inner address back to composite-level address (outside scc guard). + let fallback = self.allocate_ino(); + let outer_ino = bridge.backward_or_insert(child_inode.addr, fallback); + + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + + Ok(INode { + addr: outer_ino, + ..child_inode + }) + } + } + + async fn readdir(&self, parent: INode) -> Result, std::io::Error> { + if parent.addr == Self::ROOT_INO { + let children = self.inner.root.list_children().await?; + let mut entries = Vec::with_capacity(children.len()); + for desc in &children { + let outer_ino = self.register_child(desc); + entries.push((desc.name.clone(), self.make_child_dir_inode(outer_ino))); + } + Ok(entries) + } else { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&parent.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let (child, bridge, inner_parent) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + ( + Arc::clone(&slot.inner), + Arc::clone(&slot.bridge), + slot.bridge.forward(parent.addr), + ) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_parent = + inner_parent.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Collect child entries outside the guard. + let mut child_entries = Vec::new(); + child + .get_fs() + .readdir(LoadedAddr::new_unchecked(inner_parent), 0, |de, _offset| { + child_entries.push((de.name.to_os_string(), de.inode)); + false + }) + .await?; + + // Translate all inner addresses to composite-level addresses (outside scc guard). + let mut entries = Vec::with_capacity(child_entries.len()); + for (name, child_inode) in child_entries { + let fallback = self.allocate_ino(); + let outer_ino = bridge.backward_or_insert(child_inode.addr, fallback); + + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + entries.push(( + name, + INode { + addr: outer_ino, + ..child_inode + }, + )); + } + Ok(entries) + } + } + + async fn open(&self, inode: INode, flags: OpenFlags) -> Result { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&inode.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let (child, inner_ino) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + (Arc::clone(&slot.inner), slot.bridge.forward(inode.addr)) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_ino = inner_ino.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let open_file: OpenFile<<::ChildDP as FsDataProvider>::Reader> = child + .get_fs() + .open(LoadedAddr::new_unchecked(inner_ino), flags) + .await?; + + Ok(CompositeReader { + inner: open_file.reader, + }) + } + + /// Removes the composite-level address from the child's bridge map and + /// then from `addr_to_slot`. When the bridge becomes empty, the slot + /// and its `name_to_slot` entry are garbage-collected. + /// + /// **Ordering invariant:** the bridge mapping is removed *before* + /// `addr_to_slot` so that a concurrent [`lookup`](Self::lookup) + /// calling `backward_or_insert` will allocate a *fresh* outer address + /// (since the old inner→outer entry is already gone from the bridge) + /// rather than returning the about-to-be-forgotten address. Because + /// the fresh address differs from the forgotten one, the subsequent + /// `addr_to_slot.remove_sync` here cannot destroy the concurrent + /// lookup's mapping. + /// + /// The slot removal uses `remove_if_sync` with a re-check of + /// `bridge.is_empty_locked()`, which acquires the bridge's + /// coordination mutex to serialize with a concurrent + /// `backward_or_insert` that may be mid-insert. + /// + /// The root inode is never forgotten. + fn forget(&self, addr: InodeAddr) { + if addr == Self::ROOT_INO { + return; + } + let Some(slot_idx) = self.inner.addr_to_slot.read_sync(&addr, |_, &v| v) else { + return; + }; + // Remove from the bridge FIRST. The bridge's internal mutex + // serializes this with `backward_or_insert`, ensuring that any + // concurrent lookup that arrives after this point will allocate a + // fresh outer address rather than reusing the forgotten `addr`. + let bridge_empty = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)) + .unwrap_or(false); + // Now safe to remove from addr_to_slot — concurrent lookups that + // raced with us either: + // (a) ran backward_or_insert BEFORE our bridge removal and got + // `addr` back (same key we are removing — acceptable, see + // below), or + // (b) ran AFTER and got a fresh fallback address (different key, + // unaffected by this removal). + // + // Case (a) is a FUSE protocol-level race: the kernel sent + // `forget` for this address while a lookup resolved to the same + // inner entity. In practice, this should not occur because + // `forget` fires only when nlookup reaches zero. + self.inner.addr_to_slot.remove_sync(&addr); + if bridge_empty { + // Bridge is empty — atomically remove the slot only if no one + // has re-populated the bridge between our check and this removal. + // `remove_if_sync` holds the scc bucket lock during evaluation, + // and `is_empty_locked` acquires the bridge's coordination mutex + // to serialize with any concurrent `backward_or_insert`. + let removed = self + .inner + .slots + .remove_if_sync(&slot_idx, |slot| slot.bridge.is_empty_locked()); + if let Some((_, slot)) = removed { + self.inner.name_to_slot.remove_sync(&slot.name); + } + } + } +} diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs new file mode 100644 index 00000000..82f73b66 --- /dev/null +++ b/lib/fs/dcache.rs @@ -0,0 +1,211 @@ +use std::collections::BTreeMap; +use std::ffi::{OsStr, OsString}; +use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::{Arc, RwLock}; + +use tokio::sync::Notify; + +use crate::fs::LoadedAddr; + +/// Cached metadata for a directory entry. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DValue { + /// Inode address of this entry. + pub ino: LoadedAddr, + /// Whether this entry is itself a directory. + pub is_dir: bool, +} + +/// Population states for a directory. +const POPULATE_UNCLAIMED: u8 = 0; +const POPULATE_IN_PROGRESS: u8 = 1; +const POPULATE_DONE: u8 = 2; + +/// Result of attempting to claim a directory for population. +pub enum PopulateStatus { + /// This caller won the race and should populate the directory. + Claimed, + /// Another caller is currently populating; wait and re-check. + InProgress, + /// The directory is already fully populated. + Done, +} + +/// Per-parent directory state holding child entries and a population flag. +struct DirState { + children: RwLock>, + populated: AtomicU8, + /// Wakes waiters when `populated` transitions out of `IN_PROGRESS`. + notify: Notify, +} + +impl DirState { + fn new() -> Self { + Self { + children: RwLock::new(BTreeMap::new()), + populated: AtomicU8::new(POPULATE_UNCLAIMED), + notify: Notify::new(), + } + } +} + +/// In-memory directory entry cache with per-parent child maps. +/// +/// Each parent directory gets its own [`DirState`] containing a +/// [`BTreeMap`] of child entries (kept in sorted order) and an [`AtomicU8`] +/// population flag. This makes `readdir` O(k) in the number of children +/// with zero sorting overhead. +pub struct DCache { + dirs: scc::HashMap>, +} + +impl Default for DCache { + fn default() -> Self { + Self::new() + } +} + +impl DCache { + /// Creates an empty directory cache. + #[must_use] + pub fn new() -> Self { + Self { + dirs: scc::HashMap::new(), + } + } + + /// Returns the [`DirState`] for `parent_ino`, creating one if absent. + fn dir_state(&self, parent_ino: LoadedAddr) -> Arc { + if let Some(entry) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) { + return entry; + } + let state = Arc::new(DirState::new()); + match self.dirs.entry_sync(parent_ino) { + scc::hash_map::Entry::Occupied(occ) => Arc::clone(occ.get()), + scc::hash_map::Entry::Vacant(vac) => { + let cloned = Arc::clone(&state); + vac.insert_entry(state); + cloned + } + } + } + + /// Looks up a single child entry by parent inode and name. + #[must_use] + pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { + let state = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v))?; + let children = state + .children + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); + children.get(name).cloned() + } + + /// Atomically inserts or overwrites a child entry in the cache. + pub fn insert(&self, parent_ino: LoadedAddr, name: OsString, ino: LoadedAddr, is_dir: bool) { + let state = self.dir_state(parent_ino); + let value = DValue { ino, is_dir }; + let mut children = state + .children + .write() + .unwrap_or_else(std::sync::PoisonError::into_inner); + children.insert(name, value); + } + + /// Iterate all cached children of `parent_ino` in name-sorted order. + /// + /// Calls `f` for each `(name, value)` pair while holding the read lock. + /// Callers decide what to collect, avoiding unnecessary allocations for + /// entries that will be skipped (e.g. by offset-based pagination). + pub fn readdir(&self, parent_ino: LoadedAddr, mut f: impl FnMut(&OsStr, &DValue)) { + let Some(state) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) else { + return; + }; + let children = state + .children + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); + for (name, value) in children.iter() { + f(name, value); + } + } + + /// Returns the [`LoadedAddr`] of every child that is itself a directory. + /// + /// Used by the prefetch logic to discover which subdirectories to + /// background-populate after a `readdir` completes. + #[must_use] + pub fn child_dir_addrs(&self, parent_ino: LoadedAddr) -> Vec { + let Some(state) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) else { + return Vec::new(); + }; + let children = state + .children + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); + children + .values() + .filter(|dv| dv.is_dir) + .map(|dv| dv.ino) + .collect() + } + + /// Atomically try to claim a directory for population. + /// + /// Uses `compare_exchange` on the three-state flag: + /// - `UNCLAIMED → IN_PROGRESS`: returns `Claimed` (caller should populate) + /// - Already `IN_PROGRESS`: returns `InProgress` (caller should wait) + /// - Already `DONE`: returns `Done` (nothing to do) + pub fn try_claim_populate(&self, parent_ino: LoadedAddr) -> PopulateStatus { + let state = self.dir_state(parent_ino); + match state.populated.compare_exchange( + POPULATE_UNCLAIMED, + POPULATE_IN_PROGRESS, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => PopulateStatus::Claimed, + Err(POPULATE_IN_PROGRESS) => PopulateStatus::InProgress, + Err(_) => PopulateStatus::Done, + } + } + + /// Mark a directory as fully populated after successful population. + pub fn finish_populate(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); + state.populated.store(POPULATE_DONE, Ordering::Release); + state.notify.notify_waiters(); + } + + /// Abort a population attempt, resetting back to unclaimed so another + /// caller can retry. + pub fn abort_populate(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); + state.populated.store(POPULATE_UNCLAIMED, Ordering::Release); + state.notify.notify_waiters(); + } + + /// Wait until a directory is no longer in the `InProgress` state. + /// + /// Uses [`Notify`] to sleep efficiently instead of spinning. + /// + /// The `Notified` future is pinned and `enable()`d before checking the + /// flag so that the waiter is registered with the `Notify` *before* the + /// state check. Without this, a `notify_waiters()` firing between + /// `notified()` and the first poll would be lost (since + /// `notify_waiters` does not store a permit), causing a permanent hang. + pub async fn wait_populated(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); + loop { + let mut notified = std::pin::pin!(state.notify.notified()); + notified.as_mut().enable(); + let current = state.populated.load(Ordering::Acquire); + if current != POPULATE_IN_PROGRESS { + return; + } + // SAFETY(cancel): re-entering the loop re-creates the Notified + // future, so spurious wakeups just re-check the flag. + notified.await; + } + } +} diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs new file mode 100644 index 00000000..06c27d4d --- /dev/null +++ b/lib/fs/fuser.rs @@ -0,0 +1,377 @@ +//! FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`AsyncFs`](super::async_fs::AsyncFs). + +use std::collections::HashMap; +use std::ffi::OsStr; +use std::sync::Arc; + +use super::async_fs::{FileReader as _, FsDataProvider}; +use super::{FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags}; +use crate::cache::async_backed::FutureBackedCache; +use tracing::{debug, error, instrument}; + +/// Convert an I/O error to the corresponding errno value for FUSE replies. +#[expect( + clippy::wildcard_enum_match_arm, + reason = "ErrorKind is non_exhaustive; EIO is the safe default" +)] +fn io_to_errno(e: &std::io::Error) -> i32 { + e.raw_os_error().unwrap_or_else(|| match e.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) +} + +/// Trait abstracting the `.error(errno)` method common to all fuser reply types. +trait FuseReply { + fn error(self, errno: i32); +} + +macro_rules! impl_fuse_reply { + ($($ty:ty),* $(,)?) => { + $(impl FuseReply for $ty { + fn error(self, errno: i32) { + // Calls the inherent fuser method (not this trait method). + self.error(errno); + } + })* + }; +} + +// ReplyEmpty and ReplyStatfs are excluded: release and statfs +// do not follow the block_on -> fuse_reply pattern. +impl_fuse_reply!( + fuser::ReplyEntry, + fuser::ReplyAttr, + fuser::ReplyDirectory, + fuser::ReplyOpen, + fuser::ReplyData, +); + +/// Extension trait on `Result` for FUSE reply handling. +/// +/// Centralizes the error-logging + errno-reply path so each FUSE callback +/// only has to express its success path. +trait FuseResultExt { + fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)); +} + +impl FuseResultExt for Result { + fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)) { + match self { + Ok(val) => on_ok(val, reply), + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(io_to_errno(&e)); + } + } + } +} + +type FuseWard = crate::drop_ward::DropWard< + (Arc>, DP), + InodeAddr, + super::async_fs::InodeForget, +>; + +struct FuseBridgeInner { + ward: FuseWard, + fs: super::async_fs::AsyncFs, +} + +impl FuseBridgeInner { + fn create(table: FutureBackedCache, provider: DP) -> Self { + let table = Arc::new(table); + let ward = crate::drop_ward::DropWard::new((Arc::clone(&table), provider.clone())); + let fs = super::async_fs::AsyncFs::new_preseeded(provider, table); + Self { ward, fs } + } + + fn get_fs(&self) -> &super::async_fs::AsyncFs { + &self.fs + } + + fn ward_inc(&mut self, addr: InodeAddr) -> usize { + self.ward.inc(addr) + } + + fn ward_dec_count(&mut self, addr: InodeAddr, count: usize) -> Option { + self.ward.dec_count(&addr, count) + } +} + +/// Convert an `INode` to the fuser-specific `FileAttr`. +fn inode_to_fuser_attr(inode: &INode, block_size: u32) -> fuser::FileAttr { + fuser::FileAttr { + ino: inode.addr, + size: inode.size, + blocks: inode.size.div_ceil(512), + atime: inode.last_modified_at, + mtime: inode.last_modified_at, + ctime: inode.last_modified_at, + crtime: inode.create_time, + kind: inode_type_to_fuser(inode.itype), + perm: inode.permissions.bits(), + nlink: 1, + uid: inode.uid, + gid: inode.gid, + rdev: 0, + blksize: block_size, + flags: 0, + } +} + +#[expect( + clippy::wildcard_enum_match_arm, + reason = "INodeType is non_exhaustive; File is the safe default" +)] +fn inode_type_to_fuser(itype: INodeType) -> fuser::FileType { + match itype { + INodeType::Directory => fuser::FileType::Directory, + INodeType::Symlink => fuser::FileType::Symlink, + _ => fuser::FileType::RegularFile, + } +} + +const BLOCK_SIZE: u32 = 4096; + +/// Bridges a generic [`FsDataProvider`] to the [`fuser::Filesystem`] trait. +/// +/// Owns a self-referential inode table + ward + [`AsyncFs`](super::async_fs::AsyncFs), +/// plus an open-file map and a tokio runtime handle for blocking on async ops. +pub struct FuserAdapter { + inner: FuseBridgeInner, + open_files: HashMap>, + runtime: tokio::runtime::Handle, +} + +impl FuserAdapter { + // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the + // kernel has to ask us for every single lookup all the time. + // + // I think a better implementation is to implement + // + // notify_inval_inode(ino, offset, len) + // notify_inval_entry(parent_ino, name) + // + // These two functions can be used to invalidate specific entries in the kernel cache when we + // know they have changed. This would allow us to set a much higher TTL here. + const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); + + /// Create a new adapter from a pre-seeded inode table and data provider. + /// + /// The `table` must already have the root inode inserted. + pub fn new( + table: FutureBackedCache, + provider: DP, + runtime: tokio::runtime::Handle, + ) -> Self { + Self { + inner: FuseBridgeInner::create(table, provider), + open_files: HashMap::new(), + runtime, + } + } +} + +impl fuser::Filesystem for FuserAdapter { + #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] + fn lookup( + &mut self, + _req: &fuser::Request<'_>, + parent: u64, + name: &OsStr, + reply: fuser::ReplyEntry, + ) { + self.runtime + .block_on(async { + let tracked = self + .inner + .get_fs() + .lookup(LoadedAddr::new_unchecked(parent), name) + .await?; + self.inner.ward_inc(tracked.inode.addr); + Ok::<_, std::io::Error>(tracked.inode) + }) + .fuse_reply(reply, |inode, reply| { + let f_attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?f_attr, "replying..."); + reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); + }); + } + + #[instrument(name = "FuserAdapter::getattr", skip(self, _req, _fh, reply))] + fn getattr( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: Option, + reply: fuser::ReplyAttr, + ) { + self.runtime + .block_on(async { + self.inner + .get_fs() + .getattr(LoadedAddr::new_unchecked(ino)) + .await + }) + .fuse_reply(reply, |inode, reply| { + let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?attr, "replying..."); + reply.attr(&Self::SHAMEFUL_TTL, &attr); + }); + } + + #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] + fn readdir( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + reply: fuser::ReplyDirectory, + ) { + let offset_u64 = offset.cast_unsigned(); + self.runtime + .block_on(async { + let mut entries = Vec::new(); + self.inner + .get_fs() + .readdir( + LoadedAddr::new_unchecked(ino), + offset_u64, + |de, _next_offset| { + entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); + false + }, + ) + .await?; + Ok::<_, std::io::Error>(entries) + }) + .fuse_reply(reply, |entries, mut reply| { + for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { + let kind = inode_type_to_fuser(*entry_itype); + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + let abs_idx = offset_u64 as usize + i + 1; + let Ok(idx): Result = abs_idx.try_into() else { + error!("Directory entry index {} too large for fuser", abs_idx); + reply.error(libc::EIO); + return; + }; + + debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); + if reply.add(*entry_ino, idx, kind, entry_name) { + debug!("buffer full for now, stopping readdir"); + break; + } + } + + debug!("finalizing reply..."); + reply.ok(); + }); + } + + #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] + fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { + let flags = OpenFlags::from_bits_truncate(flags); + self.runtime + .block_on(async { + let open_file = self + .inner + .get_fs() + .open(LoadedAddr::new_unchecked(ino), flags) + .await?; + let fh = open_file.fh; + self.open_files.insert(fh, Arc::clone(&open_file.reader)); + Ok::<_, std::io::Error>(fh) + }) + .fuse_reply(reply, |fh, reply| { + debug!(handle = fh, "replying..."); + reply.opened(fh, 0); + }); + } + + #[instrument( + name = "FuserAdapter::read", + skip(self, _req, _ino, fh, offset, size, _flags, _lock_owner, reply) + )] + fn read( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: fuser::ReplyData, + ) { + self.runtime + .block_on(async { + let reader = self + .open_files + .get(&fh) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EBADF))?; + reader.read(offset.cast_unsigned(), size).await + }) + .fuse_reply(reply, |data, reply| { + debug!(read_bytes = data.len(), "replying..."); + reply.data(&data); + }); + } + + #[instrument( + name = "FuserAdapter::release", + skip(self, _req, _ino, fh, _flags, _lock_owner, _flush, reply) + )] + fn release( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + _flags: i32, + _lock_owner: Option, + _flush: bool, + reply: fuser::ReplyEmpty, + ) { + if let Some(reader) = self.open_files.remove(&fh) { + if let Err(e) = self.runtime.block_on(reader.close()) { + debug!(error = %e, "reader close reported error"); + } + debug!("replying ok"); + reply.ok(); + } else { + debug!("file handle not open, replying error"); + reply.error(libc::EBADF); + } + } + + #[expect( + clippy::cast_possible_truncation, + reason = "nlookups fits in usize on supported 64-bit platforms" + )] + #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] + fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { + self.inner.ward_dec_count(ino, nlookup as usize); + } + + #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] + fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { + let stats = self.inner.get_fs().statfs(); + debug!(?stats, "replying..."); + reply.statfs( + stats.total_blocks, + stats.free_blocks, + stats.available_blocks, + stats.total_inodes, + stats.free_inodes, + stats.block_size, + stats.max_filename_length, + 0, + ); + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs new file mode 100644 index 00000000..52f9510e --- /dev/null +++ b/lib/fs/mod.rs @@ -0,0 +1,240 @@ +//! Useful filesystem generalizations. +//! +//! # Cache invalidation +//! +//! The current implementation caches directory listings and inode data +//! indefinitely once populated. Staleness is mitigated only by a short +//! FUSE entry/attr TTL (currently 1 second in `FuserAdapter`). +//! +//! The intended long-term strategy is to use FUSE kernel notifications +//! (`notify_inval_inode` / `notify_inval_entry`) to proactively invalidate +//! specific entries when the backing data changes. This would allow a +//! much higher TTL while still reflecting changes promptly. The key +//! changes needed: +//! +//! 1. `DCache` needs a `remove` or `invalidate` method to reset a +//! parent's `PopulateStatus` back to `UNCLAIMED`. +//! 2. `FuserAdapter` needs access to the `fuser::Session` handle to +//! send `notify_inval_entry` notifications. +//! 3. Data providers need a way to signal when their backing data changes +//! (e.g. webhook, polling, or subscription). + +/// Async filesystem cache with concurrent inode management. +pub mod async_fs; +/// Lock-free bidirectional inode address mapping. +pub mod bridge; +/// Generic composite filesystem types. +pub mod composite; +/// Directory entry cache for fast parent-child lookups. +pub mod dcache; +/// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. +pub mod fuser; + +pub use async_fs::{InodeForget, InodeLifecycle, OpenFile, ResolvedINode}; + +use std::ffi::OsStr; +use std::time::SystemTime; + +use bitflags::bitflags; + +/// Type representing an inode identifier. +pub type InodeAddr = u64; + +/// Represents an inode address that has been loaded into the inode table. +/// +/// This newtype wrapper distinguishes inode addresses that are known to exist +/// in the [`async_fs::AsyncFs`] inode table from raw [`InodeAddr`] values. +/// +/// The inner field is private to prevent unchecked construction. Code within +/// the crate may use [`LoadedAddr::new_unchecked`] at trusted boundaries +/// (e.g. after inserting into the inode table, or at the FUSE adapter boundary +/// where the kernel provides addresses it previously received from us). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct LoadedAddr(InodeAddr); + +impl LoadedAddr { + /// Construct a `LoadedAddr` without validating that the address exists in + /// the inode table. + /// + /// # Safety contract (logical, not `unsafe`) + /// + /// The caller must ensure one of: + /// - The address was previously inserted into an inode table, **or** + /// - The address originates from the FUSE kernel (which only knows + /// addresses we previously returned to it). + #[doc(hidden)] + #[must_use] + pub fn new_unchecked(addr: InodeAddr) -> Self { + Self(addr) + } + + /// Return the raw inode address. + #[must_use] + pub fn addr(self) -> InodeAddr { + self.0 + } +} + +/// Type representing a file handle. +pub type FileHandle = u64; + +bitflags! { + /// Permission bits for an inode, similar to Unix file permissions. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct InodePerms: u16 { + /// Other: execute permission. + const OTHER_EXECUTE = 1 << 0; + /// Other: write permission. + const OTHER_WRITE = 1 << 1; + /// Other: read permission. + const OTHER_READ = 1 << 2; + + /// Group: execute permission. + const GROUP_EXECUTE = 1 << 3; + /// Group: write permission. + const GROUP_WRITE = 1 << 4; + /// Group: read permission. + const GROUP_READ = 1 << 5; + + /// Owner: execute permission. + const OWNER_EXECUTE = 1 << 6; + /// Owner: write permission. + const OWNER_WRITE = 1 << 7; + /// Owner: read permission. + const OWNER_READ = 1 << 8; + + /// Sticky bit. + const STICKY = 1 << 9; + /// Set-group-ID bit. + const SETGID = 1 << 10; + /// Set-user-ID bit. + const SETUID = 1 << 11; + + /// Other: read, write, and execute. + const OTHER_RWX = Self::OTHER_READ.bits() + | Self::OTHER_WRITE.bits() + | Self::OTHER_EXECUTE.bits(); + /// Group: read, write, and execute. + const GROUP_RWX = Self::GROUP_READ.bits() + | Self::GROUP_WRITE.bits() + | Self::GROUP_EXECUTE.bits(); + /// Owner: read, write, and execute. + const OWNER_RWX = Self::OWNER_READ.bits() + | Self::OWNER_WRITE.bits() + | Self::OWNER_EXECUTE.bits(); + } +} + +bitflags! { + /// Flags for opening a file, similar to Unix open(2) flags. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct OpenFlags: i32 { + /// Open for reading only. + const RDONLY = libc::O_RDONLY; + /// Open for writing only. + const WRONLY = libc::O_WRONLY; + /// Open for reading and writing. + const RDWR = libc::O_RDWR; + + /// Append on each write. + const APPEND = libc::O_APPEND; + /// Truncate to zero length. + const TRUNC = libc::O_TRUNC; + /// Create file if it does not exist. + const CREAT = libc::O_CREAT; + /// Error if file already exists (with `CREAT`). + const EXCL = libc::O_EXCL; + + /// Non-blocking mode. + const NONBLOCK = libc::O_NONBLOCK; + /// Synchronous writes. + const SYNC = libc::O_SYNC; + /// Synchronous data integrity writes. + const DSYNC = libc::O_DSYNC; + /// Do not follow symlinks. + const NOFOLLOW = libc::O_NOFOLLOW; + /// Set close-on-exec. + const CLOEXEC = libc::O_CLOEXEC; + /// Fail if not a directory. + const DIRECTORY = libc::O_DIRECTORY; + + /// Do not update access time (Linux only). + #[cfg(target_os = "linux")] + const NOATIME = libc::O_NOATIME; + } +} + +/// The type of an inode entry in the filesystem. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum INodeType { + /// A regular file. + File, + /// A directory. + Directory, + /// A symbolic link. + Symlink, +} + +/// Representation of an inode. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct INode { + /// The address of this inode, which serves as its unique identifier. + pub addr: InodeAddr, + /// The permissions associated with this inode, represented as a bitfield. + pub permissions: InodePerms, + /// The user ID of the owner of this inode. + pub uid: u32, + /// The group ID of the owner of this inode. + pub gid: u32, + /// The time this inode was created at. + pub create_time: SystemTime, + /// The time this inode was last modified at. + pub last_modified_at: SystemTime, + /// The parent inode address, if any. This is `None` for the root inode. + pub parent: Option, + /// The size of the file represented by this inode, in bytes. + pub size: u64, + /// Additional information about the type of this inode (e.g., file vs directory). + pub itype: INodeType, +} + +impl INode { + /// Check if this inode is the root inode (i.e., has no parent). + #[must_use] + pub fn is_root(&self) -> bool { + self.parent.is_none() + } +} + +/// A directory entry yielded by [`async_fs::AsyncFs::readdir`]. +/// +/// Borrows the entry name from the directory cache's iteration buffer. +#[derive(Debug, Clone, Copy)] +pub struct DirEntry<'a> { + /// The name of this entry within its parent directory. + pub name: &'a OsStr, + /// The full inode data for this entry. + pub inode: INode, +} + +/// Filesystem statistics returned by [`async_fs::AsyncFs::statfs`]. +/// +/// Block-related sizes are in units of `block_size` bytes. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct AsyncFsStats { + /// Filesystem block size (bytes). + pub block_size: u32, + /// Total number of data blocks. + pub total_blocks: u64, + /// Number of free blocks. + pub free_blocks: u64, + /// Number of blocks available to unprivileged users. + pub available_blocks: u64, + /// Total number of file nodes (inodes). + pub total_inodes: u64, + /// Number of free file nodes. + pub free_inodes: u64, + /// Maximum filename length (bytes). + pub max_filename_length: u32, +} diff --git a/lib/lib.rs b/lib/lib.rs index f7388bd5..40b1e8f2 100644 --- a/lib/lib.rs +++ b/lib/lib.rs @@ -2,4 +2,7 @@ /// Caching primitives for git-fs. pub mod cache; +pub mod drop_ward; +/// Filesystem abstractions and caching layers. +pub mod fs; pub mod io; diff --git a/src/daemon.rs b/src/daemon.rs index dac2d052..102e476b 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -1,7 +1,6 @@ use tokio::select; use crate::app_config; -use crate::fs::mescloud::{MesaFS, OrgConfig}; use tracing::{debug, error, info}; mod managed_fuse { @@ -14,9 +13,12 @@ mod managed_fuse { use nix::errno::Errno; - use super::{MesaFS, OrgConfig, app_config, debug, error}; - use crate::fs::fuser::FuserAdapter; + use git_fs::cache::async_backed::FutureBackedCache; + + use super::{app_config, debug, error}; use fuser::BackgroundSession; + use git_fs::fs::fuser::FuserAdapter; + use secrecy::ExposeSecret as _; pub struct FuseCoreScope { _session: BackgroundSession, @@ -36,15 +38,44 @@ mod managed_fuse { config: app_config::Config, handle: tokio::runtime::Handle, ) -> Result { - let orgs = config - .organizations - .iter() - .map(|(org_name, org)| OrgConfig { - name: org_name.clone(), - api_key: org.api_key.clone(), - }); - let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid), &config.cache); - let fuse_adapter = FuserAdapter::new(mesa_fs, handle); + let fs_owner = (config.uid, config.gid); + + let mut org_children = Vec::new(); + for (org_name, org_conf) in &config.organizations { + let client = + crate::fs::mescloud::build_mesa_client(org_conf.api_key.expose_secret()); + let dp = if org_name == "github" { + let github_org_root = crate::fs::mescloud::roots::GithubOrgRoot::new( + client, + org_name.clone(), + config.cache.clone(), + fs_owner, + ); + crate::fs::mescloud::roots::OrgChildDP::Github( + git_fs::fs::composite::CompositeFs::new(github_org_root, fs_owner), + ) + } else { + let standard_org_root = crate::fs::mescloud::roots::StandardOrgRoot::new( + client, + org_name.clone(), + config.cache.clone(), + fs_owner, + ); + crate::fs::mescloud::roots::OrgChildDP::Standard( + git_fs::fs::composite::CompositeFs::new(standard_org_root, fs_owner), + ) + }; + org_children.push((std::ffi::OsString::from(org_name), dp)); + } + + let mesa_root = crate::fs::mescloud::roots::MesaRoot::new(org_children); + let composite = git_fs::fs::composite::CompositeFs::new(mesa_root, fs_owner); + + let table = FutureBackedCache::default(); + let root_inode = composite.make_root_inode(); + table.insert_sync(1, root_inode); + + let fuse_adapter = FuserAdapter::new(table, composite, handle); let mount_opts = [ fuser::MountOption::FSName("git-fs".to_owned()), fuser::MountOption::RO, diff --git a/src/fs/fuser.rs b/src/fs/fuser.rs deleted file mode 100644 index 86ddabb6..00000000 --- a/src/fs/fuser.rs +++ /dev/null @@ -1,351 +0,0 @@ -use std::ffi::OsStr; - -use crate::fs::r#trait::{CommonFileAttr, DirEntryType, FileAttr, Fs, LockOwner, OpenFlags}; -use tracing::{debug, error, instrument}; - -impl From for fuser::FileAttr { - fn from(val: FileAttr) -> Self { - fn common_to_fuser(common: CommonFileAttr) -> fuser::FileAttr { - fuser::FileAttr { - ino: common.ino, - size: 0, - blocks: 0, - atime: common.atime, - mtime: common.mtime, - ctime: common.ctime, - crtime: common.crtime, - kind: fuser::FileType::RegularFile, - perm: common.perm.bits(), - nlink: common.nlink, - uid: common.uid, - gid: common.gid, - rdev: 0, - blksize: common.blksize, - flags: 0, - } - } - - match val { - FileAttr::RegularFile { - common, - size, - blocks, - } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.blocks = blocks; - attr.kind = fuser::FileType::RegularFile; - attr - } - FileAttr::Directory { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Directory; - attr - } - FileAttr::Symlink { common, size } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.kind = fuser::FileType::Symlink; - attr - } - FileAttr::CharDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::CharDevice; - attr - } - FileAttr::BlockDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::BlockDevice; - attr - } - FileAttr::NamedPipe { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::NamedPipe; - attr - } - FileAttr::Socket { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Socket; - attr - } - } - } -} - -impl From for fuser::FileType { - fn from(val: DirEntryType) -> Self { - match val { - DirEntryType::RegularFile => Self::RegularFile, - DirEntryType::Directory => Self::Directory, - DirEntryType::Symlink => Self::Symlink, - DirEntryType::CharDevice => Self::CharDevice, - DirEntryType::BlockDevice => Self::BlockDevice, - DirEntryType::NamedPipe => Self::NamedPipe, - DirEntryType::Socket => Self::Socket, - } - } -} - -impl From for OpenFlags { - fn from(val: i32) -> Self { - Self::from_bits_truncate(val) - } -} - -pub struct FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - fs: F, - runtime: tokio::runtime::Handle, -} - -impl FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the - // kernel has to ask us for every single lookup all the time. - // - // I think a better implementation is to implement - // - // notify_inval_inode(ino, offset, len) - // notify_inval_entry(parent_ino, name) - // - // These two functions can be used to invalidate specific entries in the kernel cache when we - // know they have changed. This would allow us to set a much higher TTL here. - const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); - - pub fn new(fs: F, runtime: tokio::runtime::Handle) -> Self { - Self { fs, runtime } - } -} - -impl fuser::Filesystem for FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] - fn lookup( - &mut self, - _req: &fuser::Request<'_>, - parent: u64, - name: &OsStr, - reply: fuser::ReplyEntry, - ) { - match self.runtime.block_on(self.fs.lookup(parent, name)) { - Ok(attr) => { - // TODO(markovejnovic): Passing generation = 0 here is a recipe for disaster. - // Someone with A LOT of files will likely see inode reuse which will lead to a - // disaster. - let f_attr: fuser::FileAttr = attr.into(); - debug!(?f_attr, "replying..."); - reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::getattr", skip(self, _req, fh, reply))] - fn getattr( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: Option, - reply: fuser::ReplyAttr, - ) { - match self.runtime.block_on(self.fs.getattr(ino, fh)) { - Ok(attr) => { - debug!(?attr, "replying..."); - reply.attr(&Self::SHAMEFUL_TTL, &attr.into()); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] - fn readdir( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - _fh: u64, - offset: i64, - mut reply: fuser::ReplyDirectory, - ) { - let entries = match self.runtime.block_on(self.fs.readdir(ino)) { - Ok(entries) => entries, - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - return; - } - }; - - #[expect( - clippy::cast_possible_truncation, - reason = "fuser offset is i64 but always non-negative" - )] - for (i, entry) in entries - .iter() - .enumerate() - .skip(offset.cast_unsigned() as usize) - { - let kind: fuser::FileType = entry.kind.into(); - let Ok(idx): Result = (i + 1).try_into() else { - error!("Directory entry index {} too large for fuser", i + 1); - reply.error(libc::EIO); - return; - }; - - debug!(?entry, "adding entry to reply..."); - if reply.add(entry.ino, idx, kind, &entry.name) { - debug!("buffer full for now, stopping readdir"); - break; - } - } - - debug!("finalizing reply..."); - reply.ok(); - } - - #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] - fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { - match self.runtime.block_on(self.fs.open(ino, flags.into())) { - Ok(open_file) => { - debug!(handle = open_file.handle, "replying..."); - reply.opened(open_file.handle, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument( - name = "FuserAdapter::read", - skip(self, _req, fh, offset, size, flags, lock_owner, reply) - )] - fn read( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - offset: i64, - size: u32, - flags: i32, - lock_owner: Option, - reply: fuser::ReplyData, - ) { - let flags: OpenFlags = flags.into(); - let lock_owner = lock_owner.map(LockOwner); - match self.runtime.block_on(self.fs.read( - ino, - fh, - offset.cast_unsigned(), - size, - flags, - lock_owner, - )) { - Ok(data) => { - debug!(read_bytes = data.len(), "replying..."); - reply.data(&data); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::release", skip(self, _req, _lock_owner, reply))] - fn release( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - flags: i32, - _lock_owner: Option, - flush: bool, - reply: fuser::ReplyEmpty, - ) { - match self - .runtime - .block_on(self.fs.release(ino, fh, flags.into(), flush)) - { - Ok(()) => { - debug!("replying ok"); - reply.ok(); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] - fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { - self.runtime.block_on(self.fs.forget(ino, nlookup)); - } - - #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] - fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { - self.runtime.block_on(async { - match self.fs.statfs().await { - Ok(statvfs) => { - debug!(?statvfs, "replying..."); - reply.statfs( - statvfs.total_blocks, - statvfs.free_blocks, - statvfs.available_blocks, - statvfs.total_inodes, - statvfs.free_inodes, - statvfs.block_size, - statvfs.max_filename_length, - 0, - ); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.raw_os_error().unwrap_or(libc::EIO)); - } - } - }); - } -} diff --git a/src/fs/icache/async_cache.rs b/src/fs/icache/async_cache.rs deleted file mode 100644 index 84003da3..00000000 --- a/src/fs/icache/async_cache.rs +++ /dev/null @@ -1,1410 +0,0 @@ -//! Async inode cache with InFlight/Available state machine. - -use std::future::Future; - -use scc::HashMap as ConcurrentHashMap; -use tokio::sync::watch; - -use tracing::{instrument, trace, warn}; - -use crate::fs::r#trait::Inode; - -use super::IcbLike; - -/// State of an entry in the async inode cache. -pub enum IcbState { - /// Entry is being loaded; waiters clone the receiver and `.changed().await`. - /// - /// The channel carries `()` rather than the resolved value because the map - /// is the single source of truth: ICBs are mutated in-place (rc, attrs) so - /// a snapshot in the channel would immediately go stale. Sender-drop also - /// gives us implicit, leak-proof signalling on both success and error paths. - InFlight(watch::Receiver<()>), - /// Entry is ready for use. - Available(I), -} - -impl IcbState { - /// Consume `self`, returning the inner value if `Available`, or `None` if `InFlight`. - fn into_available(self) -> Option { - match self { - Self::Available(inner) => Some(inner), - Self::InFlight(_) => None, - } - } -} - -/// Trait for resolving an inode to its control block. -/// -/// Implementations act as a "promise" that an ICB will eventually be produced -/// for a given inode. The cache calls `resolve` when it needs to populate a -/// missing entry. -pub trait IcbResolver: Send + Sync { - /// The inode control block type this resolver produces. - type Icb: IcbLike + Send + Sync; - /// Error type returned when resolution fails. - type Error: Send; - - /// Resolve an inode to a fully-populated control block. - /// - /// - `stub`: `Some(icb)` if upgrading an existing stub entry, `None` if creating - /// from scratch. The stub typically has `parent` and `path` set but `attr` missing. - /// - `cache`: reference to the cache, useful for walking parent chains to build paths. - fn resolve( - &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized; -} - -/// Async, concurrency-safe inode cache. -/// -/// All methods take `&self` — internal synchronization is provided by -/// `scc::HashMap` (sharded lock-free map). -pub struct AsyncICache { - resolver: R, - inode_table: ConcurrentHashMap>, -} - -impl AsyncICache { - /// Create a new cache with a root ICB at `root_ino` (rc = 1). - pub fn new(resolver: R, root_ino: Inode, root_path: impl Into) -> Self { - let table = ConcurrentHashMap::new(); - // insert_sync is infallible for a fresh map - drop(table.insert_sync( - root_ino, - IcbState::Available(R::Icb::new_root(root_path.into())), - )); - Self { - resolver, - inode_table: table, - } - } - - /// Number of entries (`InFlight` + `Available`) in the table. - pub fn inode_count(&self) -> usize { - self.inode_table.len() - } - - /// Wait until `ino` is `Available`. - /// Returns `true` if the entry exists and is Available, - /// `false` if the entry does not exist. - #[instrument(name = "AsyncICache::wait_for_available", skip(self))] - async fn wait_for_available(&self, ino: Inode) -> bool { - loop { - let rx = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::InFlight(rx) => Some(rx.clone()), - IcbState::Available(_) => None, - }) - .await; - - match rx { - None => return false, // key missing - Some(None) => return true, // Available - Some(Some(mut rx)) => { - // Wait for the resolver to complete (or fail/drop sender). - // changed() returns Err(RecvError) when sender is dropped, - // which is fine — it means resolution finished. - let _ = rx.changed().await; - // Loop back — the entry might be InFlight again if another - // resolution cycle started between our wakeup and re-read. - } - } - } - } - - /// Check whether `ino` has an entry in the table (either `InFlight` or `Available`). - /// - /// This is a non-blocking, synchronous check. It does **not** wait for - /// `InFlight` entries to resolve. - pub fn contains(&self, ino: Inode) -> bool { - self.inode_table.contains_sync(&ino) - } - - /// Read an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb", skip(self, f))] - // `Sync` is required because `f` is held across `.await` points in the - // loop body; for the resulting future to be `Send`, the captured closure - // must be `Sync` (clippy::future_not_send). - pub async fn get_icb( - &self, - ino: Inode, - f: impl Fn(&R::Icb) -> T + Send + Sync, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .read_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Mutate an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb_mut", skip(self, f))] - pub async fn get_icb_mut( - &self, - ino: Inode, - mut f: impl FnMut(&mut R::Icb) -> T + Send, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Insert an ICB directly as `Available`. If the entry is currently - /// `InFlight`, waits for resolution before overwriting. - #[instrument(name = "AsyncICache::insert_icb", skip(self, icb))] - pub async fn insert_icb(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - let mut icb = Some(icb); - loop { - match self.inode_table.entry_async(ino).await { - Entry::Vacant(vac) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - vac.insert_entry(IcbState::Available(val)); - return; - } - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - IcbState::Available(_) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - *occ.get_mut() = IcbState::Available(val); - return; - } - }, - } - } - } - - /// Get-or-insert pattern. If `ino` exists (awaits `InFlight`), runs `then` - /// on it. If absent, calls `factory` to create, inserts, then runs `then`. - /// - /// Both `factory` and `then` are `FnOnce` — wrapped in `Option` internally - /// to satisfy the borrow checker across the await-loop. - #[instrument(name = "AsyncICache::entry_or_insert_icb", skip(self, factory, then))] - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> R::Icb, - then: impl FnOnce(&mut R::Icb) -> T, - ) -> T { - use scc::hash_map::Entry; - let mut factory = Some(factory); - let mut then_fn = Some(then); - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return t(icb); - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); // release shard lock before awaiting - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let f = factory - .take() - .unwrap_or_else(|| unreachable!("factory consumed more than once")); - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let mut icb = f(); - let result = t(&mut icb); - vac.insert_entry(IcbState::Available(icb)); - return result; - } - } - } - } - - /// Write an ICB back to the table only if the entry still exists. - /// - /// If the entry was evicted (vacant) during resolution, the result is - /// silently dropped — this prevents resurrecting entries that a concurrent - /// `forget` has already removed. - async fn write_back_if_present(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => { - *occ.get_mut() = IcbState::Available(icb); - } - Entry::Vacant(_) => { - tracing::debug!( - ino, - "resolved inode was evicted during resolution, dropping result" - ); - } - } - } - - /// Look up `ino`. If `Available` and fully resolved, run `then` and return - /// `Ok(T)`. If `Available` but `needs_resolve()` is true (stub), extract - /// the stub, resolve it, cache the result, then run `then`. If absent, call - /// the resolver to fetch the ICB, cache it, then run `then`. If another task - /// is already resolving this inode (`InFlight`), wait for it. - /// - /// Returns `Err(R::Error)` if resolution fails. On error the `InFlight` - /// entry is removed so subsequent calls can retry. - #[instrument(name = "AsyncICache::get_or_resolve", skip(self, then))] - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&R::Icb) -> T, - ) -> Result { - use scc::hash_map::Entry; - - let mut then_fn = Some(then); - - // Fast path: Available and fully resolved - { - let hit = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - Some(t(icb)) - } - IcbState::InFlight(_) | IcbState::Available(_) => None, - }) - .await; - if let Some(Some(r)) = hit { - return Ok(r); - } - } - - // Slow path: missing, InFlight, or stub needing resolution - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return Ok(t(icb)); - } - IcbState::Available(_) => { - // Stub needing resolution — extract stub, replace with InFlight - let (tx, rx) = watch::channel(()); - let old = std::mem::replace(occ.get_mut(), IcbState::InFlight(rx)); - let stub = old.into_available().unwrap_or_else(|| { - unreachable!("matched Available arm, replaced value must be Available") - }); - let fallback = stub.clone(); - drop(occ); // release shard lock before awaiting - - match self.resolver.resolve(ino, Some(stub), self).await { - Ok(icb) => { - let t = then_fn.take().unwrap_or_else(|| { - unreachable!("then_fn consumed more than once") - }); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - if fallback.rc() > 0 { - self.write_back_if_present(ino, fallback).await; - } else { - self.inode_table.remove_async(&ino).await; - } - drop(tx); - return Err(e); - } - } - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let (tx, rx) = watch::channel(()); - vac.insert_entry(IcbState::InFlight(rx)); - - match self.resolver.resolve(ino, None, self).await { - Ok(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - self.inode_table.remove_async(&ino).await; - drop(tx); - return Err(e); - } - } - } - } - } - } - - /// Increment rc. **Awaits** `InFlight`. - /// - /// Returns `None` if the inode does not exist or was evicted concurrently. - /// This can happen when a concurrent `forget` removes the entry between the - /// caller's insert/cache and this `inc_rc` call, or when a concurrent - /// `get_or_resolve` swaps the entry to `InFlight` and the entry is then - /// evicted on resolution failure. Callers in FUSE `lookup` paths should - /// treat `None` as a lookup failure to avoid ref-count leaks (the kernel - /// would hold a reference the cache no longer tracks). - #[instrument(name = "AsyncICache::inc_rc", skip(self))] - pub async fn inc_rc(&self, ino: Inode) -> Option { - loop { - if !self.wait_for_available(ino).await { - warn!(ino, "inc_rc: inode not in table"); - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => { - *icb.rc_mut() += 1; - Some(icb.rc()) - } - IcbState::InFlight(_) => None, - }) - .await - .flatten(); - - match result { - Some(rc) => return Some(rc), - None => { - // Entry was concurrently replaced with InFlight or evicted. - if !self.contains(ino) { - warn!(ino, "inc_rc: inode evicted concurrently"); - return None; - } - // Entry exists but became InFlight — retry. - } - } - } - } - - /// Decrement rc by `nlookups`. If rc drops to zero, evicts and returns - /// the ICB. **Awaits** `InFlight` entries. - #[instrument(name = "AsyncICache::forget", skip(self))] - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - use scc::hash_map::Entry; - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - if icb.rc() <= nlookups { - trace!(ino, "evicting inode"); - let (_, state) = occ.remove_entry(); - return state.into_available(); - } - *icb.rc_mut() -= nlookups; - trace!(ino, new_rc = icb.rc(), "decremented rc"); - return None; - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(_) => { - warn!(ino, "forget on unknown inode"); - return None; - } - } - } - } - - /// Synchronous mutable access to an `Available` entry. - /// Does **not** wait for `InFlight`. Intended for initialization. - pub fn get_icb_mut_sync(&self, ino: Inode, f: impl FnOnce(&mut R::Icb) -> T) -> Option { - self.inode_table - .update_sync(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .flatten() - } - - /// Iterate over all `Available` entries (skips `InFlight`). - /// Async-safe iteration using `iter_async` to avoid contention on single-threaded runtimes. - pub async fn for_each(&self, mut f: impl FnMut(&Inode, &R::Icb)) { - self.inode_table - .iter_async(|ino, state| { - if let IcbState::Available(icb) = state { - f(ino, icb); - } - true // continue iteration - }) - .await; - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashMap as StdHashMap; - use std::path::PathBuf; - use std::sync::atomic::Ordering; - use std::sync::{Arc, Mutex}; - - #[derive(Debug, Clone, PartialEq)] - struct TestIcb { - rc: u64, - path: PathBuf, - resolved: bool, - } - - impl IcbLike for TestIcb { - fn new_root(path: PathBuf) -> Self { - Self { - rc: 1, - path, - resolved: true, - } - } - fn rc(&self) -> u64 { - self.rc - } - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - fn needs_resolve(&self) -> bool { - !self.resolved - } - } - - struct TestResolver { - responses: Mutex>>, - } - - impl TestResolver { - fn new() -> Self { - Self { - responses: Mutex::new(StdHashMap::new()), - } - } - - fn add(&self, ino: Inode, icb: TestIcb) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Ok(icb)); - } - - fn add_err(&self, ino: Inode, err: impl Into) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Err(err.into())); - } - } - - impl IcbResolver for TestResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let result = self - .responses - .lock() - .expect("test mutex") - .remove(&ino) - .unwrap_or_else(|| Err(format!("no response for inode {ino}"))); - async move { result } - } - } - - fn test_cache() -> AsyncICache { - AsyncICache::new(TestResolver::new(), 1, "/root") - } - - fn test_cache_with(resolver: TestResolver) -> AsyncICache { - AsyncICache::new(resolver, 1, "/root") - } - - #[tokio::test] - async fn contains_returns_true_for_root() { - let cache = test_cache(); - assert!(cache.contains(1), "root should exist"); - } - - #[tokio::test] - async fn contains_returns_false_for_missing() { - let cache = test_cache(); - assert!(!cache.contains(999), "missing inode should not exist"); - } - - #[tokio::test] - async fn contains_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Trigger resolve in background - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - handle - .await - .expect("task panicked") - .expect("resolve failed"); - assert!(cache.contains(42), "should be true after resolve"); - } - - #[tokio::test] - async fn new_creates_root_entry() { - let cache = test_cache(); - assert_eq!(cache.inode_count(), 1, "should have exactly 1 entry"); - } - - #[tokio::test] - async fn get_icb_returns_value() { - let cache = test_cache(); - let path = cache.get_icb(1, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/root"))); - } - - #[tokio::test] - async fn get_icb_returns_none_for_missing() { - let cache = test_cache(); - let result = cache.get_icb(999, IcbLike::rc).await; - assert_eq!(result, None, "missing inode should return None"); - } - - #[tokio::test] - async fn get_icb_mut_modifies_value() { - let cache = test_cache(); - cache - .get_icb_mut(1, |icb| { - *icb.rc_mut() += 10; - }) - .await; - let rc = cache.get_icb(1, IcbLike::rc).await; - assert_eq!(rc, Some(11), "root starts at rc=1, +10 = 11"); - } - - #[tokio::test] - async fn get_icb_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/loaded".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Resolve inode 42 - cache - .get_or_resolve(42, |_| ()) - .await - .expect("resolve failed"); - - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/loaded"))); - } - - #[tokio::test] - async fn insert_icb_adds_entry() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/foo".into(), - resolved: true, - }, - ) - .await; - assert!(cache.contains(42), "inserted entry should exist"); - assert_eq!(cache.inode_count(), 2, "root + inserted = 2"); - } - - #[tokio::test] - async fn insert_icb_does_not_clobber_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Spawn insert_icb in background — should wait for InFlight to resolve - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { - cache2 - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/inserted".into(), - resolved: true, - }, - ) - .await; - }); - - // Give insert_icb time to start waiting - tokio::task::yield_now().await; - - // Complete the InFlight from the resolver side (write directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx); // signal watchers - - handle.await.expect("task panicked"); - - // After insert_icb completes, it should have overwritten the resolved value - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/inserted"))); - } - - #[tokio::test] - async fn entry_or_insert_creates_new() { - let cache = test_cache(); - let rc = cache - .entry_or_insert_icb( - 42, - || TestIcb { - rc: 0, - path: "/new".into(), - resolved: true, - }, - |icb| { - *icb.rc_mut() += 1; - icb.rc() - }, - ) - .await; - assert_eq!(rc, 1, "factory creates rc=0, then +1 = 1"); - } - - #[tokio::test] - async fn entry_or_insert_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 5, "existing entry rc should be 5"); - } - - #[tokio::test] - async fn entry_or_insert_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Start resolve in background - let cache2 = Arc::clone(&cache); - let resolve_handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - // Wait for resolve to finish - resolve_handle - .await - .expect("task panicked") - .expect("resolve failed"); - - // Now entry_or_insert should find the existing entry - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 1, "should find the resolved entry"); - } - - #[tokio::test] - async fn inc_rc_increments() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - let new_rc = cache.inc_rc(42).await; - assert_eq!(new_rc, Some(2), "rc 1 + 1 = 2"); - } - - #[tokio::test] - async fn forget_decrements_rc() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 2).await; - assert!(evicted.is_none(), "rc 5 - 2 = 3, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(3), "rc should be 3 after forget(2)"); - } - - #[tokio::test] - async fn forget_evicts_when_rc_drops_to_zero() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 3, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 3).await; - assert!(evicted.is_some(), "rc 3 - 3 = 0, should evict"); - assert!(!cache.contains(42), "evicted entry should be gone"); - assert_eq!(cache.inode_count(), 1, "only root remains"); - } - - #[tokio::test] - async fn forget_unknown_inode_returns_none() { - let cache = test_cache(); - let evicted = cache.forget(999, 1).await; - assert!(evicted.is_none(), "unknown inode should return None"); - } - - #[tokio::test] - async fn for_each_iterates_available_entries() { - let cache = test_cache(); - cache - .insert_icb( - 2, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - cache - .insert_icb( - 3, - TestIcb { - rc: 1, - path: "/b".into(), - resolved: true, - }, - ) - .await; - - let mut seen = std::collections::HashSet::new(); - cache - .for_each(|ino, _icb| { - seen.insert(*ino); - }) - .await; - assert_eq!(seen.len(), 3, "should see all 3 entries"); - assert!(seen.contains(&1), "should contain root"); - assert!(seen.contains(&2), "should contain inode 2"); - assert!(seen.contains(&3), "should contain inode 3"); - } - - #[tokio::test] - async fn for_each_skips_inflight() { - let cache = test_cache(); - // Directly insert an InFlight entry for testing iteration - let (_tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let mut count = 0; - cache - .for_each(|_, _| { - count += 1; - }) - .await; - assert_eq!(count, 1, "only root, not the InFlight entry"); - } - - #[tokio::test] - async fn wait_does_not_miss_signal_on_immediate_complete() { - let cache = Arc::new(test_cache()); - - // Insert InFlight manually, then immediately complete before anyone waits - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Complete before any waiter (simulate resolver by writing directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/fast".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - assert!(cache.contains(42), "entry should exist in table"); - } - - // -- get_or_resolve tests -- - - #[tokio::test] - async fn get_or_resolve_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/existing"))); - } - - #[tokio::test] - async fn get_or_resolve_resolves_missing() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - // Should now be cached - assert!(cache.contains(42)); - } - - #[tokio::test] - async fn get_or_resolve_propagates_error() { - let resolver = TestResolver::new(); - resolver.add_err(42, "network error"); - let cache = test_cache_with(resolver); - - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(result, Err("network error".to_owned())); - // Entry should be cleaned up on error - assert!(!cache.contains(42)); - } - - struct CountingResolver { - count: Arc, - } - - impl IcbResolver for CountingResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - self.count.fetch_add(1, Ordering::SeqCst); - async { - tokio::task::yield_now().await; - Ok(TestIcb { - rc: 1, - path: "/coalesced".into(), - resolved: true, - }) - } - } - } - - #[tokio::test] - async fn get_or_resolve_coalesces_concurrent_requests() { - use std::sync::atomic::AtomicUsize; - - let resolve_count = Arc::new(AtomicUsize::new(0)); - - let cache = Arc::new(AsyncICache::new( - CountingResolver { - count: Arc::clone(&resolve_count), - }, - 1, - "/root", - )); - - let mut handles = Vec::new(); - for _ in 0..5 { - let c = Arc::clone(&cache); - handles.push(tokio::spawn(async move { - c.get_or_resolve(42, |icb| icb.path.clone()).await - })); - } - - for h in handles { - assert_eq!( - h.await.expect("task panicked"), - Ok(PathBuf::from("/coalesced")), - ); - } - - // Resolver should only have been called ONCE (not 5 times) - assert_eq!( - resolve_count.load(Ordering::SeqCst), - 1, - "should coalesce to 1 resolve call" - ); - } - - #[test] - fn icb_state_into_available_returns_inner() { - let state = IcbState::Available(TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }); - assert!(state.into_available().is_some()); - } - - #[test] - fn icb_state_into_available_returns_none_for_inflight() { - let (_tx, rx) = watch::channel(()); - let state: IcbState = IcbState::InFlight(rx); - assert!(state.into_available().is_none()); - } - - #[tokio::test] - async fn get_or_resolve_resolves_stub_entry() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Insert unresolved stub - cache - .insert_icb( - 42, - TestIcb { - rc: 0, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should trigger resolution because needs_resolve() == true - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - } - - #[tokio::test] - async fn forget_handles_inflight_entry() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.forget(42, 1).await }); - - // Give forget time to start waiting - tokio::task::yield_now().await; - - // Simulate resolver completing (write directly to inode_table) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 3, - path: "/inflight".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let evicted = handle.await.expect("task panicked"); - assert!(evicted.is_none(), "rc=3 - 1 = 2, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be 2 after forget(1) on rc=3"); - } - - #[tokio::test] - async fn get_or_resolve_error_preserves_stub_with_nonzero_rc() { - let resolver = TestResolver::new(); - resolver.add_err(42, "resolve failed"); - let cache = test_cache_with(resolver); - - // Insert a stub with rc=2 (simulates a looked-up entry needing resolution) - cache - .insert_icb( - 42, - TestIcb { - rc: 2, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should fail - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert!(result.is_err(), "should propagate resolver error"); - - // The stub should be preserved since rc > 0 - assert!(cache.contains(42), "entry with rc=2 should survive error"); - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be preserved"); - } - - #[tokio::test] - async fn inc_rc_missing_inode_returns_none() { - let cache = test_cache(); - assert_eq!(cache.inc_rc(999).await, None); - } - - #[tokio::test] - async fn inc_rc_waits_for_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Simulate resolver completing by writing directly to inode_table - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!( - result, - Some(2), - "waited for Available, then incremented 1 -> 2" - ); - } - - #[tokio::test] - async fn inc_rc_returns_none_after_concurrent_eviction() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Evict instead of completing - cache.inode_table.remove_async(&42).await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!(result, None, "evicted entry should return None"); - } - - /// Resolver that pauses mid-resolution via a `Notify`, allowing the test - /// to interleave a `forget` while the resolve future is suspended. - struct SlowResolver { - /// Signalled by the resolver once it has started (so the test knows - /// resolution is in progress). - started: Arc, - /// The resolver waits on this before returning (the test signals it - /// after calling `forget`). - proceed: Arc, - } - - impl IcbResolver for SlowResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let started = Arc::clone(&self.started); - let proceed = Arc::clone(&self.proceed); - async move { - started.notify_one(); - proceed.notified().await; - Ok(TestIcb { - rc: 1, - path: "/slow-resolved".into(), - resolved: true, - }) - } - } - } - - /// Regression test: `get_icb` must survive the entry cycling back to - /// `InFlight` between when `wait_for_available` returns and when - /// `read_async` runs. The loop in `get_icb` should retry and eventually - /// return the final resolved value. - #[tokio::test] - async fn wait_for_available_retries_on_re_inflight() { - let cache = Arc::new(test_cache()); - let ino: Inode = 42; - - // Phase 1: insert an InFlight entry. - let (tx1, rx1) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx1)) - .await; - - // Spawn get_icb — it will wait for InFlight to resolve. - let cache_get = Arc::clone(&cache); - let get_handle = - tokio::spawn(async move { cache_get.get_icb(ino, |icb| icb.path.clone()).await }); - - // Give get_icb time to start waiting on the watch channel. - tokio::task::yield_now().await; - - // Phase 1 complete: transition to Available briefly, then immediately - // back to InFlight (simulates get_or_resolve finding a stub and - // re-entering InFlight for a second resolution). - let (tx2, rx2) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx2)) - .await; - // Signal phase-1 watchers so get_icb wakes up; it will re-read the - // entry and find InFlight again, then loop back to wait. - drop(tx1); - - // Give get_icb time to re-enter the wait loop. - tokio::task::yield_now().await; - - // Phase 2 complete: write the final resolved value. - cache - .inode_table - .upsert_async( - ino, - IcbState::Available(TestIcb { - rc: 1, - path: "/fully-resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx2); - - // get_icb should return the final resolved value (not None). - let result = get_handle.await.expect("get_icb task panicked"); - assert_eq!( - result, - Some(PathBuf::from("/fully-resolved")), - "get_icb must survive re-InFlight and return the final resolved value" - ); - } - - /// Regression test: an entry evicted by `forget` during an in-progress - /// `get_or_resolve` must NOT be resurrected when resolution completes. - #[tokio::test] - async fn get_or_resolve_does_not_resurrect_evicted_entry() { - let started = Arc::new(tokio::sync::Notify::new()); - let proceed = Arc::new(tokio::sync::Notify::new()); - - let cache = Arc::new(AsyncICache::new( - SlowResolver { - started: Arc::clone(&started), - proceed: Arc::clone(&proceed), - }, - 1, - "/root", - )); - - let ino: Inode = 42; - - // Insert a stub with rc=1 (simulates a looked-up, unresolved entry). - cache - .insert_icb( - ino, - TestIcb { - rc: 1, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // Spawn get_or_resolve which will trigger slow resolution. - let cache2 = Arc::clone(&cache); - let resolve_handle = - tokio::spawn(async move { cache2.get_or_resolve(ino, |icb| icb.path.clone()).await }); - - // Wait until the resolver has started (entry is now InFlight). - started.notified().await; - - // Evict the entry while resolution is in progress. - // forget waits for InFlight, so we need to complete resolution for - // forget to proceed. Instead, remove the InFlight entry directly to - // simulate a concurrent eviction (e.g., by another path that already - // removed the entry). - cache.inode_table.remove_async(&ino).await; - - // Let the resolver finish. - proceed.notify_one(); - - // Wait for get_or_resolve to complete. - drop(resolve_handle.await.expect("task panicked")); - - // The entry must NOT have been resurrected by write_back_if_present. - assert!( - !cache.contains(ino), - "evicted entry must not be resurrected after resolution completes" - ); - } -} diff --git a/src/fs/icache/bridge.rs b/src/fs/icache/bridge.rs deleted file mode 100644 index e674a564..00000000 --- a/src/fs/icache/bridge.rs +++ /dev/null @@ -1,138 +0,0 @@ -use crate::fs::r#trait::{FileAttr, FileHandle, Inode}; - -/// Bidirectional bridge for both inodes and file handles between two Fs layers. -/// -/// Convention: **left = outer (caller), right = inner (callee)**. -/// `forward(left)` → right, `backward(right)` → left. -pub struct HashMapBridge { - inode_map: bimap::BiMap, - fh_map: bimap::BiMap, -} - -impl HashMapBridge { - pub fn new() -> Self { - Self { - inode_map: bimap::BiMap::new(), - fh_map: bimap::BiMap::new(), - } - } - - // ── Inode methods ──────────────────────────────────────────────────── - - pub fn insert_inode(&mut self, left: Inode, right: Inode) { - self.inode_map.insert(left, right); - } - - /// Look up right→left, or allocate a new left inode if unmapped. - pub fn backward_or_insert_inode( - &mut self, - right: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&right) { - left - } else { - let left = allocate(); - self.inode_map.insert(left, right); - left - } - } - - /// Look up left→right, or allocate a new right inode if unmapped. - pub fn forward_or_insert_inode( - &mut self, - left: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&right) = self.inode_map.get_by_left(&left) { - right - } else { - let right = allocate(); - self.inode_map.insert(left, right); - right - } - } - - /// Remove an inode mapping by its left (outer) key. - pub fn remove_inode_by_left(&mut self, left: Inode) { - self.inode_map.remove_by_left(&left); - } - - /// Look up left→right directly. - pub fn inode_map_get_by_left(&self, left: Inode) -> Option<&Inode> { - self.inode_map.get_by_left(&left) - } - - /// Rewrite the `ino` field in a [`FileAttr`] from right (inner) to left (outer) namespace. - pub fn attr_backward(&self, attr: FileAttr) -> FileAttr { - let backward = |ino: Inode| -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&ino) { - left - } else { - tracing::warn!( - inner_ino = ino, - "attr_backward: no bridge mapping, using raw inner inode" - ); - ino - } - }; - rewrite_attr_ino(attr, backward) - } - - // ── File handle methods ────────────────────────────────────────────── - - pub fn insert_fh(&mut self, left: FileHandle, right: FileHandle) { - self.fh_map.insert(left, right); - } - - pub fn fh_forward(&self, left: FileHandle) -> Option { - self.fh_map.get_by_left(&left).copied() - } - - /// Remove a file handle mapping by its left (outer) key. - pub fn remove_fh_by_left(&mut self, left: FileHandle) { - self.fh_map.remove_by_left(&left); - } -} - -/// Rewrite the `ino` field in a [`FileAttr`] using the given translation function. -fn rewrite_attr_ino(attr: FileAttr, translate: impl Fn(Inode) -> Inode) -> FileAttr { - match attr { - FileAttr::RegularFile { - mut common, - size, - blocks, - } => { - common.ino = translate(common.ino); - FileAttr::RegularFile { - common, - size, - blocks, - } - } - FileAttr::Directory { mut common } => { - common.ino = translate(common.ino); - FileAttr::Directory { common } - } - FileAttr::Symlink { mut common, size } => { - common.ino = translate(common.ino); - FileAttr::Symlink { common, size } - } - FileAttr::CharDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::CharDevice { common, rdev } - } - FileAttr::BlockDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::BlockDevice { common, rdev } - } - FileAttr::NamedPipe { mut common } => { - common.ino = translate(common.ino); - FileAttr::NamedPipe { common } - } - FileAttr::Socket { mut common } => { - common.ino = translate(common.ino); - FileAttr::Socket { common } - } - } -} diff --git a/src/fs/icache/file_table.rs b/src/fs/icache/file_table.rs deleted file mode 100644 index 332a6ffb..00000000 --- a/src/fs/icache/file_table.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::sync::atomic::{AtomicU64, Ordering}; - -use crate::fs::r#trait::FileHandle; - -/// Monotonically increasing file handle allocator. -#[must_use] -pub struct FileTable { - next_fh: AtomicU64, -} - -impl FileTable { - pub fn new() -> Self { - Self { - next_fh: AtomicU64::new(1), - } - } - - #[must_use] - pub fn allocate(&self) -> FileHandle { - self.next_fh.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/inode_factory.rs b/src/fs/icache/inode_factory.rs deleted file mode 100644 index 1a603388..00000000 --- a/src/fs/icache/inode_factory.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::fs::r#trait::Inode; -use std::sync::atomic::{AtomicU64, Ordering}; - -/// Monotonically increasing inode allocator. -pub struct InodeFactory { - next_inode: AtomicU64, -} - -impl InodeFactory { - pub fn new(start: Inode) -> Self { - Self { - next_inode: AtomicU64::new(start), - } - } - - pub fn allocate(&self) -> Inode { - self.next_inode.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/mod.rs b/src/fs/icache/mod.rs deleted file mode 100644 index 2ccd80bd..00000000 --- a/src/fs/icache/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Generic directory cache and inode management primitives. - -pub mod async_cache; -pub mod bridge; -mod file_table; -mod inode_factory; - -pub use async_cache::AsyncICache; -pub use async_cache::IcbResolver; -pub use file_table::FileTable; -pub use inode_factory::InodeFactory; - -/// Common interface for inode control block types usable with `ICache`. -pub trait IcbLike: Clone { - /// Create an ICB with rc=1, the given path, and no children. - fn new_root(path: std::path::PathBuf) -> Self; - fn rc(&self) -> u64; - fn rc_mut(&mut self) -> &mut u64; - /// Returns true if this entry needs resolution (e.g., attr not yet fetched). - fn needs_resolve(&self) -> bool; -} diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 340b5887..cf57e392 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -1,12 +1,6 @@ -//! Shared types and helpers used by both `MesaFS` and `RepoFs`. - use mesa_dev::low_level::apis; use thiserror::Error; -use crate::fs::r#trait::{FileAttr, Inode}; - -pub(super) use super::icache::InodeControlBlock; - /// A concrete error type that preserves the structure of `mesa_dev::low_level::apis::Error` /// without the generic parameter. #[derive(Debug, Error)] @@ -46,162 +40,16 @@ impl From> for MesaAp } } -#[derive(Debug, Error)] -pub enum LookupError { - #[error("inode not found")] - InodeNotFound, - - #[error("file does not exist")] - FileDoesNotExist, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), -} - -impl From for i32 { - fn from(e: LookupError) -> Self { - match e { - LookupError::InodeNotFound | LookupError::FileDoesNotExist => libc::ENOENT, - LookupError::RemoteMesaError(_) => libc::EIO, - } - } -} - -#[derive(Debug, Error)] -pub enum GetAttrError { - #[error("inode not found")] - InodeNotFound, -} - -impl From for i32 { - fn from(e: GetAttrError) -> Self { - match e { - GetAttrError::InodeNotFound => libc::ENOENT, - } - } -} - -#[derive(Debug, Error)] -pub enum OpenError { - #[error("inode not found")] - InodeNotFound, -} - -impl From for i32 { - fn from(e: OpenError) -> Self { - match e { - OpenError::InodeNotFound => libc::ENOENT, +pub(super) fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) } - } -} - -#[derive(Debug, Error)] -pub enum ReadError { - #[error("file not open")] - FileNotOpen, - - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), - - #[error("content is not a file")] - NotAFile, - - #[error("base64 decode error: {0}")] - Base64Decode(#[from] base64::DecodeError), -} - -impl From for i32 { - fn from(e: ReadError) -> Self { - match e { - ReadError::FileNotOpen => libc::EBADF, - ReadError::InodeNotFound => libc::ENOENT, - ReadError::RemoteMesaError(_) | ReadError::Base64Decode(_) => libc::EIO, - ReadError::NotAFile => libc::EISDIR, - } - } -} - -#[derive(Debug, Error)] -pub enum ReadDirError { - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), - - #[error("inode is not a directory")] - NotADirectory, - - #[error("operation not permitted")] - NotPermitted, -} - -impl From for ReadDirError { - fn from(e: LookupError) -> Self { - match e { - LookupError::RemoteMesaError(api) => Self::RemoteMesaError(api), - LookupError::InodeNotFound | LookupError::FileDoesNotExist => Self::InodeNotFound, - } - } -} - -impl From for i32 { - fn from(e: ReadDirError) -> Self { - match e { - ReadDirError::InodeNotFound => libc::ENOENT, - ReadDirError::RemoteMesaError(_) => libc::EIO, - ReadDirError::NotADirectory => libc::ENOTDIR, - ReadDirError::NotPermitted => libc::EPERM, - } - } -} - -#[derive(Debug, Error)] -pub enum ReleaseError { - #[error("file not open")] - FileNotOpen, -} - -impl From for i32 { - fn from(e: ReleaseError) -> Self { - match e { - ReleaseError::FileNotOpen => libc::EBADF, - } - } -} - -/// Allows a parent compositor to peek at cached attrs from a child filesystem. -#[async_trait::async_trait] -pub(super) trait InodeCachePeek { - async fn peek_attr(&self, ino: Inode) -> Option; -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn lookup_inode_not_found_converts_to_readdir_inode_not_found() { - let err: ReadDirError = LookupError::InodeNotFound.into(); - assert!(matches!(err, ReadDirError::InodeNotFound)); - } - - #[test] - fn lookup_file_does_not_exist_converts_to_readdir_inode_not_found() { - let err: ReadDirError = LookupError::FileDoesNotExist.into(); - assert!(matches!(err, ReadDirError::InodeNotFound)); - } - - #[test] - fn lookup_remote_error_converts_to_readdir_remote_error() { - let api_err = MesaApiError::Response { - status: 500, - body: "test".to_owned(), - }; - let err: ReadDirError = LookupError::RemoteMesaError(api_err).into(); - assert!(matches!(err, ReadDirError::RemoteMesaError(_))); + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), } } diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs deleted file mode 100644 index 6dbac250..00000000 --- a/src/fs/mescloud/composite.rs +++ /dev/null @@ -1,308 +0,0 @@ -use std::collections::HashMap; -use std::ffi::OsStr; - -use bytes::Bytes; -use tracing::{instrument, trace, warn}; - -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, -}; - -use super::common::{ - GetAttrError, InodeCachePeek, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::icache::{InodeControlBlock, MescloudICache}; - -/// A child filesystem slot: inner filesystem + bidirectional inode/fh bridge. -pub(super) struct ChildSlot { - pub inner: Inner, - pub bridge: HashMapBridge, -} - -/// Layered filesystem that presents multiple child filesystems under a single -/// inode namespace. -/// -/// `MesaCloud`'s filesystem is a hierarchy of compositions: -/// -/// ```text -/// MesaFS (CompositeFs<_, OrgFs>) -/// └─ OrgFs (CompositeFs<_, RepoFs>) -/// └─ RepoFs (leaf — backed by git) -/// ``` -/// -/// Each child filesystem numbers its inodes starting from 1, so the composite -/// maintains a bidirectional inode/file-handle bridge per child (see -/// [`ChildSlot`]) to translate between the outer namespace visible to FUSE and -/// each child's internal namespace. -pub(super) struct CompositeFs -where - R: IcbResolver, -{ - pub icache: MescloudICache, - pub file_table: FileTable, - pub readdir_buf: Vec, - /// Maps outer inode to index into `slots` for child-root inodes. - pub child_inodes: HashMap, - /// Maps every translated outer inode to its owning slot index. - pub inode_to_slot: HashMap, - pub slots: Vec>, -} - -impl CompositeFs -where - R: IcbResolver, - Inner: Fs< - LookupError = LookupError, - GetAttrError = GetAttrError, - OpenError = OpenError, - ReadError = ReadError, - ReaddirError = ReadDirError, - ReleaseError = ReleaseError, - > + InodeCachePeek - + Send - + Sync, -{ - /// Look up which child slot owns an inode via direct map. - #[instrument(name = "CompositeFs::slot_for_inode", skip(self))] - pub fn slot_for_inode(&self, ino: Inode) -> Option { - self.inode_to_slot.get(&ino).copied() - } - - /// Allocate an outer file handle and map it through the bridge. - #[must_use] - pub fn alloc_fh(&mut self, slot_idx: usize, inner_fh: FileHandle) -> FileHandle { - let fh = self.file_table.allocate(); - self.slots[slot_idx].bridge.insert_fh(fh, inner_fh); - fh - } - - /// Translate an inner inode to an outer inode, allocating if needed. - /// Also inserts a stub ICB into the outer icache when the inode is new. - #[instrument(name = "CompositeFs::translate_inner_ino", skip(self, name))] - pub async fn translate_inner_ino( - &mut self, - slot_idx: usize, - inner_ino: Inode, - parent_outer_ino: Inode, - name: &OsStr, - ) -> Inode { - let outer_ino = self.slots[slot_idx] - .bridge - .backward_or_insert_inode(inner_ino, || self.icache.allocate_inode()); - self.inode_to_slot.insert(outer_ino, slot_idx); - self.icache - .entry_or_insert_icb( - outer_ino, - || InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent_outer_ino), - attr: None, - children: None, - }, - |_| {}, - ) - .await; - outer_ino - } - - /// Get cached file attributes for an inode. - #[instrument(name = "CompositeFs::delegated_getattr", skip(self))] - pub async fn delegated_getattr(&self, ino: Inode) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) - } - - /// Find slot, forward inode, delegate to inner, allocate outer file handle. - #[instrument(name = "CompositeFs::delegated_open", skip(self))] - pub async fn delegated_open( - &mut self, - ino: Inode, - flags: OpenFlags, - ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "open on inode not belonging to any child"); - OpenError::InodeNotFound - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("open: ino should be mapped")); - let inner_open = self.slots[idx].inner.open(inner_ino, flags).await?; - let outer_fh = self.alloc_fh(idx, inner_open.handle); - trace!( - ino, - outer_fh, - inner_fh = inner_open.handle, - "open: assigned file handle" - ); - Ok(OpenFile { - handle: outer_fh, - options: inner_open.options, - }) - } - - /// Find slot, forward inode and file handle, delegate read to inner. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] - #[instrument(name = "CompositeFs::delegated_read", skip(self))] - pub async fn delegated_read( - &mut self, - ino: Inode, - fh: FileHandle, - offset: u64, - size: u32, - flags: OpenFlags, - lock_owner: Option, - ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "read on inode not belonging to any child"); - ReadError::InodeNotFound - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("read: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "read: no fh mapping found"); - ReadError::FileNotOpen - })?; - self.slots[idx] - .inner - .read(inner_ino, inner_fh, offset, size, flags, lock_owner) - .await - } - - /// Find slot, forward inode and file handle, delegate release to inner, - /// then clean up the file handle mapping. - #[instrument(name = "CompositeFs::delegated_release", skip(self))] - pub async fn delegated_release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "release on inode not belonging to any child"); - ReleaseError::FileNotOpen - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("release: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "release: no fh mapping found"); - ReleaseError::FileNotOpen - })?; - let result = self.slots[idx] - .inner - .release(inner_ino, inner_fh, flags, flush) - .await; - self.slots[idx].bridge.remove_fh_by_left(fh); - trace!(ino, fh, "release: cleaned up fh mapping"); - result - } - - /// Propagate forget to the inner filesystem, evict from icache, and clean - /// up bridge mappings. Returns `true` if the inode was evicted. - /// - /// Child-root inodes (those in `child_inodes`) do NOT propagate forget to - /// the inner filesystem: the inner root's `rc=1` is an initialization - /// invariant unrelated to outer FUSE lookup counts. Propagating would - /// evict the inner root, breaking all subsequent operations on that child. - #[must_use] - #[instrument(name = "CompositeFs::delegated_forget", skip(self))] - pub async fn delegated_forget(&mut self, ino: Inode, nlookups: u64) -> bool { - let slot_idx = self.slot_for_inode(ino); - let is_child_root = self.child_inodes.contains_key(&ino); - if !is_child_root - && let Some(idx) = slot_idx - && let Some(&inner_ino) = self.slots[idx].bridge.inode_map_get_by_left(ino) - { - self.slots[idx].inner.forget(inner_ino, nlookups).await; - } - if self.icache.forget(ino, nlookups).await.is_some() { - self.child_inodes.remove(&ino); - self.inode_to_slot.remove(&ino); - if let Some(idx) = slot_idx { - self.slots[idx].bridge.remove_inode_by_left(ino); - } - true - } else { - false - } - } - - /// Return filesystem statistics from the icache. - #[must_use] - pub fn delegated_statfs(&self) -> FilesystemStats { - self.icache.statfs() - } - - /// Delegation branch for lookup when the parent is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] - pub async fn delegated_lookup( - &mut self, - parent: Inode, - name: &OsStr, - ) -> Result { - let idx = self - .slot_for_inode(parent) - .ok_or(LookupError::InodeNotFound)?; - let inner_parent = self.slots[idx] - .bridge - .forward_or_insert_inode(parent, || unreachable!("lookup: parent should be mapped")); - let inner_attr = self.slots[idx].inner.lookup(inner_parent, name).await?; - let inner_ino = inner_attr.common().ino; - let outer_ino = self.translate_inner_ino(idx, inner_ino, parent, name).await; - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_ino, outer_attr).await; - // None means the entry was concurrently evicted; fail the lookup so - // the kernel doesn't hold a ref the cache no longer tracks. - let rc = self - .icache - .inc_rc(outer_ino) - .await - .ok_or(LookupError::InodeNotFound)?; - trace!(outer_ino, inner_ino, rc, "lookup: resolved via delegation"); - Ok(outer_attr) - } - - /// Delegation branch for readdir when the inode is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] - pub async fn delegated_readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - let idx = self - .slot_for_inode(ino) - .ok_or(ReadDirError::InodeNotFound)?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("readdir: ino should be mapped")); - let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; - let inner_entries: Vec = inner_entries.to_vec(); - let evicted = self.icache.evict_zero_rc_children(ino).await; - for evicted_ino in evicted { - if let Some(slot) = self.inode_to_slot.remove(&evicted_ino) { - self.slots[slot].bridge.remove_inode_by_left(evicted_ino); - } - self.child_inodes.remove(&evicted_ino); - } - let mut outer_entries = Vec::with_capacity(inner_entries.len()); - for entry in &inner_entries { - let outer_child_ino = self - .translate_inner_ino(idx, entry.ino, ino, &entry.name) - .await; - if let Some(inner_attr) = self.slots[idx].inner.peek_attr(entry.ino).await { - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_child_ino, outer_attr).await; - } - outer_entries.push(DirEntry { - ino: outer_child_ino, - name: entry.name.clone(), - kind: entry.kind, - }); - } - self.readdir_buf = outer_entries; - Ok(&self.readdir_buf) - } -} diff --git a/src/fs/mescloud/icache.rs b/src/fs/mescloud/icache.rs deleted file mode 100644 index 15f1f5d7..00000000 --- a/src/fs/mescloud/icache.rs +++ /dev/null @@ -1,437 +0,0 @@ -//! Mescloud-specific inode control block, helpers, and directory cache wrapper. - -use std::ffi::OsStr; -use std::time::SystemTime; - -use crate::fs::icache::{AsyncICache, IcbLike, IcbResolver, InodeFactory}; -use crate::fs::r#trait::{ - CommonFileAttr, DirEntryType, FileAttr, FilesystemStats, Inode, Permissions, -}; - -/// Inode control block for mescloud filesystem layers. -#[derive(Clone)] -pub struct InodeControlBlock { - pub parent: Option, - pub rc: u64, - pub path: std::path::PathBuf, - /// Cached file attributes from the last lookup. - pub attr: Option, - /// Cached directory children from the resolver (directories only). - pub children: Option>, -} - -impl IcbLike for InodeControlBlock { - fn new_root(path: std::path::PathBuf) -> Self { - Self { - rc: 1, - parent: None, - path, - attr: None, - children: None, - } - } - - fn rc(&self) -> u64 { - self.rc - } - - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - - fn needs_resolve(&self) -> bool { - match self.attr { - None => true, - Some(FileAttr::Directory { .. }) => self.children.is_none(), - Some(_) => false, - } - } -} - -/// Calculate the number of blocks needed for a given size. -pub fn blocks_of_size(block_size: u32, size: u64) -> u64 { - size.div_ceil(u64::from(block_size)) -} - -/// Free function -- usable by both `MescloudICache` and resolvers. -pub fn make_common_file_attr( - ino: Inode, - perm: u16, - atime: SystemTime, - mtime: SystemTime, - fs_owner: (u32, u32), - block_size: u32, -) -> CommonFileAttr { - CommonFileAttr { - ino, - atime, - mtime, - ctime: SystemTime::UNIX_EPOCH, - crtime: SystemTime::UNIX_EPOCH, - perm: Permissions::from_bits_truncate(perm), - nlink: 1, - uid: fs_owner.0, - gid: fs_owner.1, - blksize: block_size, - } -} - -/// Mescloud-specific directory cache wrapper over `AsyncICache`. -pub struct MescloudICache> { - inner: AsyncICache, - inode_factory: InodeFactory, - fs_owner: (u32, u32), - block_size: u32, -} - -impl> MescloudICache { - /// Create a new `MescloudICache`. Initializes root ICB (rc=1), caches root dir attr. - pub fn new(resolver: R, root_ino: Inode, fs_owner: (u32, u32), block_size: u32) -> Self { - let cache = Self { - inner: AsyncICache::new(resolver, root_ino, "/"), - inode_factory: InodeFactory::new(root_ino + 1), - fs_owner, - block_size, - }; - - // Set root directory attr synchronously during initialization - let now = SystemTime::now(); - let root_attr = FileAttr::Directory { - common: make_common_file_attr(root_ino, 0o755, now, now, fs_owner, block_size), - }; - cache.inner.get_icb_mut_sync(root_ino, |icb| { - icb.attr = Some(root_attr); - }); - - cache - } - - // -- Delegated from AsyncICache (async) -- - - pub fn contains(&self, ino: Inode) -> bool { - self.inner.contains(ino) - } - - pub async fn get_icb( - &self, - ino: Inode, - // `Sync` required: see comment on `AsyncICache::get_icb`. - f: impl Fn(&InodeControlBlock) -> T + Send + Sync, - ) -> Option { - self.inner.get_icb(ino, f).await - } - - pub async fn insert_icb(&self, ino: Inode, icb: InodeControlBlock) { - self.inner.insert_icb(ino, icb).await; - } - - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> InodeControlBlock, - then: impl FnOnce(&mut InodeControlBlock) -> T, - ) -> T { - self.inner.entry_or_insert_icb(ino, factory, then).await - } - - pub async fn inc_rc(&self, ino: Inode) -> Option { - self.inner.inc_rc(ino).await - } - - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - self.inner.forget(ino, nlookups).await - } - - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&InodeControlBlock) -> T, - ) -> Result { - self.inner.get_or_resolve(ino, then).await - } - - // -- Domain-specific -- - - /// Allocate a new inode number. - pub fn allocate_inode(&self) -> Inode { - self.inode_factory.allocate() - } - - pub async fn get_attr(&self, ino: Inode) -> Option { - self.inner.get_icb(ino, |icb| icb.attr).await.flatten() - } - - pub async fn cache_attr(&self, ino: Inode, attr: FileAttr) { - self.inner - .get_icb_mut(ino, |icb| { - icb.attr = Some(attr); - }) - .await; - } - - pub fn fs_owner(&self) -> (u32, u32) { - self.fs_owner - } - - pub fn block_size(&self) -> u32 { - self.block_size - } - - pub fn statfs(&self) -> FilesystemStats { - FilesystemStats { - block_size: self.block_size, - fragment_size: u64::from(self.block_size), - total_blocks: 0, - free_blocks: 0, - available_blocks: 0, - total_inodes: self.inner.inode_count() as u64, - free_inodes: 0, - available_inodes: 0, - filesystem_id: 0, - mount_flags: 0, - max_filename_length: 255, - } - } - - /// Evict all `Available` children of `parent` that have `rc == 0`. - /// Returns the list of evicted inode numbers so callers can clean up - /// associated state (e.g., bridge mappings, slot tracking). - pub async fn evict_zero_rc_children(&self, parent: Inode) -> Vec { - let mut to_evict = Vec::new(); - self.inner - .for_each(|&ino, icb| { - if icb.rc == 0 && icb.parent == Some(parent) { - to_evict.push(ino); - } - }) - .await; - let mut evicted = Vec::new(); - for ino in to_evict { - if self.inner.forget(ino, 0).await.is_some() { - evicted.push(ino); - } - } - evicted - } - - /// Find an existing child by (parent, name) or allocate a new inode. - /// If new, inserts a stub ICB (parent+path set, attr=None, children=None, rc=0). - /// Does NOT bump rc. Returns the inode number. - /// - /// # Safety invariant - /// - /// The `for_each` scan and `insert_icb` are **not** atomic. If two callers - /// race with the same `(parent, name)`, both may allocate distinct inodes - /// for the same logical child. This is currently safe because all callers - /// go through `&mut self` on the owning `Fs` implementation. - pub async fn ensure_child_ino(&self, parent: Inode, name: &OsStr) -> Inode { - // Search for existing child by parent + name - let mut existing_ino = None; - self.inner - .for_each(|&ino, icb| { - if icb.parent == Some(parent) && icb.path.as_os_str() == name { - existing_ino = Some(ino); - } - }) - .await; - - if let Some(ino) = existing_ino { - return ino; - } - - // Allocate new inode and insert stub - let ino = self.inode_factory.allocate(); - self.inner - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent), - attr: None, - children: None, - }, - ) - .await; - ino - } -} - -#[cfg(test)] -mod tests { - use std::future::Future; - - use super::*; - use crate::fs::icache::async_cache::AsyncICache; - use crate::fs::r#trait::DirEntryType; - - fn dummy_dir_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::Directory { - common: make_common_file_attr(ino, 0o755, now, now, (0, 0), 4096), - } - } - - fn dummy_file_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::RegularFile { - common: make_common_file_attr(ino, 0o644, now, now, (0, 0), 4096), - size: 100, - blocks: 1, - } - } - - #[test] - fn needs_resolve_stub_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 0, - path: "stub".into(), - attr: None, - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_file_with_attr_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "file.txt".into(), - attr: Some(dummy_file_attr(2)), - children: None, - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_without_children_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: Some(vec![("README.md".to_owned(), DirEntryType::RegularFile)]), - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_empty_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "empty-dir".into(), - attr: Some(dummy_dir_attr(4)), - children: Some(vec![]), - }; - assert!(!icb.needs_resolve()); - } - - struct NoOpResolver; - - impl IcbResolver for NoOpResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - #[expect( - clippy::manual_async_fn, - reason = "must match IcbResolver trait signature" - )] - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - async { unreachable!("NoOpResolver should not be called") } - } - } - - fn test_mescloud_cache() -> MescloudICache { - MescloudICache::new(NoOpResolver, 1, (0, 0), 4096) - } - - #[tokio::test] - async fn evict_zero_rc_children_removes_stubs() { - let cache = test_mescloud_cache(); - - // Insert stubs as children of root (ino=1) with rc=0 - cache - .insert_icb( - 10, - InodeControlBlock { - rc: 0, - path: "child_a".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - cache - .insert_icb( - 11, - InodeControlBlock { - rc: 0, - path: "child_b".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a child with rc > 0 — should survive - cache - .insert_icb( - 12, - InodeControlBlock { - rc: 1, - path: "active".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a stub under a different parent — should survive - cache - .insert_icb( - 20, - InodeControlBlock { - rc: 0, - path: "other".into(), - parent: Some(12), - attr: None, - children: None, - }, - ) - .await; - - let evicted = cache.evict_zero_rc_children(1).await; - assert_eq!(evicted.len(), 2, "should evict 2 zero-rc children of root"); - - assert!(!cache.contains(10), "child_a should be evicted"); - assert!(!cache.contains(11), "child_b should be evicted"); - assert!(cache.contains(12), "active child should survive"); - assert!( - cache.contains(20), - "child of different parent should survive" - ); - } -} diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index 1a3cce80..ab3745db 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -1,44 +1,15 @@ -use std::collections::HashMap; -use std::ffi::OsStr; -use std::future::Future; -use std::time::SystemTime; - -use bytes::Bytes; use mesa_dev::MesaClient; use opentelemetry::propagation::Injector; -use secrecy::ExposeSecret as _; -use tracing::{Instrument as _, instrument, trace, warn}; use tracing_opentelemetry::OpenTelemetrySpanExt as _; -use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; - -use composite::{ChildSlot, CompositeFs}; - #[cfg(feature = "staging")] const MESA_API_BASE_URL: &str = "https://staging.depot.mesa.dev/api/v1"; #[cfg(not(feature = "staging"))] const MESA_API_BASE_URL: &str = "https://depot.mesa.dev/api/v1"; mod common; -mod composite; -use common::InodeControlBlock; -pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - -use icache as mescloud_icache; -use icache::MescloudICache; - -mod org; -pub use org::OrgConfig; -use org::OrgFs; - -pub mod icache; pub mod repo; +pub mod roots; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); @@ -78,7 +49,7 @@ impl reqwest_middleware::Middleware for OtelPropagationMiddleware { } } -fn build_mesa_client(api_key: &str) -> MesaClient { +pub fn build_mesa_client(api_key: &str) -> MesaClient { let client = reqwest_middleware::ClientBuilder::new(reqwest::Client::new()) .with(OtelPropagationMiddleware) .build(); @@ -88,339 +59,3 @@ fn build_mesa_client(api_key: &str) -> MesaClient { .with_client(client) .build() } - -struct MesaResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for MesaResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("MesaResolver::resolve", ino)) - } -} - -/// Classifies an inode by its role in the mesa hierarchy. -enum InodeRole { - /// The filesystem root (ino == 1). - Root, - /// An inode owned by some org. - OrgOwned, -} - -/// The top-level `MesaFS` filesystem. -/// -/// Composes multiple [`OrgFs`] instances, each with its own inode namespace, -/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. -pub struct MesaFS { - composite: CompositeFs, -} - -impl MesaFS { - const ROOT_NODE_INO: Inode = 1; - const BLOCK_SIZE: u32 = 4096; - - /// Create a new `MesaFS` instance. - #[must_use] - pub fn new( - orgs: impl Iterator, - fs_owner: (u32, u32), - cache: &CacheConfig, - ) -> Self { - let resolver = MesaResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - Self { - composite: CompositeFs { - icache: MescloudICache::new( - resolver, - Self::ROOT_NODE_INO, - fs_owner, - Self::BLOCK_SIZE, - ), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: orgs - .map(|org_conf| { - let client = build_mesa_client(org_conf.api_key.expose_secret()); - let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); - ChildSlot { - inner: org, - bridge: HashMapBridge::new(), - } - }) - .collect(), - }, - } - } - - /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { - if ino == Self::ROOT_NODE_INO { - return Some(InodeRole::Root); - } - if self.composite.child_inodes.contains_key(&ino) { - return Some(InodeRole::OrgOwned); - } - if self.composite.slot_for_inode(ino).is_some() { - return Some(InodeRole::OrgOwned); - } - None - } - - /// Ensure a mesa-level inode exists for the org at `org_idx`. - /// Seeds the bridge with (`mesa_org_ino`, `OrgFs::ROOT_INO`). - /// Does NOT bump rc. - async fn ensure_org_inode(&mut self, org_idx: usize) -> (Inode, FileAttr) { - // Check if an inode already exists. - let existing_ino = self - .composite - .child_inodes - .iter() - .find(|&(_, &idx)| idx == org_idx) - .map(|(&ino, _)| ino); - - if let Some(existing_ino) = existing_ino { - if let Some(attr) = self.composite.icache.get_attr(existing_ino).await { - let rc = self - .composite - .icache - .get_icb(existing_ino, |icb| icb.rc) - .await - .unwrap_or(0); - trace!( - ino = existing_ino, - org_idx, rc, "ensure_org_inode: reusing existing inode" - ); - return (existing_ino, attr); - } - if self.composite.icache.contains(existing_ino) { - // ICB exists but attr missing — rebuild and cache. - warn!( - ino = existing_ino, - org_idx, "ensure_org_inode: attr missing, rebuilding" - ); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - existing_ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(existing_ino, attr).await; - return (existing_ino, attr); - } - // ICB was evicted — clean up stale tracking entries. - warn!( - ino = existing_ino, - org_idx, "ensure_org_inode: ICB evicted, cleaning up stale entry" - ); - self.composite.child_inodes.remove(&existing_ino); - self.composite.inode_to_slot.remove(&existing_ino); - } - - // Allocate new. - let org_name = self.composite.slots[org_idx].inner.name().to_owned(); - let ino = self.composite.icache.allocate_inode(); - trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); - - let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: org_name.as_str().into(), - parent: Some(Self::ROOT_NODE_INO), - attr: None, - children: None, - }, - ) - .await; - - self.composite.child_inodes.insert(ino, org_idx); - self.composite.inode_to_slot.insert(ino, org_idx); - - // Reset bridge (may have stale mappings from a previous eviction cycle) - // and seed: mesa org-root <-> OrgFs::ROOT_INO. - self.composite.slots[org_idx].bridge = HashMapBridge::new(); - self.composite.slots[org_idx] - .bridge - .insert_inode(ino, OrgFs::ROOT_INO); - - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) - } -} - -#[async_trait::async_trait] -impl Fs for MesaFS { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - - #[instrument(name = "MesaFS::lookup", skip(self))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; - match role { - InodeRole::Root => { - let org_name = name.to_str().ok_or(LookupError::InodeNotFound)?; - let org_idx = self - .composite - .slots - .iter() - .position(|s| s.inner.name() == org_name) - .ok_or(LookupError::InodeNotFound)?; - - trace!(org = org_name, "lookup: matched org"); - let (ino, attr) = self.ensure_org_inode(org_idx).await; - let rc = self - .composite - .icache - .inc_rc(ino) - .await - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, org = org_name, rc, "lookup: resolved org inode"); - Ok(attr) - } - InodeRole::OrgOwned => self.composite.delegated_lookup(parent, name).await, - } - } - - #[instrument(name = "MesaFS::getattr", skip(self))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.composite.delegated_getattr(ino).await - } - - #[instrument(name = "MesaFS::readdir", skip(self))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; - match role { - InodeRole::Root => { - let org_info: Vec<(usize, String)> = self - .composite - .slots - .iter() - .enumerate() - .map(|(idx, s)| (idx, s.inner.name().to_owned())) - .collect(); - - let mut entries = Vec::with_capacity(org_info.len()); - for (org_idx, name) in &org_info { - let (org_ino, _) = self.ensure_org_inode(*org_idx).await; - entries.push(DirEntry { - ino: org_ino, - name: name.clone().into(), - kind: DirEntryType::Directory, - }); - } - - trace!(entry_count = entries.len(), "readdir: listing orgs"); - self.composite.readdir_buf = entries; - Ok(&self.composite.readdir_buf) - } - InodeRole::OrgOwned => self.composite.delegated_readdir(ino).await, - } - } - - #[instrument(name = "MesaFS::open", skip(self))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { - self.composite.delegated_open(ino, flags).await - } - - #[instrument(name = "MesaFS::read", skip(self))] - async fn read( - &mut self, - ino: Inode, - fh: FileHandle, - offset: u64, - size: u32, - flags: OpenFlags, - lock_owner: Option, - ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await - } - - #[instrument(name = "MesaFS::release", skip(self))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await - } - - #[instrument(name = "MesaFS::forget", skip(self))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - // MesaFS has no extra state to clean up on eviction (unlike OrgFs::owner_inodes). - let _ = self.composite.delegated_forget(ino, nlookups).await; - } - - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) - } -} diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs deleted file mode 100644 index 1f3b8b5f..00000000 --- a/src/fs/mescloud/org.rs +++ /dev/null @@ -1,597 +0,0 @@ -use std::collections::HashMap; -use std::ffi::OsStr; -use std::future::Future; -use std::time::SystemTime; - -use bytes::Bytes; -use futures::TryStreamExt as _; -use mesa_dev::MesaClient; -use secrecy::SecretString; -use tracing::{Instrument as _, instrument, trace, warn}; - -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::common::{InodeControlBlock, MesaApiError}; -use super::composite::{ChildSlot, CompositeFs}; -use super::icache as mescloud_icache; -use super::icache::MescloudICache; -use super::repo::RepoFs; -use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; - -pub(super) struct OrgResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for OrgResolver { - type Icb = InodeControlBlock; - type Error = LookupError; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("OrgResolver::resolve", ino)) - } -} - -#[derive(Debug, Clone)] -pub struct OrgConfig { - pub name: String, - pub api_key: SecretString, -} - -/// Classifies an inode by its role in the org hierarchy. -enum InodeRole { - /// The org root directory. - OrgRoot, - /// A virtual owner directory (github only). - OwnerDir, - /// An inode owned by some repo. - RepoOwned, -} - -/// A filesystem rooted at a single organization. -/// -/// Composes multiple [`RepoFs`] instances, each with its own inode namespace, -/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. -pub struct OrgFs { - name: String, - client: MesaClient, - composite: CompositeFs, - /// Maps org-level owner-dir inodes to owner name (github only). - owner_inodes: HashMap, - cache_config: CacheConfig, -} - -impl OrgFs { - pub(crate) const ROOT_INO: Inode = 1; - const BLOCK_SIZE: u32 = 4096; - - /// The name of the organization. - #[must_use] - pub(crate) fn name(&self) -> &str { - &self.name - } - - /// Whether this org uses the github two-level owner/repo hierarchy. - /// TODO(MES-674): Cleanup "special" casing for github. - fn is_github(&self) -> bool { - self.name == "github" - } - - /// Encode "owner/repo" to base64 for API calls. - /// TODO(MES-674): Cleanup "special" casing for github. - fn encode_github_repo_name(decoded: &str) -> String { - use base64::Engine as _; - base64::engine::general_purpose::STANDARD.encode(decoded) - } - - /// Ensure an inode exists for a virtual owner directory (github only). Does NOT bump rc. - /// TODO(MES-674): Cleanup "special" casing for github. - async fn ensure_owner_inode(&mut self, owner: &str) -> (Inode, FileAttr) { - // Check existing - let mut stale_ino = None; - for (&ino, existing_owner) in &self.owner_inodes { - if existing_owner == owner { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - return (ino, attr); - } - if self.composite.icache.contains(ino) { - // ICB exists but attr missing — rebuild and cache - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - return (ino, attr); - } - // ICB was evicted — mark for cleanup - stale_ino = Some(ino); - break; - } - } - if let Some(ino) = stale_ino { - self.owner_inodes.remove(&ino); - } - - // Allocate new - let ino = self.composite.icache.allocate_inode(); - let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: owner.into(), - parent: Some(Self::ROOT_INO), - attr: None, - children: None, - }, - ) - .await; - self.owner_inodes.insert(ino, owner.to_owned()); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) - } - - #[must_use] - pub fn new( - name: String, - client: MesaClient, - fs_owner: (u32, u32), - cache_config: CacheConfig, - ) -> Self { - let resolver = OrgResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - Self { - name, - client, - composite: CompositeFs { - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: Vec::new(), - }, - owner_inodes: HashMap::new(), - cache_config, - } - } - - /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { - if ino == Self::ROOT_INO { - return Some(InodeRole::OrgRoot); - } - if self.owner_inodes.contains_key(&ino) { - return Some(InodeRole::OwnerDir); - } - if self.composite.child_inodes.contains_key(&ino) { - return Some(InodeRole::RepoOwned); - } - if self.composite.slot_for_inode(ino).is_some() { - return Some(InodeRole::RepoOwned); - } - None - } - - /// Ensure an inode + `RepoFs` exists for the given repo name. - /// Does NOT bump rc. - /// - /// - `repo_name`: name used for API calls / `RepoFs` (base64-encoded for github) - /// - `display_name`: name shown in filesystem ("linux" for github, same as `repo_name` otherwise) - /// - `parent_ino`: owner-dir inode for github, `ROOT_INO` otherwise - async fn ensure_repo_inode( - &mut self, - repo_name: &str, - display_name: &str, - default_branch: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { - // Check existing repos. - for (&ino, &idx) in &self.composite.child_inodes { - if self.composite.slots[idx].inner.repo_name() == repo_name { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - let rc = self - .composite - .icache - .get_icb(ino, |icb| icb.rc) - .await - .unwrap_or(0); - trace!(ino, repo = repo_name, rc, "ensure_repo_inode: reusing"); - return (ino, attr); - } - warn!( - ino, - repo = repo_name, - "ensure_repo_inode: attr missing, rebuilding" - ); - return self.make_repo_dir_attr(ino).await; - } - } - - // Check for orphaned slot (slot exists but not in child_inodes). - if let Some(idx) = self - .composite - .slots - .iter() - .position(|s| s.inner.repo_name() == repo_name) - { - return self.register_repo_slot(idx, display_name, parent_ino).await; - } - - // Allocate truly new slot. - let ino = self.composite.icache.allocate_inode(); - trace!( - ino, - repo = repo_name, - "ensure_repo_inode: allocated new inode" - ); - - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, - ) - .await; - - let repo = RepoFs::new( - self.client.clone(), - self.name.clone(), - repo_name.to_owned(), - default_branch.to_owned(), - self.composite.icache.fs_owner(), - // TODO(markovejnovic): Unnecessary clone. Refactoring for clearer ownership semantics - // would be ideal. - self.cache_config.clone(), - ) - .await; - - let mut bridge = HashMapBridge::new(); - bridge.insert_inode(ino, RepoFs::ROOT_INO); - - let idx = self.composite.slots.len(); - self.composite.slots.push(ChildSlot { - inner: repo, - bridge, - }); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await - } - - /// Allocate a new inode, register it in an existing (orphaned) slot, and - /// return `(ino, attr)`. - async fn register_repo_slot( - &mut self, - idx: usize, - display_name: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { - let ino = self.composite.icache.allocate_inode(); - trace!(ino, idx, "register_repo_slot: reusing orphaned slot"); - - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, - ) - .await; - - warn!( - ino, - idx, - "register_repo_slot: resetting bridge for orphaned slot; \ - inner filesystem will not receive forget for stale inode mappings" - ); - self.composite.slots[idx].bridge = HashMapBridge::new(); - self.composite.slots[idx] - .bridge - .insert_inode(ino, RepoFs::ROOT_INO); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await - } - - /// Build and cache a directory attr for `ino`, returning `(ino, attr)`. - async fn make_repo_dir_attr(&self, ino: Inode) -> (Inode, FileAttr) { - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) - } - - /// Fetch a repo by name via the API. - async fn wait_for_sync( - &self, - repo_name: &str, - ) -> Result { - self.client - .org(&self.name) - .repos() - .at(repo_name) - .get() - .await - .map_err(MesaApiError::from) - } -} - -#[async_trait::async_trait] -impl super::common::InodeCachePeek for OrgFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.composite.icache.get_attr(ino).await - } -} - -#[async_trait::async_trait] -impl Fs for OrgFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - - #[instrument(name = "OrgFs::lookup", skip(self), fields(org = %self.name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; - match role { - InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. - let name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; - - if self.is_github() { - // name is an owner like "torvalds" — create lazily, no API validation. - trace!(owner = name_str, "lookup: resolving github owner dir"); - let (ino, attr) = self.ensure_owner_inode(name_str).await; - self.composite - .icache - .inc_rc(ino) - .await - .ok_or(LookupError::InodeNotFound)?; - Ok(attr) - } else { - // Children of org root are repos. - trace!(repo = name_str, "lookup: resolving repo"); - - // Validate repo exists via API. - let repo = self.wait_for_sync(name_str).await?; - - let (ino, attr) = self - .ensure_repo_inode(name_str, name_str, &repo.default_branch, Self::ROOT_INO) - .await; - let rc = self - .composite - .icache - .inc_rc(ino) - .await - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, repo = name_str, rc, "lookup: resolved repo inode"); - Ok(attr) - } - } - InodeRole::OwnerDir => { - // TODO(MES-674): Cleanup "special" casing for github. - // Parent is an owner dir, name is a repo like "linux". - let owner = self - .owner_inodes - .get(&parent) - .ok_or(LookupError::InodeNotFound)? - .clone(); - let repo_name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; - let full_decoded = format!("{owner}/{repo_name_str}"); - let encoded = Self::encode_github_repo_name(&full_decoded); - - trace!( - owner = %owner, - repo = repo_name_str, - encoded = %encoded, - "lookup: resolving github repo via owner dir" - ); - - // Validate via API (uses encoded name). - let repo = self.wait_for_sync(&encoded).await?; - - let (ino, attr) = self - .ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent) - .await; - self.composite - .icache - .inc_rc(ino) - .await - .ok_or(LookupError::InodeNotFound)?; - Ok(attr) - } - InodeRole::RepoOwned => self.composite.delegated_lookup(parent, name).await, - } - } - - #[instrument(name = "OrgFs::getattr", skip(self), fields(org = %self.name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.composite.delegated_getattr(ino).await - } - - #[instrument(name = "OrgFs::readdir", skip(self), fields(org = %self.name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; - match role { - InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. - if self.is_github() { - return Err(ReadDirError::NotPermitted); - } - - // List repos via API. - let repos: Vec = self - .client - .org(&self.name) - .repos() - .list(None) - .try_collect() - .await - .map_err(MesaApiError::from)?; - - let repo_infos: Vec<(String, String)> = repos - .into_iter() - .filter_map(|r| { - let name = r.name?; - let branch = r.default_branch.unwrap_or_else(|| "main".to_owned()); - Some((name, branch)) - }) - .collect(); - trace!(count = repo_infos.len(), "readdir: fetched repo list"); - - let mut entries = Vec::with_capacity(repo_infos.len()); - for (repo_name, default_branch) in &repo_infos { - let (repo_ino, _) = self - .ensure_repo_inode(repo_name, repo_name, default_branch, Self::ROOT_INO) - .await; - entries.push(DirEntry { - ino: repo_ino, - name: repo_name.clone().into(), - kind: DirEntryType::Directory, - }); - } - - self.composite.readdir_buf = entries; - Ok(&self.composite.readdir_buf) - } - InodeRole::OwnerDir if self.is_github() => { - // TODO(MES-674): Cleanup "special" casing for github. - Err(ReadDirError::NotPermitted) - } - InodeRole::OwnerDir => Err(ReadDirError::NotADirectory), - InodeRole::RepoOwned => self.composite.delegated_readdir(ino).await, - } - } - - #[instrument(name = "OrgFs::open", skip(self), fields(org = %self.name))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { - self.composite.delegated_open(ino, flags).await - } - - #[instrument(name = "OrgFs::read", skip(self), fields(org = %self.name))] - async fn read( - &mut self, - ino: Inode, - fh: FileHandle, - offset: u64, - size: u32, - flags: OpenFlags, - lock_owner: Option, - ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await - } - - #[instrument(name = "OrgFs::release", skip(self), fields(org = %self.name))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await - } - - #[instrument(name = "OrgFs::forget", skip(self), fields(org = %self.name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - let evicted = self.composite.delegated_forget(ino, nlookups).await; - if evicted { - self.owner_inodes.remove(&ino); - } - } - - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) - } -} diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index 11b334a7..aae85491 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -2,543 +2,385 @@ //! //! This module directly accesses the mesa repo through the Rust SDK, on a per-repo basis. +use std::ffi::{OsStr, OsString}; use std::future::Future; -use std::{collections::HashMap, ffi::OsStr, path::PathBuf, time::SystemTime}; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::SystemTime; use base64::Engine as _; use bytes::Bytes; use mesa_dev::MesaClient; use mesa_dev::low_level::content::{Content, DirEntry as MesaDirEntry}; use num_traits::cast::ToPrimitive as _; -use tracing::{Instrument as _, instrument, trace, warn}; +use tracing::warn; use git_fs::cache::fcache::FileCache; use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _}; +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags as AsyncOpenFlags}; -use crate::app_config::CacheConfig; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FileOpenOptions, FilesystemStats, Fs, Inode, - LockOwner, OpenFile, OpenFlags, -}; - -use super::common::MesaApiError; -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::icache as mescloud_icache; -use super::icache::{InodeControlBlock, MescloudICache}; - -pub(super) struct RepoResolver { +use super::common::{MesaApiError, mesa_api_error_to_io}; + +#[derive(Clone)] +pub struct MesRepoProvider { + inner: Arc, +} + +struct MesRepoProviderInner { client: MesaClient, org_name: String, repo_name: String, ref_: String, fs_owner: (u32, u32), - block_size: u32, + next_addr: AtomicU64, + /// Maps inode addresses to repo-relative paths (e.g. `"src/main.rs"`). + /// Root maps to an empty `PathBuf`. + /// + /// Exists alongside the [`DCache`](git_fs::fs::dcache::DCache) because + /// they serve different purposes: the dcache maps + /// `(parent_addr, child_name) -> child_addr` (single-hop name resolution), + /// while this map provides the full repo-relative path needed for Mesa API + /// calls. Reconstructing the full path from the dcache would require + /// walking parent pointers to the root on every API call; this map + /// materializes that walk as an O(1) lookup. + /// + /// Entries are inserted during `lookup`/`readdir` and removed via + /// [`forget`](Self::remove_path) when the FUSE refcount reaches zero. + path_map: scc::HashMap, + file_cache: Option>>, } -impl IcbResolver for RepoResolver { - type Icb = InodeControlBlock; - type Error = LookupError; +impl MesRepoProvider { + pub(super) fn new( + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + fs_owner: (u32, u32), + file_cache: Option>>, + ) -> Self { + Self { + inner: Arc::new(MesRepoProviderInner { + client, + org_name, + repo_name, + ref_, + fs_owner, + next_addr: AtomicU64::new(2), // 1 is reserved for root + path_map: scc::HashMap::new(), + file_cache, + }), + } + } - fn resolve( - &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let client = self.client.clone(); - let org_name = self.org_name.clone(); - let repo_name = self.repo_name.clone(); - let ref_ = self.ref_.clone(); - let fs_owner = self.fs_owner; - let block_size = self.block_size; + /// Store the path for the root inode address. + pub(super) fn seed_root_path(&self, root_addr: InodeAddr) { + // Root maps to empty PathBuf (no path prefix for API calls) + drop(self.inner.path_map.insert_sync(root_addr, PathBuf::new())); + } - async move { - let stub = stub.ok_or(LookupError::InodeNotFound)?; - let file_path = build_repo_path(stub.parent, &stub.path, cache, RepoFs::ROOT_INO).await; + /// Remove the path entry for an inode. Called during forget/cleanup. + fn remove_path(&self, addr: InodeAddr) { + self.inner.path_map.remove_sync(&addr); + } - // Non-root inodes must have a resolvable path. - if stub.parent.is_some() && file_path.is_none() { - return Err(LookupError::InodeNotFound); - } + /// The name of the repository. + #[expect( + dead_code, + reason = "useful diagnostic accessor retained for future use" + )] + pub(super) fn repo_name(&self) -> &str { + &self.inner.repo_name + } +} - let content = client - .org(&org_name) +impl FsDataProvider for MesRepoProvider { + type Reader = MesFileReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let name = name.to_os_string(); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let child_path = parent_path.join(&name); + let child_path_str = child_path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let content = inner + .client + .org(&inner.org_name) .repos() - .at(&repo_name) + .at(&inner.repo_name) .content() - .get(Some(ref_.as_str()), file_path.as_deref(), Some(1u64)) + .get(Some(inner.ref_.as_str()), Some(child_path_str), Some(1u64)) .await - .map_err(MesaApiError::from)?; + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; let now = SystemTime::now(); - let attr = match &content { - Content::File(f) => { - let size = f.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } - } - Content::Symlink(s) => { - let size = s.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } - } - Content::Dir(_) => FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }, + let (uid, gid) = inner.fs_owner; + + let (itype, size) = match &content { + Content::File(f) => (INodeType::File, f.size.to_u64().unwrap_or(0)), + Content::Symlink(s) => (INodeType::File, s.size.to_u64().unwrap_or(0)), + Content::Dir(_) => (INodeType::Directory, 0), }; - let children = match content { - Content::Dir(d) => Some( - d.entries - .into_iter() - .filter_map(|e| { - let (name, kind) = match e { - MesaDirEntry::File(f) => (f.name?, DirEntryType::RegularFile), - // TODO(MES-712): return DirEntryType::Symlink once readlink is wired up. - MesaDirEntry::Symlink(s) => (s.name?, DirEntryType::RegularFile), - MesaDirEntry::Dir(d) => (d.name?, DirEntryType::Directory), - }; - Some((name, kind)) - }) - .collect(), - ), - Content::File(_) | Content::Symlink(_) => None, + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) }; - Ok(InodeControlBlock { - parent: stub.parent, - path: stub.path, - rc: stub.rc, - attr: Some(attr), - children, + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + drop(inner.path_map.insert_async(addr, child_path).await); + + Ok(INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, }) } - .instrument(tracing::info_span!("RepoResolver::resolve", ino)) } -} -/// Walk the parent chain in the cache to build the repo-relative path. -/// Returns `None` for the root inode (maps to `path=None` in the mesa content API). -async fn build_repo_path( - parent: Option, - name: &std::path::Path, - cache: &AsyncICache, - root_ino: Inode, -) -> Option { - /// Maximum parent-chain depth before bailing out. Prevents infinite loops - /// if a bug creates a cycle in the parent pointers. - const MAX_DEPTH: usize = 1024; - - let parent = parent?; - if parent == root_ino { - return name.to_str().map(String::from); - } + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let inner = Arc::clone(&self.inner); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let api_path = if parent_path.as_os_str().is_empty() { + None + } else { + Some( + parent_path + .to_str() + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })? + .to_owned(), + ) + }; - let mut components = vec![name.to_path_buf()]; - let mut current = parent; - for _ in 0..MAX_DEPTH { - if current == root_ino { - break; - } - let (path, next_parent) = cache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = next_parent?; - } - if current != root_ino { - tracing::warn!("build_repo_path: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle"); - return None; - } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) -} + let content = inner + .client + .org(&inner.org_name) + .repos() + .at(&inner.repo_name) + .content() + .get(Some(inner.ref_.as_str()), api_path.as_deref(), Some(1u64)) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; -/// A filesystem rooted at a single mesa repository. -/// -/// Implements [`Fs`] for navigating files and directories within one repo. -/// Does not handle organizations or multi-repo hierarchy — that is [`super::MesaFS`]'s job. -pub struct RepoFs { - client: MesaClient, - org_name: String, - repo_name: String, - ref_: String, + let dir = match content { + Content::Dir(d) => d, + Content::File(_) | Content::Symlink(_) => { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + }; - icache: MescloudICache, - file_table: FileTable, - readdir_buf: Vec, - open_files: HashMap, - file_cache: Option>, -} + let now = SystemTime::now(); + let (uid, gid) = inner.fs_owner; + let mut entries = Vec::with_capacity(dir.entries.len()); + + for entry in dir.entries { + let (name, itype, size) = match entry { + MesaDirEntry::File(f) => { + let Some(name) = f.name else { continue }; + (name, INodeType::File, f.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Symlink(s) => { + let Some(name) = s.name else { continue }; + (name, INodeType::File, s.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Dir(d) => { + let Some(name) = d.name else { continue }; + (name, INodeType::Directory, 0) + } + }; -impl RepoFs { - pub(crate) const ROOT_INO: Inode = 1; - const BLOCK_SIZE: u32 = 4096; + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) + }; - /// Create a new `RepoFs` for a specific org and repo. - pub async fn new( - client: MesaClient, - org_name: String, - repo_name: String, - ref_: String, - fs_owner: (u32, u32), - cache_config: CacheConfig, - ) -> Self { - let resolver = RepoResolver { - client: client.clone(), - org_name: org_name.clone(), - repo_name: repo_name.clone(), - ref_: ref_.clone(), - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - - let file_cache = match cache_config.max_size { - Some(max_size) if max_size.as_u64() > 0 => { - let cache_dir = cache_config.path.join(&org_name).join(&repo_name); - let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); - match FileCache::new(&cache_dir, max_bytes).await { - Ok(cache) => Some(cache), - Err(e) => { - warn!(error = ?e, org = %org_name, repo = %repo_name, - "failed to create file cache, continuing without caching",); - None - } - } + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + let child_path = parent_path.join(&name); + drop(inner.path_map.insert_async(addr, child_path).await); + + let inode = INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, + }; + + entries.push((OsString::from(name), inode)); } - _ => None, - }; - Self { - client, - org_name, - repo_name, - ref_, - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - open_files: HashMap::new(), - file_cache, + Ok(entries) } } - /// The name of the repository this filesystem is rooted at. - pub(crate) fn repo_name(&self) -> &str { - &self.repo_name - } - - /// Build the repo-relative path for an inode by walking up the parent chain. - /// - /// Returns `None` for the root inode (the repo top-level maps to `path=None` in the - /// mesa content API). - async fn path_of_inode(&self, ino: Inode) -> Option { - /// Maximum parent-chain depth before bailing out. - const MAX_DEPTH: usize = 1024; - - if ino == Self::ROOT_INO { - return None; - } - - let mut components = Vec::new(); - let mut current = ino; - for _ in 0..MAX_DEPTH { - if current == Self::ROOT_INO { - break; - } - let (path, parent) = self - .icache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = parent?; - } - if current != Self::ROOT_INO { - tracing::warn!( - ino, - "path_of_inode: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle" - ); - return None; + fn open( + &self, + inode: INode, + _flags: AsyncOpenFlags, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + async move { + let path = inner + .path_map + .get_async(&inode.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + Ok(MesFileReader { + inner: Arc::new(MesFileReaderCtx { + client: inner.client.clone(), + org_name: inner.org_name.clone(), + repo_name: inner.repo_name.clone(), + ref_: inner.ref_.clone(), + path, + file_cache: inner.file_cache.clone(), + inode_addr: inode.addr, + }), + }) } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) } -} -#[async_trait::async_trait] -impl super::common::InodeCachePeek for RepoFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.icache.get_attr(ino).await + /// Evicts the inode's entry from [`path_map`](MesRepoProviderInner::path_map). + /// Called automatically by `InodeForget` when the FUSE refcount drops to zero. + fn forget(&self, addr: InodeAddr) { + self.remove_path(addr); } } -#[async_trait::async_trait] -impl Fs for RepoFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - - #[instrument(name = "RepoFs::lookup", skip(self), fields(repo = %self.repo_name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - debug_assert!( - self.icache.contains(parent), - "lookup: parent inode {parent} not in inode table" - ); - - let ino = self.icache.ensure_child_ino(parent, name).await; - let attr = self - .icache - .get_or_resolve(ino, |icb| icb.attr) - .await? - .ok_or(LookupError::InodeNotFound)?; - - let rc = self - .icache - .inc_rc(ino) - .await - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, ?name, rc, "resolved inode"); - Ok(attr) - } - - #[instrument(name = "RepoFs::getattr", skip(self), fields(repo = %self.repo_name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) - } - - #[instrument(name = "RepoFs::readdir", skip(self), fields(repo = %self.repo_name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - debug_assert!( - self.icache.contains(ino), - "readdir: inode {ino} not in inode table" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::Directory { .. }) | None - ), - "readdir: inode {ino} has non-directory cached attr" - ); - - let children = self - .icache - .get_or_resolve(ino, |icb| icb.children.clone()) - .await? - .ok_or(ReadDirError::NotADirectory)?; - - trace!( - ino, - count = children.len(), - "readdir: resolved directory listing from icache" - ); - - self.icache.evict_zero_rc_children(ino).await; - - let mut entries = Vec::with_capacity(children.len()); - for (name, kind) in &children { - let child_ino = self.icache.ensure_child_ino(ino, OsStr::new(name)).await; - // Only cache directory attrs in readdir. File attrs are left as - // None so that lookup triggers the resolver to fetch the real file - // size. Caching placeholder file attrs (size=0) would poison - // needs_resolve(), preventing resolution on subsequent lookups. - if *kind == DirEntryType::Directory { - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - child_ino, - 0o755, - now, - now, - self.icache.fs_owner(), - self.icache.block_size(), - ), - }; - self.icache.cache_attr(child_ino, attr).await; - } - entries.push(DirEntry { - ino: child_ino, - name: name.clone().into(), - kind: *kind, - }); - } - - self.readdir_buf = entries; - Ok(&self.readdir_buf) - } +pub struct MesFileReader { + inner: Arc, +} - #[instrument(name = "RepoFs::open", skip(self), fields(repo = %self.repo_name))] - async fn open(&mut self, ino: Inode, _flags: OpenFlags) -> Result { - if !self.icache.contains(ino) { - warn!(ino, "open on unknown inode"); - return Err(OpenError::InodeNotFound); - } - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "open: inode {ino} has non-file cached attr" - ); - let fh = self.file_table.allocate(); - self.open_files.insert(fh, ino); - trace!(ino, fh, "assigned file handle"); - Ok(OpenFile { - handle: fh, - options: FileOpenOptions::empty(), - }) - } +struct MesFileReaderCtx { + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + path: PathBuf, + file_cache: Option>>, + inode_addr: InodeAddr, +} - #[instrument(name = "RepoFs::read", skip(self), fields(repo = %self.repo_name))] - async fn read( - &mut self, - ino: Inode, - fh: FileHandle, +impl FileReader for MesFileReader { + fn read( + &self, offset: u64, size: u32, - _flags: OpenFlags, - _lock_owner: Option, - ) -> Result { - let &file_ino = self.open_files.get(&fh).ok_or_else(|| { - warn!(fh, "read on unknown file handle"); - ReadError::FileNotOpen - })?; - debug_assert!( - file_ino == ino, - "read: file handle {fh} maps to inode {file_ino}, but caller passed inode {ino}" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "read: inode {ino} has non-file cached attr" - ); - - // Try the file cache first. - if let Some(cache) = &self.file_cache - && let Some(data) = cache.get(&ino).await - { - let start = usize::try_from(offset) - .unwrap_or(data.len()) - .min(data.len()); - let end = start.saturating_add(size as usize).min(data.len()); - trace!( - ino, - fh, - cached = true, - decoded_len = data.len(), - start, - end, - "read content" - ); - return Ok(Bytes::copy_from_slice(&data[start..end])); - } - - // Cache miss — fetch from the Mesa API. - let file_path = self.path_of_inode(ino).await; + ) -> impl Future> + Send { + let ctx = Arc::clone(&self.inner); - if ino != Self::ROOT_INO && file_path.is_none() { - warn!(ino, "read: path_of_inode returned None for non-root inode"); - return Err(ReadError::InodeNotFound); - } - - let content = self - .client - .org(&self.org_name) - .repos() - .at(&self.repo_name) - .content() - .get(Some(self.ref_.as_str()), file_path.as_deref(), None) - .await - .map_err(MesaApiError::from)?; - - let encoded_content = match content { - Content::File(f) => f.content.unwrap_or_default(), - // TODO(MES-712): return ReadError::NotAFile once symlinks are surfaced as - // DirEntryType::Symlink, and implement readlink to return the link target. - Content::Symlink(s) => s.content.unwrap_or_default(), - Content::Dir(_) => return Err(ReadError::NotAFile), - }; - - let decoded = base64::engine::general_purpose::STANDARD.decode(&encoded_content)?; - - let start = usize::try_from(offset) - .unwrap_or(decoded.len()) - .min(decoded.len()); - let end = start.saturating_add(size as usize).min(decoded.len()); - let result = Bytes::copy_from_slice(&decoded[start..end]); - trace!(ino, fh, cached = false, path = ?file_path, decoded_len = decoded.len(), start, end, "read content"); - - // Store the decoded content in the cache for future reads. - if let Some(cache) = &self.file_cache - && let Err(e) = cache.insert(&ino, decoded).await - { - warn!(error = ?e, ino, "failed to cache file content"); - } + async move { + // Try the file cache first. + if let Some(cache) = &ctx.file_cache + && let Some(data) = cache.get(&ctx.inode_addr).await + { + let start = usize::try_from(offset) + .unwrap_or(data.len()) + .min(data.len()); + let end = start.saturating_add(size as usize).min(data.len()); + return Ok(Bytes::copy_from_slice(&data[start..end])); + } - Ok(result) - } + // Cache miss -- fetch from the Mesa API. + let path_str = ctx.path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let api_path = if path_str.is_empty() { + None + } else { + Some(path_str) + }; - #[instrument(name = "RepoFs::release", skip(self), fields(repo = %self.repo_name))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - _flags: OpenFlags, - _flush: bool, - ) -> Result<(), ReleaseError> { - let released_ino = self.open_files.remove(&fh).ok_or_else(|| { - warn!(fh, "release on unknown file handle"); - ReleaseError::FileNotOpen - })?; - debug_assert!( - released_ino == ino, - "release: file handle {fh} mapped to inode {released_ino}, but caller passed inode {ino}" - ); - trace!(ino = released_ino, fh, "closed file handle"); - Ok(()) - } + let content = ctx + .client + .org(&ctx.org_name) + .repos() + .at(&ctx.repo_name) + .content() + .get(Some(ctx.ref_.as_str()), api_path, None) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let encoded_content = match content { + Content::File(f) => f.content.unwrap_or_default(), + Content::Symlink(s) => s.content.unwrap_or_default(), + Content::Dir(_) => { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); + } + }; - #[instrument(name = "RepoFs::forget", skip(self), fields(repo = %self.repo_name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - debug_assert!( - self.icache.contains(ino), - "forget: inode {ino} not in inode table" - ); + let decoded = base64::engine::general_purpose::STANDARD + .decode(&encoded_content) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - self.icache.forget(ino, nlookups).await; - } + let start = usize::try_from(offset) + .unwrap_or(decoded.len()) + .min(decoded.len()); + let end = start.saturating_add(size as usize).min(decoded.len()); + let result = Bytes::copy_from_slice(&decoded[start..end]); + + // Store the decoded content in the cache for future reads. + if let Some(cache) = &ctx.file_cache + && let Err(e) = cache.insert(&ctx.inode_addr, decoded).await + { + warn!(error = ?e, inode_addr = ctx.inode_addr, "failed to cache file content"); + } - async fn statfs(&mut self) -> Result { - Ok(self.icache.statfs()) + Ok(result) + } } } diff --git a/src/fs/mescloud/roots.rs b/src/fs/mescloud/roots.rs new file mode 100644 index 00000000..7c8701db --- /dev/null +++ b/src/fs/mescloud/roots.rs @@ -0,0 +1,474 @@ +//! Domain-specific [`CompositeRoot`] implementations and the [`OrgChildDP`] enum. +//! +//! Bridges the generic `CompositeFs` from `lib/fs/composite.rs` with +//! Mesa/GitHub-specific org and repo resolution logic. +//! +//! These types are wired into the daemon entry point, replacing the old +//! `MesaFS` + `OrgFs` pipeline. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::time::SystemTime; + +use base64::Engine as _; +use futures::TryStreamExt as _; +use mesa_dev::MesaClient; +use tracing::warn; + +use git_fs::cache::fcache::FileCache; +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::composite::{ChildDescriptor, CompositeFs, CompositeReader, CompositeRoot}; +use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags}; + +use super::common::{MesaApiError, mesa_api_error_to_io}; +use super::repo::{MesFileReader, MesRepoProvider}; +use crate::app_config::CacheConfig; + +const CHILD_ROOT_ADDR: InodeAddr = 1; + +/// Create a [`MesRepoProvider`] and its root [`INode`] for a given repo. +async fn create_repo_provider( + client: &MesaClient, + org_name: &str, + repo_name: &str, + ref_: &str, + fs_owner: (u32, u32), + cache_config: &CacheConfig, +) -> (MesRepoProvider, INode) { + let file_cache = match cache_config.max_size { + Some(max_size) if max_size.as_u64() > 0 => { + let cache_dir = cache_config.path.join(org_name).join(repo_name); + let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); + match FileCache::new(&cache_dir, max_bytes).await { + Ok(cache) => Some(Arc::new(cache)), + Err(e) => { + warn!(error = ?e, org = %org_name, repo = %repo_name, + "failed to create file cache, continuing without caching"); + None + } + } + } + _ => None, + }; + + let provider = MesRepoProvider::new( + client.clone(), + org_name.to_owned(), + repo_name.to_owned(), + ref_.to_owned(), + fs_owner, + file_cache, + ); + + provider.seed_root_path(CHILD_ROOT_ADDR); + + let now = SystemTime::now(); + let root_ino = INode { + addr: CHILD_ROOT_ADDR, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + + (provider, root_ino) +} + +/// Returns `Ok(())` if the error is a 404; otherwise returns the IO error. +/// +/// Callers use this to treat 404 as "not found" (return `Ok(None)`) while +/// propagating all other API errors. +fn check_not_found(e: MesaApiError) -> Result<(), std::io::Error> { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => Ok(()), + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => Err(mesa_api_error_to_io(e)), + } +} + +pub struct StandardOrgRoot { + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl StandardOrgRoot { + pub fn new( + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), + ) -> Self { + Self { + client, + org_name, + cache_config, + fs_owner, + } + } +} + +impl CompositeRoot for StandardOrgRoot { + type ChildDP = MesRepoProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let name_str = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "repo name contains non-UTF-8 characters", + ) + })?; + + let repo = match self + .client + .org(&self.org_name) + .repos() + .at(name_str) + .get() + .await + .map_err(MesaApiError::from) + { + Ok(repo) => repo, + Err(e) => { + check_not_found(e)?; + return Ok(None); + } + }; + + // Single-repo GET returns `default_branch: String` (non-optional), + // unlike the list endpoint which returns `Option`. + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + name_str, + &repo.default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + let repos: Vec = self + .client + .org(&self.org_name) + .repos() + .list(None) + .try_collect() + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let mut children = Vec::with_capacity(repos.len()); + for repo in repos { + let Some(repo_name) = repo.name else { + continue; + }; + let default_branch = repo.default_branch.unwrap_or_else(|| "main".to_owned()); + + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + &repo_name, + &default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + children.push(ChildDescriptor { + name: OsString::from(repo_name), + provider, + root_ino, + }); + } + + Ok(children) + } +} + +pub struct GithubRepoRoot { + client: MesaClient, + org_name: String, + owner: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl CompositeRoot for GithubRepoRoot { + type ChildDP = MesRepoProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let repo_name = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "repo name contains non-UTF-8 characters", + ) + })?; + + let full_decoded = format!("{}/{}", self.owner, repo_name); + let encoded = base64::engine::general_purpose::STANDARD.encode(&full_decoded); + + let repo = match self + .client + .org(&self.org_name) + .repos() + .at(&encoded) + .get() + .await + .map_err(MesaApiError::from) + { + Ok(repo) => repo, + Err(e) => { + check_not_found(e)?; + return Ok(None); + } + }; + + // Single-repo GET returns `default_branch: String` (non-optional). + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + &encoded, + &repo.default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Err(std::io::Error::from_raw_os_error(libc::EPERM)) + } +} + +pub struct GithubOrgRoot { + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl GithubOrgRoot { + pub fn new( + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), + ) -> Self { + Self { + client, + org_name, + cache_config, + fs_owner, + } + } +} + +impl CompositeRoot for GithubOrgRoot { + type ChildDP = CompositeFs; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let owner = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "owner name contains non-UTF-8 characters", + ) + })?; + + let repo_root = GithubRepoRoot { + client: self.client.clone(), + org_name: self.org_name.clone(), + owner: owner.to_owned(), + cache_config: self.cache_config.clone(), + fs_owner: self.fs_owner, + }; + + let composite = CompositeFs::new(repo_root, self.fs_owner); + let root_ino = composite.make_root_inode(); + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider: composite, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Err(std::io::Error::from_raw_os_error(libc::EPERM)) + } +} + +#[derive(Clone)] +pub enum OrgChildDP { + Standard(CompositeFs), + Github(CompositeFs), +} + +impl OrgChildDP { + fn make_root_inode(&self) -> INode { + match self { + Self::Standard(c) => c.make_root_inode(), + Self::Github(c) => c.make_root_inode(), + } + } +} + +impl FsDataProvider for OrgChildDP { + type Reader = OrgChildReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let this = self.clone(); + let name = name.to_os_string(); + async move { + match this { + Self::Standard(c) => c.lookup(parent, &name).await, + Self::Github(c) => c.lookup(parent, &name).await, + } + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let this = self.clone(); + async move { + match this { + Self::Standard(c) => c.readdir(parent).await, + Self::Github(c) => c.readdir(parent).await, + } + } + } + + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send { + let this = self.clone(); + async move { + match this { + Self::Standard(c) => c.open(inode, flags).await.map(OrgChildReader::Standard), + Self::Github(c) => c.open(inode, flags).await.map(OrgChildReader::Github), + } + } + } + + fn forget(&self, addr: InodeAddr) { + match self { + Self::Standard(c) => c.forget(addr), + Self::Github(c) => c.forget(addr), + } + } +} + +pub enum OrgChildReader { + Standard(CompositeReader), + Github(CompositeReader>), +} + +impl std::fmt::Debug for OrgChildReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Standard(_) => f.debug_tuple("Standard").finish(), + Self::Github(_) => f.debug_tuple("Github").finish(), + } + } +} + +impl FileReader for OrgChildReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + match self { + Self::Standard(r) => futures::future::Either::Left(r.read(offset, size)), + Self::Github(r) => futures::future::Either::Right(r.read(offset, size)), + } + } + + fn close(&self) -> impl Future> + Send { + match self { + Self::Standard(r) => futures::future::Either::Left(r.close()), + Self::Github(r) => futures::future::Either::Right(r.close()), + } + } +} + +pub struct MesaRoot { + orgs: Vec<(OsString, OrgChildDP)>, +} + +impl MesaRoot { + pub fn new(orgs: Vec<(OsString, OrgChildDP)>) -> Self { + Self { orgs } + } +} + +impl CompositeRoot for MesaRoot { + type ChildDP = OrgChildDP; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let found = self.orgs.iter().find(|(n, _)| n == name); + match found { + Some((_, dp)) => Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider: dp.clone(), + root_ino: dp.make_root_inode(), + })), + None => Ok(None), + } + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Ok(self + .orgs + .iter() + .map(|(name, dp)| ChildDescriptor { + name: name.clone(), + provider: dp.clone(), + root_ino: dp.make_root_inode(), + }) + .collect()) + } +} diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 003e1b04..a696e56f 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,4 +1 @@ -pub mod fuser; -pub mod icache; pub mod mescloud; -pub mod r#trait; diff --git a/src/fs/trait.rs b/src/fs/trait.rs deleted file mode 100644 index f4d98529..00000000 --- a/src/fs/trait.rs +++ /dev/null @@ -1,375 +0,0 @@ -//! Generic trait for implementing filesystems. -//! -//! Note that this is a slightly cleaner interface than directly using fuser. The whole point of -//! this is to abstract away fuser-specific details. -use async_trait::async_trait; -use std::{ - ffi::{OsStr, OsString}, - time::{Duration, SystemTime}, -}; -use tracing::error; - -use bitflags::bitflags; -use bytes::Bytes; - -/// Type representing an inode. -pub type Inode = u64; - -pub type FileHandle = u64; - -/// An opaque lock owner identifier provided by the kernel. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct LockOwner(pub u64); - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct Permissions: u16 { - // Other - const OTHER_EXECUTE = 1 << 0; - const OTHER_WRITE = 1 << 1; - const OTHER_READ = 1 << 2; - - // Group - const GROUP_EXECUTE = 1 << 3; - const GROUP_WRITE = 1 << 4; - const GROUP_READ = 1 << 5; - - // Owner - const OWNER_EXECUTE = 1 << 6; - const OWNER_WRITE = 1 << 7; - const OWNER_READ = 1 << 8; - - // Special bits - const STICKY = 1 << 9; - const SETGID = 1 << 10; - const SETUID = 1 << 11; - - const OTHER_RWX = Self::OTHER_READ.bits() - | Self::OTHER_WRITE.bits() - | Self::OTHER_EXECUTE.bits(); - const GROUP_RWX = Self::GROUP_READ.bits() - | Self::GROUP_WRITE.bits() - | Self::GROUP_EXECUTE.bits(); - const OWNER_RWX = Self::OWNER_READ.bits() - | Self::OWNER_WRITE.bits() - | Self::OWNER_EXECUTE.bits(); - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct OpenFlags: i32 { - // Access modes (mutually exclusive) - const RDONLY = libc::O_RDONLY; - const WRONLY = libc::O_WRONLY; - const RDWR = libc::O_RDWR; - - // Creation/status flags - const APPEND = libc::O_APPEND; - const TRUNC = libc::O_TRUNC; - const CREAT = libc::O_CREAT; - const EXCL = libc::O_EXCL; - - // Behavior flags - const NONBLOCK = libc::O_NONBLOCK; - const SYNC = libc::O_SYNC; - const DSYNC = libc::O_DSYNC; - const NOFOLLOW = libc::O_NOFOLLOW; - const CLOEXEC = libc::O_CLOEXEC; - const DIRECTORY = libc::O_DIRECTORY; - - #[cfg(target_os = "linux")] - const NOATIME = libc::O_NOATIME; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct CommonFileAttr { - pub ino: Inode, - pub atime: SystemTime, - pub mtime: SystemTime, - pub ctime: SystemTime, - pub crtime: SystemTime, - pub perm: Permissions, - pub nlink: u32, - pub uid: u32, - pub gid: u32, - pub blksize: u32, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum FileAttr { - RegularFile { - common: CommonFileAttr, - size: u64, - blocks: u64, - }, - Directory { - common: CommonFileAttr, - }, - Symlink { - common: CommonFileAttr, - size: u64, - }, - CharDevice { - common: CommonFileAttr, - rdev: u64, - }, - BlockDevice { - common: CommonFileAttr, - rdev: u64, - }, - NamedPipe { - common: CommonFileAttr, - }, - Socket { - common: CommonFileAttr, - }, -} - -impl FileAttr { - pub fn common(&self) -> &CommonFileAttr { - match self { - Self::RegularFile { common, .. } - | Self::Directory { common } - | Self::Symlink { common, .. } - | Self::CharDevice { common, .. } - | Self::BlockDevice { common, .. } - | Self::NamedPipe { common } - | Self::Socket { common } => common, - } - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub (crate) struct FileOpenOptions: u32 { - const DIRECT_IO = 1 << 0; - const KEEP_CACHE = 1 << 1; - const NONSEEKABLE = 1 << 2; - const STREAM = 1 << 4; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct OpenFile { - pub handle: FileHandle, - pub options: FileOpenOptions, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum DirEntryType { - RegularFile, - Directory, - Symlink, - CharDevice, - BlockDevice, - NamedPipe, - Socket, -} - -impl TryFrom for FileAttr { - type Error = (); - - #[expect( - clippy::cast_possible_truncation, - reason = "metadata mode/nlink/blksize narrowing is intentional" - )] - #[expect( - clippy::cast_sign_loss, - reason = "nsecs from MetadataExt is always in [0, 999_999_999]" - )] - fn try_from(meta: std::fs::Metadata) -> Result { - use std::os::unix::fs::FileTypeExt as _; - use std::os::unix::fs::MetadataExt as _; - - fn to_systime(secs: i64, nsecs: i64) -> SystemTime { - if secs >= 0 { - std::time::UNIX_EPOCH + Duration::new(secs.cast_unsigned(), nsecs as u32) - } else { - // nsecs is always in [0, 999_999_999] from MetadataExt. - // For negative secs, subtract whole seconds then add back nsecs. - std::time::UNIX_EPOCH - Duration::from_secs((-secs).cast_unsigned()) - + Duration::from_nanos(nsecs.cast_unsigned()) - } - } - - let common_attr = CommonFileAttr { - ino: meta.ino(), - atime: to_systime(meta.atime(), meta.atime_nsec()), - mtime: to_systime(meta.mtime(), meta.mtime_nsec()), - ctime: to_systime(meta.ctime(), meta.ctime_nsec()), - crtime: to_systime(0, 0), // Not available in std::fs::Metadata - perm: Permissions::from_bits_truncate(meta.mode() as u16), - nlink: meta.nlink() as u32, - uid: meta.uid(), - gid: meta.gid(), - blksize: meta.blksize() as u32, - }; - - let ft = meta.file_type(); - if ft.is_file() { - Ok(Self::RegularFile { - common: common_attr, - size: meta.len(), - blocks: meta.blocks(), - }) - } else if ft.is_dir() { - Ok(Self::Directory { - common: common_attr, - }) - } else if ft.is_symlink() { - Ok(Self::Symlink { - common: common_attr, - size: meta.len(), - }) - } else if ft.is_char_device() { - Ok(Self::CharDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_block_device() { - Ok(Self::BlockDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_fifo() { - Ok(Self::NamedPipe { - common: common_attr, - }) - } else if ft.is_socket() { - Ok(Self::Socket { - common: common_attr, - }) - } else { - debug_assert!( - false, - "Unknown file type encountered in FileAttr conversion" - ); - Err(()) - } - } -} - -impl From for DirEntryType { - fn from(attr: FileAttr) -> Self { - match attr { - FileAttr::RegularFile { .. } => Self::RegularFile, - FileAttr::Directory { .. } => Self::Directory, - FileAttr::Symlink { .. } => Self::Symlink, - FileAttr::CharDevice { .. } => Self::CharDevice, - FileAttr::BlockDevice { .. } => Self::BlockDevice, - FileAttr::NamedPipe { .. } => Self::NamedPipe, - FileAttr::Socket { .. } => Self::Socket, - } - } -} - -impl TryFrom for DirEntryType { - type Error = (); - - fn try_from(ft: std::fs::FileType) -> Result { - use std::os::unix::fs::FileTypeExt as _; - - if ft.is_file() { - Ok(Self::RegularFile) - } else if ft.is_dir() { - Ok(Self::Directory) - } else if ft.is_symlink() { - Ok(Self::Symlink) - } else if ft.is_char_device() { - Ok(Self::CharDevice) - } else if ft.is_block_device() { - Ok(Self::BlockDevice) - } else if ft.is_fifo() { - Ok(Self::NamedPipe) - } else if ft.is_socket() { - Ok(Self::Socket) - } else { - debug_assert!( - false, - "Unknown file type encountered in DirEntryType conversion" - ); - error!(ft = ?ft, "Unknown file type encountered in DirEntryType conversion"); - Err(()) - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DirEntry { - pub ino: Inode, - // TODO(markovejnovic): This OsString is hella expensive - pub name: OsString, - pub kind: DirEntryType, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct FilesystemStats { - pub block_size: u32, - pub fragment_size: u64, - pub total_blocks: u64, - pub free_blocks: u64, - pub available_blocks: u64, - pub total_inodes: u64, - pub free_inodes: u64, - pub available_inodes: u64, - pub filesystem_id: u64, - pub mount_flags: u32, - pub max_filename_length: u32, -} - -#[async_trait] -pub trait Fs { - type LookupError: std::error::Error; - type GetAttrError: std::error::Error; - type OpenError: std::error::Error; - type ReadError: std::error::Error; - type ReaddirError: std::error::Error; - type ReleaseError: std::error::Error; - - /// For each lookup call made by the kernel, it expects the icache to be updated with the - /// returned `FileAttr`. - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result; - - /// Can be called in two contexts -- the file is not open (in which case `fh` is `None`), - /// or the file is open (in which case `fh` is `Some`). - async fn getattr( - &mut self, - ino: Inode, - fh: Option, - ) -> Result; - - /// Read the contents of a directory. - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], Self::ReaddirError>; - - /// Open a file for reading. - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result; - - /// Read data from an open file. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] - async fn read( - &mut self, - ino: Inode, - fh: FileHandle, - offset: u64, - size: u32, - flags: OpenFlags, - lock_owner: Option, - ) -> Result; - - /// Called when the kernel closes a file handle. - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), Self::ReleaseError>; - - /// Called when the kernel is done with an inode. - async fn forget(&mut self, ino: Inode, nlookups: u64); - - /// Get filesystem statistics. - async fn statfs(&mut self) -> Result; -} diff --git a/tests/async_backed_correctness.rs b/tests/async_backed_correctness.rs new file mode 100644 index 00000000..097226aa --- /dev/null +++ b/tests/async_backed_correctness.rs @@ -0,0 +1,176 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use tokio::sync::oneshot; + +use git_fs::cache::async_backed::FutureBackedCache; + +#[tokio::test] +async fn try_init_ok_caches_value() { + let cache = FutureBackedCache::::default(); + let result: Result = cache + .get_or_try_init(1, || async { Ok("hello".to_owned()) }) + .await; + assert_eq!(result.unwrap(), "hello", "should return Ok value"); + + // Value should now be cached (get returns it without factory) + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "hello", "value should be in cache"); +} + +#[tokio::test] +async fn try_init_err_does_not_cache() { + let cache = FutureBackedCache::::default(); + let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; + assert_eq!(result.unwrap_err(), "boom", "should return the error"); + + // Cache should be empty — error was not stored + assert!(cache.is_empty(), "cache should have no entries after error"); + assert!(cache.get(&1).await.is_none(), "key should not exist"); +} + +#[tokio::test] +async fn try_init_err_then_retry_ok() { + let cache = FutureBackedCache::::default(); + + // First call: factory fails + let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; + assert!(r1.is_err(), "first call should fail"); + + // Second call: factory succeeds + let r2: Result = cache + .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) + .await; + assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); + + // Value should now be cached + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "recovered"); +} + +#[tokio::test] +async fn try_init_returns_value_cached_by_init() { + let cache = FutureBackedCache::::default(); + + // Populate via infallible get_or_init + cache + .get_or_init(1, || async { "from_init".to_owned() }) + .await; + + // get_or_try_init should return the cached value without running factory + let result: Result = cache + .get_or_try_init(1, || async { panic!("factory should not run") }) + .await; + assert_eq!(result.unwrap(), "from_init"); +} + +#[tokio::test] +async fn panic_in_factory_is_recovered() { + let cache = Arc::new(FutureBackedCache::::default()); + let call_count = Arc::new(AtomicUsize::new(0)); + + // Spawn a task whose factory panics. tokio::spawn catches the panic. + let cache2 = Arc::clone(&cache); + let call_count2 = Arc::clone(&call_count); + let handle = tokio::spawn(async move { + cache2 + .get_or_init(1, || { + call_count2.fetch_add(1, Ordering::Relaxed); + async { panic!("boom") } + }) + .await + }); + // The spawned task panics internally; JoinHandle returns Err. + assert!(handle.await.is_err(), "task should have panicked"); + + // The key should NOT be permanently bricked. A new caller should succeed. + let v = cache + .get_or_init(1, || { + call_count.fetch_add(1, Ordering::Relaxed); + async { "recovered".to_owned() } + }) + .await; + assert_eq!(v, "recovered", "should recover after panic"); + assert_eq!( + call_count.load(Ordering::Relaxed), + 2, + "factory called twice" + ); +} + +/// With 3+ joiners the dedup property becomes observable: under the old +/// broken code each joiner would run its own factory after the owner fails +/// (4 total calls for 1 owner + 3 joiners). With the loop-based retry only +/// one joiner wins the `Vacant` race, so we expect exactly 2 calls +/// (A's fail + one winner's success). +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn try_init_retry_after_joined_failure_deduplicates() { + let cache = Arc::new(FutureBackedCache::::default()); + let call_count = Arc::new(AtomicUsize::new(0)); + + // Channel to control timing of Task A's factory. + let (release_tx, release_rx) = oneshot::channel::<()>(); + + // Task A: starts a failing InFlight, held until we release. + let cache_a = Arc::clone(&cache); + let count_a = Arc::clone(&call_count); + let task_a = tokio::spawn(async move { + let result: Result = cache_a + .get_or_try_init(1, || { + count_a.fetch_add(1, Ordering::Relaxed); + async move { + let _ = release_rx.await; + Err("task_a_fail".to_owned()) + } + }) + .await; + result + }); + + // Give Task A time to register the InFlight slot. + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + // Spawn 3 joiners that all join A's InFlight. After A fails, exactly + // one should win the Vacant race and run its factory; the others join + // the new InFlight. + let mut joiner_handles = Vec::new(); + for _ in 0..3 { + let cache_j = Arc::clone(&cache); + let count_j = Arc::clone(&call_count); + joiner_handles.push(tokio::spawn(async move { + let result: Result = cache_j + .get_or_try_init(1, || { + count_j.fetch_add(1, Ordering::Relaxed); + async move { Ok("joiner_ok".to_owned()) } + }) + .await; + result + })); + } + + // Give joiners time to join the InFlight. + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + // Release A's factory → it fails. + release_tx.send(()).unwrap(); + + let result_a = task_a.await.unwrap(); + assert!(result_a.is_err(), "task A should fail"); + + for handle in joiner_handles { + let result = handle.await.unwrap(); + assert_eq!(result.unwrap(), "joiner_ok", "every joiner should succeed"); + } + + // Factory should have been called exactly 2 times: A's fail + one + // joiner winning the Vacant race. The other 2 joiners piggyback on + // the winner's InFlight via Shared, so their factories are never called. + assert_eq!( + call_count.load(Ordering::Relaxed), + 2, + "factory should be called exactly twice (A's fail + one joiner's success), \ + not 4 (which would indicate each joiner ran its own factory)" + ); +} diff --git a/tests/async_fs_correctness.rs b/tests/async_fs_correctness.rs new file mode 100644 index 00000000..e3087ceb --- /dev/null +++ b/tests/async_fs_correctness.rs @@ -0,0 +1,772 @@ +#![allow(clippy::unwrap_used, clippy::expect_used, missing_docs)] + +mod common; + +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; + +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::async_fs::{AsyncFs, InodeLifecycle}; +use git_fs::fs::{INode, INodeType, LoadedAddr, OpenFlags}; + +use common::async_fs_mocks::{MockFsDataProvider, MockFsState, make_inode}; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_inc_returns_count_after_increment() { + let table = Arc::new(FutureBackedCache::default()); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); + + assert_eq!(lifecycle.inc(100), 1, "first inc should return 1"); + assert_eq!(lifecycle.inc(100), 2, "second inc should return 2"); + assert_eq!(lifecycle.inc(100), 3, "third inc should return 3"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_returns_remaining_count() { + let table = Arc::new(FutureBackedCache::default()); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); + lifecycle.inc(100); + lifecycle.inc(100); + + assert_eq!(lifecycle.dec(&100), Some(1), "dec from 2 should give 1"); + assert_eq!(lifecycle.dec(&100), Some(0), "dec from 1 should give 0"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_unknown_addr_returns_none() { + let table: Arc> = Arc::new(FutureBackedCache::default()); + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); + + assert_eq!( + lifecycle.dec(&999), + None, + "dec on unknown key should return None" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_to_zero_evicts_from_table() { + let table = Arc::new(FutureBackedCache::default()); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); + lifecycle.inc(100); + + assert_eq!(lifecycle.dec(&100), Some(0)); + // The inode should have been evicted from the table. + assert!( + lifecycle.table().get(&100).await.is_none(), + "inode should be evicted after refcount hits zero" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_count_decrements_by_n() { + let table: Arc> = Arc::new(FutureBackedCache::default()); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); + lifecycle.inc(100); + lifecycle.inc(100); + lifecycle.inc(100); // count = 3 + + assert_eq!( + lifecycle.dec_count(&100, 2), + Some(1), + "dec_count(3, 2) should give 1" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_count_to_zero_evicts() { + let table = Arc::new(FutureBackedCache::default()); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); + lifecycle.inc(100); + lifecycle.inc(100); // count = 2 + + assert_eq!(lifecycle.dec_count(&100, 2), Some(0)); + assert!( + lifecycle.table().get(&100).await.is_none(), + "inode should be evicted after dec_count to zero" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_table_returns_underlying_cache() { + let table = Arc::new(FutureBackedCache::default()); + let inode = make_inode(42, INodeType::Directory, 0, None); + table.insert_sync(42, inode); + + let lifecycle = InodeLifecycle::from_table(Arc::clone(&table)); + + let fetched = lifecycle.table().get(&42).await; + assert_eq!( + fetched.map(|n| n.addr), + Some(42), + "table() should expose the underlying cache" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn new_seeds_root_inode_into_table() { + let table = Arc::new(FutureBackedCache::default()); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + assert_eq!(fs.inode_count(), 1, "root should be the only inode"); + let fetched = table.get(&1).await; + assert_eq!( + fetched.map(|n| n.addr), + Some(1), + "root inode should be in the table" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn new_preseeded_does_not_insert_root() { + let table: Arc> = Arc::new(FutureBackedCache::default()); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new_preseeded(dp, Arc::clone(&table)); + + assert_eq!( + fs.inode_count(), + 0, + "preseeded constructor should not insert anything" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn statfs_reports_inode_count() { + let table = Arc::new(FutureBackedCache::default()); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + let stats = fs.statfs(); + + assert_eq!(stats.block_size, 4096); + assert_eq!(stats.total_inodes, 1, "should reflect the root inode"); + assert_eq!(stats.free_blocks, 0); + assert_eq!(stats.max_filename_length, 255); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn loaded_inode_returns_seeded_inode() { + let table = Arc::new(FutureBackedCache::default()); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let inode = fs.loaded_inode(LoadedAddr::new_unchecked(1)).await.unwrap(); + assert_eq!(inode.addr, 1); + assert_eq!(inode.itype, INodeType::Directory); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn loaded_inode_returns_enoent_for_missing_addr() { + let table = Arc::new(FutureBackedCache::default()); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let err = fs + .loaded_inode(LoadedAddr::new_unchecked(999)) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn getattr_delegates_to_loaded_inode() { + let table = Arc::new(FutureBackedCache::default()); + let root = make_inode(1, INodeType::Directory, 4096, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let inode = fs.getattr(LoadedAddr::new_unchecked(1)).await.unwrap(); + assert_eq!(inode.addr, 1); + assert_eq!(inode.size, 4096); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_resolves_child_via_data_provider() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "readme.md".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let tracked = fs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("readme.md")) + .await + .unwrap(); + + assert_eq!(tracked.inode.addr, 10); + assert_eq!(tracked.inode.size, 42); + assert_eq!(tracked.inode.itype, INodeType::File); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_populates_inode_table() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 100, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "file.txt".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + fs.lookup(LoadedAddr::new_unchecked(1), OsStr::new("file.txt")) + .await + .unwrap(); + + // The child should now be in the inode table. + let cached = table.get(&10).await; + assert_eq!( + cached.map(|n| n.addr), + Some(10), + "child inode should be cached in the table after lookup" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_second_call_uses_cache() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 100, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "cached.txt".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let first = fs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("cached.txt")) + .await + .unwrap(); + let second = fs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("cached.txt")) + .await + .unwrap(); + + assert_eq!(first.inode.addr, second.inode.addr); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_propagates_provider_error() { + let root = make_inode(1, INodeType::Directory, 0, None); + // No lookups configured — provider will return ENOENT. + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let err = fs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("nonexistent")) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +// open and OpenFile::read tests + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_file_handle_and_reader() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 5, Some(1)); + + let mut state = MockFsState::default(); + state + .file_contents + .insert(10, bytes::Bytes::from_static(b"hello")); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let open_file = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap(); + + assert!(open_file.fh >= 1, "file handle should start at 1"); + let data = open_file.read(0, 5).await.unwrap(); + assert_eq!(&data[..], b"hello"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_eisdir_for_directory() { + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let err = fs + .open(LoadedAddr::new_unchecked(1), OpenFlags::RDONLY) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::EISDIR)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_enoent_for_missing_inode() { + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let err = fs + .open(LoadedAddr::new_unchecked(999), OpenFlags::RDONLY) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_assigns_unique_file_handles() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 0, Some(1)); + + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let fh1 = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap() + .fh; + let fh2 = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap() + .fh; + + assert_ne!(fh1, fh2, "each open should produce a unique file handle"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_file_read_with_offset() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 11, Some(1)); + + let mut state = MockFsState::default(); + state + .file_contents + .insert(10, bytes::Bytes::from_static(b"hello world")); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let open_file = fs + .open(LoadedAddr::new_unchecked(10), OpenFlags::RDONLY) + .await + .unwrap(); + + let data = open_file.read(6, 5).await.unwrap(); + assert_eq!(&data[..], b"world"); +} + +// readdir tests + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_lists_children_sorted_by_name() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_b = make_inode(10, INodeType::File, 10, Some(1)); + let child_a = make_inode(11, INodeType::File, 20, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("b.txt"), child_b), + (OsString::from("a.txt"), child_a), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let mut entries: Vec<(OsString, u64)> = Vec::new(); + fs.readdir(LoadedAddr::new_unchecked(1), 0, |entry, _offset| { + entries.push((entry.name.to_os_string(), entry.inode.addr)); + false // don't stop + }) + .await + .unwrap(); + + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].0, "a.txt", "entries should be sorted by name"); + assert_eq!(entries[0].1, 11); + assert_eq!(entries[1].0, "b.txt"); + assert_eq!(entries[1].1, 10); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_respects_offset() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 10, Some(1)); + let child_b = make_inode(11, INodeType::File, 20, Some(1)); + let child_c = make_inode(12, INodeType::File, 30, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + (OsString::from("c"), child_c), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + // First readdir to populate cache + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); + + // Second readdir starting at offset 2 (skip first two) + let mut entries: Vec = Vec::new(); + fs.readdir(LoadedAddr::new_unchecked(1), 2, |entry, _| { + entries.push(entry.name.to_os_string()); + false + }) + .await + .unwrap(); + + assert_eq!(entries, vec![OsString::from("c")]); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_stops_when_filler_returns_true() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 10, Some(1)); + let child_b = make_inode(11, INodeType::File, 20, Some(1)); + let child_c = make_inode(12, INodeType::File, 30, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + (OsString::from("c"), child_c), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let mut count = 0; + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| { + count += 1; + count >= 2 // stop after 2 entries + }) + .await + .unwrap(); + + assert_eq!(count, 2, "filler should have been called exactly twice"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_returns_enotdir_for_file() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 100, Some(1)); + + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let err = fs + .readdir(LoadedAddr::new_unchecked(10), 0, |_, _| false) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOTDIR)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_populates_inode_table_with_children() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + state + .directories + .insert(1, vec![(OsString::from("child.txt"), child)]); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); + + let cached = table.get(&10).await; + assert_eq!( + cached.map(|n| n.addr), + Some(10), + "readdir should populate children into the inode table" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_empty_directory() { + let root = make_inode(1, INodeType::Directory, 0, None); + + let mut state = MockFsState::default(); + state.directories.insert(1, vec![]); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let mut count = 0; + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| { + count += 1; + false + }) + .await + .unwrap(); + + assert_eq!(count, 0, "empty directory should yield no entries"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_provides_correct_next_offsets() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 0, Some(1)); + let child_b = make_inode(11, INodeType::File, 0, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + let mut offsets: Vec = Vec::new(); + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, next_offset| { + offsets.push(next_offset); + false + }) + .await + .unwrap(); + + assert_eq!( + offsets, + vec![1, 2], + "offsets should be 1-indexed and sequential" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_after_eviction_returns_fresh_inode() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_v1 = make_inode(10, INodeType::File, 42, Some(1)); + let child_v2 = make_inode(20, INodeType::File, 99, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "readme.md".into()), child_v1); + let dp = MockFsDataProvider::new(state); + let state_ref = Arc::clone(&dp.state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + // First lookup → addr=10 + let first = fs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("readme.md")) + .await + .unwrap(); + assert_eq!(first.inode.addr, 10); + + // Simulate forget: remove the inode from the table. + table.remove_sync(&10); + + // Insert the refresh entry *after* the first lookup so dp.lookup() + // returns child_v2 on the next call (refresh_lookups is checked first). + drop( + state_ref + .refresh_lookups + .insert_sync((1, "readme.md".into()), child_v2), + ); + + // Second lookup should NOT return the stale addr=10. + let second = fs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("readme.md")) + .await + .unwrap(); + assert_ne!(second.inode.addr, 10, "should not return stale inode"); + assert_eq!(second.inode.addr, 20, "should return the fresh inode"); +} + +// lookup-after-readdir integration test + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_after_readdir_uses_directory_cache() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + // Only configure readdir — no lookup entry. If the directory cache + // fast path is broken, the lookup will fail with ENOENT. + state + .directories + .insert(1, vec![(OsString::from("file.txt"), child)]); + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + // readdir populates the directory cache. + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); + + // lookup should hit the directory cache fast path. + let tracked = fs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("file.txt")) + .await + .unwrap(); + assert_eq!(tracked.inode.addr, 10); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_prefetches_child_directories() { + use std::sync::atomic::Ordering; + + let root = make_inode(1, INodeType::Directory, 0, None); + let child_dir = make_inode(10, INodeType::Directory, 0, Some(1)); + let child_file = make_inode(11, INodeType::File, 100, Some(1)); + let grandchild = make_inode(20, INodeType::File, 50, Some(10)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("subdir"), child_dir), + (OsString::from("file.txt"), child_file), + ], + ); + state + .directories + .insert(10, vec![(OsString::from("grandchild.txt"), grandchild)]); + let dp = MockFsDataProvider::new(state); + let readdir_count = Arc::clone(&dp.state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + // readdir on root should trigger prefetch of child_dir (addr=10) + fs.readdir(LoadedAddr::new_unchecked(1), 0, |_, _| false) + .await + .unwrap(); + + // Wait for prefetch to complete (mock is instant, just need task to run) + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + + // dp.readdir should have been called twice: once for root, once for child_dir prefetch + assert_eq!( + readdir_count.readdir_count.load(Ordering::Relaxed), + 2, + "prefetch should have called readdir on the child directory" + ); + + // Now readdir on child_dir should NOT call dp.readdir again (served from cache) + let mut entries = Vec::new(); + fs.readdir(LoadedAddr::new_unchecked(10), 0, |entry, _| { + entries.push(entry.name.to_os_string()); + false + }) + .await + .unwrap(); + + assert_eq!(entries, vec![OsString::from("grandchild.txt")]); + assert_eq!( + readdir_count.readdir_count.load(Ordering::Relaxed), + 2, + "cached readdir should not call dp.readdir again" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn prefetch_failure_does_not_affect_parent_readdir() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_dir = make_inode(10, INodeType::Directory, 0, Some(1)); + + let mut state = MockFsState::default(); + state + .directories + .insert(1, vec![(OsString::from("bad_dir"), child_dir)]); + // Don't configure readdir for addr=10 — mock will return ENOENT + let dp = MockFsDataProvider::new(state); + + let table = Arc::new(FutureBackedCache::default()); + let fs = AsyncFs::new(dp, root, Arc::clone(&table)).await; + + // Parent readdir should succeed even though child prefetch will fail + let mut entries = Vec::new(); + fs.readdir(LoadedAddr::new_unchecked(1), 0, |entry, _| { + entries.push(entry.name.to_os_string()); + false + }) + .await + .unwrap(); + + assert_eq!(entries, vec![OsString::from("bad_dir")]); + + // Wait for prefetch to attempt and fail + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + + // Direct readdir on child should still work (CAS reset to UNCLAIMED by PopulateGuard) + let err = fs + .readdir(LoadedAddr::new_unchecked(10), 0, |_, _| false) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} diff --git a/tests/bridge_tests.rs b/tests/bridge_tests.rs new file mode 100644 index 00000000..d8389273 --- /dev/null +++ b/tests/bridge_tests.rs @@ -0,0 +1,49 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use git_fs::fs::bridge::ConcurrentBridge; + +#[test] +fn insert_then_forward_returns_inner() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + assert_eq!(bridge.forward(10), Some(100)); +} + +#[test] +fn insert_then_backward_returns_outer() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + assert_eq!(bridge.backward(100), Some(10)); +} + +#[test] +fn forward_missing_returns_none() { + let bridge = ConcurrentBridge::new(); + assert_eq!(bridge.forward(42), None); +} + +#[test] +fn backward_or_insert_existing_returns_cached() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + let outer = bridge.backward_or_insert(100, 999); + assert_eq!(outer, 10, "should return existing outer addr"); +} + +#[test] +fn backward_or_insert_new_allocates() { + let bridge = ConcurrentBridge::new(); + let outer = bridge.backward_or_insert(200, 50); + assert_eq!(outer, 50, "should use fallback address"); + assert_eq!(bridge.forward(50), Some(200)); + assert_eq!(bridge.backward(200), Some(50)); +} + +#[test] +fn remove_by_outer_clears_both_directions() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + bridge.remove_by_outer(10); + assert_eq!(bridge.forward(10), None); + assert_eq!(bridge.backward(100), None); +} diff --git a/tests/common/async_fs_mocks.rs b/tests/common/async_fs_mocks.rs new file mode 100644 index 00000000..b95f6ebe --- /dev/null +++ b/tests/common/async_fs_mocks.rs @@ -0,0 +1,117 @@ +#![allow(missing_docs, clippy::unwrap_used)] + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; +use std::time::SystemTime; + +use bytes::Bytes; + +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::{INode, INodeType, InodePerms, OpenFlags}; + +/// Builds an `INode` with sensible defaults. Only `addr` and `itype` are required. +pub fn make_inode(addr: u64, itype: INodeType, size: u64, parent: Option) -> INode { + INode { + addr, + permissions: InodePerms::OWNER_RWX | InodePerms::GROUP_READ | InodePerms::OTHER_READ, + uid: 1000, + gid: 1000, + create_time: SystemTime::UNIX_EPOCH, + last_modified_at: SystemTime::UNIX_EPOCH, + parent, + size, + itype, + } +} + +/// A mock `FileReader` that returns a fixed byte slice for any read. +#[derive(Debug, Clone)] +pub struct MockFileReader { + pub data: Bytes, +} + +impl FileReader for MockFileReader { + #[expect( + clippy::cast_possible_truncation, + reason = "test mock — offsets stay small" + )] + async fn read(&self, offset: u64, size: u32) -> Result { + let start = (offset as usize).min(self.data.len()); + let end = (start + size as usize).min(self.data.len()); + Ok(self.data.slice(start..end)) + } +} + +/// Shared state backing `MockFsDataProvider`. +#[derive(Debug, Default)] +pub struct MockFsState { + /// `(parent_addr, child_name) -> child_inode` + pub lookups: HashMap<(u64, OsString), INode>, + /// `parent_addr -> vec of (child_name, child_inode)` + pub directories: HashMap>, + /// `inode_addr -> file content bytes` + pub file_contents: HashMap, + /// Mutable overrides for `lookups`. When populated, entries here take + /// precedence and are consumed on use (removed after the first hit). + /// Existing tests are unaffected because this defaults to empty. + pub refresh_lookups: scc::HashMap<(u64, OsString), INode>, + /// Counts how many times `readdir` has been called on this provider. + pub readdir_count: std::sync::atomic::AtomicU64, +} + +/// A clonable mock data provider for `AsyncFs` tests. +#[derive(Debug, Clone)] +pub struct MockFsDataProvider { + pub state: Arc, +} + +impl MockFsDataProvider { + pub fn new(state: MockFsState) -> Self { + Self { + state: Arc::new(state), + } + } +} + +impl FsDataProvider for MockFsDataProvider { + type Reader = MockFileReader; + + async fn lookup(&self, parent: INode, name: &OsStr) -> Result { + let key = (parent.addr, name.to_os_string()); + // Check mutable overrides first (consumed on use). + if let Some((_, inode)) = self.state.refresh_lookups.remove_sync(&key) { + return Ok(inode); + } + self.state + .lookups + .get(&key) + .copied() + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT)) + } + + async fn readdir(&self, parent: INode) -> Result, std::io::Error> { + self.state + .readdir_count + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + self.state + .directories + .get(&parent.addr) + .cloned() + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT)) + } + + async fn open( + &self, + inode: INode, + _flags: OpenFlags, + ) -> Result { + let data = self + .state + .file_contents + .get(&inode.addr) + .cloned() + .unwrap_or_default(); + Ok(MockFileReader { data }) + } +} diff --git a/tests/common/composite_mocks.rs b/tests/common/composite_mocks.rs new file mode 100644 index 00000000..413621d3 --- /dev/null +++ b/tests/common/composite_mocks.rs @@ -0,0 +1,55 @@ +#![allow(missing_docs, clippy::unwrap_used)] + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; + +use git_fs::fs::INode; +use git_fs::fs::composite::{ChildDescriptor, CompositeRoot}; + +use super::async_fs_mocks::MockFsDataProvider; + +/// A mock `CompositeRoot` that resolves children from a fixed map. +pub struct MockRoot { + pub children: Arc>, +} + +impl MockRoot { + pub fn new(children: HashMap) -> Self { + Self { + children: Arc::new(children), + } + } +} + +impl CompositeRoot for MockRoot { + type ChildDP = MockFsDataProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + Ok(self + .children + .get(name) + .map(|(provider, root_ino)| ChildDescriptor { + name: name.to_os_string(), + provider: provider.clone(), + root_ino: *root_ino, + })) + } + + async fn list_children( + &self, + ) -> Result>, std::io::Error> { + Ok(self + .children + .iter() + .map(|(name, (provider, root_ino))| ChildDescriptor { + name: name.clone(), + provider: provider.clone(), + root_ino: *root_ino, + }) + .collect()) + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 101f9295..96aedec1 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,4 +1,7 @@ -#![allow(missing_docs, clippy::unwrap_used)] +#![allow(dead_code, missing_docs, clippy::unwrap_used)] + +pub mod async_fs_mocks; +pub mod composite_mocks; use std::sync::{Arc, Mutex}; use std::time::Duration; diff --git a/tests/composite_fs_tests.rs b/tests/composite_fs_tests.rs new file mode 100644 index 00000000..1c263425 --- /dev/null +++ b/tests/composite_fs_tests.rs @@ -0,0 +1,354 @@ +#![allow(clippy::unwrap_used, clippy::expect_used, missing_docs)] + +mod common; + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; + +use bytes::Bytes; + +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::async_fs::{AsyncFs, FsDataProvider as _}; +use git_fs::fs::composite::CompositeFs; +use git_fs::fs::{INode, INodeType, LoadedAddr, OpenFlags}; + +use common::async_fs_mocks::{MockFsDataProvider, MockFsState, make_inode}; +use common::composite_mocks::MockRoot; + +/// Build a child data provider with a root directory and a set of children. +/// +/// Each child is `(name, addr, itype, size)`. Files get auto-generated content +/// of the form `"content of {name}"`. +fn make_child_provider( + root_addr: u64, + children: &[(&str, u64, INodeType, u64)], +) -> (MockFsDataProvider, INode) { + let root = make_inode(root_addr, INodeType::Directory, 0, None); + let mut state = MockFsState::default(); + let mut dir_entries = Vec::new(); + for (name, addr, itype, size) in children { + let child = make_inode(*addr, *itype, *size, Some(root_addr)); + state + .lookups + .insert((root_addr, OsString::from(name)), child); + dir_entries.push((OsString::from(name), child)); + if *itype == INodeType::File { + state + .file_contents + .insert(*addr, Bytes::from(format!("content of {name}"))); + } + } + state.directories.insert(root_addr, dir_entries); + (MockFsDataProvider::new(state), root) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_root_lookup_resolves_child() { + let (provider, root_ino) = make_child_provider(100, &[("file.txt", 101, INodeType::File, 42)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo-a"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); + + let tracked = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo-a")) + .await + .unwrap(); + + assert_eq!( + tracked.inode.itype, + INodeType::Directory, + "child should appear as a directory at composite level" + ); + assert_ne!( + tracked.inode.addr, 1, + "child should have a composite-level address different from root" + ); + assert_eq!( + tracked.inode.parent, + Some(1), + "child directory should have the composite root as parent" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_root_readdir_lists_children() { + let (prov_a, root_a) = make_child_provider(100, &[]); + let (prov_b, root_b) = make_child_provider(200, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("alpha"), (prov_a, root_a)); + children.insert(OsString::from("beta"), (prov_b, root_b)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); + + let mut entries = Vec::new(); + afs.readdir(LoadedAddr::new_unchecked(1), 0, |de, _offset| { + entries.push(de.name.to_os_string()); + false + }) + .await + .unwrap(); + + entries.sort(); + assert_eq!(entries.len(), 2, "should list both children"); + assert_eq!(entries[0], "alpha"); + assert_eq!(entries[1], "beta"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_delegated_lookup_reaches_child() { + let (provider, root_ino) = make_child_provider( + 100, + &[ + ("readme.md", 101, INodeType::File, 256), + ("src", 102, INodeType::Directory, 0), + ], + ); + + let mut children = HashMap::new(); + children.insert(OsString::from("my-repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); + + // First, lookup the child at root level. + let child_dir = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("my-repo")) + .await + .unwrap(); + let child_addr = child_dir.inode.addr; + + // Then, lookup a file inside the child. + let file = afs + .lookup( + LoadedAddr::new_unchecked(child_addr), + OsStr::new("readme.md"), + ) + .await + .unwrap(); + + assert_eq!(file.inode.itype, INodeType::File); + assert_eq!(file.inode.size, 256); + + // Also lookup a subdirectory inside the child. + let subdir = afs + .lookup(LoadedAddr::new_unchecked(child_addr), OsStr::new("src")) + .await + .unwrap(); + + assert_eq!(subdir.inode.itype, INodeType::Directory); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_open_and_read_through_child() { + let (provider, root_ino) = make_child_provider(100, &[("hello.txt", 101, INodeType::File, 20)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); + + // Navigate to the file. + let child_dir = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + let file_tracked = afs + .lookup( + LoadedAddr::new_unchecked(child_dir.inode.addr), + OsStr::new("hello.txt"), + ) + .await + .unwrap(); + let file_addr = file_tracked.inode.addr; + + // Open and read. + let open_file = afs + .open(LoadedAddr::new_unchecked(file_addr), OpenFlags::empty()) + .await + .unwrap(); + let data = open_file.read(0, 1024).await.unwrap(); + + assert_eq!( + data, + Bytes::from("content of hello.txt"), + "should read the file content through the composite layer" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_lookup_unknown_child_returns_enoent() { + let (provider, root_ino) = make_child_provider(100, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("existing"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); + + let err = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("nonexistent")) + .await + .unwrap_err(); + + assert_eq!( + err.raw_os_error(), + Some(libc::ENOENT), + "looking up a nonexistent child at root should return ENOENT" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_readdir_delegated_lists_child_contents() { + let (provider, root_ino) = make_child_provider( + 100, + &[ + ("a.rs", 101, INodeType::File, 10), + ("b.rs", 102, INodeType::File, 20), + ("lib", 103, INodeType::Directory, 0), + ], + ); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); + + // Navigate into the child. + let child_dir = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + + // Readdir inside the child. + let mut entries = Vec::new(); + afs.readdir( + LoadedAddr::new_unchecked(child_dir.inode.addr), + 0, + |de, _offset| { + entries.push((de.name.to_os_string(), de.inode.itype)); + false + }, + ) + .await + .unwrap(); + + entries.sort_by(|(a, _), (b, _)| a.cmp(b)); + assert_eq!(entries.len(), 3); + assert_eq!(entries[0], (OsString::from("a.rs"), INodeType::File)); + assert_eq!(entries[1], (OsString::from("b.rs"), INodeType::File)); + assert_eq!(entries[2], (OsString::from("lib"), INodeType::Directory)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_repeated_lookup_returns_same_addr() { + let (provider, root_ino) = make_child_provider(100, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, Arc::clone(&table)); + + let first = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + let second = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + + assert_eq!( + first.inode.addr, second.inode.addr, + "repeated lookups for the same child should return the same composite address" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_forget_cleans_up_slot_and_name_mapping() { + // Setup: one child "repo" with a file. + let (provider, root_ino) = make_child_provider(100, &[("file.txt", 101, INodeType::File, 42)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = Arc::new(FutureBackedCache::default()); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite.clone(), Arc::clone(&table)); + + // Look up the child and a file inside it. + let child_dir = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + let child_addr = child_dir.inode.addr; + + let file = afs + .lookup( + LoadedAddr::new_unchecked(child_addr), + OsStr::new("file.txt"), + ) + .await + .unwrap(); + let file_addr = file.inode.addr; + + // Forget the file, then the child directory. + composite.forget(file_addr); + composite.forget(child_addr); + + // Re-lookup the child — should succeed with a fresh slot. + let re_resolved = afs + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("repo")) + .await + .unwrap(); + + assert_eq!(re_resolved.inode.itype, INodeType::Directory); + // The new address may differ from the original (fresh slot allocated). +} diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs new file mode 100644 index 00000000..7043bd9b --- /dev/null +++ b/tests/dcache_correctness.rs @@ -0,0 +1,223 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use std::ffi::{OsStr, OsString}; + +use git_fs::fs::LoadedAddr; +use git_fs::fs::dcache::{DCache, PopulateStatus}; + +#[tokio::test] +async fn lookup_returns_none_for_missing_entry() { + let cache = DCache::new(); + assert!( + cache + .lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")) + .is_none() + ); +} + +#[tokio::test] +async fn insert_then_lookup() { + let cache = DCache::new(); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ); + let dv = cache.lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should be present after insert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr::new_unchecked(10)); + assert!(!dv.is_dir); +} + +#[tokio::test] +async fn readdir_returns_only_children_of_parent() { + let cache = DCache::new(); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("a"), + LoadedAddr::new_unchecked(10), + false, + ); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("b"), + LoadedAddr::new_unchecked(11), + true, + ); + cache.insert( + LoadedAddr::new_unchecked(2), + OsString::from("c"), + LoadedAddr::new_unchecked(12), + false, + ); + let mut children = Vec::new(); + cache.readdir(LoadedAddr::new_unchecked(1), |name, dvalue| { + children.push((name.to_os_string(), dvalue.clone())); + }); + assert_eq!(children.len(), 2); + let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); + assert!(names.contains(&OsString::from("a"))); + assert!(names.contains(&OsString::from("b"))); +} + +#[tokio::test] +async fn readdir_empty_parent_returns_empty() { + let cache = DCache::new(); + let mut children = Vec::new(); + cache.readdir(LoadedAddr::new_unchecked(1), |name, dvalue| { + children.push((name.to_os_string(), dvalue.clone())); + }); + assert!(children.is_empty()); +} + +#[tokio::test] +async fn try_claim_populate_unclaimed_returns_claimed() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::Claimed + )); +} + +#[tokio::test] +async fn finish_populate_then_claim_returns_done() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::Claimed + )); + cache.finish_populate(LoadedAddr::new_unchecked(1)); + assert!(matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::Done + )); +} + +#[tokio::test] +async fn double_claim_returns_in_progress() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::Claimed + )); + assert!(matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::InProgress + )); +} + +#[tokio::test] +async fn abort_populate_allows_reclaim() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::Claimed + )); + cache.abort_populate(LoadedAddr::new_unchecked(1)); + assert!(matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::Claimed + )); +} + +#[tokio::test] +async fn insert_does_not_mark_populated() { + let cache = DCache::new(); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ); + assert!( + matches!( + cache.try_claim_populate(LoadedAddr::new_unchecked(1)), + PopulateStatus::Claimed + ), + "insert alone should not mark a directory as populated" + ); +} + +#[tokio::test] +async fn upsert_overwrites_existing_entry() { + let cache = DCache::new(); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(10), + false, + ); + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from("foo"), + LoadedAddr::new_unchecked(20), + true, + ); + let dv = cache.lookup(LoadedAddr::new_unchecked(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should still be present after upsert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr::new_unchecked(20)); + assert!(dv.is_dir); +} + +#[tokio::test] +async fn readdir_returns_entries_in_sorted_order() { + let cache = DCache::new(); + for name in ["zebra", "apple", "mango"] { + cache.insert( + LoadedAddr::new_unchecked(1), + OsString::from(name), + LoadedAddr::new_unchecked(10), + false, + ); + } + let mut names = Vec::new(); + cache.readdir(LoadedAddr::new_unchecked(1), |name, _| { + names.push(name.to_str().unwrap().to_owned()); + }); + assert_eq!(names, ["apple", "mango", "zebra"]); +} + +#[tokio::test] +async fn child_dir_addrs_returns_only_directories() { + let cache = DCache::new(); + let parent = LoadedAddr::new_unchecked(1); + cache.insert( + parent, + OsString::from("file.txt"), + LoadedAddr::new_unchecked(10), + false, + ); + cache.insert( + parent, + OsString::from("subdir"), + LoadedAddr::new_unchecked(11), + true, + ); + cache.insert( + parent, + OsString::from("another_file"), + LoadedAddr::new_unchecked(12), + false, + ); + cache.insert( + parent, + OsString::from("another_dir"), + LoadedAddr::new_unchecked(13), + true, + ); + + let dirs = cache.child_dir_addrs(parent); + assert_eq!(dirs.len(), 2); + assert!(dirs.contains(&LoadedAddr::new_unchecked(11))); + assert!(dirs.contains(&LoadedAddr::new_unchecked(13))); +} + +#[tokio::test] +async fn child_dir_addrs_returns_empty_for_unknown_parent() { + let cache = DCache::new(); + let dirs = cache.child_dir_addrs(LoadedAddr::new_unchecked(999)); + assert!(dirs.is_empty()); +}