From 1fa982e7b4fc00b09bc4141c9bf04ddf7eb4ebe1 Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Sun, 1 Mar 2026 20:55:05 +0400 Subject: [PATCH 01/13] feat: added replcator crate with basic protocol impl --- Cargo.lock | 19 +++ Cargo.toml | 2 + magicblock-replicator/Cargo.toml | 23 ++++ magicblock-replicator/src/connection.rs | 70 ++++++++++++ magicblock-replicator/src/error.rs | 14 +++ magicblock-replicator/src/lib.rs | 12 ++ magicblock-replicator/src/proto.rs | 146 ++++++++++++++++++++++++ magicblock-replicator/src/tcp.rs | 24 ++++ 8 files changed, 310 insertions(+) create mode 100644 magicblock-replicator/Cargo.toml create mode 100644 magicblock-replicator/src/connection.rs create mode 100644 magicblock-replicator/src/error.rs create mode 100644 magicblock-replicator/src/lib.rs create mode 100644 magicblock-replicator/src/proto.rs create mode 100644 magicblock-replicator/src/tcp.rs diff --git a/Cargo.lock b/Cargo.lock index 9d7115c11..c96a6c211 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3463,6 +3463,25 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "magicblock-replicator" +version = "0.8.0" +dependencies = [ + "bincode", + "bytes", + "futures", + "serde", + "solana-hash", + "solana-keypair", + "solana-pubkey", + "solana-signature", + "solana-signer", + "solana-transaction", + "thiserror 1.0.69", + "tokio", + "tokio-util", +] + [[package]] name = "magicblock-rpc-client" version = "0.8.3" diff --git a/Cargo.toml b/Cargo.toml index 833da8581..c58ec006a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ members = [ "magicblock-ledger", "magicblock-metrics", "magicblock-processor", + "magicblock-replicator", "magicblock-rpc-client", "magicblock-table-mania", "magicblock-task-scheduler", @@ -54,6 +55,7 @@ assert_matches = "1.5.0" async-trait = "0.1.77" base64 = "0.21.7" bincode = "1.3.3" +bytes = "1.0" borsh = { version = "1.5.1", features = ["derive", "unstable__schema"] } bs58 = "0.5.1" byteorder = "1.5.0" diff --git a/magicblock-replicator/Cargo.toml b/magicblock-replicator/Cargo.toml new file mode 100644 index 000000000..94b3d8ba8 --- /dev/null +++ b/magicblock-replicator/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "magicblock-replicator" +version.workspace = true +authors.workspace = true +repository.workspace = true +homepage.workspace = true +license.workspace = true +edition.workspace = true + +[dependencies] +bincode = { workspace = true } +bytes = { workspace = true } +futures = { workspace = true } +thiserror = { workspace = true } +tokio = { workspace = true, features = ["net"] } +tokio-util = { workspace = true, features = ["codec"] } +serde = { workspace = true, features = ["derive"] } +solana-hash = { workspace = true, features = ["serde"] } +solana-keypair = { workspace = true } +solana-pubkey = { workspace = true, features = ["serde"] } +solana-signature = { workspace = true, features = ["serde"] } +solana-signer = { workspace = true } +solana-transaction = { workspace = true, features = ["serde"] } diff --git a/magicblock-replicator/src/connection.rs b/magicblock-replicator/src/connection.rs new file mode 100644 index 000000000..8df76bdcf --- /dev/null +++ b/magicblock-replicator/src/connection.rs @@ -0,0 +1,70 @@ +//! Codec and stream types for length-prefixed bincode framing. + +use bytes::{BufMut, BytesMut}; +use futures::{SinkExt, StreamExt}; +use tokio::io::{AsyncRead, AsyncWrite}; +use tokio_util::codec::{FramedRead, FramedWrite, LengthDelimitedCodec}; + +use crate::{ + error::{Error, Result}, + proto::Message, +}; + +/// Encodes `Message` with 4-byte LE length prefix. +pub struct MessageEncoder; + +pub(crate) type InputStream = FramedRead; +pub(crate) type OutputStream = FramedWrite; + +impl tokio_util::codec::Encoder for MessageEncoder { + type Error = Error; + + fn encode(&mut self, msg: Message, dst: &mut BytesMut) -> Result<()> { + let start = dst.len(); + dst.put_u32_le(0); + bincode::serialize_into(dst.writer(), &msg)?; + let len = (dst.len() - start - 4) as u32; + dst[start..start + 4].copy_from_slice(&len.to_le_bytes()); + Ok(()) + } +} + +/// Receives messages from an async stream (max frame: 64KB). +pub struct Receiver { + inner: InputStream, +} + +impl Receiver { + pub fn new(io: IO) -> Self { + let inner = LengthDelimitedCodec::builder() + .little_endian() + .max_frame_length(64 * 1024) + .length_field_type::() + .new_read(io); + Self { inner } + } + + pub async fn recv(&mut self) -> Result { + let frame = + self.inner.next().await.ok_or(Error::ConnectionClosed)??; + bincode::deserialize(&frame).map_err(Into::into) + } +} + +/// Sends messages to an async stream. +pub struct Sender { + inner: OutputStream, +} + +impl Sender { + pub fn new(io: IO) -> Self { + Self { + inner: FramedWrite::new(io, MessageEncoder), + } + } + + pub async fn send(&mut self, msg: Message) -> Result<()> { + self.inner.send(msg).await?; + Ok(()) + } +} diff --git a/magicblock-replicator/src/error.rs b/magicblock-replicator/src/error.rs new file mode 100644 index 000000000..9e66f70bb --- /dev/null +++ b/magicblock-replicator/src/error.rs @@ -0,0 +1,14 @@ +//! Error types for the replication protocol. + +/// Replication operation errors. +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("connection closed")] + ConnectionClosed, + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("serialization error: {0}")] + SerDe(#[from] bincode::Error), +} + +pub type Result = std::result::Result; diff --git a/magicblock-replicator/src/lib.rs b/magicblock-replicator/src/lib.rs new file mode 100644 index 000000000..72eb84f4c --- /dev/null +++ b/magicblock-replicator/src/lib.rs @@ -0,0 +1,12 @@ +//! State replication protocol for streaming transactions from primary to standby nodes. +//! +//! Messages are length-prefixed (4B LE) + bincode payload. + +pub mod connection; +pub mod error; +pub mod proto; +pub mod tcp; + +pub use connection::{Receiver, Sender}; +pub use error::{Error, Result}; +pub use proto::{Message, PROTOCOL_VERSION}; diff --git a/magicblock-replicator/src/proto.rs b/magicblock-replicator/src/proto.rs new file mode 100644 index 000000000..1becc33ce --- /dev/null +++ b/magicblock-replicator/src/proto.rs @@ -0,0 +1,146 @@ +//! Protocol message types for replication. +//! +//! Wire format: 4-byte LE length prefix + bincode payload. +//! Bincode encodes enum variant index as implicit type tag. + +use serde::{Deserialize, Serialize}; +use solana_hash::Hash; +use solana_keypair::Keypair; +use solana_pubkey::Pubkey; +use solana_signature::Signature; +use solana_signer::Signer; +use solana_transaction::versioned::VersionedTransaction; + +use crate::error::Result; + +pub type Slot = u64; +pub type TxIndex = u32; + +pub const PROTOCOL_VERSION: u32 = 1; + +/// Top-level replication message. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub enum Message { + HandshakeReq(HandshakeRequest), + HandshakeResp(HandshakeResponse), + Transaction(Transaction), + Block(Block), + SuperBlock(SuperBlock), + Failover(FailoverSignal), +} + +/// Client -> Server: initiate replication session. +/// Authenticated via Ed25519 signature over `start_slot`. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct HandshakeRequest { + pub version: u32, + pub start_slot: Slot, + pub identity: Pubkey, + signature: Signature, +} + +/// Server -> Client: accept or reject session. +/// Signed over `slot` (success) or error message (failure). +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct HandshakeResponse { + pub result: std::result::Result, + pub identity: Pubkey, + signature: Signature, +} + +/// Slot boundary marker with blockhash. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct Block { + pub slot: Slot, + pub hash: Hash, +} + +/// Transaction with slot and ordinal position. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct Transaction { + pub slot: Slot, + pub index: TxIndex, + /// Bincode-encoded `VersionedTransaction`. + pub payload: Vec, +} + +/// Periodic checkpoint for state verification. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct SuperBlock { + pub blocks: u64, + pub transactions: u64, + pub checksum: u64, +} + +/// Primary -> Standby: signal controlled failover. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct FailoverSignal { + pub slot: Slot, + signature: Signature, +} + +impl HandshakeRequest { + pub fn new(start_slot: Slot, keypair: &Keypair) -> Self { + Self { + version: PROTOCOL_VERSION, + start_slot, + identity: keypair.pubkey(), + signature: keypair.sign_message(&start_slot.to_le_bytes()), + } + } + + /// Verifies signature matches claimed identity. + pub fn verify(&self) -> bool { + self.signature + .verify(self.identity.as_array(), &self.start_slot.to_le_bytes()) + } +} + +impl HandshakeResponse { + pub fn new(result: Result, keypair: &Keypair) -> Self { + let result = result.map_err(|e| e.to_string()); + let signature = match &result { + Ok(slot) => keypair.sign_message(&slot.to_le_bytes()), + Err(err) => keypair.sign_message(err.as_bytes()), + }; + Self { + result, + identity: keypair.pubkey(), + signature, + } + } + + /// Verifies signature matches server identity. + pub fn verify(&self) -> bool { + match &self.result { + Ok(slot) => self + .signature + .verify(self.identity.as_array(), &slot.to_le_bytes()), + Err(err) => self + .signature + .verify(self.identity.as_array(), err.as_bytes()), + } + } +} + +impl Transaction { + /// Deserializes the inner transaction. + pub fn decode(&self) -> bincode::Result { + bincode::deserialize(&self.payload) + } +} + +impl FailoverSignal { + pub fn new(slot: Slot, keypair: &Keypair) -> Self { + Self { + slot, + signature: keypair.sign_message(&slot.to_le_bytes()), + } + } + + /// Verifies signal against expected identity. + pub fn verify(&self, identity: Pubkey) -> bool { + self.signature + .verify(identity.as_array(), &self.slot.to_le_bytes()) + } +} diff --git a/magicblock-replicator/src/tcp.rs b/magicblock-replicator/src/tcp.rs new file mode 100644 index 000000000..10b3d9a6a --- /dev/null +++ b/magicblock-replicator/src/tcp.rs @@ -0,0 +1,24 @@ +//! TCP transport utilities. + +use std::{io, net::SocketAddr}; + +use tokio::net::{ + tcp::{OwnedReadHalf, OwnedWriteHalf}, + TcpStream, +}; + +use crate::connection::{Receiver, Sender}; + +pub type TcpReceiver = Receiver; +pub type TcpSender = Sender; + +/// Connects to a primary at `addr`, returning (sender, receiver). +pub async fn connect(addr: SocketAddr) -> io::Result<(TcpSender, TcpReceiver)> { + TcpStream::connect(addr).await.map(split) +} + +/// Splits a TCP stream into sender and receiver halves. +pub fn split(stream: TcpStream) -> (TcpSender, TcpReceiver) { + let (rx, tx) = stream.into_split(); + (Sender::new(tx), Receiver::new(rx)) +} From 197db9580744a0a9b460fd590dca3ee2cd78ef5e Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Sun, 1 Mar 2026 21:18:07 +0400 Subject: [PATCH 02/13] fix: update Cargo.lock --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index c96a6c211..14dd46043 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3465,7 +3465,7 @@ dependencies = [ [[package]] name = "magicblock-replicator" -version = "0.8.0" +version = "0.8.1" dependencies = [ "bincode", "bytes", From 564e2e89cf9743f0c063c0d426071c72869fc789 Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Mon, 2 Mar 2026 14:34:26 +0400 Subject: [PATCH 03/13] test: add unit tests + address comments --- magicblock-replicator/Cargo.toml | 2 +- magicblock-replicator/src/connection.rs | 14 +- magicblock-replicator/src/lib.rs | 3 + magicblock-replicator/src/tests.rs | 309 ++++++++++++++++++++++++ 4 files changed, 323 insertions(+), 5 deletions(-) create mode 100644 magicblock-replicator/src/tests.rs diff --git a/magicblock-replicator/Cargo.toml b/magicblock-replicator/Cargo.toml index 94b3d8ba8..acd341e53 100644 --- a/magicblock-replicator/Cargo.toml +++ b/magicblock-replicator/Cargo.toml @@ -12,7 +12,7 @@ bincode = { workspace = true } bytes = { workspace = true } futures = { workspace = true } thiserror = { workspace = true } -tokio = { workspace = true, features = ["net"] } +tokio = { workspace = true, features = ["net", "rt", "macros"] } tokio-util = { workspace = true, features = ["codec"] } serde = { workspace = true, features = ["derive"] } solana-hash = { workspace = true, features = ["serde"] } diff --git a/magicblock-replicator/src/connection.rs b/magicblock-replicator/src/connection.rs index 8df76bdcf..87ba7a422 100644 --- a/magicblock-replicator/src/connection.rs +++ b/magicblock-replicator/src/connection.rs @@ -16,6 +16,8 @@ pub struct MessageEncoder; pub(crate) type InputStream = FramedRead; pub(crate) type OutputStream = FramedWrite; +const MAX_FRAME_SIZE: usize = 64 * 1024 * 1024; + impl tokio_util::codec::Encoder for MessageEncoder { type Error = Error; @@ -23,13 +25,17 @@ impl tokio_util::codec::Encoder for MessageEncoder { let start = dst.len(); dst.put_u32_le(0); bincode::serialize_into(dst.writer(), &msg)?; - let len = (dst.len() - start - 4) as u32; - dst[start..start + 4].copy_from_slice(&len.to_le_bytes()); + let len = dst.len() - start - 4; + if len > MAX_FRAME_SIZE { + dst.truncate(start); + return Err(Box::new(bincode::ErrorKind::SizeLimit))?; + } + dst[start..start + 4].copy_from_slice(&(len as u32).to_le_bytes()); Ok(()) } } -/// Receives messages from an async stream (max frame: 64KB). +/// Receives messages from an async stream (max frame: 64MB). pub struct Receiver { inner: InputStream, } @@ -38,7 +44,7 @@ impl Receiver { pub fn new(io: IO) -> Self { let inner = LengthDelimitedCodec::builder() .little_endian() - .max_frame_length(64 * 1024) + .max_frame_length(MAX_FRAME_SIZE) .length_field_type::() .new_read(io); Self { inner } diff --git a/magicblock-replicator/src/lib.rs b/magicblock-replicator/src/lib.rs index 72eb84f4c..4918f9eee 100644 --- a/magicblock-replicator/src/lib.rs +++ b/magicblock-replicator/src/lib.rs @@ -7,6 +7,9 @@ pub mod error; pub mod proto; pub mod tcp; +#[cfg(test)] +mod tests; + pub use connection::{Receiver, Sender}; pub use error::{Error, Result}; pub use proto::{Message, PROTOCOL_VERSION}; diff --git a/magicblock-replicator/src/tests.rs b/magicblock-replicator/src/tests.rs new file mode 100644 index 000000000..8f9d654a9 --- /dev/null +++ b/magicblock-replicator/src/tests.rs @@ -0,0 +1,309 @@ +//! Tests suite for replication protocol. + +use solana_keypair::Keypair; +use solana_pubkey::Pubkey; +use solana_signer::Signer; +use tokio::net::{TcpListener, TcpStream}; + +use crate::{ + proto::{ + Block, FailoverSignal, HandshakeRequest, HandshakeResponse, Message, + SuperBlock, Transaction, + }, + tcp::split, +}; + +// ============================================================================= +// Wire Format Tests - catch serialization/protocol changes +// ============================================================================= + +#[test] +fn variant_order_stability() { + // Bincode encodes enum discriminant as variant index. + // Reordering enum variants silently breaks wire compatibility. + let cases: [(Message, u32); 6] = [ + ( + Message::HandshakeReq(HandshakeRequest::new(0, &Keypair::new())), + 0, + ), + ( + Message::HandshakeResp(HandshakeResponse::new( + Ok(0), + &Keypair::new(), + )), + 1, + ), + ( + Message::Transaction(Transaction { + slot: 0, + index: 0, + payload: vec![], + }), + 2, + ), + ( + Message::Block(Block { + slot: 0, + hash: solana_hash::Hash::default(), + }), + 3, + ), + ( + Message::SuperBlock(SuperBlock { + blocks: 0, + transactions: 0, + checksum: 0, + }), + 4, + ), + ( + Message::Failover(FailoverSignal::new(0, &Keypair::new())), + 5, + ), + ]; + + for (msg, expected_idx) in cases { + let encoded = bincode::serialize(&msg).unwrap(); + let actual_idx = u32::from_le_bytes([ + encoded[0], encoded[1], encoded[2], encoded[3], + ]); + assert_eq!( + actual_idx, expected_idx, + "variant index changed - this breaks wire compatibility!" + ); + } +} + +#[test] +fn signed_message_roundtrip() { + // Signed messages (handshake, failover) have complex serialization. + // Unsigned messages are trivial and covered by variant_order_stability. + let kp = Keypair::new(); + + let cases = vec![ + Message::HandshakeReq(HandshakeRequest::new(12345, &kp)), + Message::HandshakeResp(HandshakeResponse::new(Ok(99999), &kp)), + Message::HandshakeResp(HandshakeResponse::new( + Err(crate::error::Error::ConnectionClosed), + &kp, + )), + Message::Failover(FailoverSignal::new(77777, &kp)), + Message::Transaction(Transaction { + slot: 54321, + index: 42, + payload: (0..255).collect(), + }), + ]; + + for msg in cases { + let encoded = bincode::serialize(&msg).unwrap(); + let decoded: Message = bincode::deserialize(&encoded).unwrap(); + assert_eq!(bincode::serialize(&decoded).unwrap(), encoded); + } +} + +// ============================================================================= +// Signature Verification Tests - catch crypto/auth bugs +// ============================================================================= + +#[test] +fn handshake_tampering_detected() { + let kp = Keypair::new(); + let req = HandshakeRequest::new(12345, &kp); + + // Valid signature works + assert!(req.verify()); + + // Tampered identity fails + let mut tampered = req.clone(); + tampered.identity = Pubkey::new_unique(); + assert!(!tampered.verify(), "tampered identity should fail"); + + // Tampered slot fails (slot is at offset 4 after version u32) + let mut bytes = bincode::serialize(&req).unwrap(); + bytes[4..12].copy_from_slice(&99999u64.to_le_bytes()); + let decoded: HandshakeRequest = bincode::deserialize(&bytes).unwrap(); + assert!(!decoded.verify(), "tampered slot should fail"); +} + +#[test] +fn handshake_response_signing() { + // Success and error paths sign different data - both must verify. + let kp = Keypair::new(); + + let success = HandshakeResponse::new(Ok(5000), &kp); + assert!(success.verify(), "success response should verify"); + + let error = + HandshakeResponse::new(Err(crate::error::Error::ConnectionClosed), &kp); + assert!(error.verify(), "error response should verify"); +} + +#[test] +fn failover_signal_verification() { + let kp = Keypair::new(); + let signal = FailoverSignal::new(99999, &kp); + + // Correct identity verifies + assert!(signal.verify(kp.pubkey())); + + // Wrong identity fails + assert!( + !signal.verify(Pubkey::new_unique()), + "wrong identity should fail" + ); +} + +// ============================================================================= +// TCP Transport Tests - catch framing/connection bugs +// ============================================================================= + +#[tokio::test] +async fn bidirectional_handshake() { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let client = TcpStream::connect(addr).await.unwrap(); + let (server, _) = listener.accept().await.unwrap(); + + let (mut client_tx, mut client_rx) = split(client); + let (mut server_tx, mut server_rx) = split(server); + + // Client -> Server: handshake request + let kp = Keypair::new(); + client_tx + .send(Message::HandshakeReq(HandshakeRequest::new(1000, &kp))) + .await + .unwrap(); + + let req = match server_rx.recv().await.unwrap() { + Message::HandshakeReq(r) => r, + _ => panic!("expected HandshakeReq"), + }; + assert!(req.verify()); + assert_eq!(req.start_slot, 1000); + + // Server -> Client: handshake response + server_tx + .send(Message::HandshakeResp(HandshakeResponse::new( + Ok(1000), + &Keypair::new(), + ))) + .await + .unwrap(); + + let resp = match client_rx.recv().await.unwrap() { + Message::HandshakeResp(r) => r, + _ => panic!("expected HandshakeResp"), + }; + assert!(resp.verify()); + assert_eq!(resp.result, Ok(1000u64)); +} + +#[tokio::test] +async fn message_ordering_over_stream() { + // Tests that TCP framing preserves message boundaries and order. + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let client = TcpStream::connect(addr).await.unwrap(); + let (server, _) = listener.accept().await.unwrap(); + + let (mut tx, _) = split(client); + let (_, mut rx) = split(server); + + // Send mixed message types + for i in 0..10 { + tx.send(Message::Block(Block { + slot: i, + hash: solana_hash::Hash::new_unique(), + })) + .await + .unwrap(); + } + + // Verify order is preserved + for expected in 0..10 { + match rx.recv().await.unwrap() { + Message::Block(b) => assert_eq!(b.slot, expected), + _ => panic!("expected Block"), + } + } +} + +#[tokio::test] +async fn large_payload() { + // Tests frame handling for messages larger than TCP buffer. + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let client = TcpStream::connect(addr).await.unwrap(); + let (server, _) = listener.accept().await.unwrap(); + + let (mut tx, _) = split(client); + let (_, mut rx) = split(server); + + let payload = vec![0xAB; 1024 * 1024]; // 1MB + tx.send(Message::Transaction(Transaction { + slot: 0, + index: 0, + payload: payload.clone(), + })) + .await + .unwrap(); + + match rx.recv().await.unwrap() { + Message::Transaction(t) => { + assert_eq!(t.payload, payload); + } + _ => panic!("expected Transaction"), + } +} + +#[tokio::test] +async fn all_message_types_over_wire() { + // Tests encoder→TCP→decoder path for all 6 message types. + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let client = TcpStream::connect(addr).await.unwrap(); + let (server, _) = listener.accept().await.unwrap(); + + let (mut tx, _) = split(client); + let (_, mut rx) = split(server); + + let kp = Keypair::new(); + let messages: Vec = vec![ + Message::HandshakeReq(HandshakeRequest::new(12345, &kp)), + Message::HandshakeResp(HandshakeResponse::new(Ok(67890), &kp)), + Message::Transaction(Transaction { + slot: 100, + index: 5, + payload: vec![0xDE, 0xAD, 0xBE, 0xEF], + }), + Message::Block(Block { + slot: 200, + hash: solana_hash::Hash::new_unique(), + }), + Message::SuperBlock(SuperBlock { + blocks: 1000, + transactions: 50000, + checksum: 0xCAFEBABE, + }), + Message::Failover(FailoverSignal::new(99999, &kp)), + ]; + + for msg in &messages { + tx.send(msg.clone()).await.unwrap(); + } + + for expected in &messages { + let received = rx.recv().await.unwrap(); + // Compare serialized form to catch any encoding differences + assert_eq!( + bincode::serialize(&received).unwrap(), + bincode::serialize(expected).unwrap(), + "wire roundtrip mismatch" + ); + } +} From ef986a898b117a51125d28b73b04e8d978629414 Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Tue, 3 Mar 2026 16:05:52 +0400 Subject: [PATCH 04/13] refactor: cleanup the connection module --- Cargo.lock | 1 + magicblock-replicator/Cargo.toml | 1 + magicblock-replicator/src/connection.rs | 42 ++++++++++++++++++++----- magicblock-replicator/src/lib.rs | 1 - magicblock-replicator/src/tests.rs | 12 +++---- 5 files changed, 43 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 14dd46043..07403c30b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3479,6 +3479,7 @@ dependencies = [ "solana-transaction", "thiserror 1.0.69", "tokio", + "tokio-stream", "tokio-util", ] diff --git a/magicblock-replicator/Cargo.toml b/magicblock-replicator/Cargo.toml index acd341e53..62ceb4e1e 100644 --- a/magicblock-replicator/Cargo.toml +++ b/magicblock-replicator/Cargo.toml @@ -13,6 +13,7 @@ bytes = { workspace = true } futures = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = ["net", "rt", "macros"] } +tokio-stream = { workspace = true } tokio-util = { workspace = true, features = ["codec"] } serde = { workspace = true, features = ["derive"] } solana-hash = { workspace = true, features = ["serde"] } diff --git a/magicblock-replicator/src/connection.rs b/magicblock-replicator/src/connection.rs index 87ba7a422..a6651c0df 100644 --- a/magicblock-replicator/src/connection.rs +++ b/magicblock-replicator/src/connection.rs @@ -1,9 +1,16 @@ //! Codec and stream types for length-prefixed bincode framing. +use std::{ + pin::Pin, + task::{Context, Poll}, +}; + use bytes::{BufMut, BytesMut}; -use futures::{SinkExt, StreamExt}; +use futures::{SinkExt, Stream, StreamExt}; use tokio::io::{AsyncRead, AsyncWrite}; -use tokio_util::codec::{FramedRead, FramedWrite, LengthDelimitedCodec}; +use tokio_util::codec::{ + Encoder, FramedRead, FramedWrite, LengthDelimitedCodec, +}; use crate::{ error::{Error, Result}, @@ -11,20 +18,20 @@ use crate::{ }; /// Encodes `Message` with 4-byte LE length prefix. -pub struct MessageEncoder; +pub(crate) struct MessageEncoder; pub(crate) type InputStream = FramedRead; pub(crate) type OutputStream = FramedWrite; const MAX_FRAME_SIZE: usize = 64 * 1024 * 1024; -impl tokio_util::codec::Encoder for MessageEncoder { +impl Encoder<&Message> for MessageEncoder { type Error = Error; - fn encode(&mut self, msg: Message, dst: &mut BytesMut) -> Result<()> { + fn encode(&mut self, msg: &Message, dst: &mut BytesMut) -> Result<()> { let start = dst.len(); dst.put_u32_le(0); - bincode::serialize_into(dst.writer(), &msg)?; + bincode::serialize_into(dst.writer(), msg)?; let len = dst.len() - start - 4; if len > MAX_FRAME_SIZE { dst.truncate(start); @@ -57,6 +64,27 @@ impl Receiver { } } +impl Stream for Receiver { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + match self.inner.poll_next_unpin(cx) { + Poll::Ready(Some(Ok(item))) => { + let result = bincode::deserialize(&item).map_err(Into::into); + Poll::Ready(Some(result)) + } + Poll::Ready(Some(Err(err))) => Poll::Ready(Some(Err(err.into()))), + Poll::Ready(None) => { + Poll::Ready(Some(Err(Error::ConnectionClosed))) + } + Poll::Pending => Poll::Pending, + } + } +} + /// Sends messages to an async stream. pub struct Sender { inner: OutputStream, @@ -69,7 +97,7 @@ impl Sender { } } - pub async fn send(&mut self, msg: Message) -> Result<()> { + pub async fn send(&mut self, msg: &Message) -> Result<()> { self.inner.send(msg).await?; Ok(()) } diff --git a/magicblock-replicator/src/lib.rs b/magicblock-replicator/src/lib.rs index 4918f9eee..c4ee0513e 100644 --- a/magicblock-replicator/src/lib.rs +++ b/magicblock-replicator/src/lib.rs @@ -10,6 +10,5 @@ pub mod tcp; #[cfg(test)] mod tests; -pub use connection::{Receiver, Sender}; pub use error::{Error, Result}; pub use proto::{Message, PROTOCOL_VERSION}; diff --git a/magicblock-replicator/src/tests.rs b/magicblock-replicator/src/tests.rs index 8f9d654a9..e9a770e3c 100644 --- a/magicblock-replicator/src/tests.rs +++ b/magicblock-replicator/src/tests.rs @@ -37,7 +37,7 @@ fn variant_order_stability() { Message::Transaction(Transaction { slot: 0, index: 0, - payload: vec![], + payload: vec![].into(), }), 2, ), @@ -172,7 +172,7 @@ async fn bidirectional_handshake() { // Client -> Server: handshake request let kp = Keypair::new(); client_tx - .send(Message::HandshakeReq(HandshakeRequest::new(1000, &kp))) + .send(&Message::HandshakeReq(HandshakeRequest::new(1000, &kp))) .await .unwrap(); @@ -185,7 +185,7 @@ async fn bidirectional_handshake() { // Server -> Client: handshake response server_tx - .send(Message::HandshakeResp(HandshakeResponse::new( + .send(&Message::HandshakeResp(HandshakeResponse::new( Ok(1000), &Keypair::new(), ))) @@ -214,7 +214,7 @@ async fn message_ordering_over_stream() { // Send mixed message types for i in 0..10 { - tx.send(Message::Block(Block { + tx.send(&Message::Block(Block { slot: i, hash: solana_hash::Hash::new_unique(), })) @@ -244,7 +244,7 @@ async fn large_payload() { let (_, mut rx) = split(server); let payload = vec![0xAB; 1024 * 1024]; // 1MB - tx.send(Message::Transaction(Transaction { + tx.send(&Message::Transaction(Transaction { slot: 0, index: 0, payload: payload.clone(), @@ -294,7 +294,7 @@ async fn all_message_types_over_wire() { ]; for msg in &messages { - tx.send(msg.clone()).await.unwrap(); + tx.send(msg).await.unwrap(); } for expected in &messages { From 03d08c1cd59ac6b6803ec5f66bb5f345038f059a Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Wed, 4 Mar 2026 15:59:44 +0400 Subject: [PATCH 05/13] chore: fix Cargo.lock --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 07403c30b..dea924a17 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3465,7 +3465,7 @@ dependencies = [ [[package]] name = "magicblock-replicator" -version = "0.8.1" +version = "0.8.2" dependencies = [ "bincode", "bytes", From 4c2043799a8e714f298f4c6c8b7527387f85bef0 Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Wed, 4 Mar 2026 16:18:47 +0400 Subject: [PATCH 06/13] fix: clippy --- magicblock-replicator/src/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicblock-replicator/src/tests.rs b/magicblock-replicator/src/tests.rs index e9a770e3c..ea7499cd2 100644 --- a/magicblock-replicator/src/tests.rs +++ b/magicblock-replicator/src/tests.rs @@ -37,7 +37,7 @@ fn variant_order_stability() { Message::Transaction(Transaction { slot: 0, index: 0, - payload: vec![].into(), + payload: vec![], }), 2, ), From b75dff32651bd835fa18ceec5da1284cb9db9ddd Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Sun, 8 Mar 2026 22:40:55 +0400 Subject: [PATCH 07/13] refactor: switch the protocol from TCP to NATS --- Cargo.lock | 273 +++++++++++++++++++++++- Cargo.toml | 1 + magicblock-replicator/Cargo.toml | 5 +- magicblock-replicator/src/connection.rs | 105 +-------- magicblock-replicator/src/error.rs | 13 ++ magicblock-replicator/src/lib.rs | 2 +- magicblock-replicator/src/nats.rs | 114 ++++++++++ magicblock-replicator/src/proto.rs | 68 +----- magicblock-replicator/src/tcp.rs | 24 --- magicblock-replicator/src/tests.rs | 223 +------------------ 10 files changed, 404 insertions(+), 424 deletions(-) create mode 100644 magicblock-replicator/src/nats.rs delete mode 100644 magicblock-replicator/src/tcp.rs diff --git a/Cargo.lock b/Cargo.lock index dea924a17..2a9442115 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -356,6 +356,43 @@ dependencies = [ "tokio", ] +[[package]] +name = "async-nats" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df5af9ebfb0a14481d3eaf6101e6391261e4f30d25b26a7635ade8a39482ded0" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-util", + "memchr", + "nkeys", + "nuid", + "once_cell", + "pin-project", + "portable-atomic", + "rand 0.8.5", + "regex", + "ring", + "rustls-native-certs 0.7.3", + "rustls-pki-types", + "rustls-webpki 0.102.8", + "serde", + "serde_json", + "serde_nanos", + "serde_repr", + "thiserror 1.0.69", + "time", + "tokio", + "tokio-rustls 0.26.4", + "tokio-stream", + "tokio-util", + "tokio-websockets", + "tracing", + "tryhard", + "url", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -538,6 +575,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bincode" version = "1.3.3" @@ -811,6 +854,9 @@ name = "bytes" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +dependencies = [ + "serde", +] [[package]] name = "bzip2-sys" @@ -1105,6 +1151,12 @@ dependencies = [ "sha2-const-stable", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -1357,6 +1409,17 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.5" @@ -1456,7 +1519,16 @@ version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91cff35c70bba8a626e3185d8cd48cc11b5437e1a5bcd15b9b5fa3c64b6dfee7" dependencies = [ - "signature", + "signature 1.6.4", +] + +[[package]] +name = "ed25519" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" +dependencies = [ + "signature 2.2.0", ] [[package]] @@ -1466,13 +1538,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c762bae6dcaf24c4c84667b8579785430908723d5c889f469d76a41d59cc7a9d" dependencies = [ "curve25519-dalek 3.2.0", - "ed25519", + "ed25519 1.5.3", "rand 0.7.3", "serde", "sha2 0.9.9", "zeroize", ] +[[package]] +name = "ed25519-dalek" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e796c081cee67dc755e1a36a0a172b897fab85fc3f6bc48307991f64e4eca9" +dependencies = [ + "curve25519-dalek 4.1.3", + "ed25519 2.2.3", + "sha2 0.10.9", + "signature 2.2.0", + "subtle", +] + [[package]] name = "ed25519-dalek-bip32" version = "0.2.0" @@ -1480,7 +1565,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d2be62a4061b872c8c0873ee4fc6f101ce7b889d039f019c5fa2af471a59908" dependencies = [ "derivation-path", - "ed25519-dalek", + "ed25519-dalek 1.0.1", "hmac 0.12.1", "sha2 0.10.9", ] @@ -3467,6 +3552,7 @@ dependencies = [ name = "magicblock-replicator" version = "0.8.2" dependencies = [ + "async-nats", "bincode", "bytes", "futures", @@ -3479,8 +3565,8 @@ dependencies = [ "solana-transaction", "thiserror 1.0.69", "tokio", - "tokio-stream", - "tokio-util", + "tracing", + "url", ] [[package]] @@ -3509,7 +3595,7 @@ dependencies = [ name = "magicblock-table-mania" version = "0.8.3" dependencies = [ - "ed25519-dalek", + "ed25519-dalek 1.0.1", "magicblock-metrics", "magicblock-rpc-client", "rand 0.9.2", @@ -3787,6 +3873,21 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nkeys" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879011babc47a1c7fdf5a935ae3cfe94f34645ca0cac1c7f6424b36fc743d1bf" +dependencies = [ + "data-encoding", + "ed25519 2.2.3", + "ed25519-dalek 2.2.0", + "getrandom 0.2.16", + "log", + "rand 0.8.5", + "signatory", +] + [[package]] name = "nom" version = "7.1.3" @@ -3806,6 +3907,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "nuid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc895af95856f929163a0aa20c26a78d26bfdc839f51b9d5aa7a5b79e52b7e83" +dependencies = [ + "rand 0.8.5", +] + [[package]] name = "num" version = "0.2.1" @@ -4080,6 +4190,15 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -4184,6 +4303,16 @@ dependencies = [ "pinocchio-pubkey", ] +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -5091,6 +5220,19 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.2.0", + "rustls-pki-types", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.2" @@ -5140,6 +5282,16 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.8" @@ -5337,6 +5489,26 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_nanos" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a93142f0367a4cc53ae0fead1bcda39e85beccfad3dcd717656cacab94b12985" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -5510,12 +5682,34 @@ dependencies = [ "libc", ] +[[package]] +name = "signatory" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31" +dependencies = [ + "pkcs8", + "rand_core 0.6.4", + "signature 2.2.0", + "zeroize", +] + [[package]] name = "signature" version = "1.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest 0.10.7", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.8" @@ -6029,7 +6223,7 @@ checksum = "a1feafa1691ea3ae588f99056f4bdd1293212c7ece28243d7da257c443e84753" dependencies = [ "bytemuck", "bytemuck_derive", - "ed25519-dalek", + "ed25519-dalek 1.0.1", "solana-feature-set", "solana-instruction", "solana-precompile-error", @@ -6316,7 +6510,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dbb7042c2e0c561afa07242b2099d55c57bd1b1da3b6476932197d84e15e3e4" dependencies = [ "bs58", - "ed25519-dalek", + "ed25519-dalek 1.0.1", "ed25519-dalek-bip32", "rand 0.7.3", "solana-derivation-path", @@ -7234,7 +7428,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47d251c8f3dc015f320b4161daac7f108156c837428e5a8cc61136d25beb11d6" dependencies = [ "bs58", - "ed25519-dalek", + "ed25519-dalek 1.0.1", "rand 0.8.5", "serde", "serde-big-array", @@ -7843,6 +8037,16 @@ dependencies = [ "lock_api", ] +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "spl-associated-token-account" version = "6.0.0" @@ -8662,6 +8866,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-websockets" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-sink", + "http 1.4.0", + "httparse", + "rand 0.8.5", + "ring", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "webpki-roots 0.26.11", +] + [[package]] name = "toml" version = "0.5.11" @@ -8764,7 +8989,7 @@ dependencies = [ "percent-encoding", "pin-project", "prost 0.13.5", - "rustls-native-certs", + "rustls-native-certs 0.8.2", "rustls-pemfile 2.2.0", "socket2 0.5.10", "tokio", @@ -8989,6 +9214,16 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tryhard" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fe58ebd5edd976e0fe0f8a14d2a04b7c81ef153ea9a54eebc42e67c2c23b4e5" +dependencies = [ + "pin-project-lite", + "tokio", +] + [[package]] name = "tungstenite" version = "0.20.1" @@ -9307,6 +9542,24 @@ version = "0.25.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.6", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "4.4.2" diff --git a/Cargo.toml b/Cargo.toml index c58ec006a..d2a960216 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ agave-geyser-plugin-interface = { version = "2.2" } anyhow = "1.0.86" arc-swap = { version = "1.7" } assert_matches = "1.5.0" +async-nats = "0.46" async-trait = "0.1.77" base64 = "0.21.7" bincode = "1.3.3" diff --git a/magicblock-replicator/Cargo.toml b/magicblock-replicator/Cargo.toml index 62ceb4e1e..c9fa5f6ea 100644 --- a/magicblock-replicator/Cargo.toml +++ b/magicblock-replicator/Cargo.toml @@ -8,13 +8,12 @@ license.workspace = true edition.workspace = true [dependencies] +async-nats = { workspace = true } bincode = { workspace = true } bytes = { workspace = true } futures = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = ["net", "rt", "macros"] } -tokio-stream = { workspace = true } -tokio-util = { workspace = true, features = ["codec"] } serde = { workspace = true, features = ["derive"] } solana-hash = { workspace = true, features = ["serde"] } solana-keypair = { workspace = true } @@ -22,3 +21,5 @@ solana-pubkey = { workspace = true, features = ["serde"] } solana-signature = { workspace = true, features = ["serde"] } solana-signer = { workspace = true } solana-transaction = { workspace = true, features = ["serde"] } +tracing = { workspace = true } +url = { workspace = true } diff --git a/magicblock-replicator/src/connection.rs b/magicblock-replicator/src/connection.rs index a6651c0df..62fdd5c77 100644 --- a/magicblock-replicator/src/connection.rs +++ b/magicblock-replicator/src/connection.rs @@ -1,104 +1 @@ -//! Codec and stream types for length-prefixed bincode framing. - -use std::{ - pin::Pin, - task::{Context, Poll}, -}; - -use bytes::{BufMut, BytesMut}; -use futures::{SinkExt, Stream, StreamExt}; -use tokio::io::{AsyncRead, AsyncWrite}; -use tokio_util::codec::{ - Encoder, FramedRead, FramedWrite, LengthDelimitedCodec, -}; - -use crate::{ - error::{Error, Result}, - proto::Message, -}; - -/// Encodes `Message` with 4-byte LE length prefix. -pub(crate) struct MessageEncoder; - -pub(crate) type InputStream = FramedRead; -pub(crate) type OutputStream = FramedWrite; - -const MAX_FRAME_SIZE: usize = 64 * 1024 * 1024; - -impl Encoder<&Message> for MessageEncoder { - type Error = Error; - - fn encode(&mut self, msg: &Message, dst: &mut BytesMut) -> Result<()> { - let start = dst.len(); - dst.put_u32_le(0); - bincode::serialize_into(dst.writer(), msg)?; - let len = dst.len() - start - 4; - if len > MAX_FRAME_SIZE { - dst.truncate(start); - return Err(Box::new(bincode::ErrorKind::SizeLimit))?; - } - dst[start..start + 4].copy_from_slice(&(len as u32).to_le_bytes()); - Ok(()) - } -} - -/// Receives messages from an async stream (max frame: 64MB). -pub struct Receiver { - inner: InputStream, -} - -impl Receiver { - pub fn new(io: IO) -> Self { - let inner = LengthDelimitedCodec::builder() - .little_endian() - .max_frame_length(MAX_FRAME_SIZE) - .length_field_type::() - .new_read(io); - Self { inner } - } - - pub async fn recv(&mut self) -> Result { - let frame = - self.inner.next().await.ok_or(Error::ConnectionClosed)??; - bincode::deserialize(&frame).map_err(Into::into) - } -} - -impl Stream for Receiver { - type Item = Result; - - fn poll_next( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - match self.inner.poll_next_unpin(cx) { - Poll::Ready(Some(Ok(item))) => { - let result = bincode::deserialize(&item).map_err(Into::into); - Poll::Ready(Some(result)) - } - Poll::Ready(Some(Err(err))) => Poll::Ready(Some(Err(err.into()))), - Poll::Ready(None) => { - Poll::Ready(Some(Err(Error::ConnectionClosed))) - } - Poll::Pending => Poll::Pending, - } - } -} - -/// Sends messages to an async stream. -pub struct Sender { - inner: OutputStream, -} - -impl Sender { - pub fn new(io: IO) -> Self { - Self { - inner: FramedWrite::new(io, MessageEncoder), - } - } - - pub async fn send(&mut self, msg: &Message) -> Result<()> { - self.inner.send(msg).await?; - Ok(()) - } -} +pub struct IngressStream {} diff --git a/magicblock-replicator/src/error.rs b/magicblock-replicator/src/error.rs index 9e66f70bb..66304d9c1 100644 --- a/magicblock-replicator/src/error.rs +++ b/magicblock-replicator/src/error.rs @@ -1,8 +1,12 @@ //! Error types for the replication protocol. +use std::fmt::{Debug, Display}; + /// Replication operation errors. #[derive(thiserror::Error, Debug)] pub enum Error { + #[error("message broker error: {0}")] + Nats(async_nats::Error), #[error("connection closed")] ConnectionClosed, #[error("IO error: {0}")] @@ -11,4 +15,13 @@ pub enum Error { SerDe(#[from] bincode::Error), } +impl From> for Error +where + K: Display + Debug + Clone + PartialEq + Sync + Send + 'static, +{ + fn from(value: async_nats::error::Error) -> Self { + Self::Nats(value.into()) + } +} + pub type Result = std::result::Result; diff --git a/magicblock-replicator/src/lib.rs b/magicblock-replicator/src/lib.rs index c4ee0513e..b7af9a887 100644 --- a/magicblock-replicator/src/lib.rs +++ b/magicblock-replicator/src/lib.rs @@ -4,8 +4,8 @@ pub mod connection; pub mod error; +pub mod nats; pub mod proto; -pub mod tcp; #[cfg(test)] mod tests; diff --git a/magicblock-replicator/src/nats.rs b/magicblock-replicator/src/nats.rs new file mode 100644 index 000000000..a5e1e00b0 --- /dev/null +++ b/magicblock-replicator/src/nats.rs @@ -0,0 +1,114 @@ +use std::time::Duration; + +use crate::Result; +use async_nats::{ + jetstream::{ + self, + consumer::{pull, AckPolicy, PullConsumer}, + object_store, + stream::{self, Compression}, + Context, + }, + ConnectOptions, Event, ServerAddr, +}; +use tracing::{debug, info, warn}; +use url::Url; + +struct Consumer { + inner: PullConsumer, + jetstream: Context, + id: String, +} + +struct Producer { + jetstream: Context, + id: String, +} + +impl Consumer { + pub async fn new(id: String, url: Url) -> Result { + let jetstream = setup_jetstream_client(url).await?; + let stream = jetstream.get_stream("EVENTS").await?; + let config = pull::Config { + durable_name: Some(id.clone()), + ack_policy: AckPolicy::All, + ack_wait: Duration::from_secs(30), + max_ack_pending: 512, + ..Default::default() + }; + let inner = stream.get_or_create_consumer(&id, config).await?; + Ok(Self { + inner, + jetstream, + id, + }) + } +} + +impl Producer { + pub async fn new(id: String, url: Url) -> Result { + let jetstream = setup_jetstream_client(url).await?; + Ok(Self { jetstream, id }) + } +} + +async fn setup_jetstream_client(url: Url) -> Result { + let addr = ServerAddr::from_url(url)?; + // Configure connection options + let jetstream = ConnectOptions::new() + .max_reconnects(None) // Infinite reconnect attempts + .reconnect_delay_callback(|attempts| { + // Exponential backoff for reconnects + Duration::from_millis((attempts * 100).min(5000) as u64) + }) + .event_callback(|event| async move { + match event { + Event::Disconnected => warn!("⚠️ NATS Disconnected!"), + Event::Connected => info!("✅ NATS Connected!"), + Event::ClientError(err) => { + warn!("❌ NATS Client Error: {}", err) + } + _ => debug!("ℹ️ NATS Event: {:?}", event), + } + }) + .connect(addr) + .await + .map(jetstream::new)?; + initialize_jetstream(&jetstream).await?; + + Ok(jetstream) +} + +async fn initialize_jetstream(jetstream: &Context) -> Result<()> { + let config = stream::Config { + name: "EVENTS".into(), + max_bytes: 1024 * 1024 * 1024 * 256, + max_messages: 16 * 1024 * 60 * 60 * 24, + max_messages_per_subject: -1, + subjects: vec![ + "event.transaction".into(), + "event.block".into(), + "event.superblock".into(), + ], + max_consumers: -1, + max_age: Duration::from_secs(60 * 60 * 24), + duplicate_window: Duration::from_secs(30), + description: Some("Magicblock validator events".into()), + compression: Some(Compression::S2), + ..Default::default() + }; + let info = jetstream.create_or_update_stream(config).await?; + info!( + "NATS stream existence is confirmed: {} created at: {}, subjects: {:?}, messages: {}", + info.config.name, info.created, info.config.subjects, info.state.messages + ); + let config = object_store::Config { + bucket: "snapshots.accountsdb".into(), + description: Some("Magicblock accountsdb snapshot".into()), + max_bytes: 512 * 1024 * 1024 * 1024, + compression: false, + ..Default::default() + }; + jetstream.create_object_store(config).await?; + Ok(()) +} diff --git a/magicblock-replicator/src/proto.rs b/magicblock-replicator/src/proto.rs index 1becc33ce..a8456a3c7 100644 --- a/magicblock-replicator/src/proto.rs +++ b/magicblock-replicator/src/proto.rs @@ -11,8 +11,6 @@ use solana_signature::Signature; use solana_signer::Signer; use solana_transaction::versioned::VersionedTransaction; -use crate::error::Result; - pub type Slot = u64; pub type TxIndex = u32; @@ -21,38 +19,18 @@ pub const PROTOCOL_VERSION: u32 = 1; /// Top-level replication message. #[derive(Deserialize, Serialize, Clone, Debug)] pub enum Message { - HandshakeReq(HandshakeRequest), - HandshakeResp(HandshakeResponse), Transaction(Transaction), Block(Block), SuperBlock(SuperBlock), Failover(FailoverSignal), } -/// Client -> Server: initiate replication session. -/// Authenticated via Ed25519 signature over `start_slot`. -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct HandshakeRequest { - pub version: u32, - pub start_slot: Slot, - pub identity: Pubkey, - signature: Signature, -} - -/// Server -> Client: accept or reject session. -/// Signed over `slot` (success) or error message (failure). -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct HandshakeResponse { - pub result: std::result::Result, - pub identity: Pubkey, - signature: Signature, -} - /// Slot boundary marker with blockhash. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct Block { pub slot: Slot, pub hash: Hash, + pub timestamp: i64, } /// Transaction with slot and ordinal position. @@ -79,50 +57,6 @@ pub struct FailoverSignal { signature: Signature, } -impl HandshakeRequest { - pub fn new(start_slot: Slot, keypair: &Keypair) -> Self { - Self { - version: PROTOCOL_VERSION, - start_slot, - identity: keypair.pubkey(), - signature: keypair.sign_message(&start_slot.to_le_bytes()), - } - } - - /// Verifies signature matches claimed identity. - pub fn verify(&self) -> bool { - self.signature - .verify(self.identity.as_array(), &self.start_slot.to_le_bytes()) - } -} - -impl HandshakeResponse { - pub fn new(result: Result, keypair: &Keypair) -> Self { - let result = result.map_err(|e| e.to_string()); - let signature = match &result { - Ok(slot) => keypair.sign_message(&slot.to_le_bytes()), - Err(err) => keypair.sign_message(err.as_bytes()), - }; - Self { - result, - identity: keypair.pubkey(), - signature, - } - } - - /// Verifies signature matches server identity. - pub fn verify(&self) -> bool { - match &self.result { - Ok(slot) => self - .signature - .verify(self.identity.as_array(), &slot.to_le_bytes()), - Err(err) => self - .signature - .verify(self.identity.as_array(), err.as_bytes()), - } - } -} - impl Transaction { /// Deserializes the inner transaction. pub fn decode(&self) -> bincode::Result { diff --git a/magicblock-replicator/src/tcp.rs b/magicblock-replicator/src/tcp.rs deleted file mode 100644 index 10b3d9a6a..000000000 --- a/magicblock-replicator/src/tcp.rs +++ /dev/null @@ -1,24 +0,0 @@ -//! TCP transport utilities. - -use std::{io, net::SocketAddr}; - -use tokio::net::{ - tcp::{OwnedReadHalf, OwnedWriteHalf}, - TcpStream, -}; - -use crate::connection::{Receiver, Sender}; - -pub type TcpReceiver = Receiver; -pub type TcpSender = Sender; - -/// Connects to a primary at `addr`, returning (sender, receiver). -pub async fn connect(addr: SocketAddr) -> io::Result<(TcpSender, TcpReceiver)> { - TcpStream::connect(addr).await.map(split) -} - -/// Splits a TCP stream into sender and receiver halves. -pub fn split(stream: TcpStream) -> (TcpSender, TcpReceiver) { - let (rx, tx) = stream.into_split(); - (Sender::new(tx), Receiver::new(rx)) -} diff --git a/magicblock-replicator/src/tests.rs b/magicblock-replicator/src/tests.rs index ea7499cd2..1ec3661a2 100644 --- a/magicblock-replicator/src/tests.rs +++ b/magicblock-replicator/src/tests.rs @@ -5,13 +5,7 @@ use solana_pubkey::Pubkey; use solana_signer::Signer; use tokio::net::{TcpListener, TcpStream}; -use crate::{ - proto::{ - Block, FailoverSignal, HandshakeRequest, HandshakeResponse, Message, - SuperBlock, Transaction, - }, - tcp::split, -}; +use crate::proto::{Block, FailoverSignal, Message, SuperBlock, Transaction}; // ============================================================================= // Wire Format Tests - catch serialization/protocol changes @@ -21,32 +15,22 @@ use crate::{ fn variant_order_stability() { // Bincode encodes enum discriminant as variant index. // Reordering enum variants silently breaks wire compatibility. - let cases: [(Message, u32); 6] = [ - ( - Message::HandshakeReq(HandshakeRequest::new(0, &Keypair::new())), - 0, - ), - ( - Message::HandshakeResp(HandshakeResponse::new( - Ok(0), - &Keypair::new(), - )), - 1, - ), + let cases: [(Message, u32); 4] = [ ( Message::Transaction(Transaction { slot: 0, index: 0, payload: vec![], }), - 2, + 0, ), ( Message::Block(Block { slot: 0, hash: solana_hash::Hash::default(), + timestamp: 42, }), - 3, + 1, ), ( Message::SuperBlock(SuperBlock { @@ -54,11 +38,11 @@ fn variant_order_stability() { transactions: 0, checksum: 0, }), - 4, + 2, ), ( Message::Failover(FailoverSignal::new(0, &Keypair::new())), - 5, + 3, ), ]; @@ -81,12 +65,6 @@ fn signed_message_roundtrip() { let kp = Keypair::new(); let cases = vec![ - Message::HandshakeReq(HandshakeRequest::new(12345, &kp)), - Message::HandshakeResp(HandshakeResponse::new(Ok(99999), &kp)), - Message::HandshakeResp(HandshakeResponse::new( - Err(crate::error::Error::ConnectionClosed), - &kp, - )), Message::Failover(FailoverSignal::new(77777, &kp)), Message::Transaction(Transaction { slot: 54321, @@ -106,39 +84,6 @@ fn signed_message_roundtrip() { // Signature Verification Tests - catch crypto/auth bugs // ============================================================================= -#[test] -fn handshake_tampering_detected() { - let kp = Keypair::new(); - let req = HandshakeRequest::new(12345, &kp); - - // Valid signature works - assert!(req.verify()); - - // Tampered identity fails - let mut tampered = req.clone(); - tampered.identity = Pubkey::new_unique(); - assert!(!tampered.verify(), "tampered identity should fail"); - - // Tampered slot fails (slot is at offset 4 after version u32) - let mut bytes = bincode::serialize(&req).unwrap(); - bytes[4..12].copy_from_slice(&99999u64.to_le_bytes()); - let decoded: HandshakeRequest = bincode::deserialize(&bytes).unwrap(); - assert!(!decoded.verify(), "tampered slot should fail"); -} - -#[test] -fn handshake_response_signing() { - // Success and error paths sign different data - both must verify. - let kp = Keypair::new(); - - let success = HandshakeResponse::new(Ok(5000), &kp); - assert!(success.verify(), "success response should verify"); - - let error = - HandshakeResponse::new(Err(crate::error::Error::ConnectionClosed), &kp); - assert!(error.verify(), "error response should verify"); -} - #[test] fn failover_signal_verification() { let kp = Keypair::new(); @@ -153,157 +98,3 @@ fn failover_signal_verification() { "wrong identity should fail" ); } - -// ============================================================================= -// TCP Transport Tests - catch framing/connection bugs -// ============================================================================= - -#[tokio::test] -async fn bidirectional_handshake() { - let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); - let addr = listener.local_addr().unwrap(); - - let client = TcpStream::connect(addr).await.unwrap(); - let (server, _) = listener.accept().await.unwrap(); - - let (mut client_tx, mut client_rx) = split(client); - let (mut server_tx, mut server_rx) = split(server); - - // Client -> Server: handshake request - let kp = Keypair::new(); - client_tx - .send(&Message::HandshakeReq(HandshakeRequest::new(1000, &kp))) - .await - .unwrap(); - - let req = match server_rx.recv().await.unwrap() { - Message::HandshakeReq(r) => r, - _ => panic!("expected HandshakeReq"), - }; - assert!(req.verify()); - assert_eq!(req.start_slot, 1000); - - // Server -> Client: handshake response - server_tx - .send(&Message::HandshakeResp(HandshakeResponse::new( - Ok(1000), - &Keypair::new(), - ))) - .await - .unwrap(); - - let resp = match client_rx.recv().await.unwrap() { - Message::HandshakeResp(r) => r, - _ => panic!("expected HandshakeResp"), - }; - assert!(resp.verify()); - assert_eq!(resp.result, Ok(1000u64)); -} - -#[tokio::test] -async fn message_ordering_over_stream() { - // Tests that TCP framing preserves message boundaries and order. - let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); - let addr = listener.local_addr().unwrap(); - - let client = TcpStream::connect(addr).await.unwrap(); - let (server, _) = listener.accept().await.unwrap(); - - let (mut tx, _) = split(client); - let (_, mut rx) = split(server); - - // Send mixed message types - for i in 0..10 { - tx.send(&Message::Block(Block { - slot: i, - hash: solana_hash::Hash::new_unique(), - })) - .await - .unwrap(); - } - - // Verify order is preserved - for expected in 0..10 { - match rx.recv().await.unwrap() { - Message::Block(b) => assert_eq!(b.slot, expected), - _ => panic!("expected Block"), - } - } -} - -#[tokio::test] -async fn large_payload() { - // Tests frame handling for messages larger than TCP buffer. - let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); - let addr = listener.local_addr().unwrap(); - - let client = TcpStream::connect(addr).await.unwrap(); - let (server, _) = listener.accept().await.unwrap(); - - let (mut tx, _) = split(client); - let (_, mut rx) = split(server); - - let payload = vec![0xAB; 1024 * 1024]; // 1MB - tx.send(&Message::Transaction(Transaction { - slot: 0, - index: 0, - payload: payload.clone(), - })) - .await - .unwrap(); - - match rx.recv().await.unwrap() { - Message::Transaction(t) => { - assert_eq!(t.payload, payload); - } - _ => panic!("expected Transaction"), - } -} - -#[tokio::test] -async fn all_message_types_over_wire() { - // Tests encoder→TCP→decoder path for all 6 message types. - let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); - let addr = listener.local_addr().unwrap(); - - let client = TcpStream::connect(addr).await.unwrap(); - let (server, _) = listener.accept().await.unwrap(); - - let (mut tx, _) = split(client); - let (_, mut rx) = split(server); - - let kp = Keypair::new(); - let messages: Vec = vec![ - Message::HandshakeReq(HandshakeRequest::new(12345, &kp)), - Message::HandshakeResp(HandshakeResponse::new(Ok(67890), &kp)), - Message::Transaction(Transaction { - slot: 100, - index: 5, - payload: vec![0xDE, 0xAD, 0xBE, 0xEF], - }), - Message::Block(Block { - slot: 200, - hash: solana_hash::Hash::new_unique(), - }), - Message::SuperBlock(SuperBlock { - blocks: 1000, - transactions: 50000, - checksum: 0xCAFEBABE, - }), - Message::Failover(FailoverSignal::new(99999, &kp)), - ]; - - for msg in &messages { - tx.send(msg).await.unwrap(); - } - - for expected in &messages { - let received = rx.recv().await.unwrap(); - // Compare serialized form to catch any encoding differences - assert_eq!( - bincode::serialize(&received).unwrap(), - bincode::serialize(expected).unwrap(), - "wire roundtrip mismatch" - ); - } -} From 4ba1c16b4af393a7f81852ed80943eaf343f1454 Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Tue, 10 Mar 2026 12:57:59 +0400 Subject: [PATCH 08/13] feat: add snapshot upload/retrieval to the nats module --- magicblock-replicator/src/error.rs | 2 + magicblock-replicator/src/nats.rs | 148 +++++++++++++++++++++++++---- 2 files changed, 134 insertions(+), 16 deletions(-) diff --git a/magicblock-replicator/src/error.rs b/magicblock-replicator/src/error.rs index 66304d9c1..196cabb40 100644 --- a/magicblock-replicator/src/error.rs +++ b/magicblock-replicator/src/error.rs @@ -13,6 +13,8 @@ pub enum Error { Io(#[from] std::io::Error), #[error("serialization error: {0}")] SerDe(#[from] bincode::Error), + #[error("internal replication error: {0}")] + Internal(&'static str), } impl From> for Error diff --git a/magicblock-replicator/src/nats.rs b/magicblock-replicator/src/nats.rs index a5e1e00b0..216c38c7a 100644 --- a/magicblock-replicator/src/nats.rs +++ b/magicblock-replicator/src/nats.rs @@ -1,45 +1,69 @@ -use std::time::Duration; +use std::{collections::HashMap, time::Duration}; -use crate::Result; +use crate::{proto::Slot, Error, Result}; use async_nats::{ jetstream::{ - self, - consumer::{pull, AckPolicy, PullConsumer}, - object_store, + consumer::{pull, AckPolicy, DeliverPolicy, PullConsumer}, + kv::{self, CreateErrorKind, Store}, + object_store::{self, ObjectMetadata}, stream::{self, Compression}, - Context, + Context, ContextBuilder, }, - ConnectOptions, Event, ServerAddr, + ConnectOptions, Event, ServerAddr, Subject, }; +use bytes::Bytes; +use tokio::{fs::File, io::AsyncReadExt}; use tracing::{debug, info, warn}; use url::Url; +pub struct AccountsDbSnapshotObject { + pub blob: Vec, + pub slot: Slot, + pub seqno: u64, +} + struct Consumer { inner: PullConsumer, jetstream: Context, + msgcount: usize, id: String, } struct Producer { jetstream: Context, - id: String, + store: Store, + id: Bytes, } impl Consumer { - pub async fn new(id: String, url: Url) -> Result { - let jetstream = setup_jetstream_client(url).await?; + pub async fn new( + id: String, + jetstream: Context, + seqno: Option, + ) -> Result { let stream = jetstream.get_stream("EVENTS").await?; + + let deliver_policy = if let Some(seqno) = seqno { + stream.delete_consumer(&id).await?; + DeliverPolicy::ByStartSequence { + start_sequence: seqno, + } + } else { + DeliverPolicy::All + }; let config = pull::Config { durable_name: Some(id.clone()), ack_policy: AckPolicy::All, ack_wait: Duration::from_secs(30), max_ack_pending: 512, + deliver_policy, ..Default::default() }; let inner = stream.get_or_create_consumer(&id, config).await?; Ok(Self { inner, jetstream, + msgcount: 0, id, }) } @@ -47,15 +71,95 @@ impl Consumer { impl Producer { pub async fn new(id: String, url: Url) -> Result { + let id = id.into_bytes().into(); let jetstream = setup_jetstream_client(url).await?; - Ok(Self { jetstream, id }) + let store = jetstream.get_key_value("PRODUCER").await?; + Ok(Self { + jetstream, + store, + id, + }) + } + + pub async fn publish( + &mut self, + payload: Bytes, + subject: Subject, + ) -> Result<()> { + self.jetstream.publish(subject, payload).await?; + Ok(()) + } + + pub async fn upload_snapshot( + &mut self, + slot: Slot, + mut snapshot: File, + ) -> Result<()> { + let store = self.jetstream.get_object_store("SNAPSHOTS").await?; + let mut stream = self.jetstream.get_stream("EVENTS").await?; + let seqno = stream.info().await?.state.last_sequence; + let metadata = { + let mut map = HashMap::with_capacity(2); + map.insert("slot".into(), slot.to_string()); + map.insert("seqno".into(), seqno.to_string()); + map + }; + let meta = ObjectMetadata { + name: "".into(), + metadata, + ..Default::default() + }; + store.put(meta, &mut snapshot).await?; + Ok(()) } + + pub async fn acquire(&self) -> Result { + let result = self.store.create("lock", self.id.clone()).await; + match result { + Ok(_) => Ok(true), + Err(e) if matches!(e.kind(), CreateErrorKind::AlreadyExists) => { + Ok(false) + } + Err(e) => Err(e.into()), + } + } + + pub async fn update(&self) -> Result<()> { + self.store.put("lock", self.id.clone()).await?; + Ok(()) + } +} + +pub async fn retrieve_snapshot( + jetstream: &Context, +) -> Result { + let store = jetstream.get_object_store("SNAPSHOTS").await?; + let mut object = store.get("accountsdb").await?; + let info = object.info(); + let slot = info + .metadata + .get("slot") + .and_then(|s| s.parse::().ok()) + .ok_or(Error::Internal( + "malformed snapshot object, no slot metadata found", + ))?; + let seqno = info + .metadata + .get("seqno") + .and_then(|s| s.parse::().ok()) + .ok_or(Error::Internal( + "malformed snapshot object, no seqno metadata found", + ))?; + + let mut blob = Vec::with_capacity(info.size); + object.read_to_end(&mut blob).await?; + Ok(AccountsDbSnapshotObject { blob, slot, seqno }) } -async fn setup_jetstream_client(url: Url) -> Result { +pub async fn setup_jetstream_client(url: Url) -> Result { let addr = ServerAddr::from_url(url)?; // Configure connection options - let jetstream = ConnectOptions::new() + let client = ConnectOptions::new() .max_reconnects(None) // Infinite reconnect attempts .reconnect_delay_callback(|attempts| { // Exponential backoff for reconnects @@ -72,8 +176,13 @@ async fn setup_jetstream_client(url: Url) -> Result { } }) .connect(addr) - .await - .map(jetstream::new)?; + .await?; + let jetstream = ContextBuilder::new() + .timeout(Duration::from_secs(2)) + .max_ack_inflight(2048) + .backpressure_on_inflight(true) + .build(client); + initialize_jetstream(&jetstream).await?; Ok(jetstream) @@ -103,12 +212,19 @@ async fn initialize_jetstream(jetstream: &Context) -> Result<()> { info.config.name, info.created, info.config.subjects, info.state.messages ); let config = object_store::Config { - bucket: "snapshots.accountsdb".into(), + bucket: "SNAPSHOTS".into(), description: Some("Magicblock accountsdb snapshot".into()), max_bytes: 512 * 1024 * 1024 * 1024, compression: false, ..Default::default() }; jetstream.create_object_store(config).await?; + let config = kv::Config { + bucket: "PRODUCER".into(), + description: "Magicblock event producer state".into(), + max_age: Duration::from_secs(5), + ..Default::default() + }; + jetstream.create_key_value(config).await?; Ok(()) } From 942d10322b63013f3c73b4e753b4782fde9a52ac Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Tue, 10 Mar 2026 23:19:28 +0400 Subject: [PATCH 09/13] feat: added scaffolding for replication service --- Cargo.lock | 10 +- magicblock-api/src/magic_validator.rs | 9 +- magicblock-core/src/link/transactions.rs | 11 + magicblock-processor/src/scheduler/mod.rs | 4 +- magicblock-processor/src/scheduler/state.rs | 15 +- magicblock-replicator/Cargo.toml | 8 +- magicblock-replicator/src/connection.rs | 1 - magicblock-replicator/src/error.rs | 22 +- magicblock-replicator/src/lib.rs | 18 +- magicblock-replicator/src/nats.rs | 571 +++++++++++++------- magicblock-replicator/src/proto.rs | 92 ++-- magicblock-replicator/src/service.rs | 470 ++++++++++++++++ magicblock-replicator/src/tests.rs | 59 +- test-kit/src/lib.rs | 10 +- 14 files changed, 985 insertions(+), 315 deletions(-) delete mode 100644 magicblock-replicator/src/connection.rs create mode 100644 magicblock-replicator/src/service.rs diff --git a/Cargo.lock b/Cargo.lock index 2a9442115..b79eaf770 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3550,18 +3550,16 @@ dependencies = [ [[package]] name = "magicblock-replicator" -version = "0.8.2" +version = "0.8.3" dependencies = [ "async-nats", "bincode", "bytes", - "futures", + "magicblock-accounts-db", + "magicblock-core", + "magicblock-ledger", "serde", "solana-hash", - "solana-keypair", - "solana-pubkey", - "solana-signature", - "solana-signer", "solana-transaction", "thiserror 1.0.69", "tokio", diff --git a/magicblock-api/src/magic_validator.rs b/magicblock-api/src/magic_validator.rs index 22f2a6312..e47169edc 100644 --- a/magicblock-api/src/magic_validator.rs +++ b/magicblock-api/src/magic_validator.rs @@ -42,7 +42,9 @@ use magicblock_config::{ }; use magicblock_core::{ link::{ - blocks::BlockUpdateTx, link, transactions::TransactionSchedulerHandle, + blocks::BlockUpdateTx, + link, + transactions::{SchedulerMode, TransactionSchedulerHandle}, }, Slot, }; @@ -55,10 +57,7 @@ use magicblock_metrics::{metrics::TRANSACTION_COUNT, MetricsService}; use magicblock_processor::{ build_svm_env, loader::load_upgradeable_programs, - scheduler::{ - state::{SchedulerMode, TransactionSchedulerState}, - TransactionScheduler, - }, + scheduler::{state::TransactionSchedulerState, TransactionScheduler}, }; use magicblock_program::{ init_magic_sys, diff --git a/magicblock-core/src/link/transactions.rs b/magicblock-core/src/link/transactions.rs index 899555e29..2964a227e 100644 --- a/magicblock-core/src/link/transactions.rs +++ b/magicblock-core/src/link/transactions.rs @@ -321,3 +321,14 @@ impl TransactionSchedulerHandle { rx.await.map_err(|_| TransactionError::ClusterMaintenance) } } + +/// Scheduler execution mode (used in mode switching). +/// +/// Send via channel to transition the scheduler between modes. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum SchedulerMode { + /// Accept client transactions with concurrent execution. + Primary, + /// Replay transactions with strict ordering. + Replica, +} diff --git a/magicblock-processor/src/scheduler/mod.rs b/magicblock-processor/src/scheduler/mod.rs index d3c180f8d..69cd80d5d 100644 --- a/magicblock-processor/src/scheduler/mod.rs +++ b/magicblock-processor/src/scheduler/mod.rs @@ -17,7 +17,7 @@ use locks::{ExecutorId, MAX_SVM_EXECUTORS}; use magicblock_accounts_db::{traits::AccountsBank, AccountsDb}; use magicblock_core::{ link::transactions::{ - ProcessableTransaction, TransactionProcessingMode, + ProcessableTransaction, SchedulerMode, TransactionProcessingMode, TransactionToProcessRx, }, Slot, @@ -27,7 +27,7 @@ use solana_account::{from_account, to_account}; use solana_program::slot_hashes::SlotHashes; use solana_program_runtime::loaded_programs::ProgramCache; use solana_sdk_ids::sysvar::{clock, slot_hashes}; -use state::{SchedulerMode, TransactionSchedulerState}; +use state::TransactionSchedulerState; use tokio::{ runtime::Builder, sync::mpsc::{channel, Receiver, Sender}, diff --git a/magicblock-processor/src/scheduler/state.rs b/magicblock-processor/src/scheduler/state.rs index d8acb5537..03b9a01af 100644 --- a/magicblock-processor/src/scheduler/state.rs +++ b/magicblock-processor/src/scheduler/state.rs @@ -10,7 +10,8 @@ use magicblock_accounts_db::{traits::AccountsBank, AccountsDb}; use magicblock_core::link::{ accounts::AccountUpdateTx, transactions::{ - ScheduledTasksTx, TransactionStatusTx, TransactionToProcessRx, + ScheduledTasksTx, SchedulerMode, TransactionStatusTx, + TransactionToProcessRx, }, }; use magicblock_ledger::Ledger; @@ -145,15 +146,3 @@ impl TransactionSchedulerState { } } } - -/// Scheduler execution mode command. -/// -/// Send via channel to transition the scheduler between modes. -/// See [`CoordinationMode`](super::coordinator::CoordinationMode) for internal state. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum SchedulerMode { - /// Accept client transactions with concurrent execution. - Primary, - /// Replay transactions with strict ordering. - Replica, -} diff --git a/magicblock-replicator/Cargo.toml b/magicblock-replicator/Cargo.toml index c9fa5f6ea..7f63e5f17 100644 --- a/magicblock-replicator/Cargo.toml +++ b/magicblock-replicator/Cargo.toml @@ -11,15 +11,13 @@ edition.workspace = true async-nats = { workspace = true } bincode = { workspace = true } bytes = { workspace = true } -futures = { workspace = true } +magicblock-accounts-db = { workspace = true } +magicblock-core = { workspace = true } +magicblock-ledger = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = ["net", "rt", "macros"] } serde = { workspace = true, features = ["derive"] } solana-hash = { workspace = true, features = ["serde"] } -solana-keypair = { workspace = true } -solana-pubkey = { workspace = true, features = ["serde"] } -solana-signature = { workspace = true, features = ["serde"] } -solana-signer = { workspace = true } solana-transaction = { workspace = true, features = ["serde"] } tracing = { workspace = true } url = { workspace = true } diff --git a/magicblock-replicator/src/connection.rs b/magicblock-replicator/src/connection.rs deleted file mode 100644 index 62fdd5c77..000000000 --- a/magicblock-replicator/src/connection.rs +++ /dev/null @@ -1 +0,0 @@ -pub struct IngressStream {} diff --git a/magicblock-replicator/src/error.rs b/magicblock-replicator/src/error.rs index 196cabb40..1c3d0b1e5 100644 --- a/magicblock-replicator/src/error.rs +++ b/magicblock-replicator/src/error.rs @@ -2,21 +2,38 @@ use std::fmt::{Debug, Display}; +use magicblock_ledger::errors::LedgerError; + /// Replication operation errors. #[derive(thiserror::Error, Debug)] pub enum Error { + /// NATS message broker error. #[error("message broker error: {0}")] Nats(async_nats::Error), + + /// Connection was closed unexpectedly. #[error("connection closed")] ConnectionClosed, - #[error("IO error: {0}")] + + /// I/O operation failed. + #[error("I/O error: {0}")] Io(#[from] std::io::Error), + + /// Serialization or deserialization failed. #[error("serialization error: {0}")] SerDe(#[from] bincode::Error), - #[error("internal replication error: {0}")] + + /// Ledger access error. + #[error("ledger access error: {0}")] + Ledger(#[from] LedgerError), + + /// Internal protocol violation or malformed data. + #[error("internal error: {0}")] Internal(&'static str), } +// async_nats::Error is actually async_nats::error::Error where K is the error kind. +// We need this generic impl to convert all variants. impl From> for Error where K: Display + Debug + Clone + PartialEq + Sync + Send + 'static, @@ -26,4 +43,5 @@ where } } +/// Convenience alias for `Result`. pub type Result = std::result::Result; diff --git a/magicblock-replicator/src/lib.rs b/magicblock-replicator/src/lib.rs index b7af9a887..13d654c92 100644 --- a/magicblock-replicator/src/lib.rs +++ b/magicblock-replicator/src/lib.rs @@ -1,14 +1,24 @@ -//! State replication protocol for streaming transactions from primary to standby nodes. +//! State replication protocol for streaming validator events via NATS JetStream. //! -//! Messages are length-prefixed (4B LE) + bincode payload. +//! # Architecture +//! +//! The replicator enables primary-standby state replication using NATS JetStream: +//! +//! - **Producer**: Primary node publishes transactions, blocks, and superblocks +//! - **Consumer**: Standby nodes consume events to maintain synchronized state +//! - **Snapshots**: Periodic AccountsDb snapshots enable fast standby recovery +//! +//! # Wire Format +//! +//! Messages are serialized with bincode (4-byte discriminator + payload). -pub mod connection; pub mod error; pub mod nats; pub mod proto; +pub mod service; #[cfg(test)] mod tests; pub use error::{Error, Result}; -pub use proto::{Message, PROTOCOL_VERSION}; +pub use proto::Message; diff --git a/magicblock-replicator/src/nats.rs b/magicblock-replicator/src/nats.rs index 216c38c7a..78d39fef4 100644 --- a/magicblock-replicator/src/nats.rs +++ b/magicblock-replicator/src/nats.rs @@ -1,230 +1,423 @@ -use std::{collections::HashMap, time::Duration}; - -use crate::{proto::Slot, Error, Result}; -use async_nats::{ - jetstream::{ - consumer::{pull, AckPolicy, DeliverPolicy, PullConsumer}, - kv::{self, CreateErrorKind, Store}, - object_store::{self, ObjectMetadata}, - stream::{self, Compression}, - Context, ContextBuilder, - }, - ConnectOptions, Event, ServerAddr, Subject, +//! NATS JetStream client for event replication. +//! +//! # Components +//! +//! - [`Broker`]: Connection manager with stream/bucket initialization +//! - [`Producer`]: Event publisher with distributed leader lock +//! - [`Consumer`]: Event subscriber for standby replay +//! - [`Snapshot`]: AccountsDb snapshot with positioning metadata + +use std::collections::HashMap; +use std::time::Duration; + +use async_nats::jetstream::{ + consumer::{pull, AckPolicy, DeliverPolicy, PullConsumer}, + kv::{self, CreateErrorKind, Store, UpdateErrorKind}, + object_store::{self, GetErrorKind, ObjectMetadata}, + stream::{self, Compression}, + Context, ContextBuilder, }; +use async_nats::{ConnectOptions, Event, ServerAddr, Subject}; use bytes::Bytes; -use tokio::{fs::File, io::AsyncReadExt}; -use tracing::{debug, info, warn}; +use magicblock_core::Slot; +use tokio::fs::File; +use tokio::io::AsyncReadExt; +use tracing::{debug, error, info, instrument, warn}; use url::Url; -pub struct AccountsDbSnapshotObject { - pub blob: Vec, - pub slot: Slot, - pub seqno: u64, -} +use crate::{Error, Result}; -struct Consumer { - inner: PullConsumer, - jetstream: Context, - msgcount: usize, - id: String, -} +// ============================================================================= +// Configuration +// ============================================================================= -struct Producer { - jetstream: Context, - store: Store, - id: Bytes, +mod cfg { + use std::time::Duration; + + // Resource names + pub const STREAM: &str = "EVENTS"; + pub const SNAPSHOTS: &str = "SNAPSHOTS"; + pub const PRODUCER_LOCK: &str = "PRODUCER"; + pub const LOCK_KEY: &str = "lock"; + pub const SNAPSHOT_NAME: &str = "accountsdb"; + + // Metadata keys + pub const META_SLOT: &str = "slot"; + pub const META_SEQNO: &str = "seqno"; + + // Size limits + pub const STREAM_BYTES: i64 = 256 * 1024 * 1024 * 1024; // 256 GB + pub const SNAPSHOT_BYTES: i64 = 512 * 1024 * 1024 * 1024; // 512 GB + + // Timeouts + pub const TTL_STREAM: Duration = Duration::from_secs(24 * 60 * 60); + pub const TTL_LOCK: Duration = Duration::from_secs(5); + pub const ACK_WAIT: Duration = Duration::from_secs(30); + pub const API_TIMEOUT: Duration = Duration::from_secs(2); + pub const DUP_WINDOW: Duration = Duration::from_secs(30); + + // Reconnect backoff + pub const RECONNECT_BASE_MS: u64 = 100; + pub const RECONNECT_MAX_MS: u64 = 5000; + + // Backpressure + pub const MAX_ACK_PENDING: i64 = 512; + pub const MAX_ACK_INFLIGHT: usize = 2048; } -impl Consumer { - pub async fn new( - id: String, - jetstream: Context, - seqno: Option, - ) -> Result { - let stream = jetstream.get_stream("EVENTS").await?; +// ============================================================================= +// Subjects +// ============================================================================= - let deliver_policy = if let Some(seqno) = seqno { - stream.delete_consumer(&id).await?; - DeliverPolicy::ByStartSequence { - start_sequence: seqno, - } - } else { - DeliverPolicy::All - }; - let config = pull::Config { - durable_name: Some(id.clone()), - ack_policy: AckPolicy::All, - ack_wait: Duration::from_secs(30), - max_ack_pending: 512, - deliver_policy, - ..Default::default() - }; - let inner = stream.get_or_create_consumer(&id, config).await?; - Ok(Self { - inner, - jetstream, - msgcount: 0, - id, - }) +/// NATS subjects for event types. +pub struct Subjects; + +impl Subjects { + /// Subject for transaction events. + pub fn transaction() -> Subject { + Subject::from_static("event.transaction") + } + + /// Subject for block boundary events. + pub fn block() -> Subject { + Subject::from_static("event.block") + } + + /// Subject for superblock checkpoint events. + pub fn superblock() -> Subject { + Subject::from_static("event.superblock") } } -impl Producer { - pub async fn new(id: String, url: Url) -> Result { - let id = id.into_bytes().into(); - let jetstream = setup_jetstream_client(url).await?; - let store = jetstream.get_key_value("PRODUCER").await?; - Ok(Self { - jetstream, - store, - id, - }) +// ============================================================================= +// Broker +// ============================================================================= + +/// NATS JetStream connection with initialized streams and buckets. +/// +/// The broker handles: +/// - Event stream (`EVENTS`) for transaction/block/superblock messages +/// - Object store (`SNAPSHOTS`) for AccountsDb snapshots +/// - KV bucket (`PRODUCER`) for leader election +pub struct Broker(Context); + +impl Broker { + /// Connects to NATS and initializes all JetStream resources. + /// + /// Resources are created idempotently - safe to call multiple times. + pub async fn connect(url: Url) -> Result { + let addr = ServerAddr::from_url(url)?; + + let client = ConnectOptions::new() + .max_reconnects(None) + .reconnect_delay_callback(|attempts| { + let ms = (attempts as u64 * cfg::RECONNECT_BASE_MS) + .min(cfg::RECONNECT_MAX_MS); + Duration::from_millis(ms) + }) + .event_callback(|event| async move { + match event { + Event::Disconnected => warn!("NATS disconnected"), + Event::Connected => info!("NATS connected"), + Event::ClientError(e) => warn!(%e, "NATS client error"), + other => debug!(?other, "NATS event"), + } + }) + .connect(addr) + .await?; + + let js = ContextBuilder::new() + .timeout(cfg::API_TIMEOUT) + .max_ack_inflight(cfg::MAX_ACK_INFLIGHT) + .backpressure_on_inflight(true) + .build(client); + + let broker = Self(js); + broker.init_resources().await?; + Ok(broker) + } + + /// Initializes streams, object stores, and KV buckets. + async fn init_resources(&self) -> Result<()> { + let info = self + .0 + .create_or_update_stream(stream::Config { + name: cfg::STREAM.into(), + max_bytes: cfg::STREAM_BYTES, + subjects: vec![ + "event.transaction".into(), + "event.block".into(), + "event.superblock".into(), + ], + max_age: cfg::TTL_STREAM, + duplicate_window: cfg::DUP_WINDOW, + description: Some("Magicblock validator events".into()), + compression: Some(Compression::S2), + ..Default::default() + }) + .await?; + + info!(stream = %info.config.name, messages = info.state.messages, "JetStream initialized"); + + self.0 + .create_object_store(object_store::Config { + bucket: cfg::SNAPSHOTS.into(), + description: Some("AccountsDb snapshots".into()), + max_bytes: cfg::SNAPSHOT_BYTES, + ..Default::default() + }) + .await?; + + self.0 + .create_key_value(kv::Config { + bucket: cfg::PRODUCER_LOCK.into(), + description: "Producer leader election".into(), + max_age: cfg::TTL_LOCK, + ..Default::default() + }) + .await?; + + Ok(()) } + /// Publishes a serialized message to the stream. pub async fn publish( - &mut self, - payload: Bytes, + &self, subject: Subject, + payload: Bytes, ) -> Result<()> { - self.jetstream.publish(subject, payload).await?; + self.0.publish(subject, payload).await?; Ok(()) } - pub async fn upload_snapshot( - &mut self, - slot: Slot, - mut snapshot: File, - ) -> Result<()> { - let store = self.jetstream.get_object_store("SNAPSHOTS").await?; - let mut stream = self.jetstream.get_stream("EVENTS").await?; - let seqno = stream.info().await?.state.last_sequence; - let metadata = { - let mut map = HashMap::with_capacity(2); - map.insert("slot".into(), slot.to_string()); - map.insert("seqno".into(), seqno.to_string()); - map + /// Retrieves the latest snapshot, if one exists. + /// + /// Returns `None` if no snapshot has been uploaded yet. + pub async fn get_snapshot(&self) -> Result> { + let store = self.0.get_object_store(cfg::SNAPSHOTS).await?; + + let mut object = match store.get(cfg::SNAPSHOT_NAME).await { + Ok(obj) => obj, + Err(e) if e.kind() == GetErrorKind::NotFound => return Ok(None), + Err(e) => return Err(e.into()), }; + + let info = object.info(); + let meta = SnapshotMeta::parse(info)?; + + let mut data = Vec::with_capacity(info.size); + object.read_to_end(&mut data).await?; + + Ok(Some(Snapshot { + data, + slot: meta.slot, + seqno: meta.seqno, + })) + } + + /// Uploads a snapshot in the background. + /// + /// The snapshot is tagged with the current stream sequence number, + /// allowing standbys to resume replay from the correct position. + #[instrument(skip(self, file))] + pub async fn put_snapshot(&self, slot: Slot, mut file: File) -> Result<()> { + let store = self.0.get_object_store(cfg::SNAPSHOTS).await?; + let mut stream = self.0.get_stream(cfg::STREAM).await?; + let seqno = stream.info().await?.state.last_sequence; + let meta = ObjectMetadata { - name: "".into(), - metadata, + name: cfg::SNAPSHOT_NAME.into(), + metadata: SnapshotMeta { slot, seqno }.into_headers(), ..Default::default() }; - store.put(meta, &mut snapshot).await?; + + // Background upload to avoid blocking. + tokio::spawn(async move { + if let Err(e) = store.put(meta, &mut file).await { + error!(%e, "snapshot upload failed"); + } + }); + Ok(()) } - pub async fn acquire(&self) -> Result { - let result = self.store.create("lock", self.id.clone()).await; - match result { - Ok(_) => Ok(true), - Err(e) if matches!(e.kind(), CreateErrorKind::AlreadyExists) => { - Ok(false) - } - Err(e) => Err(e.into()), - } + /// Creates a consumer for receiving replicated events. + pub async fn create_consumer( + &self, + id: &str, + start_seq: Option, + ) -> Result { + Consumer::new(id, &self.0, start_seq).await } - pub async fn update(&self) -> Result<()> { - self.store.put("lock", self.id.clone()).await?; - Ok(()) + /// Creates a producer for publishing events. + pub async fn create_producer(&self, id: &str) -> Result { + Producer::new(id, &self.0).await } } -pub async fn retrieve_snapshot( - jetstream: &Context, -) -> Result { - let store = jetstream.get_object_store("SNAPSHOTS").await?; - let mut object = store.get("accountsdb").await?; - let info = object.info(); - let slot = info - .metadata - .get("slot") - .and_then(|s| s.parse::().ok()) - .ok_or(Error::Internal( - "malformed snapshot object, no slot metadata found", - ))?; - let seqno = info - .metadata - .get("seqno") - .and_then(|s| s.parse::().ok()) - .ok_or(Error::Internal( - "malformed snapshot object, no seqno metadata found", - ))?; - - let mut blob = Vec::with_capacity(info.size); - object.read_to_end(&mut blob).await?; - Ok(AccountsDbSnapshotObject { blob, slot, seqno }) +// ============================================================================= +// Snapshot +// ============================================================================= + +/// AccountsDb snapshot with positioning metadata. +#[derive(Debug)] +pub struct Snapshot { + /// Raw snapshot bytes. + pub data: Vec, + /// Slot at which the snapshot was taken. + pub slot: Slot, + /// Stream sequence for replay start position. + pub seqno: u64, } -pub async fn setup_jetstream_client(url: Url) -> Result { - let addr = ServerAddr::from_url(url)?; - // Configure connection options - let client = ConnectOptions::new() - .max_reconnects(None) // Infinite reconnect attempts - .reconnect_delay_callback(|attempts| { - // Exponential backoff for reconnects - Duration::from_millis((attempts * 100).min(5000) as u64) - }) - .event_callback(|event| async move { - match event { - Event::Disconnected => warn!("⚠️ NATS Disconnected!"), - Event::Connected => info!("✅ NATS Connected!"), - Event::ClientError(err) => { - warn!("❌ NATS Client Error: {}", err) +/// Metadata stored with each snapshot object. +struct SnapshotMeta { + slot: Slot, + seqno: u64, +} + +impl SnapshotMeta { + /// Parses metadata from object info headers. + fn parse(info: &object_store::ObjectInfo) -> Result { + let slot = info + .metadata + .get(cfg::META_SLOT) + .and_then(|v| v.parse().ok()) + .ok_or_else(|| { + Error::Internal("missing 'slot' in snapshot metadata") + })?; + + let seqno = info + .metadata + .get(cfg::META_SEQNO) + .and_then(|v| v.parse().ok()) + .ok_or_else(|| { + Error::Internal("missing 'seqno' in snapshot metadata") + })?; + + Ok(Self { slot, seqno }) + } + + /// Converts to HashMap for ObjectMetadata. + fn into_headers(self) -> HashMap { + HashMap::from([ + (cfg::META_SLOT.into(), self.slot.to_string()), + (cfg::META_SEQNO.into(), self.seqno.to_string()), + ]) + } +} + +// ============================================================================= +// Consumer +// ============================================================================= + +/// Pull-based consumer for receiving replicated events. +/// +/// Supports resuming from a specific sequence number for catch-up replay +/// after recovering from a snapshot. +pub struct Consumer { + #[allow(dead_code)] + inner: PullConsumer, +} + +impl Consumer { + async fn new( + id: &str, + js: &Context, + start_seq: Option, + ) -> Result { + let stream = js.get_stream(cfg::STREAM).await?; + + let deliver_policy = match start_seq { + Some(seq) => { + // Delete and recreate to change start position. + stream.delete_consumer(id).await.ok(); + DeliverPolicy::ByStartSequence { + start_sequence: seq, } - _ => debug!("ℹ️ NATS Event: {:?}", event), } - }) - .connect(addr) - .await?; - let jetstream = ContextBuilder::new() - .timeout(Duration::from_secs(2)) - .max_ack_inflight(2048) - .backpressure_on_inflight(true) - .build(client); + None => DeliverPolicy::All, + }; - initialize_jetstream(&jetstream).await?; + let inner = stream + .get_or_create_consumer( + id, + pull::Config { + durable_name: Some(id.into()), + ack_policy: AckPolicy::All, + ack_wait: cfg::ACK_WAIT, + max_ack_pending: cfg::MAX_ACK_PENDING, + deliver_policy, + ..Default::default() + }, + ) + .await?; - Ok(jetstream) + Ok(Self { inner }) + } } -async fn initialize_jetstream(jetstream: &Context) -> Result<()> { - let config = stream::Config { - name: "EVENTS".into(), - max_bytes: 1024 * 1024 * 1024 * 256, - max_messages: 16 * 1024 * 60 * 60 * 24, - max_messages_per_subject: -1, - subjects: vec![ - "event.transaction".into(), - "event.block".into(), - "event.superblock".into(), - ], - max_consumers: -1, - max_age: Duration::from_secs(60 * 60 * 24), - duplicate_window: Duration::from_secs(30), - description: Some("Magicblock validator events".into()), - compression: Some(Compression::S2), - ..Default::default() - }; - let info = jetstream.create_or_update_stream(config).await?; - info!( - "NATS stream existence is confirmed: {} created at: {}, subjects: {:?}, messages: {}", - info.config.name, info.created, info.config.subjects, info.state.messages - ); - let config = object_store::Config { - bucket: "SNAPSHOTS".into(), - description: Some("Magicblock accountsdb snapshot".into()), - max_bytes: 512 * 1024 * 1024 * 1024, - compression: false, - ..Default::default() - }; - jetstream.create_object_store(config).await?; - let config = kv::Config { - bucket: "PRODUCER".into(), - description: "Magicblock event producer state".into(), - max_age: Duration::from_secs(5), - ..Default::default() - }; - jetstream.create_key_value(config).await?; - Ok(()) +// ============================================================================= +// Producer +// ============================================================================= + +/// Event producer with distributed lock for leader election. +/// +/// Only one producer can hold the lock at a time, ensuring exactly one +/// primary publishes events. The lock has a TTL and must be refreshed +/// periodically to maintain leadership. +pub struct Producer { + /// KV store for the lock. + lock: Store, + /// Producer identity (node ID). + id: Bytes, + /// Current lock revision for CAS updates. + revision: u64, +} + +impl Producer { + async fn new(id: &str, js: &Context) -> Result { + Ok(Self { + lock: js.get_key_value(cfg::PRODUCER_LOCK).await?, + id: id.to_owned().into_bytes().into(), + revision: 0, + }) + } + + /// Attempts to acquire the leader lock. + /// + /// Returns `true` if this producer became the leader. + /// Returns `false` if another producer already holds the lock. + pub async fn acquire(&mut self) -> Result { + match self.lock.create(cfg::LOCK_KEY, self.id.clone()).await { + Ok(rev) => { + self.revision = rev; + Ok(true) + } + Err(e) if e.kind() == CreateErrorKind::AlreadyExists => Ok(false), + Err(e) => Err(e.into()), + } + } + + /// Refreshes the leader lock to prevent expiration. + /// + /// Returns `false` if we lost the lock (another producer took over). + /// This typically indicates a network partition or slow refresh. + pub async fn refresh(&mut self) -> Result { + match self + .lock + .update(cfg::LOCK_KEY, self.id.clone(), self.revision) + .await + { + Ok(rev) => { + self.revision = rev; + Ok(true) + } + Err(e) if e.kind() == UpdateErrorKind::WrongLastRevision => { + Ok(false) + } + Err(e) => Err(e.into()), + } + } } diff --git a/magicblock-replicator/src/proto.rs b/magicblock-replicator/src/proto.rs index a8456a3c7..c098676cd 100644 --- a/magicblock-replicator/src/proto.rs +++ b/magicblock-replicator/src/proto.rs @@ -1,80 +1,88 @@ //! Protocol message types for replication. //! -//! Wire format: 4-byte LE length prefix + bincode payload. -//! Bincode encodes enum variant index as implicit type tag. +//! # Wire Format +//! +//! The enum variant index serves as an implicit type tag. +use async_nats::Subject; +use magicblock_core::Slot; use serde::{Deserialize, Serialize}; use solana_hash::Hash; -use solana_keypair::Keypair; -use solana_pubkey::Pubkey; -use solana_signature::Signature; -use solana_signer::Signer; use solana_transaction::versioned::VersionedTransaction; -pub type Slot = u64; -pub type TxIndex = u32; +use crate::nats::Subjects; -pub const PROTOCOL_VERSION: u32 = 1; +/// Ordinal position of a transaction within a slot. +pub type TxIndex = u32; -/// Top-level replication message. +/// Top-level replication message envelope. +/// +/// Variant order is part of the wire format - reordering breaks compatibility. #[derive(Deserialize, Serialize, Clone, Debug)] pub enum Message { + /// Transaction executed at a specific slot position. Transaction(Transaction), + /// Slot boundary with blockhash for confirmation. Block(Block), + /// Periodic checkpoint for state verification. SuperBlock(SuperBlock), - Failover(FailoverSignal), } -/// Slot boundary marker with blockhash. -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Block { - pub slot: Slot, - pub hash: Hash, - pub timestamp: i64, +impl Message { + pub(crate) fn subject(&self) -> Subject { + match self { + Self::Transaction(_) => Subjects::transaction(), + Self::Block(_) => Subjects::block(), + Self::SuperBlock(_) => Subjects::superblock(), + } + } + + pub(crate) fn slot_and_index(&self) -> (Slot, TxIndex) { + match self { + Self::Transaction(txn) => (txn.slot, txn.index), + Self::Block(block) => (block.slot, 0), + Self::SuperBlock(superblock) => (superblock.slot, 0), + } + } } -/// Transaction with slot and ordinal position. +/// Transaction with slot context for ordered replay. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct Transaction { + /// Slot where the transaction was executed. pub slot: Slot, + /// Ordinal position within the slot. pub index: TxIndex, /// Bincode-encoded `VersionedTransaction`. pub payload: Vec, } -/// Periodic checkpoint for state verification. +/// Slot boundary marker with blockhash. #[derive(Deserialize, Serialize, Clone, Debug)] -pub struct SuperBlock { - pub blocks: u64, - pub transactions: u64, - pub checksum: u64, +pub struct Block { + /// Slot number. + pub slot: Slot, + /// Blockhash for this slot. + pub hash: Hash, + /// Unix timestamp (seconds). + pub timestamp: i64, } -/// Primary -> Standby: signal controlled failover. +/// Periodic checkpoint for state verification and catch-up. #[derive(Deserialize, Serialize, Clone, Debug)] -pub struct FailoverSignal { +pub struct SuperBlock { pub slot: Slot, - signature: Signature, + /// Total blocks processed. + pub blocks: u64, + /// Total transactions processed. + pub transactions: u64, + /// Rolling checksum for verification. + pub checksum: u64, } impl Transaction { - /// Deserializes the inner transaction. + /// Deserializes the inner `VersionedTransaction`. pub fn decode(&self) -> bincode::Result { bincode::deserialize(&self.payload) } } - -impl FailoverSignal { - pub fn new(slot: Slot, keypair: &Keypair) -> Self { - Self { - slot, - signature: keypair.sign_message(&slot.to_le_bytes()), - } - } - - /// Verifies signal against expected identity. - pub fn verify(&self, identity: Pubkey) -> bool { - self.signature - .verify(identity.as_array(), &self.slot.to_le_bytes()) - } -} diff --git a/magicblock-replicator/src/service.rs b/magicblock-replicator/src/service.rs new file mode 100644 index 000000000..021e8ee21 --- /dev/null +++ b/magicblock-replicator/src/service.rs @@ -0,0 +1,470 @@ +//! Replication service for primary-standby state synchronization. +//! +//! # Architecture +//! +//! ```text +//! ┌─────────────┐ +//! │ Service │ +//! └──────┬──────┘ +//! ┌────────────┴────────────┐ +//! ▼ ▼ +//! ┌─────────┐ ┌─────────┐ +//! │ Primary │ │ Standby │ +//! └────┬────┘ └────┬────┘ +//! │ │ +//! ┌─────────┴─────────┐ ┌─────────┴─────────┐ +//! │ Publish events │ │ Consume events │ +//! │ Upload snapshots │ │ Apply to state │ +//! │ Refresh lock │ │ Verify checksums │ +//! └───────────────────┘ └───────────────────┘ +//! ``` +//! +//! # Role Transitions +//! +//! - **Primary → Standby**: Lock lost (refresh failed) +//! - **Standby → Primary**: Leader lock available (current primary crashed) + +use std::sync::Arc; +use std::time::Duration; + +use magicblock_accounts_db::AccountsDb; +use magicblock_core::link::transactions::{SchedulerMode, TransactionSchedulerHandle}; +use magicblock_core::Slot; +use magicblock_ledger::Ledger; +use tokio::sync::mpsc::Receiver; +use tokio::time::interval; +use tracing::{info, warn}; + +use crate::nats::{Broker, Consumer, Producer, Snapshot}; +use crate::proto::{Block, SuperBlock, TxIndex}; +use crate::{Message, Result}; + +/// Re-export for crate users. +pub use crate::nats::Snapshot as AccountsDbSnapshot; + +// ============================================================================= +// Configuration +// ============================================================================= + +/// Interval between leader lock refreshes. +const LOCK_REFRESH_INTERVAL: Duration = Duration::from_secs(1); + +/// Maximum time without seeing leader activity before attempting takeover. +const LEADER_TIMEOUT: Duration = Duration::from_secs(10); + +/// Interval between snapshot uploads. +const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(60); + +/// Retry delay for consumer creation. +const CONSUMER_RETRY_DELAY: Duration = Duration::from_secs(1); + +// ============================================================================= +// Context +// ============================================================================= + +/// Shared state accessible from both primary and standby roles. +/// +/// Contains all references needed for replication operations: +/// - State stores (accountsdb, ledger) +/// - Communication (broker, scheduler) +/// - Position tracking (slot, index) +pub struct Context { + /// Unique node identifier used for leader election. + pub id: String, + /// NATS broker for publishing/consuming events and snapshots. + pub broker: Broker, + /// Channel to switch scheduler between primary/replica mode. + pub mode_tx: tokio::sync::mpsc::Sender, + /// Accounts database for snapshot creation and state application. + pub accountsdb: Arc, + /// Ledger for transaction and block storage. + pub ledger: Arc, + /// Transaction scheduler for executing replayed transactions. + pub scheduler: TransactionSchedulerHandle, + /// Current slot position in the replicated stream. + pub slot: Slot, + /// Current transaction index within the slot. + pub index: TxIndex, +} + +impl Context { + /// Creates context, initializing position from ledger. + pub async fn new( + id: String, + broker: Broker, + mode_tx: tokio::sync::mpsc::Sender, + accountsdb: Arc, + ledger: Arc, + scheduler: TransactionSchedulerHandle, + ) -> Result { + let (slot, index) = ledger + .get_latest_transaction_position()? + .unwrap_or_default(); + + info!(%id, slot, index, "created replication context"); + + Ok(Self { + id, + broker, + mode_tx, + accountsdb, + ledger, + scheduler, + slot, + index, + }) + } + + /// Updates position after processing a message. + pub fn update_position(&mut self, slot: Slot, index: TxIndex) { + self.slot = slot; + self.index = index; + } + + // ========================================================================= + // Standby Operations + // ========================================================================= + + /// Writes a block to the ledger. + pub async fn write_block(&self, _block: &Block) -> Result<()> { + // TODO: Implement ledger block writing. + // self.ledger.write_block(block)?; + Ok(()) + } + + /// Verifies superblock checksum against local accountsdb. + /// + /// Returns `true` if state matches, `false` if divergence detected. + pub fn verify_checksum(&self, _superblock: &SuperBlock) -> Result { + // TODO: Implement checksum verification. + // let local_hash = self.accountsdb.compute_hash(); + // Ok(local_hash == superblock.checksum) + Ok(true) + } + + /// Applies a snapshot to restore accountsdb state. + pub async fn apply_snapshot(&self, _snapshot: &Snapshot) -> Result<()> { + // TODO: Implement snapshot application. + // self.accountsdb.restore(&snapshot.data)?; + // Update position to snapshot's seqno for correct replay start. + Ok(()) + } + + /// Switches scheduler to replica mode for transaction replay. + pub async fn enter_replica_mode(&self) { + let _ = self.mode_tx.send(SchedulerMode::Replica).await; + } + + // ========================================================================= + // Primary Operations + // ========================================================================= + + /// Uploads current accountsdb snapshot to the broker. + pub async fn upload_snapshot(&self) -> Result<()> { + // TODO: Get snapshot file from accountsdb and upload. + // let file = self.accountsdb.snapshot_file().await?; + // self.broker.put_snapshot(self.slot, file).await + Ok(()) + } + + /// Switches scheduler to primary mode. + pub async fn enter_primary_mode(&self) { + let _ = self.mode_tx.send(SchedulerMode::Primary).await; + } +} + +// ============================================================================= +// Service +// ============================================================================= + +/// Replication service that runs as either primary or standby. +/// +/// Automatically selects role based on leader lock availability. +/// Transitions between roles as conditions change. +pub enum Service { + /// Primary role: publishes events, holds leader lock. + Primary(Primary), + /// Standby role: consumes events from stream. + Standby(Standby), +} + +impl Service { + /// Creates a new replication service. + /// + /// Attempts to acquire the leader lock first. If successful, runs as + /// primary; otherwise, falls back to standby mode. + pub async fn new( + id: String, + broker: Broker, + mode_tx: tokio::sync::mpsc::Sender, + accountsdb: Arc, + ledger: Arc, + scheduler: TransactionSchedulerHandle, + rx: Receiver, + ) -> Result { + let ctx = Context::new(id, broker, mode_tx, accountsdb, ledger, scheduler).await?; + + // Attempt to acquire leader lock. + let mut producer = ctx.broker.create_producer(&ctx.id).await?; + if producer.acquire().await? { + ctx.enter_primary_mode().await; + return Ok(Self::Primary(Primary { ctx, producer, rx })); + } + + // Fall back to standby. + let standby = ctx.into_standby().await?; + Ok(Self::Standby(standby)) + } + + /// Runs the service in its assigned role. + pub async fn run(self) { + match self { + Service::Primary(p) => p.run().await, + Service::Standby(s) => s.run().await, + } + } +} + +// ============================================================================= +// Primary +// ============================================================================= + +/// Primary node: publishes events and holds the leader lock. +/// +/// Responsibilities: +/// - Forward incoming validator events to the stream +/// - Periodically upload accountsdb snapshots +/// - Maintain leadership via lock refresh +pub struct Primary { + ctx: Context, + producer: Producer, + rx: Receiver, +} + +impl Primary { + /// Main loop: publish events, upload snapshots, maintain lock. + async fn run(mut self) { + let mut lock_tick = interval(LOCK_REFRESH_INTERVAL); + let mut snapshot_tick = interval(SNAPSHOT_INTERVAL); + + loop { + tokio::select! { + // Forward incoming messages to the stream. + Some(msg) = self.rx.recv() => { + self.publish(msg).await; + } + + // Periodically refresh the leader lock. + _ = lock_tick.tick() => { + if !self.refresh_lock().await { + info!("lost leadership, demoting to standby"); + return; + } + } + + // Periodically upload snapshots. + _ = snapshot_tick.tick() => { + self.upload_snapshot().await; + } + } + } + } + + /// Publishes a message to the stream. + async fn publish(&mut self, msg: Message) { + let Ok(payload) = bincode::serialize(&msg) else { return }; + let subject = msg.subject(); + let (slot, index) = msg.slot_and_index(); + + if self.ctx.broker.publish(subject, payload.into()).await.is_ok() { + self.ctx.update_position(slot, index); + } + } + + /// Refreshes the leader lock. Returns `false` if lost. + async fn refresh_lock(&mut self) -> bool { + match self.producer.refresh().await { + Ok(held) => held, + Err(e) => { + warn!(%e, "failed to refresh leader lock"); + false + } + } + } + + /// Uploads a snapshot of current state. + async fn upload_snapshot(&self) { + if let Err(e) = self.ctx.upload_snapshot().await { + warn!(%e, "failed to upload snapshot"); + } + } +} + +// ============================================================================= +// Standby +// ============================================================================= + +/// Standby node: consumes events and applies them to local state. +/// +/// Responsibilities: +/// - Consume events from the stream +/// - Apply transactions via scheduler +/// - Write blocks to ledger +/// - Verify superblock checksums +/// - Watch for leader failure (heartbeat timeout) +/// - Attempt takeover when leader fails +pub struct Standby { + ctx: Context, + #[allow(dead_code)] + consumer: Consumer, + /// Tracks last time we saw activity from the leader. + last_leader_activity: tokio::time::Instant, +} + +impl Standby { + /// Main loop: consume events, apply state, watch for leader failure. + async fn run(mut self) { + let mut leader_timeout_check = interval(Duration::from_secs(1)); + + loop { + tokio::select! { + // TODO: Consume messages from the stream. + // This will be implemented when Consumer has a receive method. + // For now, we just watch for leader timeout. + + _ = leader_timeout_check.tick() => { + if self.last_leader_activity.elapsed() > LEADER_TIMEOUT + && self.attempt_takeover().await + { + return; + } + } + } + } + } + + /// Updates leader activity timestamp (called when receiving events). + fn record_leader_activity(&mut self) { + self.last_leader_activity = tokio::time::Instant::now(); + } + + /// Attempts to acquire leadership. + /// + /// Returns `true` if successful and we should exit (caller should restart). + async fn attempt_takeover(&mut self) -> bool { + info!("leader timeout, attempting takeover"); + + let mut producer = match self.ctx.broker.create_producer(&self.ctx.id).await { + Ok(p) => p, + Err(e) => { + warn!(%e, "failed to create producer for takeover"); + return false; + } + }; + + match producer.acquire().await { + Ok(true) => { + info!("successfully acquired leadership"); + self.ctx.enter_primary_mode().await; + // Exit this run loop; caller should restart as Primary. + true + } + Ok(false) => { + info!("another node acquired leadership first"); + self.record_leader_activity(); + false + } + Err(e) => { + warn!(%e, "failed to acquire leadership"); + false + } + } + } + + // ========================================================================= + // Event Processing (scaffolding for future implementation) + // ========================================================================= + + /// Processes an incoming transaction message. + #[allow(dead_code)] + async fn process_transaction(&mut self, _slot: Slot, _index: TxIndex, _payload: &[u8]) -> Result<()> { + // TODO: + // 1. Deserialize transaction + // 2. Submit to scheduler for execution + // 3. Update position + self.record_leader_activity(); + Ok(()) + } + + /// Processes an incoming block message. + #[allow(dead_code)] + async fn process_block(&mut self, block: &Block) -> Result<()> { + // TODO: + // 1. Write block to ledger + // 2. Update slot position + self.ctx.write_block(block).await?; + self.record_leader_activity(); + Ok(()) + } + + /// Processes an incoming superblock message. + #[allow(dead_code)] + async fn process_superblock(&mut self, superblock: &SuperBlock) -> Result<()> { + // TODO: + // 1. Verify checksum against local state + // 2. If mismatch, may need to request snapshot + if !self.ctx.verify_checksum(superblock)? { + warn!( + slot = superblock.slot, + "superblock checksum mismatch - state divergence detected" + ); + // TODO: Request snapshot or enter recovery mode + } + self.record_leader_activity(); + Ok(()) + } +} + +// ============================================================================= +// Context -> Role Transitions +// ============================================================================= + +impl Context { + /// Transitions to standby role by creating a consumer. + pub async fn into_standby(self) -> Result { + let consumer = self.create_consumer_with_retry(None).await?; + self.enter_replica_mode().await; + + Ok(Standby { + consumer, + last_leader_activity: tokio::time::Instant::now(), + ctx: self, + }) + } + + /// Creates a consumer with retry on failure. + async fn create_consumer_with_retry(&self, start_seq: Option) -> Result { + loop { + match self.broker.create_consumer(&self.id, start_seq).await { + Ok(c) => return Ok(c), + Err(e) => { + warn!(%e, "failed to create consumer; retrying"); + tokio::time::sleep(CONSUMER_RETRY_DELAY).await; + } + } + } + } + + /// Attempts to recover from a snapshot before starting consumer. + #[allow(dead_code)] + pub async fn recover_from_snapshot(&self) -> Result> { + let Some(snapshot) = self.broker.get_snapshot().await? else { + info!("no snapshot available for recovery"); + return Ok(None); + }; + + info!(slot = snapshot.slot, seqno = snapshot.seqno, "retrieved snapshot"); + self.apply_snapshot(&snapshot).await?; + Ok(Some(snapshot)) + } +} diff --git a/magicblock-replicator/src/tests.rs b/magicblock-replicator/src/tests.rs index 1ec3661a2..051860c3a 100644 --- a/magicblock-replicator/src/tests.rs +++ b/magicblock-replicator/src/tests.rs @@ -1,11 +1,8 @@ -//! Tests suite for replication protocol. +//! Tests for the replication protocol. -use solana_keypair::Keypair; -use solana_pubkey::Pubkey; -use solana_signer::Signer; -use tokio::net::{TcpListener, TcpStream}; +use solana_hash::Hash; -use crate::proto::{Block, FailoverSignal, Message, SuperBlock, Transaction}; +use crate::proto::{Block, Message, SuperBlock, Transaction}; // ============================================================================= // Wire Format Tests - catch serialization/protocol changes @@ -15,7 +12,7 @@ use crate::proto::{Block, FailoverSignal, Message, SuperBlock, Transaction}; fn variant_order_stability() { // Bincode encodes enum discriminant as variant index. // Reordering enum variants silently breaks wire compatibility. - let cases: [(Message, u32); 4] = [ + let cases: [(Message, u32); 3] = [ ( Message::Transaction(Transaction { slot: 0, @@ -27,30 +24,25 @@ fn variant_order_stability() { ( Message::Block(Block { slot: 0, - hash: solana_hash::Hash::default(), + hash: Hash::default(), timestamp: 42, }), 1, ), ( Message::SuperBlock(SuperBlock { + slot: 0, blocks: 0, transactions: 0, checksum: 0, }), 2, ), - ( - Message::Failover(FailoverSignal::new(0, &Keypair::new())), - 3, - ), ]; for (msg, expected_idx) in cases { let encoded = bincode::serialize(&msg).unwrap(); - let actual_idx = u32::from_le_bytes([ - encoded[0], encoded[1], encoded[2], encoded[3], - ]); + let actual_idx = u32::from_le_bytes([encoded[0], encoded[1], encoded[2], encoded[3]]); assert_eq!( actual_idx, expected_idx, "variant index changed - this breaks wire compatibility!" @@ -59,18 +51,24 @@ fn variant_order_stability() { } #[test] -fn signed_message_roundtrip() { - // Signed messages (handshake, failover) have complex serialization. - // Unsigned messages are trivial and covered by variant_order_stability. - let kp = Keypair::new(); - +fn message_roundtrip() { let cases = vec![ - Message::Failover(FailoverSignal::new(77777, &kp)), Message::Transaction(Transaction { slot: 54321, index: 42, payload: (0..255).collect(), }), + Message::Block(Block { + slot: 12345, + hash: Hash::new_unique(), + timestamp: 1700000000, + }), + Message::SuperBlock(SuperBlock { + slot: 99999, + blocks: 1000, + transactions: 50000, + checksum: 0xDEADBEEF, + }), ]; for msg in cases { @@ -79,22 +77,3 @@ fn signed_message_roundtrip() { assert_eq!(bincode::serialize(&decoded).unwrap(), encoded); } } - -// ============================================================================= -// Signature Verification Tests - catch crypto/auth bugs -// ============================================================================= - -#[test] -fn failover_signal_verification() { - let kp = Keypair::new(); - let signal = FailoverSignal::new(99999, &kp); - - // Correct identity verifies - assert!(signal.verify(kp.pubkey())); - - // Wrong identity fails - assert!( - !signal.verify(Pubkey::new_unique()), - "wrong identity should fail" - ); -} diff --git a/test-kit/src/lib.rs b/test-kit/src/lib.rs index c0adc2b61..eaedfa801 100644 --- a/test-kit/src/lib.rs +++ b/test-kit/src/lib.rs @@ -14,8 +14,9 @@ use magicblock_core::{ blocks::{BlockMeta, BlockUpdate, BlockUpdateTx}, link, transactions::{ - ReplayPosition, SanitizeableTransaction, TransactionResult, - TransactionSchedulerHandle, TransactionSimulationResult, + ReplayPosition, SanitizeableTransaction, SchedulerMode, + TransactionResult, TransactionSchedulerHandle, + TransactionSimulationResult, }, DispatchEndpoints, }, @@ -25,10 +26,7 @@ use magicblock_ledger::Ledger; use magicblock_processor::{ build_svm_env, loader::load_upgradeable_programs, - scheduler::{ - state::{SchedulerMode, TransactionSchedulerState}, - TransactionScheduler, - }, + scheduler::{state::TransactionSchedulerState, TransactionScheduler}, }; use solana_account::AccountSharedData; pub use solana_instruction::*; From 71a9ef0539346bf75cc068572bf199f6bad73759 Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Wed, 11 Mar 2026 21:23:40 +0400 Subject: [PATCH 10/13] feat: complete overhaul of replication protocol --- magicblock-accounts-db/src/lib.rs | 4 + magicblock-core/src/link/transactions.rs | 4 +- magicblock-replicator/Cargo.toml | 15 +- magicblock-replicator/src/error.rs | 13 +- magicblock-replicator/src/lib.rs | 1 + magicblock-replicator/src/nats.rs | 171 ++++--- magicblock-replicator/src/proto.rs | 11 +- magicblock-replicator/src/service.rs | 587 +++++++++++------------ magicblock-replicator/src/tests.rs | 6 +- magicblock-replicator/src/watcher.rs | 202 ++++++++ 10 files changed, 624 insertions(+), 390 deletions(-) create mode 100644 magicblock-replicator/src/watcher.rs diff --git a/magicblock-accounts-db/src/lib.rs b/magicblock-accounts-db/src/lib.rs index 86c75d9cd..d2b6013ab 100644 --- a/magicblock-accounts-db/src/lib.rs +++ b/magicblock-accounts-db/src/lib.rs @@ -434,6 +434,10 @@ impl AccountsDb { pub fn lock_database(&self) -> RwLockWriteGuard<'_, ()> { self.write_lock.write() } + + pub fn database_directory(&self) -> &Path { + self.snapshot_manager.database_path() + } } impl AccountsBank for AccountsDb { diff --git a/magicblock-core/src/link/transactions.rs b/magicblock-core/src/link/transactions.rs index 2964a227e..65750b7da 100644 --- a/magicblock-core/src/link/transactions.rs +++ b/magicblock-core/src/link/transactions.rs @@ -287,11 +287,11 @@ impl TransactionSchedulerHandle { txn: impl SanitizeableTransaction, ) -> TransactionResult { let mode = TransactionProcessingMode::Replay(position); - let transaction = txn.sanitize(true)?; + let (transaction, encoded) = txn.sanitize_with_encoded(true)?; let txn = ProcessableTransaction { transaction, mode, - encoded: None, + encoded, }; self.0 .send(txn) diff --git a/magicblock-replicator/Cargo.toml b/magicblock-replicator/Cargo.toml index 7f63e5f17..25e48e15e 100644 --- a/magicblock-replicator/Cargo.toml +++ b/magicblock-replicator/Cargo.toml @@ -11,13 +11,26 @@ edition.workspace = true async-nats = { workspace = true } bincode = { workspace = true } bytes = { workspace = true } +futures = { workspace = true } magicblock-accounts-db = { workspace = true } magicblock-core = { workspace = true } magicblock-ledger = { workspace = true } +notify = { version = "8.0", features = ["macos_kqueue"] } thiserror = { workspace = true } -tokio = { workspace = true, features = ["net", "rt", "macros"] } +tokio = { workspace = true, features = [ + "net", + "rt", + "macros", + "sync", + "io-util", + "fs", +] } serde = { workspace = true, features = ["derive"] } solana-hash = { workspace = true, features = ["serde"] } solana-transaction = { workspace = true, features = ["serde"] } +solana-transaction-error = { workspace = true } tracing = { workspace = true } url = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/magicblock-replicator/src/error.rs b/magicblock-replicator/src/error.rs index 1c3d0b1e5..1b1091a20 100644 --- a/magicblock-replicator/src/error.rs +++ b/magicblock-replicator/src/error.rs @@ -3,6 +3,7 @@ use std::fmt::{Debug, Display}; use magicblock_ledger::errors::LedgerError; +use solana_transaction_error::TransactionError; /// Replication operation errors. #[derive(thiserror::Error, Debug)] @@ -11,10 +12,6 @@ pub enum Error { #[error("message broker error: {0}")] Nats(async_nats::Error), - /// Connection was closed unexpectedly. - #[error("connection closed")] - ConnectionClosed, - /// I/O operation failed. #[error("I/O error: {0}")] Io(#[from] std::io::Error), @@ -27,9 +24,17 @@ pub enum Error { #[error("ledger access error: {0}")] Ledger(#[from] LedgerError), + /// Transaction execution error. + #[error("transaction execution error: {0}")] + Transaction(#[from] TransactionError), + /// Internal protocol violation or malformed data. #[error("internal error: {0}")] Internal(&'static str), + + /// File system watcher error. + #[error("watcher error: {0}")] + Watcher(#[from] notify::Error), } // async_nats::Error is actually async_nats::error::Error where K is the error kind. diff --git a/magicblock-replicator/src/lib.rs b/magicblock-replicator/src/lib.rs index 13d654c92..f0b7620a8 100644 --- a/magicblock-replicator/src/lib.rs +++ b/magicblock-replicator/src/lib.rs @@ -16,6 +16,7 @@ pub mod error; pub mod nats; pub mod proto; pub mod service; +pub mod watcher; #[cfg(test)] mod tests; diff --git a/magicblock-replicator/src/nats.rs b/magicblock-replicator/src/nats.rs index 78d39fef4..218150ab4 100644 --- a/magicblock-replicator/src/nats.rs +++ b/magicblock-replicator/src/nats.rs @@ -7,21 +7,24 @@ //! - [`Consumer`]: Event subscriber for standby replay //! - [`Snapshot`]: AccountsDb snapshot with positioning metadata -use std::collections::HashMap; -use std::time::Duration; - -use async_nats::jetstream::{ - consumer::{pull, AckPolicy, DeliverPolicy, PullConsumer}, - kv::{self, CreateErrorKind, Store, UpdateErrorKind}, - object_store::{self, GetErrorKind, ObjectMetadata}, - stream::{self, Compression}, - Context, ContextBuilder, +use std::{collections::HashMap, time::Duration}; + +use async_nats::{ + jetstream::{ + consumer::{ + pull::{Config as PullConfig, Stream as MessageStream}, + AckPolicy, DeliverPolicy, PullConsumer, + }, + kv::{self, CreateErrorKind, Store, UpdateErrorKind}, + object_store::{self, GetErrorKind, ObjectMetadata}, + stream::{self, Compression}, + Context, ContextBuilder, + }, + ConnectOptions, Event, ServerAddr, Subject, }; -use async_nats::{ConnectOptions, Event, ServerAddr, Subject}; use bytes::Bytes; use magicblock_core::Slot; -use tokio::fs::File; -use tokio::io::AsyncReadExt; +use tokio::{fs::File, io::AsyncReadExt}; use tracing::{debug, error, info, instrument, warn}; use url::Url; @@ -31,23 +34,22 @@ use crate::{Error, Result}; // Configuration // ============================================================================= +/// Resource names and configuration constants. mod cfg { use std::time::Duration; - // Resource names pub const STREAM: &str = "EVENTS"; pub const SNAPSHOTS: &str = "SNAPSHOTS"; pub const PRODUCER_LOCK: &str = "PRODUCER"; pub const LOCK_KEY: &str = "lock"; pub const SNAPSHOT_NAME: &str = "accountsdb"; - // Metadata keys pub const META_SLOT: &str = "slot"; pub const META_SEQNO: &str = "seqno"; - // Size limits - pub const STREAM_BYTES: i64 = 256 * 1024 * 1024 * 1024; // 256 GB - pub const SNAPSHOT_BYTES: i64 = 512 * 1024 * 1024 * 1024; // 512 GB + // Size limits (256 GB stream, 512 GB snapshots) + pub const STREAM_BYTES: i64 = 256 * 1024 * 1024 * 1024; + pub const SNAPSHOT_BYTES: i64 = 512 * 1024 * 1024 * 1024; // Timeouts pub const TTL_STREAM: Duration = Duration::from_secs(24 * 60 * 60); @@ -56,13 +58,14 @@ mod cfg { pub const API_TIMEOUT: Duration = Duration::from_secs(2); pub const DUP_WINDOW: Duration = Duration::from_secs(30); - // Reconnect backoff + // Reconnect backoff (exponential: 100ms base, 5s max) pub const RECONNECT_BASE_MS: u64 = 100; pub const RECONNECT_MAX_MS: u64 = 5000; // Backpressure pub const MAX_ACK_PENDING: i64 = 512; pub const MAX_ACK_INFLIGHT: usize = 2048; + pub const BATCH_SIZE: usize = 512; } // ============================================================================= @@ -70,22 +73,38 @@ mod cfg { // ============================================================================= /// NATS subjects for event types. +/// +/// Provides both string constants for stream configuration and typed subjects +/// for publishing. pub struct Subjects; impl Subjects { - /// Subject for transaction events. + pub const TRANSACTION: &'static str = "event.transaction"; + pub const BLOCK: &'static str = "event.block"; + pub const SUPERBLOCK: &'static str = "event.superblock"; + + /// All subjects for stream configuration. + pub const fn all() -> [&'static str; 3] { + [Self::TRANSACTION, Self::BLOCK, Self::SUPERBLOCK] + } + + const fn from(s: &'static str) -> Subject { + Subject::from_static(s) + } + + /// Typed subject for transaction events. pub fn transaction() -> Subject { - Subject::from_static("event.transaction") + Self::from(Self::TRANSACTION) } - /// Subject for block boundary events. + /// Typed subject for block events. pub fn block() -> Subject { - Subject::from_static("event.block") + Self::from(Self::BLOCK) } - /// Subject for superblock checkpoint events. + /// Typed subject for superblock events. pub fn superblock() -> Subject { - Subject::from_static("event.superblock") + Self::from(Self::SUPERBLOCK) } } @@ -94,12 +113,10 @@ impl Subjects { // ============================================================================= /// NATS JetStream connection with initialized streams and buckets. -/// -/// The broker handles: -/// - Event stream (`EVENTS`) for transaction/block/superblock messages -/// - Object store (`SNAPSHOTS`) for AccountsDb snapshots -/// - KV bucket (`PRODUCER`) for leader election -pub struct Broker(Context); +pub struct Broker { + ctx: Context, + seqno: u64, +} impl Broker { /// Connects to NATS and initializes all JetStream resources. @@ -126,13 +143,13 @@ impl Broker { .connect(addr) .await?; - let js = ContextBuilder::new() + let ctx = ContextBuilder::new() .timeout(cfg::API_TIMEOUT) .max_ack_inflight(cfg::MAX_ACK_INFLIGHT) .backpressure_on_inflight(true) .build(client); - let broker = Self(js); + let broker = Self { ctx, seqno: 0 }; broker.init_resources().await?; Ok(broker) } @@ -140,15 +157,11 @@ impl Broker { /// Initializes streams, object stores, and KV buckets. async fn init_resources(&self) -> Result<()> { let info = self - .0 + .ctx .create_or_update_stream(stream::Config { name: cfg::STREAM.into(), max_bytes: cfg::STREAM_BYTES, - subjects: vec![ - "event.transaction".into(), - "event.block".into(), - "event.superblock".into(), - ], + subjects: Subjects::all().into_iter().map(Into::into).collect(), max_age: cfg::TTL_STREAM, duplicate_window: cfg::DUP_WINDOW, description: Some("Magicblock validator events".into()), @@ -159,7 +172,7 @@ impl Broker { info!(stream = %info.config.name, messages = info.state.messages, "JetStream initialized"); - self.0 + self.ctx .create_object_store(object_store::Config { bucket: cfg::SNAPSHOTS.into(), description: Some("AccountsDb snapshots".into()), @@ -168,7 +181,7 @@ impl Broker { }) .await?; - self.0 + self.ctx .create_key_value(kv::Config { bucket: cfg::PRODUCER_LOCK.into(), description: "Producer leader election".into(), @@ -181,20 +194,24 @@ impl Broker { } /// Publishes a serialized message to the stream. + /// + /// If `ack` is true, waits for server acknowledgment and updates internal seqno. pub async fn publish( - &self, + &mut self, subject: Subject, payload: Bytes, + ack: bool, ) -> Result<()> { - self.0.publish(subject, payload).await?; + let f = self.ctx.publish(subject, payload).await?; + if ack { + self.seqno = f.await?.sequence; + } Ok(()) } /// Retrieves the latest snapshot, if one exists. - /// - /// Returns `None` if no snapshot has been uploaded yet. pub async fn get_snapshot(&self) -> Result> { - let store = self.0.get_object_store(cfg::SNAPSHOTS).await?; + let store = self.ctx.get_object_store(cfg::SNAPSHOTS).await?; let mut object = match store.get(cfg::SNAPSHOT_NAME).await { Ok(obj) => obj, @@ -221,9 +238,9 @@ impl Broker { /// allowing standbys to resume replay from the correct position. #[instrument(skip(self, file))] pub async fn put_snapshot(&self, slot: Slot, mut file: File) -> Result<()> { - let store = self.0.get_object_store(cfg::SNAPSHOTS).await?; - let mut stream = self.0.get_stream(cfg::STREAM).await?; - let seqno = stream.info().await?.state.last_sequence; + let store = self.ctx.get_object_store(cfg::SNAPSHOTS).await?; + // Next seqno (snapshot captures state after last published message) + let seqno = self.seqno + 1; let meta = ObjectMetadata { name: cfg::SNAPSHOT_NAME.into(), @@ -231,7 +248,7 @@ impl Broker { ..Default::default() }; - // Background upload to avoid blocking. + // Background upload to avoid blocking tokio::spawn(async move { if let Err(e) = store.put(meta, &mut file).await { error!(%e, "snapshot upload failed"); @@ -247,12 +264,12 @@ impl Broker { id: &str, start_seq: Option, ) -> Result { - Consumer::new(id, &self.0, start_seq).await + Consumer::new(id, &self.ctx, start_seq).await } /// Creates a producer for publishing events. pub async fn create_producer(&self, id: &str) -> Result { - Producer::new(id, &self.0).await + Producer::new(id, &self.ctx).await } } @@ -278,28 +295,21 @@ struct SnapshotMeta { } impl SnapshotMeta { - /// Parses metadata from object info headers. + /// Parses required metadata fields from object info. fn parse(info: &object_store::ObjectInfo) -> Result { - let slot = info - .metadata - .get(cfg::META_SLOT) - .and_then(|v| v.parse().ok()) - .ok_or_else(|| { - Error::Internal("missing 'slot' in snapshot metadata") - })?; - - let seqno = info - .metadata - .get(cfg::META_SEQNO) - .and_then(|v| v.parse().ok()) - .ok_or_else(|| { - Error::Internal("missing 'seqno' in snapshot metadata") - })?; + let get_parsed = + |key: &str| info.metadata.get(key).and_then(|v| v.parse().ok()); + + let slot = get_parsed(cfg::META_SLOT).ok_or_else(|| { + Error::Internal("missing 'slot' in snapshot metadata") + })?; + let seqno = get_parsed(cfg::META_SEQNO).ok_or_else(|| { + Error::Internal("missing 'seqno' in snapshot metadata") + })?; Ok(Self { slot, seqno }) } - /// Converts to HashMap for ObjectMetadata. fn into_headers(self) -> HashMap { HashMap::from([ (cfg::META_SLOT.into(), self.slot.to_string()), @@ -317,7 +327,6 @@ impl SnapshotMeta { /// Supports resuming from a specific sequence number for catch-up replay /// after recovering from a snapshot. pub struct Consumer { - #[allow(dead_code)] inner: PullConsumer, } @@ -331,7 +340,7 @@ impl Consumer { let deliver_policy = match start_seq { Some(seq) => { - // Delete and recreate to change start position. + // Delete and recreate to change start position stream.delete_consumer(id).await.ok(); DeliverPolicy::ByStartSequence { start_sequence: seq, @@ -343,7 +352,7 @@ impl Consumer { let inner = stream .get_or_create_consumer( id, - pull::Config { + PullConfig { durable_name: Some(id.into()), ack_policy: AckPolicy::All, ack_wait: cfg::ACK_WAIT, @@ -356,6 +365,19 @@ impl Consumer { Ok(Self { inner }) } + + /// Returns a stream of messages from the consumer. + /// + /// Use this in a `tokio::select!` loop to process messages as they arrive. + /// Messages are fetched in batches for efficiency. + pub async fn messages(&self) -> Result { + self.inner + .stream() + .max_messages_per_batch(cfg::BATCH_SIZE) + .messages() + .await + .map_err(Into::into) + } } // ============================================================================= @@ -368,18 +390,15 @@ impl Consumer { /// primary publishes events. The lock has a TTL and must be refreshed /// periodically to maintain leadership. pub struct Producer { - /// KV store for the lock. - lock: Store, - /// Producer identity (node ID). + lock: Box, id: Bytes, - /// Current lock revision for CAS updates. revision: u64, } impl Producer { async fn new(id: &str, js: &Context) -> Result { Ok(Self { - lock: js.get_key_value(cfg::PRODUCER_LOCK).await?, + lock: Box::new(js.get_key_value(cfg::PRODUCER_LOCK).await?), id: id.to_owned().into_bytes().into(), revision: 0, }) diff --git a/magicblock-replicator/src/proto.rs b/magicblock-replicator/src/proto.rs index c098676cd..1f8d2151e 100644 --- a/magicblock-replicator/src/proto.rs +++ b/magicblock-replicator/src/proto.rs @@ -15,6 +15,11 @@ use crate::nats::Subjects; /// Ordinal position of a transaction within a slot. pub type TxIndex = u32; +/// Sentinel index for block boundary markers. +pub const BLOCK_INDEX: TxIndex = TxIndex::MAX - 1; +/// Sentinel index for superblock checkpoint markers. +pub const SUPERBLOCK_INDEX: TxIndex = TxIndex::MAX; + /// Top-level replication message envelope. /// /// Variant order is part of the wire format - reordering breaks compatibility. @@ -40,8 +45,8 @@ impl Message { pub(crate) fn slot_and_index(&self) -> (Slot, TxIndex) { match self { Self::Transaction(txn) => (txn.slot, txn.index), - Self::Block(block) => (block.slot, 0), - Self::SuperBlock(superblock) => (superblock.slot, 0), + Self::Block(block) => (block.slot, BLOCK_INDEX), + Self::SuperBlock(superblock) => (superblock.slot, SUPERBLOCK_INDEX), } } } @@ -72,8 +77,6 @@ pub struct Block { #[derive(Deserialize, Serialize, Clone, Debug)] pub struct SuperBlock { pub slot: Slot, - /// Total blocks processed. - pub blocks: u64, /// Total transactions processed. pub transactions: u64, /// Rolling checksum for verification. diff --git a/magicblock-replicator/src/service.rs b/magicblock-replicator/src/service.rs index 021e8ee21..502fbcd6f 100644 --- a/magicblock-replicator/src/service.rs +++ b/magicblock-replicator/src/service.rs @@ -1,98 +1,94 @@ -//! Replication service for primary-standby state synchronization. +//! Primary-standby state synchronization via NATS JetStream. //! //! # Architecture //! //! ```text -//! ┌─────────────┐ -//! │ Service │ -//! └──────┬──────┘ -//! ┌────────────┴────────────┐ -//! ▼ ▼ -//! ┌─────────┐ ┌─────────┐ -//! │ Primary │ │ Standby │ -//! └────┬────┘ └────┬────┘ -//! │ │ -//! ┌─────────┴─────────┐ ┌─────────┴─────────┐ -//! │ Publish events │ │ Consume events │ -//! │ Upload snapshots │ │ Apply to state │ -//! │ Refresh lock │ │ Verify checksums │ -//! └───────────────────┘ └───────────────────┘ +//! ┌─────────────┐ +//! │ Service │ +//! └──────┬──────┘ +//! ┌─────────┴─────────┐ +//! ▼ ▼ +//! ┌─────────┐ ┌─────────┐ +//! │ Primary │ ←────→│ Standby │ +//! └────┬────┘ └────┬────┘ +//! │ │ +//! ┌───┴───┐ ┌───┴───┐ +//! │Publish│ │Consume│ +//! │Upload │ │Apply │ +//! │Refresh│ │Verify │ +//! └───────┘ └───────┘ //! ``` -//! -//! # Role Transitions -//! -//! - **Primary → Standby**: Lock lost (refresh failed) -//! - **Standby → Primary**: Leader lock available (current primary crashed) -use std::sync::Arc; -use std::time::Duration; +use std::{ + sync::Arc, + thread::JoinHandle, + time::{Duration, Instant}, +}; +use async_nats::Message as NatsMessage; +use futures::StreamExt; use magicblock_accounts_db::AccountsDb; -use magicblock_core::link::transactions::{SchedulerMode, TransactionSchedulerHandle}; -use magicblock_core::Slot; +use magicblock_core::{ + link::transactions::{ + ReplayPosition, SchedulerMode, TransactionSchedulerHandle, WithEncoded, + }, + Slot, +}; use magicblock_ledger::Ledger; -use tokio::sync::mpsc::Receiver; -use tokio::time::interval; -use tracing::{info, warn}; +use solana_transaction::versioned::VersionedTransaction; +use tokio::{ + fs::File, + runtime::Builder, + sync::mpsc::{Receiver, Sender}, + time::interval, +}; +use tracing::{error, info, warn}; -use crate::nats::{Broker, Consumer, Producer, Snapshot}; -use crate::proto::{Block, SuperBlock, TxIndex}; -use crate::{Message, Result}; - -/// Re-export for crate users. pub use crate::nats::Snapshot as AccountsDbSnapshot; +use crate::{ + nats::{Broker, Consumer, Producer}, + proto::{Block, SuperBlock, TxIndex}, + watcher::SnapshotWatcher, + Message, Result, +}; // ============================================================================= -// Configuration +// Constants // ============================================================================= -/// Interval between leader lock refreshes. const LOCK_REFRESH_INTERVAL: Duration = Duration::from_secs(1); - -/// Maximum time without seeing leader activity before attempting takeover. const LEADER_TIMEOUT: Duration = Duration::from_secs(10); - -/// Interval between snapshot uploads. -const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(60); - -/// Retry delay for consumer creation. const CONSUMER_RETRY_DELAY: Duration = Duration::from_secs(1); // ============================================================================= // Context // ============================================================================= -/// Shared state accessible from both primary and standby roles. -/// -/// Contains all references needed for replication operations: -/// - State stores (accountsdb, ledger) -/// - Communication (broker, scheduler) -/// - Position tracking (slot, index) +/// Shared state for both roles. pub struct Context { - /// Unique node identifier used for leader election. + /// Node identifier for leader election. pub id: String, - /// NATS broker for publishing/consuming events and snapshots. + /// NATS broker. pub broker: Broker, - /// Channel to switch scheduler between primary/replica mode. - pub mode_tx: tokio::sync::mpsc::Sender, - /// Accounts database for snapshot creation and state application. + /// Scheduler mode channel. + pub mode_tx: Sender, + /// Accounts database. pub accountsdb: Arc, - /// Ledger for transaction and block storage. + /// Transaction ledger. pub ledger: Arc, - /// Transaction scheduler for executing replayed transactions. + /// Transaction scheduler. pub scheduler: TransactionSchedulerHandle, - /// Current slot position in the replicated stream. + /// Current position. pub slot: Slot, - /// Current transaction index within the slot. pub index: TxIndex, } impl Context { - /// Creates context, initializing position from ledger. + /// Creates context from ledger state. pub async fn new( id: String, broker: Broker, - mode_tx: tokio::sync::mpsc::Sender, + mode_tx: Sender, accountsdb: Arc, ledger: Arc, scheduler: TransactionSchedulerHandle, @@ -101,8 +97,7 @@ impl Context { .get_latest_transaction_position()? .unwrap_or_default(); - info!(%id, slot, index, "created replication context"); - + info!(%id, slot, index, "context initialized"); Ok(Self { id, broker, @@ -115,61 +110,87 @@ impl Context { }) } - /// Updates position after processing a message. - pub fn update_position(&mut self, slot: Slot, index: TxIndex) { + /// Updates position. + fn advance(&mut self, slot: Slot, index: TxIndex) { self.slot = slot; self.index = index; } - // ========================================================================= - // Standby Operations - // ========================================================================= - - /// Writes a block to the ledger. - pub async fn write_block(&self, _block: &Block) -> Result<()> { - // TODO: Implement ledger block writing. - // self.ledger.write_block(block)?; + /// Writes block to ledger. + async fn write_block(&self, block: &Block) -> Result<()> { + self.ledger + .write_block(block.slot, block.timestamp, block.hash)?; Ok(()) } - /// Verifies superblock checksum against local accountsdb. - /// - /// Returns `true` if state matches, `false` if divergence detected. - pub fn verify_checksum(&self, _superblock: &SuperBlock) -> Result { - // TODO: Implement checksum verification. - // let local_hash = self.accountsdb.compute_hash(); - // Ok(local_hash == superblock.checksum) - Ok(true) + /// Verifies superblock checksum. + fn verify_checksum(&self, sb: &SuperBlock) -> Result<()> { + let _lock = self.accountsdb.lock_database(); + // SAFETY: Lock acquired above ensures no concurrent modifications + // during checksum computation. + let checksum = unsafe { self.accountsdb.checksum() }; + if checksum == sb.checksum { + Ok(()) + } else { + Err(crate::Error::Internal("accountsdb state mismatch")) + } } - /// Applies a snapshot to restore accountsdb state. - pub async fn apply_snapshot(&self, _snapshot: &Snapshot) -> Result<()> { - // TODO: Implement snapshot application. - // self.accountsdb.restore(&snapshot.data)?; - // Update position to snapshot's seqno for correct replay start. - Ok(()) + /// Creates a snapshot watcher for the database directory. + fn create_snapshot_watcher(&self) -> Result { + SnapshotWatcher::new(self.accountsdb.database_directory()) } - /// Switches scheduler to replica mode for transaction replay. - pub async fn enter_replica_mode(&self) { + /// Attempts to acquire producer lock for primary role. + async fn try_acquire_producer(&self) -> Option { + let mut producer = self.broker.create_producer(&self.id).await.ok()?; + producer.acquire().await.ok()?.then_some(producer) + } + + /// Switches to replica mode. + async fn enter_replica_mode(&self) { let _ = self.mode_tx.send(SchedulerMode::Replica).await; } - // ========================================================================= - // Primary Operations - // ========================================================================= + /// Switches to primary mode. + async fn enter_primary_mode(&self) { + let _ = self.mode_tx.send(SchedulerMode::Primary).await; + } - /// Uploads current accountsdb snapshot to the broker. - pub async fn upload_snapshot(&self) -> Result<()> { - // TODO: Get snapshot file from accountsdb and upload. - // let file = self.accountsdb.snapshot_file().await?; - // self.broker.put_snapshot(self.slot, file).await - Ok(()) + /// Uploads snapshot. + async fn upload_snapshot(&self, file: File, slot: Slot) -> Result<()> { + self.broker.put_snapshot(slot, file).await } - /// Switches scheduler to primary mode. - pub async fn enter_primary_mode(&self) { - let _ = self.mode_tx.send(SchedulerMode::Primary).await; + /// Creates consumer with retry. + async fn create_consumer( + &self, + start_seq: Option, + ) -> Result { + loop { + match self.broker.create_consumer(&self.id, start_seq).await { + Ok(c) => return Ok(c), + Err(e) => { + warn!(%e, "consumer creation failed, retrying"); + tokio::time::sleep(CONSUMER_RETRY_DELAY).await; + } + } + } + } + + /// Transitions to standby. + async fn into_standby( + self, + messages: Receiver, + ) -> Result { + let consumer = Box::new(self.create_consumer(None).await?); + self.enter_replica_mode().await; + Ok(Standby { + ctx: self, + consumer, + messages, + last_activity: Instant::now(), + }) } } @@ -177,125 +198,140 @@ impl Context { // Service // ============================================================================= -/// Replication service that runs as either primary or standby. -/// -/// Automatically selects role based on leader lock availability. -/// Transitions between roles as conditions change. +/// Replication service with automatic role transitions. pub enum Service { - /// Primary role: publishes events, holds leader lock. Primary(Primary), - /// Standby role: consumes events from stream. Standby(Standby), } impl Service { - /// Creates a new replication service. - /// - /// Attempts to acquire the leader lock first. If successful, runs as - /// primary; otherwise, falls back to standby mode. + /// Creates service, attempting primary role first. pub async fn new( id: String, broker: Broker, - mode_tx: tokio::sync::mpsc::Sender, + mode_tx: Sender, accountsdb: Arc, ledger: Arc, scheduler: TransactionSchedulerHandle, - rx: Receiver, + messages: Receiver, ) -> Result { - let ctx = Context::new(id, broker, mode_tx, accountsdb, ledger, scheduler).await?; + let ctx = + Context::new(id, broker, mode_tx, accountsdb, ledger, scheduler) + .await?; - // Attempt to acquire leader lock. - let mut producer = ctx.broker.create_producer(&ctx.id).await?; - if producer.acquire().await? { - ctx.enter_primary_mode().await; - return Ok(Self::Primary(Primary { ctx, producer, rx })); - } + // Try to become primary. + let Some(producer) = ctx.try_acquire_producer().await else { + return Ok(Self::Standby(ctx.into_standby(messages).await?)); + }; - // Fall back to standby. - let standby = ctx.into_standby().await?; - Ok(Self::Standby(standby)) + ctx.enter_primary_mode().await; + let snapshots = ctx.create_snapshot_watcher()?; + Ok(Self::Primary(Primary { + ctx, + producer, + messages, + snapshots, + })) } - /// Runs the service in its assigned role. + /// Runs service with automatic role transitions. pub async fn run(self) { - match self { - Service::Primary(p) => p.run().await, - Service::Standby(s) => s.run().await, + let mut state = self; + loop { + state = match state { + Service::Primary(p) => match p.run().await { + Some(s) => Service::Standby(s), + None => return, + }, + Service::Standby(s) => match s.run().await { + Some(p) => Service::Primary(p), + None => return, + }, + }; } } + + /// Spawns the service in a dedicated OS thread with a single-threaded runtime. + /// + /// Returns a `JoinHandle` that can be used to wait for the service to complete. + pub fn spawn(self) -> JoinHandle<()> { + std::thread::spawn(move || { + let runtime = Builder::new_current_thread() + .thread_name("replication-service") + .build() + .expect("Failed to build replication service runtime"); + + runtime.block_on(tokio::task::unconstrained(self.run())); + }) + } } // ============================================================================= // Primary // ============================================================================= -/// Primary node: publishes events and holds the leader lock. -/// -/// Responsibilities: -/// - Forward incoming validator events to the stream -/// - Periodically upload accountsdb snapshots -/// - Maintain leadership via lock refresh +/// Primary node: publishes events and holds leader lock. pub struct Primary { ctx: Context, producer: Producer, - rx: Receiver, + messages: Receiver, + snapshots: SnapshotWatcher, } impl Primary { - /// Main loop: publish events, upload snapshots, maintain lock. - async fn run(mut self) { + /// Runs until leadership lost, returns standby on demotion. + async fn run(mut self) -> Option { let mut lock_tick = interval(LOCK_REFRESH_INTERVAL); - let mut snapshot_tick = interval(SNAPSHOT_INTERVAL); loop { tokio::select! { - // Forward incoming messages to the stream. - Some(msg) = self.rx.recv() => { + Some(msg) = self.messages.recv() => { self.publish(msg).await; } - // Periodically refresh the leader lock. _ = lock_tick.tick() => { - if !self.refresh_lock().await { - info!("lost leadership, demoting to standby"); - return; + let held = match self.producer.refresh().await { + Ok(h) => h, + Err(e) => { + warn!(%e, "lock refresh failed"); + false + } + }; + if !held { + info!("lost leadership, demoting"); + return self.ctx.into_standby(self.messages).await + .inspect_err(|e| error!(%e, "demotion failed")) + .ok(); } } - // Periodically upload snapshots. - _ = snapshot_tick.tick() => { - self.upload_snapshot().await; + Some((file, slot)) = self.snapshots.recv() => { + if let Err(e) = self.ctx.upload_snapshot(file, slot).await { + warn!(%e, "snapshot upload failed"); + } } } } } - /// Publishes a message to the stream. async fn publish(&mut self, msg: Message) { - let Ok(payload) = bincode::serialize(&msg) else { return }; - let subject = msg.subject(); - let (slot, index) = msg.slot_and_index(); - - if self.ctx.broker.publish(subject, payload.into()).await.is_ok() { - self.ctx.update_position(slot, index); - } - } - - /// Refreshes the leader lock. Returns `false` if lost. - async fn refresh_lock(&mut self) -> bool { - match self.producer.refresh().await { - Ok(held) => held, + let payload = match bincode::serialize(&msg) { + Ok(p) => p, Err(e) => { - warn!(%e, "failed to refresh leader lock"); - false + warn!(%e, "serialization failed"); + return; } - } - } - - /// Uploads a snapshot of current state. - async fn upload_snapshot(&self) { - if let Err(e) = self.ctx.upload_snapshot().await { - warn!(%e, "failed to upload snapshot"); + }; + let subject = msg.subject(); + let (slot, index) = msg.slot_and_index(); + let ack = matches!(msg, Message::SuperBlock(_)); + + if let Err(e) = + self.ctx.broker.publish(subject, payload.into(), ack).await + { + warn!(%e, slot, index, "publish failed"); + } else { + self.ctx.advance(slot, index); } } } @@ -304,167 +340,118 @@ impl Primary { // Standby // ============================================================================= -/// Standby node: consumes events and applies them to local state. -/// -/// Responsibilities: -/// - Consume events from the stream -/// - Apply transactions via scheduler -/// - Write blocks to ledger -/// - Verify superblock checksums -/// - Watch for leader failure (heartbeat timeout) -/// - Attempt takeover when leader fails +/// Standby node: consumes events and watches for leader failure. pub struct Standby { ctx: Context, - #[allow(dead_code)] - consumer: Consumer, - /// Tracks last time we saw activity from the leader. - last_leader_activity: tokio::time::Instant, + consumer: Box, + messages: Receiver, + last_activity: Instant, } impl Standby { - /// Main loop: consume events, apply state, watch for leader failure. - async fn run(mut self) { - let mut leader_timeout_check = interval(Duration::from_secs(1)); + /// Runs until leadership acquired, returns primary on promotion. + async fn run(mut self) -> Option { + let mut timeout_check = interval(Duration::from_secs(1)); + let Ok(mut stream) = self.consumer.messages().await else { + error!("failed to get message stream"); + return None; + }; loop { tokio::select! { - // TODO: Consume messages from the stream. - // This will be implemented when Consumer has a receive method. - // For now, we just watch for leader timeout. - - _ = leader_timeout_check.tick() => { - if self.last_leader_activity.elapsed() > LEADER_TIMEOUT - && self.attempt_takeover().await - { - return; + Some(result) = stream.next() => { + match result { + Ok(msg) => { + self.process(&msg).await; + self.last_activity = Instant::now(); + } + Err(e) => warn!(%e, "stream error"), + } + } + + _ = timeout_check.tick(), if self.last_activity.elapsed() > LEADER_TIMEOUT => { + if let Some(producer) = self.try_acquire_lock().await { + info!("acquired leadership, promoting"); + self.ctx.enter_primary_mode().await; + let snapshots = match self.ctx.create_snapshot_watcher() { + Ok(s) => s, + Err(e) => { error!(%e, "FATAL: snapshot watcher failed"); return None } + }; + return Some(Primary { ctx: self.ctx, producer, messages: self.messages, snapshots }); } } } } } - /// Updates leader activity timestamp (called when receiving events). - fn record_leader_activity(&mut self) { - self.last_leader_activity = tokio::time::Instant::now(); - } - - /// Attempts to acquire leadership. - /// - /// Returns `true` if successful and we should exit (caller should restart). - async fn attempt_takeover(&mut self) -> bool { - info!("leader timeout, attempting takeover"); - - let mut producer = match self.ctx.broker.create_producer(&self.ctx.id).await { - Ok(p) => p, + async fn process(&mut self, msg: &NatsMessage) { + let message = match bincode::deserialize::(&msg.payload) { + Ok(m) => m, Err(e) => { - warn!(%e, "failed to create producer for takeover"); - return false; + warn!(%e, "deserialization failed"); + return; } }; + let (slot, index) = message.slot_and_index(); - match producer.acquire().await { - Ok(true) => { - info!("successfully acquired leadership"); - self.ctx.enter_primary_mode().await; - // Exit this run loop; caller should restart as Primary. - true - } - Ok(false) => { - info!("another node acquired leadership first"); - self.record_leader_activity(); - false - } - Err(e) => { - warn!(%e, "failed to acquire leadership"); - false - } + // Skip duplicates. + let obsolete = self.ctx.slot == slot && self.ctx.index >= index; + if self.ctx.slot > slot || obsolete { + return; } - } - - // ========================================================================= - // Event Processing (scaffolding for future implementation) - // ========================================================================= - - /// Processes an incoming transaction message. - #[allow(dead_code)] - async fn process_transaction(&mut self, _slot: Slot, _index: TxIndex, _payload: &[u8]) -> Result<()> { - // TODO: - // 1. Deserialize transaction - // 2. Submit to scheduler for execution - // 3. Update position - self.record_leader_activity(); - Ok(()) - } - /// Processes an incoming block message. - #[allow(dead_code)] - async fn process_block(&mut self, block: &Block) -> Result<()> { - // TODO: - // 1. Write block to ledger - // 2. Update slot position - self.ctx.write_block(block).await?; - self.record_leader_activity(); - Ok(()) - } + let result = match message { + Message::Transaction(tx) => { + self.replay_tx(tx.slot, tx.index, tx.payload).await + } + Message::Block(block) => self.ctx.write_block(&block).await, + Message::SuperBlock(sb) => { + self.ctx.verify_checksum(&sb).inspect_err(|error| + error!(slot, %error, "accountsdb state has diverged") + ) + } + }; - /// Processes an incoming superblock message. - #[allow(dead_code)] - async fn process_superblock(&mut self, superblock: &SuperBlock) -> Result<()> { - // TODO: - // 1. Verify checksum against local state - // 2. If mismatch, may need to request snapshot - if !self.ctx.verify_checksum(superblock)? { - warn!( - slot = superblock.slot, - "superblock checksum mismatch - state divergence detected" - ); - // TODO: Request snapshot or enter recovery mode + if let Err(error) = result { + warn!(slot, index, %error, "message precessing error"); + return; } - self.record_leader_activity(); - Ok(()) + self.ctx.advance(slot, index); } -} - -// ============================================================================= -// Context -> Role Transitions -// ============================================================================= - -impl Context { - /// Transitions to standby role by creating a consumer. - pub async fn into_standby(self) -> Result { - let consumer = self.create_consumer_with_retry(None).await?; - self.enter_replica_mode().await; - Ok(Standby { - consumer, - last_leader_activity: tokio::time::Instant::now(), - ctx: self, - }) + async fn replay_tx( + &self, + slot: Slot, + index: TxIndex, + encoded: Vec, + ) -> Result<()> { + let pos = ReplayPosition { + slot, + index, + persist: true, + }; + let txn: VersionedTransaction = bincode::deserialize(&encoded)?; + let txn = WithEncoded { txn, encoded }; + self.ctx.scheduler.replay(pos, txn).await?; + Ok(()) } - /// Creates a consumer with retry on failure. - async fn create_consumer_with_retry(&self, start_seq: Option) -> Result { - loop { - match self.broker.create_consumer(&self.id, start_seq).await { - Ok(c) => return Ok(c), - Err(e) => { - warn!(%e, "failed to create consumer; retrying"); - tokio::time::sleep(CONSUMER_RETRY_DELAY).await; - } + async fn try_acquire_lock(&mut self) -> Option { + let Ok(mut producer) = + self.ctx.broker.create_producer(&self.ctx.id).await + else { + return None; + }; + match producer.acquire().await { + Ok(true) => Some(producer), + Ok(false) => { + self.last_activity = Instant::now(); + None + } + Err(e) => { + warn!(%e, "lock acquisition failed"); + None } } } - - /// Attempts to recover from a snapshot before starting consumer. - #[allow(dead_code)] - pub async fn recover_from_snapshot(&self) -> Result> { - let Some(snapshot) = self.broker.get_snapshot().await? else { - info!("no snapshot available for recovery"); - return Ok(None); - }; - - info!(slot = snapshot.slot, seqno = snapshot.seqno, "retrieved snapshot"); - self.apply_snapshot(&snapshot).await?; - Ok(Some(snapshot)) - } } diff --git a/magicblock-replicator/src/tests.rs b/magicblock-replicator/src/tests.rs index 051860c3a..dbe48b8d5 100644 --- a/magicblock-replicator/src/tests.rs +++ b/magicblock-replicator/src/tests.rs @@ -32,7 +32,6 @@ fn variant_order_stability() { ( Message::SuperBlock(SuperBlock { slot: 0, - blocks: 0, transactions: 0, checksum: 0, }), @@ -42,7 +41,9 @@ fn variant_order_stability() { for (msg, expected_idx) in cases { let encoded = bincode::serialize(&msg).unwrap(); - let actual_idx = u32::from_le_bytes([encoded[0], encoded[1], encoded[2], encoded[3]]); + let actual_idx = u32::from_le_bytes([ + encoded[0], encoded[1], encoded[2], encoded[3], + ]); assert_eq!( actual_idx, expected_idx, "variant index changed - this breaks wire compatibility!" @@ -65,7 +66,6 @@ fn message_roundtrip() { }), Message::SuperBlock(SuperBlock { slot: 99999, - blocks: 1000, transactions: 50000, checksum: 0xDEADBEEF, }), diff --git a/magicblock-replicator/src/watcher.rs b/magicblock-replicator/src/watcher.rs new file mode 100644 index 000000000..8d5db582a --- /dev/null +++ b/magicblock-replicator/src/watcher.rs @@ -0,0 +1,202 @@ +//! Directory watcher for AccountsDb snapshot archives. +//! +//! Monitors a directory for new `.tar.gz` snapshot files and yields them +//! as open [`tokio::fs::File`] handles via a channel for tokio::select compatibility. + +use std::path::{Path, PathBuf}; + +use notify::{Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; +use tokio::{fs::File, sync::mpsc}; +use tracing::{error, info}; + +use crate::Result; + +const SNAPSHOT_EXTENSION: &str = "tar.gz"; +const SNAPSHOT_PREFIX: &str = "snapshot-"; + +/// Extracts the slot number from a snapshot filename. +/// +/// Expected format: `snapshot-{slot:0>12}.tar.gz` +/// Example: `snapshot-000000000001.tar.gz` -> `Some(1)` +pub fn parse_slot(path: &Path) -> Option { + path.file_name()? + .to_str()? + .strip_prefix(SNAPSHOT_PREFIX)? + .strip_suffix(&format!(".{SNAPSHOT_EXTENSION}"))? + .parse() + .ok() +} + +/// Watcher for snapshot archive files in a directory. +/// +/// Uses `notify` for filesystem events and yields open file handles +/// via an mpsc channel compatible with `tokio::select!`. +pub struct SnapshotWatcher { + _watcher: RecommendedWatcher, + rx: mpsc::Receiver, +} + +impl SnapshotWatcher { + /// Creates a new watcher monitoring the given directory. + /// + /// The watcher detects newly created `.tar.gz` files and opens them + /// for reading when [`Self::recv`] is called. + /// + /// # Errors + /// + /// Returns an error if the watcher cannot be initialized or the + /// directory cannot be accessed. + pub fn new(dir: &Path) -> Result { + let (tx, rx) = mpsc::channel(32); + + let tx_clone = tx.clone(); + + let mut watcher = + notify::recommended_watcher(move |res: notify::Result| { + match res { + Ok(event) => { + if let Some(path) = Self::process_event(&event) { + if let Err(e) = tx_clone.blocking_send(path) { + error!("Failed to send snapshot event: {}", e); + } + } + } + Err(e) => { + error!("Watch error: {}", e); + } + } + })?; + + watcher.watch(dir, RecursiveMode::NonRecursive)?; + info!(dir = %dir.display(), "Snapshot watcher started"); + + Ok(Self { + _watcher: watcher, + rx, + }) + } + + /// Process a filesystem event and extract snapshot path if relevant. + fn process_event(event: &Event) -> Option { + if !matches!(event.kind, EventKind::Create(_)) { + return None; + } + + for path in &event.paths { + if Self::is_snapshot_file(path) { + info!(path = %path.display(), "Detected new snapshot"); + return Some(path.clone()); + } + } + + None + } + + /// Check if a path is a snapshot archive file. + fn is_snapshot_file(path: &std::path::Path) -> bool { + path.is_file() + && path + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|n| n.ends_with(&format!(".{SNAPSHOT_EXTENSION}"))) + } + + /// Receive the next detected snapshot as an open file handle and slot. + /// + /// Opens the file for reading before returning. This method is + /// `tokio::select!` compatible. Returns `None` when the watcher + /// has been dropped. + pub async fn recv(&mut self) -> Option<(File, u64)> { + loop { + let path = self.rx.recv().await?; + let Some(slot) = parse_slot(&path) else { + continue; + }; + let Ok(file) = File::open(&path).await else { + continue; + }; + break Some((file, slot)); + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Write; + + use tempfile::TempDir; + use tokio::io::AsyncReadExt; + + use super::*; + + #[tokio::test] + async fn test_watcher_detects_new_snapshot() { + let temp_dir = TempDir::new().unwrap(); + let mut watcher = SnapshotWatcher::new(temp_dir.path()).unwrap(); + + let test_data = b"test archive contents"; + let snapshot_path = + temp_dir.path().join("snapshot-000000000001.tar.gz"); + std::fs::File::create(&snapshot_path) + .unwrap() + .write_all(test_data) + .unwrap(); + + let (mut file, slot) = tokio::time::timeout( + std::time::Duration::from_secs(2), + watcher.recv(), + ) + .await + .expect("Timeout waiting for snapshot") + .expect("Channel closed"); + + assert_eq!(slot, 1); + let mut contents = Vec::new(); + file.read_to_end(&mut contents).await.unwrap(); + assert_eq!(contents, test_data); + } + + #[tokio::test] + async fn test_watcher_ignores_non_snapshots() { + let temp_dir = TempDir::new().unwrap(); + let mut watcher = SnapshotWatcher::new(temp_dir.path()).unwrap(); + + let other_path = temp_dir.path().join("other.txt"); + std::fs::File::create(&other_path).unwrap(); + + let test_data = b"test archive"; + let snapshot_path = + temp_dir.path().join("snapshot-000000000002.tar.gz"); + std::fs::File::create(&snapshot_path) + .unwrap() + .write_all(test_data) + .unwrap(); + + let (mut file, slot) = tokio::time::timeout( + std::time::Duration::from_secs(2), + watcher.recv(), + ) + .await + .expect("Timeout waiting for snapshot") + .expect("Channel closed"); + + assert_eq!(slot, 2); + let mut contents = Vec::new(); + file.read_to_end(&mut contents).await.unwrap(); + assert_eq!(contents, test_data); + } + + #[test] + fn test_parse_slot() { + assert_eq!( + parse_slot(Path::new("snapshot-000000000001.tar.gz")), + Some(1) + ); + assert_eq!( + parse_slot(Path::new("/some/path/snapshot-000000000123.tar.gz")), + Some(123) + ); + assert_eq!(parse_slot(Path::new("other.txt")), None); + assert_eq!(parse_slot(Path::new("snapshot-invalid.tar.gz")), None); + } +} From bc06af391aee00df0cf2b29ba91b951fbdcb122e Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Thu, 12 Mar 2026 18:41:55 +0400 Subject: [PATCH 11/13] refactor: replicator service --- .gitignore | 1 + Cargo.lock | 99 ++++ magicblock-replicator/src/lib.rs | 2 +- magicblock-replicator/src/nats.rs | 442 ----------------- magicblock-replicator/src/nats/broker.rs | 184 +++++++ magicblock-replicator/src/nats/consumer.rs | 81 ++++ .../src/nats/lock_watcher.rs | 60 +++ magicblock-replicator/src/nats/mod.rs | 100 ++++ magicblock-replicator/src/nats/producer.rs | 71 +++ magicblock-replicator/src/nats/snapshot.rs | 50 ++ magicblock-replicator/src/proto.rs | 21 +- magicblock-replicator/src/service.rs | 457 ------------------ magicblock-replicator/src/service/context.rs | 160 ++++++ magicblock-replicator/src/service/mod.rs | 115 +++++ magicblock-replicator/src/service/primary.rs | 93 ++++ magicblock-replicator/src/service/standby.rs | 139 ++++++ magicblock-replicator/src/tests.rs | 132 +++-- magicblock-replicator/src/watcher.rs | 85 +--- 18 files changed, 1228 insertions(+), 1064 deletions(-) delete mode 100644 magicblock-replicator/src/nats.rs create mode 100644 magicblock-replicator/src/nats/broker.rs create mode 100644 magicblock-replicator/src/nats/consumer.rs create mode 100644 magicblock-replicator/src/nats/lock_watcher.rs create mode 100644 magicblock-replicator/src/nats/mod.rs create mode 100644 magicblock-replicator/src/nats/producer.rs create mode 100644 magicblock-replicator/src/nats/snapshot.rs delete mode 100644 magicblock-replicator/src/service.rs create mode 100644 magicblock-replicator/src/service/context.rs create mode 100644 magicblock-replicator/src/service/mod.rs create mode 100644 magicblock-replicator/src/service/primary.rs create mode 100644 magicblock-replicator/src/service/standby.rs diff --git a/.gitignore b/.gitignore index 377396019..72f2b55d0 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,4 @@ magicblock-test-storage/ # AI related **/CLAUDE.md +config.json diff --git a/Cargo.lock b/Cargo.lock index b79eaf770..543ec7f85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1835,6 +1835,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + [[package]] name = "funty" version = "2.0.0" @@ -2633,6 +2642,26 @@ version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" +[[package]] +name = "inotify" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd5b3eaf1a28b758ac0faa5a4254e8ab2705605496f1b1f3fbbc3988ad73d199" +dependencies = [ + "bitflags 2.10.0", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "inout" version = "0.1.4" @@ -2777,6 +2806,26 @@ dependencies = [ "serde_json", ] +[[package]] +name = "kqueue" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac30106d7dce88daf4a3fcb4879ea939476d5074a9b7ddd0fb97fa4bed5596a" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -3555,12 +3604,16 @@ dependencies = [ "async-nats", "bincode", "bytes", + "futures", "magicblock-accounts-db", "magicblock-core", "magicblock-ledger", + "notify", "serde", "solana-hash", "solana-transaction", + "solana-transaction-error", + "tempfile", "thiserror 1.0.69", "tokio", "tracing", @@ -3896,6 +3949,33 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "notify" +version = "8.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3" +dependencies = [ + "bitflags 2.10.0", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "log", + "mio", + "notify-types", + "walkdir", + "windows-sys 0.60.2", +] + +[[package]] +name = "notify-types" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42b8cfee0e339a0337359f3c88165702ac6e600dc01c0cc9579a92d62b08477a" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -5313,6 +5393,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scc" version = "2.4.0" @@ -9417,6 +9506,16 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" diff --git a/magicblock-replicator/src/lib.rs b/magicblock-replicator/src/lib.rs index f0b7620a8..9a616b463 100644 --- a/magicblock-replicator/src/lib.rs +++ b/magicblock-replicator/src/lib.rs @@ -22,4 +22,4 @@ pub mod watcher; mod tests; pub use error::{Error, Result}; -pub use proto::Message; +pub use proto::{Message, TransactionIndex}; diff --git a/magicblock-replicator/src/nats.rs b/magicblock-replicator/src/nats.rs deleted file mode 100644 index 218150ab4..000000000 --- a/magicblock-replicator/src/nats.rs +++ /dev/null @@ -1,442 +0,0 @@ -//! NATS JetStream client for event replication. -//! -//! # Components -//! -//! - [`Broker`]: Connection manager with stream/bucket initialization -//! - [`Producer`]: Event publisher with distributed leader lock -//! - [`Consumer`]: Event subscriber for standby replay -//! - [`Snapshot`]: AccountsDb snapshot with positioning metadata - -use std::{collections::HashMap, time::Duration}; - -use async_nats::{ - jetstream::{ - consumer::{ - pull::{Config as PullConfig, Stream as MessageStream}, - AckPolicy, DeliverPolicy, PullConsumer, - }, - kv::{self, CreateErrorKind, Store, UpdateErrorKind}, - object_store::{self, GetErrorKind, ObjectMetadata}, - stream::{self, Compression}, - Context, ContextBuilder, - }, - ConnectOptions, Event, ServerAddr, Subject, -}; -use bytes::Bytes; -use magicblock_core::Slot; -use tokio::{fs::File, io::AsyncReadExt}; -use tracing::{debug, error, info, instrument, warn}; -use url::Url; - -use crate::{Error, Result}; - -// ============================================================================= -// Configuration -// ============================================================================= - -/// Resource names and configuration constants. -mod cfg { - use std::time::Duration; - - pub const STREAM: &str = "EVENTS"; - pub const SNAPSHOTS: &str = "SNAPSHOTS"; - pub const PRODUCER_LOCK: &str = "PRODUCER"; - pub const LOCK_KEY: &str = "lock"; - pub const SNAPSHOT_NAME: &str = "accountsdb"; - - pub const META_SLOT: &str = "slot"; - pub const META_SEQNO: &str = "seqno"; - - // Size limits (256 GB stream, 512 GB snapshots) - pub const STREAM_BYTES: i64 = 256 * 1024 * 1024 * 1024; - pub const SNAPSHOT_BYTES: i64 = 512 * 1024 * 1024 * 1024; - - // Timeouts - pub const TTL_STREAM: Duration = Duration::from_secs(24 * 60 * 60); - pub const TTL_LOCK: Duration = Duration::from_secs(5); - pub const ACK_WAIT: Duration = Duration::from_secs(30); - pub const API_TIMEOUT: Duration = Duration::from_secs(2); - pub const DUP_WINDOW: Duration = Duration::from_secs(30); - - // Reconnect backoff (exponential: 100ms base, 5s max) - pub const RECONNECT_BASE_MS: u64 = 100; - pub const RECONNECT_MAX_MS: u64 = 5000; - - // Backpressure - pub const MAX_ACK_PENDING: i64 = 512; - pub const MAX_ACK_INFLIGHT: usize = 2048; - pub const BATCH_SIZE: usize = 512; -} - -// ============================================================================= -// Subjects -// ============================================================================= - -/// NATS subjects for event types. -/// -/// Provides both string constants for stream configuration and typed subjects -/// for publishing. -pub struct Subjects; - -impl Subjects { - pub const TRANSACTION: &'static str = "event.transaction"; - pub const BLOCK: &'static str = "event.block"; - pub const SUPERBLOCK: &'static str = "event.superblock"; - - /// All subjects for stream configuration. - pub const fn all() -> [&'static str; 3] { - [Self::TRANSACTION, Self::BLOCK, Self::SUPERBLOCK] - } - - const fn from(s: &'static str) -> Subject { - Subject::from_static(s) - } - - /// Typed subject for transaction events. - pub fn transaction() -> Subject { - Self::from(Self::TRANSACTION) - } - - /// Typed subject for block events. - pub fn block() -> Subject { - Self::from(Self::BLOCK) - } - - /// Typed subject for superblock events. - pub fn superblock() -> Subject { - Self::from(Self::SUPERBLOCK) - } -} - -// ============================================================================= -// Broker -// ============================================================================= - -/// NATS JetStream connection with initialized streams and buckets. -pub struct Broker { - ctx: Context, - seqno: u64, -} - -impl Broker { - /// Connects to NATS and initializes all JetStream resources. - /// - /// Resources are created idempotently - safe to call multiple times. - pub async fn connect(url: Url) -> Result { - let addr = ServerAddr::from_url(url)?; - - let client = ConnectOptions::new() - .max_reconnects(None) - .reconnect_delay_callback(|attempts| { - let ms = (attempts as u64 * cfg::RECONNECT_BASE_MS) - .min(cfg::RECONNECT_MAX_MS); - Duration::from_millis(ms) - }) - .event_callback(|event| async move { - match event { - Event::Disconnected => warn!("NATS disconnected"), - Event::Connected => info!("NATS connected"), - Event::ClientError(e) => warn!(%e, "NATS client error"), - other => debug!(?other, "NATS event"), - } - }) - .connect(addr) - .await?; - - let ctx = ContextBuilder::new() - .timeout(cfg::API_TIMEOUT) - .max_ack_inflight(cfg::MAX_ACK_INFLIGHT) - .backpressure_on_inflight(true) - .build(client); - - let broker = Self { ctx, seqno: 0 }; - broker.init_resources().await?; - Ok(broker) - } - - /// Initializes streams, object stores, and KV buckets. - async fn init_resources(&self) -> Result<()> { - let info = self - .ctx - .create_or_update_stream(stream::Config { - name: cfg::STREAM.into(), - max_bytes: cfg::STREAM_BYTES, - subjects: Subjects::all().into_iter().map(Into::into).collect(), - max_age: cfg::TTL_STREAM, - duplicate_window: cfg::DUP_WINDOW, - description: Some("Magicblock validator events".into()), - compression: Some(Compression::S2), - ..Default::default() - }) - .await?; - - info!(stream = %info.config.name, messages = info.state.messages, "JetStream initialized"); - - self.ctx - .create_object_store(object_store::Config { - bucket: cfg::SNAPSHOTS.into(), - description: Some("AccountsDb snapshots".into()), - max_bytes: cfg::SNAPSHOT_BYTES, - ..Default::default() - }) - .await?; - - self.ctx - .create_key_value(kv::Config { - bucket: cfg::PRODUCER_LOCK.into(), - description: "Producer leader election".into(), - max_age: cfg::TTL_LOCK, - ..Default::default() - }) - .await?; - - Ok(()) - } - - /// Publishes a serialized message to the stream. - /// - /// If `ack` is true, waits for server acknowledgment and updates internal seqno. - pub async fn publish( - &mut self, - subject: Subject, - payload: Bytes, - ack: bool, - ) -> Result<()> { - let f = self.ctx.publish(subject, payload).await?; - if ack { - self.seqno = f.await?.sequence; - } - Ok(()) - } - - /// Retrieves the latest snapshot, if one exists. - pub async fn get_snapshot(&self) -> Result> { - let store = self.ctx.get_object_store(cfg::SNAPSHOTS).await?; - - let mut object = match store.get(cfg::SNAPSHOT_NAME).await { - Ok(obj) => obj, - Err(e) if e.kind() == GetErrorKind::NotFound => return Ok(None), - Err(e) => return Err(e.into()), - }; - - let info = object.info(); - let meta = SnapshotMeta::parse(info)?; - - let mut data = Vec::with_capacity(info.size); - object.read_to_end(&mut data).await?; - - Ok(Some(Snapshot { - data, - slot: meta.slot, - seqno: meta.seqno, - })) - } - - /// Uploads a snapshot in the background. - /// - /// The snapshot is tagged with the current stream sequence number, - /// allowing standbys to resume replay from the correct position. - #[instrument(skip(self, file))] - pub async fn put_snapshot(&self, slot: Slot, mut file: File) -> Result<()> { - let store = self.ctx.get_object_store(cfg::SNAPSHOTS).await?; - // Next seqno (snapshot captures state after last published message) - let seqno = self.seqno + 1; - - let meta = ObjectMetadata { - name: cfg::SNAPSHOT_NAME.into(), - metadata: SnapshotMeta { slot, seqno }.into_headers(), - ..Default::default() - }; - - // Background upload to avoid blocking - tokio::spawn(async move { - if let Err(e) = store.put(meta, &mut file).await { - error!(%e, "snapshot upload failed"); - } - }); - - Ok(()) - } - - /// Creates a consumer for receiving replicated events. - pub async fn create_consumer( - &self, - id: &str, - start_seq: Option, - ) -> Result { - Consumer::new(id, &self.ctx, start_seq).await - } - - /// Creates a producer for publishing events. - pub async fn create_producer(&self, id: &str) -> Result { - Producer::new(id, &self.ctx).await - } -} - -// ============================================================================= -// Snapshot -// ============================================================================= - -/// AccountsDb snapshot with positioning metadata. -#[derive(Debug)] -pub struct Snapshot { - /// Raw snapshot bytes. - pub data: Vec, - /// Slot at which the snapshot was taken. - pub slot: Slot, - /// Stream sequence for replay start position. - pub seqno: u64, -} - -/// Metadata stored with each snapshot object. -struct SnapshotMeta { - slot: Slot, - seqno: u64, -} - -impl SnapshotMeta { - /// Parses required metadata fields from object info. - fn parse(info: &object_store::ObjectInfo) -> Result { - let get_parsed = - |key: &str| info.metadata.get(key).and_then(|v| v.parse().ok()); - - let slot = get_parsed(cfg::META_SLOT).ok_or_else(|| { - Error::Internal("missing 'slot' in snapshot metadata") - })?; - let seqno = get_parsed(cfg::META_SEQNO).ok_or_else(|| { - Error::Internal("missing 'seqno' in snapshot metadata") - })?; - - Ok(Self { slot, seqno }) - } - - fn into_headers(self) -> HashMap { - HashMap::from([ - (cfg::META_SLOT.into(), self.slot.to_string()), - (cfg::META_SEQNO.into(), self.seqno.to_string()), - ]) - } -} - -// ============================================================================= -// Consumer -// ============================================================================= - -/// Pull-based consumer for receiving replicated events. -/// -/// Supports resuming from a specific sequence number for catch-up replay -/// after recovering from a snapshot. -pub struct Consumer { - inner: PullConsumer, -} - -impl Consumer { - async fn new( - id: &str, - js: &Context, - start_seq: Option, - ) -> Result { - let stream = js.get_stream(cfg::STREAM).await?; - - let deliver_policy = match start_seq { - Some(seq) => { - // Delete and recreate to change start position - stream.delete_consumer(id).await.ok(); - DeliverPolicy::ByStartSequence { - start_sequence: seq, - } - } - None => DeliverPolicy::All, - }; - - let inner = stream - .get_or_create_consumer( - id, - PullConfig { - durable_name: Some(id.into()), - ack_policy: AckPolicy::All, - ack_wait: cfg::ACK_WAIT, - max_ack_pending: cfg::MAX_ACK_PENDING, - deliver_policy, - ..Default::default() - }, - ) - .await?; - - Ok(Self { inner }) - } - - /// Returns a stream of messages from the consumer. - /// - /// Use this in a `tokio::select!` loop to process messages as they arrive. - /// Messages are fetched in batches for efficiency. - pub async fn messages(&self) -> Result { - self.inner - .stream() - .max_messages_per_batch(cfg::BATCH_SIZE) - .messages() - .await - .map_err(Into::into) - } -} - -// ============================================================================= -// Producer -// ============================================================================= - -/// Event producer with distributed lock for leader election. -/// -/// Only one producer can hold the lock at a time, ensuring exactly one -/// primary publishes events. The lock has a TTL and must be refreshed -/// periodically to maintain leadership. -pub struct Producer { - lock: Box, - id: Bytes, - revision: u64, -} - -impl Producer { - async fn new(id: &str, js: &Context) -> Result { - Ok(Self { - lock: Box::new(js.get_key_value(cfg::PRODUCER_LOCK).await?), - id: id.to_owned().into_bytes().into(), - revision: 0, - }) - } - - /// Attempts to acquire the leader lock. - /// - /// Returns `true` if this producer became the leader. - /// Returns `false` if another producer already holds the lock. - pub async fn acquire(&mut self) -> Result { - match self.lock.create(cfg::LOCK_KEY, self.id.clone()).await { - Ok(rev) => { - self.revision = rev; - Ok(true) - } - Err(e) if e.kind() == CreateErrorKind::AlreadyExists => Ok(false), - Err(e) => Err(e.into()), - } - } - - /// Refreshes the leader lock to prevent expiration. - /// - /// Returns `false` if we lost the lock (another producer took over). - /// This typically indicates a network partition or slow refresh. - pub async fn refresh(&mut self) -> Result { - match self - .lock - .update(cfg::LOCK_KEY, self.id.clone(), self.revision) - .await - { - Ok(rev) => { - self.revision = rev; - Ok(true) - } - Err(e) if e.kind() == UpdateErrorKind::WrongLastRevision => { - Ok(false) - } - Err(e) => Err(e.into()), - } - } -} diff --git a/magicblock-replicator/src/nats/broker.rs b/magicblock-replicator/src/nats/broker.rs new file mode 100644 index 000000000..4ed7c9107 --- /dev/null +++ b/magicblock-replicator/src/nats/broker.rs @@ -0,0 +1,184 @@ +//! NATS JetStream connection with initialized streams and buckets. + +use std::time::Duration; + +use async_nats::{ + jetstream::{ + kv, + object_store::{self, GetErrorKind, ObjectMetadata}, + stream::{self, Compression}, + Context, ContextBuilder, + }, + ConnectOptions, Event, ServerAddr, Subject, +}; +use bytes::Bytes; +use magicblock_core::Slot; +use tokio::{fs::File, io::AsyncReadExt}; +use tracing::{debug, error, info, instrument, warn}; +use url::Url; + +use super::{ + cfg, snapshot::SnapshotMeta, Consumer, Producer, Snapshot, Subjects, +}; +use crate::Result; + +/// NATS JetStream connection with initialized streams and buckets. +pub struct Broker { + pub(crate) ctx: Context, + pub(crate) sequence: u64, +} + +impl Broker { + /// Connects to NATS and initializes all JetStream resources. + /// + /// Resources are created idempotently - safe to call multiple times. + pub async fn connect(url: Url) -> Result { + let addr = ServerAddr::from_url(url)?; + + let client = ConnectOptions::new() + .max_reconnects(None) + .reconnect_delay_callback(|attempts| { + let ms = (attempts as u64 * cfg::RECONNECT_BASE_MS) + .min(cfg::RECONNECT_MAX_MS); + Duration::from_millis(ms) + }) + .event_callback(|event| async move { + match event { + Event::Disconnected => warn!("NATS disconnected"), + Event::Connected => info!("NATS connected"), + Event::ClientError(e) => warn!(%e, "NATS client error"), + other => debug!(?other, "NATS event"), + } + }) + .connect(addr) + .await?; + + let ctx = ContextBuilder::new() + .timeout(cfg::API_TIMEOUT) + .max_ack_inflight(cfg::MAX_ACK_INFLIGHT) + .backpressure_on_inflight(true) + .build(client); + + let broker = Self { ctx, sequence: 0 }; + broker.init_resources().await?; + Ok(broker) + } + + /// Initializes streams, object stores, and KV buckets. + async fn init_resources(&self) -> Result<()> { + let info = self + .ctx + .create_or_update_stream(stream::Config { + name: cfg::STREAM.into(), + max_bytes: cfg::STREAM_BYTES, + subjects: Subjects::all().into_iter().map(Into::into).collect(), + max_age: cfg::TTL_STREAM, + duplicate_window: cfg::DUP_WINDOW, + description: Some("Magicblock validator events".into()), + compression: Some(Compression::S2), + ..Default::default() + }) + .await?; + + info!(stream = %info.config.name, messages = info.state.messages, "JetStream initialized"); + + self.ctx + .create_object_store(object_store::Config { + bucket: cfg::SNAPSHOTS.into(), + description: Some("AccountsDb snapshots".into()), + max_bytes: cfg::SNAPSHOT_BYTES, + ..Default::default() + }) + .await?; + + self.ctx + .create_key_value(kv::Config { + bucket: cfg::PRODUCER_LOCK.into(), + description: "Producer leader election".into(), + max_age: cfg::TTL_LOCK, + ..Default::default() + }) + .await?; + + Ok(()) + } + + /// Publishes a serialized message to the stream. + /// + /// If `ack` is true, waits for server acknowledgment and updates internal sequence. + pub async fn publish( + &mut self, + subject: Subject, + payload: Bytes, + ack: bool, + ) -> Result<()> { + let f = self.ctx.publish(subject, payload).await?; + if ack { + self.sequence = f.await?.sequence; + } + Ok(()) + } + + /// Retrieves the latest snapshot, if one exists. + pub async fn get_snapshot(&self) -> Result> { + let store = self.ctx.get_object_store(cfg::SNAPSHOTS).await?; + + let mut object = match store.get(cfg::SNAPSHOT_NAME).await { + Ok(obj) => obj, + Err(e) if e.kind() == GetErrorKind::NotFound => return Ok(None), + Err(e) => return Err(e.into()), + }; + + let info = object.info(); + let meta = SnapshotMeta::parse(info)?; + + let mut data = Vec::with_capacity(info.size); + object.read_to_end(&mut data).await?; + + Ok(Some(Snapshot { + data, + slot: meta.slot, + sequence: meta.sequence, + })) + } + + /// Uploads a snapshot in the background. + /// + /// The snapshot is tagged with the current stream sequence number, + /// allowing standbys to resume replay from the correct position. + #[instrument(skip(self, file))] + pub async fn put_snapshot(&self, slot: Slot, mut file: File) -> Result<()> { + let store = self.ctx.get_object_store(cfg::SNAPSHOTS).await?; + // Next sequence (snapshot captures state after last published message) + let sequence = self.sequence + 1; + + let meta = ObjectMetadata { + name: cfg::SNAPSHOT_NAME.into(), + metadata: SnapshotMeta { slot, sequence }.into_headers(), + ..Default::default() + }; + + // Background upload to avoid blocking + tokio::spawn(async move { + if let Err(error) = store.put(meta, &mut file).await { + error!(%error, "snapshot upload failed"); + } + }); + + Ok(()) + } + + /// Creates a consumer for receiving replicated events. + pub async fn create_consumer( + &self, + id: &str, + start_seq: Option, + ) -> Result { + Consumer::new(id, &self.ctx, start_seq).await + } + + /// Creates a producer for publishing events. + pub async fn create_producer(&self, id: &str) -> Result { + Producer::new(id, &self.ctx).await + } +} diff --git a/magicblock-replicator/src/nats/consumer.rs b/magicblock-replicator/src/nats/consumer.rs new file mode 100644 index 000000000..7d666ce55 --- /dev/null +++ b/magicblock-replicator/src/nats/consumer.rs @@ -0,0 +1,81 @@ +//! Pull-based consumer for receiving replicated events. + +use async_nats::jetstream::{ + consumer::{ + pull::{Config as PullConfig, Stream as MessageStream}, + AckPolicy, DeliverPolicy, PullConsumer, + }, + Context, +}; +use tracing::warn; + +use super::cfg; +use crate::Result; + +/// Pull-based consumer for receiving replicated events. +/// +/// Supports resuming from a specific sequence number for catch-up replay +/// after recovering from a snapshot. +pub struct Consumer { + inner: PullConsumer, +} + +impl Consumer { + pub(crate) async fn new( + id: &str, + js: &Context, + start_seq: Option, + ) -> Result { + let stream = js.get_stream(cfg::STREAM).await?; + + let deliver_policy = match start_seq { + Some(seq) => { + // Delete and recreate to change start position + if let Err(error) = stream.delete_consumer(id).await { + warn!(%error, "error removing consumer"); + } + DeliverPolicy::ByStartSequence { + start_sequence: seq, + } + } + None => DeliverPolicy::All, + }; + + let inner = stream + .get_or_create_consumer( + id, + PullConfig { + durable_name: Some(id.into()), + ack_policy: AckPolicy::All, + ack_wait: cfg::ACK_WAIT, + max_ack_pending: cfg::MAX_ACK_PENDING, + deliver_policy, + ..Default::default() + }, + ) + .await?; + + Ok(Self { inner }) + } + + /// Returns a stream of messages from the consumer. + /// + /// Use this in a `tokio::select!` loop to process messages as they arrive. + /// Messages are fetched in batches for efficiency. + pub async fn messages(&self) -> MessageStream { + loop { + let result = self + .inner + .stream() + .max_messages_per_batch(cfg::BATCH_SIZE) + .messages() + .await; + match result { + Ok(s) => break s, + Err(error) => { + warn!(%error, "failed to create message stream") + } + } + } + } +} diff --git a/magicblock-replicator/src/nats/lock_watcher.rs b/magicblock-replicator/src/nats/lock_watcher.rs new file mode 100644 index 000000000..303a84926 --- /dev/null +++ b/magicblock-replicator/src/nats/lock_watcher.rs @@ -0,0 +1,60 @@ +//! Lock watcher for detecting leader expiration. + +use async_nats::jetstream::kv::{Operation, Watch}; +use futures::StreamExt; +use tracing::warn; + +use super::cfg; +use crate::nats::Broker; + +/// Watches the leader lock for expiration/deletion. +/// +/// Used by standby nodes to detect when the primary's lock expires, +/// enabling faster takeover than waiting for the activity timeout. +pub struct LockWatcher { + watch: Box, +} + +impl LockWatcher { + /// Creates a new lock watcher. + pub(crate) async fn new(broker: &Broker) -> Self { + let watch = loop { + let store = match broker.ctx.get_key_value(cfg::PRODUCER_LOCK).await + { + Ok(s) => s, + Err(error) => { + tracing::error!(%error, "failed to obtain lock object"); + continue; + } + }; + match store.watch(cfg::LOCK_KEY).await { + Ok(w) => break Box::new(w), + Err(error) => { + tracing::error!(%error, "failed to create lock watcher"); + continue; + } + } + }; + Self { watch } + } + + /// Waits for the lock to be deleted or expire. + /// + /// Returns when the lock key is deleted or purged (TTL expiry). + /// This signals that a takeover attempt should be made. + pub async fn wait_for_expiry(&mut self) { + while let Some(result) = self.watch.next().await { + let operation = match result { + Ok(entry) => entry.operation, + Err(e) => { + warn!(%e, "lock watch error"); + continue; + } + }; + if matches!(operation, Operation::Delete | Operation::Purge) { + return; + } + } + warn!("lock watch stream ended unexpectedly"); + } +} diff --git a/magicblock-replicator/src/nats/mod.rs b/magicblock-replicator/src/nats/mod.rs new file mode 100644 index 000000000..70cc0f511 --- /dev/null +++ b/magicblock-replicator/src/nats/mod.rs @@ -0,0 +1,100 @@ +//! NATS JetStream client for event replication. +//! +//! # Components +//! +//! - [`Broker`]: Connection manager with stream/bucket initialization +//! - [`Producer`]: Event publisher with distributed leader lock +//! - [`Consumer`]: Event subscriber for standby replay +//! - [`Snapshot`]: AccountsDb snapshot with positioning metadata +//! - [`LockWatcher`]: Watcher for leader lock expiration + +mod broker; +mod consumer; +mod lock_watcher; +mod producer; +mod snapshot; + +use async_nats::Subject; +pub use broker::Broker; +pub use consumer::Consumer; +pub use lock_watcher::LockWatcher; +pub use producer::Producer; +pub use snapshot::Snapshot; + +// ============================================================================= +// Configuration +// ============================================================================= + +/// Resource names and configuration constants. +mod cfg { + use std::time::Duration; + + pub const STREAM: &str = "EVENTS"; + pub const SNAPSHOTS: &str = "SNAPSHOTS"; + pub const PRODUCER_LOCK: &str = "PRODUCER"; + pub const LOCK_KEY: &str = "lock"; + pub const SNAPSHOT_NAME: &str = "accountsdb"; + + pub const META_SLOT: &str = "slot"; + pub const META_SEQUENCE: &str = "sequence"; + + // Size limits (256 GB stream, 512 GB snapshots) + pub const STREAM_BYTES: i64 = 256 * 1024 * 1024 * 1024; + pub const SNAPSHOT_BYTES: i64 = 512 * 1024 * 1024 * 1024; + + // Timeouts + pub const TTL_STREAM: Duration = Duration::from_secs(24 * 60 * 60); + pub const TTL_LOCK: Duration = Duration::from_secs(5); + pub const ACK_WAIT: Duration = Duration::from_secs(30); + pub const API_TIMEOUT: Duration = Duration::from_secs(2); + pub const DUP_WINDOW: Duration = Duration::from_secs(30); + + // Reconnect backoff (exponential: 100ms base, 5s max) + pub const RECONNECT_BASE_MS: u64 = 100; + pub const RECONNECT_MAX_MS: u64 = 5000; + + // Backpressure + pub const MAX_ACK_PENDING: i64 = 512; + pub const MAX_ACK_INFLIGHT: usize = 2048; + pub const BATCH_SIZE: usize = 512; +} + +// ============================================================================= +// Subjects +// ============================================================================= + +/// NATS subjects for event types. +/// +/// Provides both string constants for stream configuration and typed subjects +/// for publishing. +pub struct Subjects; + +impl Subjects { + pub const TRANSACTION: &'static str = "event.transaction"; + pub const BLOCK: &'static str = "event.block"; + pub const SUPERBLOCK: &'static str = "event.superblock"; + + /// All subjects for stream configuration. + pub const fn all() -> [&'static str; 3] { + [Self::TRANSACTION, Self::BLOCK, Self::SUPERBLOCK] + } + + const fn from(s: &'static str) -> Subject { + Subject::from_static(s) + } + + /// Typed subject for transaction events. + pub fn transaction() -> Subject { + Self::from(Self::TRANSACTION) + } + + /// Typed subject for block events. + pub fn block() -> Subject { + Self::from(Self::BLOCK) + } + + /// Typed subject for superblock events. + pub fn superblock() -> Subject { + Self::from(Self::SUPERBLOCK) + } +} diff --git a/magicblock-replicator/src/nats/producer.rs b/magicblock-replicator/src/nats/producer.rs new file mode 100644 index 000000000..e2ab5137d --- /dev/null +++ b/magicblock-replicator/src/nats/producer.rs @@ -0,0 +1,71 @@ +//! Event producer with distributed lock for leader election. + +use async_nats::jetstream::{ + kv::{CreateErrorKind, Store, UpdateErrorKind}, + Context, +}; +use bytes::Bytes; +use tracing::warn; + +use super::cfg; +use crate::Result; + +/// Event producer with distributed lock for leader election. +/// +/// Only one producer can hold the lock at a time, ensuring exactly one +/// primary publishes events. The lock has a TTL and must be refreshed +/// periodically to maintain leadership. +pub struct Producer { + lock: Box, + id: Bytes, + revision: u64, +} + +impl Producer { + pub(crate) async fn new(id: &str, js: &Context) -> Result { + Ok(Self { + lock: Box::new(js.get_key_value(cfg::PRODUCER_LOCK).await?), + id: id.to_owned().into_bytes().into(), + revision: 0, + }) + } + + /// Attempts to acquire the leader lock. + /// + /// Returns `true` if this producer became the leader. + /// Returns `false` if another producer already holds the lock. + pub async fn acquire(&mut self) -> Result { + match self.lock.create(cfg::LOCK_KEY, self.id.clone()).await { + Ok(rev) => { + self.revision = rev; + Ok(true) + } + Err(e) if e.kind() == CreateErrorKind::AlreadyExists => Ok(false), + Err(e) => Err(e.into()), + } + } + + /// Refreshes the leader lock to prevent expiration. + /// + /// Returns `false` if we lost the lock (another producer took over). + /// This typically indicates a network partition or slow refresh. + pub async fn refresh(&mut self) -> Result { + match self + .lock + .update(cfg::LOCK_KEY, self.id.clone(), self.revision) + .await + { + Ok(rev) => { + self.revision = rev; + Ok(true) + } + Err(e) if e.kind() == UpdateErrorKind::WrongLastRevision => { + Ok(false) + } + Err(e) => { + warn!(%e, "lock refresh failed"); + Err(e.into()) + } + } + } +} diff --git a/magicblock-replicator/src/nats/snapshot.rs b/magicblock-replicator/src/nats/snapshot.rs new file mode 100644 index 000000000..4cde43591 --- /dev/null +++ b/magicblock-replicator/src/nats/snapshot.rs @@ -0,0 +1,50 @@ +//! AccountsDb snapshot with positioning metadata. + +use std::collections::HashMap; + +use async_nats::jetstream::object_store; +use magicblock_core::Slot; + +use super::cfg; +use crate::Result; + +/// AccountsDb snapshot with positioning metadata. +#[derive(Debug)] +pub struct Snapshot { + /// Raw snapshot bytes. + pub data: Vec, + /// Slot at which the snapshot was taken. + pub slot: Slot, + /// Stream sequence for replay start position. + pub sequence: u64, +} + +/// Metadata stored with each snapshot object. +pub(crate) struct SnapshotMeta { + pub(crate) slot: Slot, + pub(crate) sequence: u64, +} + +impl SnapshotMeta { + /// Parses required metadata fields from object info. + pub(crate) fn parse(info: &object_store::ObjectInfo) -> Result { + let get_parsed = + |key: &str| info.metadata.get(key).and_then(|v| v.parse().ok()); + + let slot = get_parsed(cfg::META_SLOT).ok_or_else(|| { + crate::Error::Internal("missing 'slot' in snapshot metadata") + })?; + let sequence = get_parsed(cfg::META_SEQUENCE).ok_or_else(|| { + crate::Error::Internal("missing 'sequence' in snapshot metadata") + })?; + + Ok(Self { slot, sequence }) + } + + pub(crate) fn into_headers(self) -> HashMap { + HashMap::from([ + (cfg::META_SLOT.into(), self.slot.to_string()), + (cfg::META_SEQUENCE.into(), self.sequence.to_string()), + ]) + } +} diff --git a/magicblock-replicator/src/proto.rs b/magicblock-replicator/src/proto.rs index 1f8d2151e..92a113eab 100644 --- a/magicblock-replicator/src/proto.rs +++ b/magicblock-replicator/src/proto.rs @@ -13,12 +13,15 @@ use solana_transaction::versioned::VersionedTransaction; use crate::nats::Subjects; /// Ordinal position of a transaction within a slot. -pub type TxIndex = u32; +pub type TransactionIndex = u32; -/// Sentinel index for block boundary markers. -pub const BLOCK_INDEX: TxIndex = TxIndex::MAX - 1; -/// Sentinel index for superblock checkpoint markers. -pub const SUPERBLOCK_INDEX: TxIndex = TxIndex::MAX; +/// Index for block boundary markers (TransactionIndex::MAX - 1). +/// Used to identify Block messages in slot/index comparisons. +pub const BLOCK_INDEX: TransactionIndex = TransactionIndex::MAX - 1; + +/// Index for superblock checkpoint markers (TransactionIndex::MAX). +/// Used to identify SuperBlock messages in slot/index comparisons. +pub const SUPERBLOCK_INDEX: TransactionIndex = TransactionIndex::MAX; /// Top-level replication message envelope. /// @@ -42,9 +45,9 @@ impl Message { } } - pub(crate) fn slot_and_index(&self) -> (Slot, TxIndex) { + pub(crate) fn slot_and_index(&self) -> (Slot, TransactionIndex) { match self { - Self::Transaction(txn) => (txn.slot, txn.index), + Self::Transaction(tx) => (tx.slot, tx.index), Self::Block(block) => (block.slot, BLOCK_INDEX), Self::SuperBlock(superblock) => (superblock.slot, SUPERBLOCK_INDEX), } @@ -57,7 +60,7 @@ pub struct Transaction { /// Slot where the transaction was executed. pub slot: Slot, /// Ordinal position within the slot. - pub index: TxIndex, + pub index: TransactionIndex, /// Bincode-encoded `VersionedTransaction`. pub payload: Vec, } @@ -77,8 +80,6 @@ pub struct Block { #[derive(Deserialize, Serialize, Clone, Debug)] pub struct SuperBlock { pub slot: Slot, - /// Total transactions processed. - pub transactions: u64, /// Rolling checksum for verification. pub checksum: u64, } diff --git a/magicblock-replicator/src/service.rs b/magicblock-replicator/src/service.rs deleted file mode 100644 index 502fbcd6f..000000000 --- a/magicblock-replicator/src/service.rs +++ /dev/null @@ -1,457 +0,0 @@ -//! Primary-standby state synchronization via NATS JetStream. -//! -//! # Architecture -//! -//! ```text -//! ┌─────────────┐ -//! │ Service │ -//! └──────┬──────┘ -//! ┌─────────┴─────────┐ -//! ▼ ▼ -//! ┌─────────┐ ┌─────────┐ -//! │ Primary │ ←────→│ Standby │ -//! └────┬────┘ └────┬────┘ -//! │ │ -//! ┌───┴───┐ ┌───┴───┐ -//! │Publish│ │Consume│ -//! │Upload │ │Apply │ -//! │Refresh│ │Verify │ -//! └───────┘ └───────┘ -//! ``` - -use std::{ - sync::Arc, - thread::JoinHandle, - time::{Duration, Instant}, -}; - -use async_nats::Message as NatsMessage; -use futures::StreamExt; -use magicblock_accounts_db::AccountsDb; -use magicblock_core::{ - link::transactions::{ - ReplayPosition, SchedulerMode, TransactionSchedulerHandle, WithEncoded, - }, - Slot, -}; -use magicblock_ledger::Ledger; -use solana_transaction::versioned::VersionedTransaction; -use tokio::{ - fs::File, - runtime::Builder, - sync::mpsc::{Receiver, Sender}, - time::interval, -}; -use tracing::{error, info, warn}; - -pub use crate::nats::Snapshot as AccountsDbSnapshot; -use crate::{ - nats::{Broker, Consumer, Producer}, - proto::{Block, SuperBlock, TxIndex}, - watcher::SnapshotWatcher, - Message, Result, -}; - -// ============================================================================= -// Constants -// ============================================================================= - -const LOCK_REFRESH_INTERVAL: Duration = Duration::from_secs(1); -const LEADER_TIMEOUT: Duration = Duration::from_secs(10); -const CONSUMER_RETRY_DELAY: Duration = Duration::from_secs(1); - -// ============================================================================= -// Context -// ============================================================================= - -/// Shared state for both roles. -pub struct Context { - /// Node identifier for leader election. - pub id: String, - /// NATS broker. - pub broker: Broker, - /// Scheduler mode channel. - pub mode_tx: Sender, - /// Accounts database. - pub accountsdb: Arc, - /// Transaction ledger. - pub ledger: Arc, - /// Transaction scheduler. - pub scheduler: TransactionSchedulerHandle, - /// Current position. - pub slot: Slot, - pub index: TxIndex, -} - -impl Context { - /// Creates context from ledger state. - pub async fn new( - id: String, - broker: Broker, - mode_tx: Sender, - accountsdb: Arc, - ledger: Arc, - scheduler: TransactionSchedulerHandle, - ) -> Result { - let (slot, index) = ledger - .get_latest_transaction_position()? - .unwrap_or_default(); - - info!(%id, slot, index, "context initialized"); - Ok(Self { - id, - broker, - mode_tx, - accountsdb, - ledger, - scheduler, - slot, - index, - }) - } - - /// Updates position. - fn advance(&mut self, slot: Slot, index: TxIndex) { - self.slot = slot; - self.index = index; - } - - /// Writes block to ledger. - async fn write_block(&self, block: &Block) -> Result<()> { - self.ledger - .write_block(block.slot, block.timestamp, block.hash)?; - Ok(()) - } - - /// Verifies superblock checksum. - fn verify_checksum(&self, sb: &SuperBlock) -> Result<()> { - let _lock = self.accountsdb.lock_database(); - // SAFETY: Lock acquired above ensures no concurrent modifications - // during checksum computation. - let checksum = unsafe { self.accountsdb.checksum() }; - if checksum == sb.checksum { - Ok(()) - } else { - Err(crate::Error::Internal("accountsdb state mismatch")) - } - } - - /// Creates a snapshot watcher for the database directory. - fn create_snapshot_watcher(&self) -> Result { - SnapshotWatcher::new(self.accountsdb.database_directory()) - } - - /// Attempts to acquire producer lock for primary role. - async fn try_acquire_producer(&self) -> Option { - let mut producer = self.broker.create_producer(&self.id).await.ok()?; - producer.acquire().await.ok()?.then_some(producer) - } - - /// Switches to replica mode. - async fn enter_replica_mode(&self) { - let _ = self.mode_tx.send(SchedulerMode::Replica).await; - } - - /// Switches to primary mode. - async fn enter_primary_mode(&self) { - let _ = self.mode_tx.send(SchedulerMode::Primary).await; - } - - /// Uploads snapshot. - async fn upload_snapshot(&self, file: File, slot: Slot) -> Result<()> { - self.broker.put_snapshot(slot, file).await - } - - /// Creates consumer with retry. - async fn create_consumer( - &self, - start_seq: Option, - ) -> Result { - loop { - match self.broker.create_consumer(&self.id, start_seq).await { - Ok(c) => return Ok(c), - Err(e) => { - warn!(%e, "consumer creation failed, retrying"); - tokio::time::sleep(CONSUMER_RETRY_DELAY).await; - } - } - } - } - - /// Transitions to standby. - async fn into_standby( - self, - messages: Receiver, - ) -> Result { - let consumer = Box::new(self.create_consumer(None).await?); - self.enter_replica_mode().await; - Ok(Standby { - ctx: self, - consumer, - messages, - last_activity: Instant::now(), - }) - } -} - -// ============================================================================= -// Service -// ============================================================================= - -/// Replication service with automatic role transitions. -pub enum Service { - Primary(Primary), - Standby(Standby), -} - -impl Service { - /// Creates service, attempting primary role first. - pub async fn new( - id: String, - broker: Broker, - mode_tx: Sender, - accountsdb: Arc, - ledger: Arc, - scheduler: TransactionSchedulerHandle, - messages: Receiver, - ) -> Result { - let ctx = - Context::new(id, broker, mode_tx, accountsdb, ledger, scheduler) - .await?; - - // Try to become primary. - let Some(producer) = ctx.try_acquire_producer().await else { - return Ok(Self::Standby(ctx.into_standby(messages).await?)); - }; - - ctx.enter_primary_mode().await; - let snapshots = ctx.create_snapshot_watcher()?; - Ok(Self::Primary(Primary { - ctx, - producer, - messages, - snapshots, - })) - } - - /// Runs service with automatic role transitions. - pub async fn run(self) { - let mut state = self; - loop { - state = match state { - Service::Primary(p) => match p.run().await { - Some(s) => Service::Standby(s), - None => return, - }, - Service::Standby(s) => match s.run().await { - Some(p) => Service::Primary(p), - None => return, - }, - }; - } - } - - /// Spawns the service in a dedicated OS thread with a single-threaded runtime. - /// - /// Returns a `JoinHandle` that can be used to wait for the service to complete. - pub fn spawn(self) -> JoinHandle<()> { - std::thread::spawn(move || { - let runtime = Builder::new_current_thread() - .thread_name("replication-service") - .build() - .expect("Failed to build replication service runtime"); - - runtime.block_on(tokio::task::unconstrained(self.run())); - }) - } -} - -// ============================================================================= -// Primary -// ============================================================================= - -/// Primary node: publishes events and holds leader lock. -pub struct Primary { - ctx: Context, - producer: Producer, - messages: Receiver, - snapshots: SnapshotWatcher, -} - -impl Primary { - /// Runs until leadership lost, returns standby on demotion. - async fn run(mut self) -> Option { - let mut lock_tick = interval(LOCK_REFRESH_INTERVAL); - - loop { - tokio::select! { - Some(msg) = self.messages.recv() => { - self.publish(msg).await; - } - - _ = lock_tick.tick() => { - let held = match self.producer.refresh().await { - Ok(h) => h, - Err(e) => { - warn!(%e, "lock refresh failed"); - false - } - }; - if !held { - info!("lost leadership, demoting"); - return self.ctx.into_standby(self.messages).await - .inspect_err(|e| error!(%e, "demotion failed")) - .ok(); - } - } - - Some((file, slot)) = self.snapshots.recv() => { - if let Err(e) = self.ctx.upload_snapshot(file, slot).await { - warn!(%e, "snapshot upload failed"); - } - } - } - } - } - - async fn publish(&mut self, msg: Message) { - let payload = match bincode::serialize(&msg) { - Ok(p) => p, - Err(e) => { - warn!(%e, "serialization failed"); - return; - } - }; - let subject = msg.subject(); - let (slot, index) = msg.slot_and_index(); - let ack = matches!(msg, Message::SuperBlock(_)); - - if let Err(e) = - self.ctx.broker.publish(subject, payload.into(), ack).await - { - warn!(%e, slot, index, "publish failed"); - } else { - self.ctx.advance(slot, index); - } - } -} - -// ============================================================================= -// Standby -// ============================================================================= - -/// Standby node: consumes events and watches for leader failure. -pub struct Standby { - ctx: Context, - consumer: Box, - messages: Receiver, - last_activity: Instant, -} - -impl Standby { - /// Runs until leadership acquired, returns primary on promotion. - async fn run(mut self) -> Option { - let mut timeout_check = interval(Duration::from_secs(1)); - let Ok(mut stream) = self.consumer.messages().await else { - error!("failed to get message stream"); - return None; - }; - - loop { - tokio::select! { - Some(result) = stream.next() => { - match result { - Ok(msg) => { - self.process(&msg).await; - self.last_activity = Instant::now(); - } - Err(e) => warn!(%e, "stream error"), - } - } - - _ = timeout_check.tick(), if self.last_activity.elapsed() > LEADER_TIMEOUT => { - if let Some(producer) = self.try_acquire_lock().await { - info!("acquired leadership, promoting"); - self.ctx.enter_primary_mode().await; - let snapshots = match self.ctx.create_snapshot_watcher() { - Ok(s) => s, - Err(e) => { error!(%e, "FATAL: snapshot watcher failed"); return None } - }; - return Some(Primary { ctx: self.ctx, producer, messages: self.messages, snapshots }); - } - } - } - } - } - - async fn process(&mut self, msg: &NatsMessage) { - let message = match bincode::deserialize::(&msg.payload) { - Ok(m) => m, - Err(e) => { - warn!(%e, "deserialization failed"); - return; - } - }; - let (slot, index) = message.slot_and_index(); - - // Skip duplicates. - let obsolete = self.ctx.slot == slot && self.ctx.index >= index; - if self.ctx.slot > slot || obsolete { - return; - } - - let result = match message { - Message::Transaction(tx) => { - self.replay_tx(tx.slot, tx.index, tx.payload).await - } - Message::Block(block) => self.ctx.write_block(&block).await, - Message::SuperBlock(sb) => { - self.ctx.verify_checksum(&sb).inspect_err(|error| - error!(slot, %error, "accountsdb state has diverged") - ) - } - }; - - if let Err(error) = result { - warn!(slot, index, %error, "message precessing error"); - return; - } - self.ctx.advance(slot, index); - } - - async fn replay_tx( - &self, - slot: Slot, - index: TxIndex, - encoded: Vec, - ) -> Result<()> { - let pos = ReplayPosition { - slot, - index, - persist: true, - }; - let txn: VersionedTransaction = bincode::deserialize(&encoded)?; - let txn = WithEncoded { txn, encoded }; - self.ctx.scheduler.replay(pos, txn).await?; - Ok(()) - } - - async fn try_acquire_lock(&mut self) -> Option { - let Ok(mut producer) = - self.ctx.broker.create_producer(&self.ctx.id).await - else { - return None; - }; - match producer.acquire().await { - Ok(true) => Some(producer), - Ok(false) => { - self.last_activity = Instant::now(); - None - } - Err(e) => { - warn!(%e, "lock acquisition failed"); - None - } - } - } -} diff --git a/magicblock-replicator/src/service/context.rs b/magicblock-replicator/src/service/context.rs new file mode 100644 index 000000000..9f3214037 --- /dev/null +++ b/magicblock-replicator/src/service/context.rs @@ -0,0 +1,160 @@ +//! Shared context for primary and standby roles. + +use std::sync::Arc; + +use magicblock_accounts_db::AccountsDb; +use magicblock_core::{ + link::transactions::{SchedulerMode, TransactionSchedulerHandle}, + Slot, +}; +use magicblock_ledger::Ledger; +use tokio::{ + fs::File, + sync::mpsc::{Receiver, Sender}, +}; +use tracing::info; + +use super::{Primary, Standby, CONSUMER_RETRY_DELAY}; +use crate::{ + nats::{Broker, Consumer, LockWatcher, Producer}, + proto::TransactionIndex, + watcher::SnapshotWatcher, + Message, Result, +}; + +/// Shared state for both primary and standby roles. +pub struct ReplicationContext { + /// Node identifier for leader election. + pub id: String, + /// NATS broker. + pub broker: Broker, + /// Scheduler mode channel. + pub mode_tx: Sender, + /// Accounts database. + pub accountsdb: Arc, + /// Transaction ledger. + pub ledger: Arc, + /// Transaction scheduler. + pub scheduler: TransactionSchedulerHandle, + /// Current position. + pub slot: Slot, + pub index: TransactionIndex, +} + +impl ReplicationContext { + /// Creates context from ledger state. + pub async fn new( + id: String, + broker: Broker, + mode_tx: Sender, + accountsdb: Arc, + ledger: Arc, + scheduler: TransactionSchedulerHandle, + ) -> Result { + let (slot, index) = ledger + .get_latest_transaction_position()? + .unwrap_or_default(); + + info!(%id, slot, index, "context initialized"); + Ok(Self { + id, + broker, + mode_tx, + accountsdb, + ledger, + scheduler, + slot, + index, + }) + } + + /// Updates position. + pub fn update_position(&mut self, slot: Slot, index: TransactionIndex) { + self.slot = slot; + self.index = index; + } + + /// Writes block to ledger. + pub async fn write_block(&self, block: &crate::proto::Block) -> Result<()> { + self.ledger + .write_block(block.slot, block.timestamp, block.hash)?; + Ok(()) + } + + /// Verifies superblock checksum. + pub fn verify_checksum(&self, sb: &crate::proto::SuperBlock) -> Result<()> { + let _lock = self.accountsdb.lock_database(); + // SAFETY: Lock acquired above ensures no concurrent modifications + // during checksum computation. + let checksum = unsafe { self.accountsdb.checksum() }; + if checksum == sb.checksum { + Ok(()) + } else { + Err(crate::Error::Internal("accountsdb state mismatch")) + } + } + + /// Creates a snapshot watcher for the database directory. + pub fn create_snapshot_watcher(&self) -> Result { + SnapshotWatcher::new(self.accountsdb.database_directory()) + } + + /// Attempts to acquire producer lock for primary role. + pub async fn try_acquire_producer(&self) -> Result> { + let mut producer = self.broker.create_producer(&self.id).await?; + producer + .acquire() + .await + .map(|acquired| acquired.then_some(producer)) + } + + /// Switches to replica mode. + pub async fn enter_replica_mode(&self) { + let _ = self.mode_tx.send(SchedulerMode::Replica).await; + } + + /// Switches to primary mode. + pub async fn enter_primary_mode(&self) { + let _ = self.mode_tx.send(SchedulerMode::Primary).await; + } + + /// Uploads snapshot. + pub async fn upload_snapshot(&self, file: File, slot: Slot) -> Result<()> { + self.broker.put_snapshot(slot, file).await + } + + /// Creates consumer with retry. + pub async fn create_consumer(&self, start_seq: Option) -> Consumer { + loop { + match self.broker.create_consumer(&self.id, start_seq).await { + Ok(c) => return c, + Err(e) => { + tracing::warn!(%e, "consumer creation failed, retrying"); + tokio::time::sleep(CONSUMER_RETRY_DELAY).await; + } + } + } + } + + /// Transitions to primary role with the given producer. + pub async fn into_primary( + self, + producer: Producer, + messages: Receiver, + ) -> Result { + let snapshots = self.create_snapshot_watcher()?; + self.enter_primary_mode().await; + Ok(Primary::new(self, producer, messages, snapshots)) + } + + /// Transitions to standby role. + pub async fn into_standby( + self, + messages: Receiver, + ) -> Result { + let consumer = Box::new(self.create_consumer(None).await); + let watcher = LockWatcher::new(&self.broker).await; + self.enter_replica_mode().await; + Ok(Standby::new(self, consumer, messages, watcher)) + } +} diff --git a/magicblock-replicator/src/service/mod.rs b/magicblock-replicator/src/service/mod.rs new file mode 100644 index 000000000..5556bcf2d --- /dev/null +++ b/magicblock-replicator/src/service/mod.rs @@ -0,0 +1,115 @@ +//! Primary-standby state synchronization via NATS JetStream. +//! +//! # Architecture +//! +//! ```text +//! ┌─────────────┐ +//! │ Service │ +//! └──────┬──────┘ +//! ┌─────────┴─────────┐ +//! ▼ ▼ +//! ┌─────────┐ ┌─────────┐ +//! │ Primary │ ←────→│ Standby │ +//! └────┬────┘ └────┬────┘ +//! │ │ +//! ┌───┴───┐ ┌───┴───┐ +//! │Publish│ │Consume│ +//! │Upload │ │Apply │ +//! │Refresh│ │Verify │ +//! └───────┘ └───────┘ +//! ``` + +mod context; +mod primary; +mod standby; + +use std::{sync::Arc, thread::JoinHandle, time::Duration}; + +pub use context::ReplicationContext; +use magicblock_accounts_db::AccountsDb; +use magicblock_core::link::transactions::{ + SchedulerMode, TransactionSchedulerHandle, +}; +use magicblock_ledger::Ledger; +pub use primary::Primary; +pub use standby::Standby; +use tokio::{ + runtime::Builder, + sync::mpsc::{Receiver, Sender}, +}; + +use crate::{nats::Broker, Message, Result}; + +// ============================================================================= +// Constants +// ============================================================================= + +pub(crate) const LOCK_REFRESH_INTERVAL: Duration = Duration::from_secs(1); +pub(crate) const LEADER_TIMEOUT: Duration = Duration::from_secs(10); +const CONSUMER_RETRY_DELAY: Duration = Duration::from_secs(1); + +// ============================================================================= +// Service +// ============================================================================= + +/// Replication service with automatic role transitions. +pub enum Service { + Primary(Primary), + Standby(Standby), +} + +impl Service { + /// Creates service, attempting primary role first. + pub async fn new( + id: String, + broker: Broker, + mode_tx: Sender, + accountsdb: Arc, + ledger: Arc, + scheduler: TransactionSchedulerHandle, + messages: Receiver, + ) -> crate::Result { + let ctx = ReplicationContext::new( + id, broker, mode_tx, accountsdb, ledger, scheduler, + ) + .await?; + + // Try to become primary. + match ctx.try_acquire_producer().await? { + Some(producer) => { + Ok(Self::Primary(ctx.into_primary(producer, messages).await?)) + } + None => Ok(Self::Standby(ctx.into_standby(messages).await?)), + } + } + + /// Runs service with automatic role transitions. + pub async fn run(mut self) -> Result<()> { + loop { + self = match self { + Service::Primary(p) => Service::Standby(p.run().await?), + Service::Standby(s) => match s.run().await { + Ok(p) => Service::Primary(p), + Err(error) => { + tracing::error!(%error, "unrecoverable replication failure"); + return Err(error); + } + }, + }; + } + } + + /// Spawns the service in a dedicated OS thread with a single-threaded runtime. + /// + /// Returns a `JoinHandle` that can be used to wait for the service to complete. + pub fn spawn(self) -> JoinHandle> { + std::thread::spawn(move || { + let runtime = Builder::new_current_thread() + .thread_name("replication-service") + .build() + .expect("Failed to build replication service runtime"); + + runtime.block_on(tokio::task::unconstrained(self.run())) + }) + } +} diff --git a/magicblock-replicator/src/service/primary.rs b/magicblock-replicator/src/service/primary.rs new file mode 100644 index 000000000..0705e4fb6 --- /dev/null +++ b/magicblock-replicator/src/service/primary.rs @@ -0,0 +1,93 @@ +//! Primary node: publishes events and holds leader lock. + +use tokio::sync::mpsc::Receiver; +use tracing::{error, info, instrument, warn}; + +use super::{ReplicationContext, LOCK_REFRESH_INTERVAL}; +use crate::{ + nats::Producer, service::Standby, watcher::SnapshotWatcher, Message, Result, +}; + +/// Primary node: publishes events and holds leader lock. +pub struct Primary { + pub(crate) ctx: ReplicationContext, + producer: Producer, + messages: Receiver, + snapshots: SnapshotWatcher, +} + +impl Primary { + /// Creates a new primary instance. + pub fn new( + ctx: ReplicationContext, + producer: Producer, + messages: Receiver, + snapshots: SnapshotWatcher, + ) -> Self { + Self { + ctx, + producer, + messages, + snapshots, + } + } + + /// Runs until leadership lost, returns standby on demotion. + #[instrument(skip(self))] + pub async fn run(mut self) -> Result { + let mut lock_tick = tokio::time::interval(LOCK_REFRESH_INTERVAL); + + loop { + tokio::select! { + Some(msg) = self.messages.recv() => { + if let Err(error) = self.publish(msg).await { + // publish should not easily fail, if that happens, it means + // the message broker has become unrecoverably unreacheable + warn!(%error, "failed to publish the message"); + return self.ctx.into_standby(self.messages).await; + } + } + + _ = lock_tick.tick() => { + let held = match self.producer.refresh().await { + Ok(h) => h, + Err(e) => { + warn!(%e, "lock refresh failed"); + false + } + }; + if !held { + info!("lost leadership, demoting"); + return self.ctx.into_standby(self.messages).await; + } + } + + Some((file, slot)) = self.snapshots.recv() => { + if let Err(e) = self.ctx.upload_snapshot(file, slot).await { + warn!(%e, "snapshot upload failed"); + } + } + } + } + } + + async fn publish(&mut self, msg: Message) -> Result<()> { + let payload = match bincode::serialize(&msg) { + Ok(p) => p, + Err(error) => { + error!(%error, "serialization failed, should never happen"); + return Ok(()); + } + }; + let subject = msg.subject(); + let (slot, index) = msg.slot_and_index(); + let ack = matches!(msg, Message::SuperBlock(_)); + + self.ctx + .broker + .publish(subject, payload.into(), ack) + .await?; + self.ctx.update_position(slot, index); + Ok(()) + } +} diff --git a/magicblock-replicator/src/service/standby.rs b/magicblock-replicator/src/service/standby.rs new file mode 100644 index 000000000..9bd834d1b --- /dev/null +++ b/magicblock-replicator/src/service/standby.rs @@ -0,0 +1,139 @@ +//! Standby node: consumes events and watches for leader failure. + +use std::time::{Duration, Instant}; + +use async_nats::Message as NatsMessage; +use futures::StreamExt; +use magicblock_core::{ + link::transactions::{ReplayPosition, WithEncoded}, + Slot, +}; +use solana_transaction::versioned::VersionedTransaction; +use tokio::sync::mpsc::Receiver; +use tracing::{error, info, warn}; + +use super::{ReplicationContext, LEADER_TIMEOUT}; +use crate::{ + nats::{Consumer, LockWatcher}, + proto::TransactionIndex, + service::Primary, + Message, Result, +}; + +/// Standby node: consumes events and watches for leader failure. +pub struct Standby { + pub(crate) ctx: ReplicationContext, + consumer: Box, + messages: Receiver, + watcher: LockWatcher, + last_activity: Instant, +} + +impl Standby { + /// Creates a new standby instance. + pub fn new( + ctx: ReplicationContext, + consumer: Box, + messages: Receiver, + watcher: LockWatcher, + ) -> Self { + Self { + ctx, + consumer, + messages, + watcher, + last_activity: Instant::now(), + } + } + + /// Runs until leadership acquired, returns primary on promotion. + pub async fn run(mut self) -> Result { + let mut timeout_check = tokio::time::interval(Duration::from_secs(1)); + let mut stream = self.consumer.messages().await; + + loop { + tokio::select! { + result = stream.next() => { + let Some(result) = result else { + stream = self.consumer.messages().await; + continue; + }; + match result { + Ok(msg) => { + self.handle_message(&msg).await; + self.last_activity = Instant::now(); + } + Err(e) => warn!(%e, "message consumption stream error"), + } + } + + _ = self.watcher.wait_for_expiry() => { + info!("leader lock expired, attempting takeover"); + if let Ok(Some(producer)) = self.ctx.try_acquire_producer().await { + info!("acquired leadership, promoting"); + return self.ctx.into_primary(producer, self.messages).await; + } + } + + _ = timeout_check.tick(), if self.last_activity.elapsed() > LEADER_TIMEOUT => { + if let Ok(Some(producer)) = self.ctx.try_acquire_producer().await { + info!("acquired leadership via timeout, promoting"); + return self.ctx.into_primary(producer, self.messages).await; + } + } + } + } + } + + async fn handle_message(&mut self, msg: &NatsMessage) { + let message = match bincode::deserialize::(&msg.payload) { + Ok(m) => m, + Err(e) => { + warn!(%e, "deserialization failed"); + return; + } + }; + let (slot, index) = message.slot_and_index(); + + // Skip duplicates. + let obsolete = self.ctx.slot == slot && self.ctx.index >= index; + if self.ctx.slot > slot || obsolete { + return; + } + + let result = match message { + Message::Transaction(tx) => { + self.replay_tx(tx.slot, tx.index, tx.payload).await + } + Message::Block(block) => self.ctx.write_block(&block).await, + Message::SuperBlock(sb) => { + self.ctx.verify_checksum(&sb).inspect_err(|error| + error!(slot, %error, "accountsdb state has diverged") + ) + } + }; + + if let Err(error) = result { + warn!(slot, index, %error, "message processing error"); + return; + } + self.ctx.update_position(slot, index); + } + + async fn replay_tx( + &self, + slot: Slot, + index: TransactionIndex, + encoded: Vec, + ) -> Result<()> { + let pos = ReplayPosition { + slot, + index, + persist: true, + }; + let tx: VersionedTransaction = bincode::deserialize(&encoded)?; + let tx = WithEncoded { txn: tx, encoded }; + self.ctx.scheduler.replay(pos, tx).await?; + Ok(()) + } +} diff --git a/magicblock-replicator/src/tests.rs b/magicblock-replicator/src/tests.rs index dbe48b8d5..2b3247f05 100644 --- a/magicblock-replicator/src/tests.rs +++ b/magicblock-replicator/src/tests.rs @@ -1,79 +1,71 @@ -//! Tests for the replication protocol. +use std::{io::Write, path::Path, time::Duration}; -use solana_hash::Hash; +use tempfile::TempDir; +use tokio::io::AsyncReadExt; -use crate::proto::{Block, Message, SuperBlock, Transaction}; +use crate::watcher::*; -// ============================================================================= -// Wire Format Tests - catch serialization/protocol changes -// ============================================================================= +#[tokio::test] +async fn test_watcher_detects_new_snapshot() { + let temp_dir = TempDir::new().unwrap(); + let mut watcher = SnapshotWatcher::new(temp_dir.path()).unwrap(); -#[test] -fn variant_order_stability() { - // Bincode encodes enum discriminant as variant index. - // Reordering enum variants silently breaks wire compatibility. - let cases: [(Message, u32); 3] = [ - ( - Message::Transaction(Transaction { - slot: 0, - index: 0, - payload: vec![], - }), - 0, - ), - ( - Message::Block(Block { - slot: 0, - hash: Hash::default(), - timestamp: 42, - }), - 1, - ), - ( - Message::SuperBlock(SuperBlock { - slot: 0, - transactions: 0, - checksum: 0, - }), - 2, - ), - ]; + let test_data = b"test archive contents"; + let snapshot_path = temp_dir.path().join("snapshot-000000000001.tar.gz"); + std::fs::File::create(&snapshot_path) + .unwrap() + .write_all(test_data) + .unwrap(); + + let (mut file, slot) = + tokio::time::timeout(Duration::from_secs(2), watcher.recv()) + .await + .expect("Timeout waiting for snapshot") + .expect("Channel closed"); - for (msg, expected_idx) in cases { - let encoded = bincode::serialize(&msg).unwrap(); - let actual_idx = u32::from_le_bytes([ - encoded[0], encoded[1], encoded[2], encoded[3], - ]); - assert_eq!( - actual_idx, expected_idx, - "variant index changed - this breaks wire compatibility!" - ); - } + assert_eq!(slot, 1); + let mut contents = Vec::new(); + file.read_to_end(&mut contents).await.unwrap(); + assert_eq!(contents, test_data); } -#[test] -fn message_roundtrip() { - let cases = vec![ - Message::Transaction(Transaction { - slot: 54321, - index: 42, - payload: (0..255).collect(), - }), - Message::Block(Block { - slot: 12345, - hash: Hash::new_unique(), - timestamp: 1700000000, - }), - Message::SuperBlock(SuperBlock { - slot: 99999, - transactions: 50000, - checksum: 0xDEADBEEF, - }), - ]; +#[tokio::test] +async fn test_watcher_ignores_non_snapshots() { + let temp_dir = TempDir::new().unwrap(); + let mut watcher = SnapshotWatcher::new(temp_dir.path()).unwrap(); + + let other_path = temp_dir.path().join("other.txt"); + std::fs::File::create(&other_path).unwrap(); + + let test_data = b"test archive"; + let snapshot_path = temp_dir.path().join("snapshot-000000000002.tar.gz"); + std::fs::File::create(&snapshot_path) + .unwrap() + .write_all(test_data) + .unwrap(); - for msg in cases { - let encoded = bincode::serialize(&msg).unwrap(); - let decoded: Message = bincode::deserialize(&encoded).unwrap(); - assert_eq!(bincode::serialize(&decoded).unwrap(), encoded); - } + let (mut file, slot) = + tokio::time::timeout(Duration::from_secs(2), watcher.recv()) + .await + .expect("Timeout waiting for snapshot") + .expect("Channel closed"); + + assert_eq!(slot, 2); + let mut contents = Vec::new(); + file.read_to_end(&mut contents).await.unwrap(); + assert_eq!(contents, test_data); +} + +#[test] +fn test_parse_slot() { + assert_eq!( + parse_slot(Path::new("snapshot-000000000001.tar.gz")), + Some(1) + ); + assert_eq!( + parse_slot(Path::new("/some/path/snapshot-000000000123.tar.gz")), + Some(123) + ); + assert_eq!(parse_slot(Path::new("other.txt")), None); + assert_eq!(parse_slot(Path::new("snapshot-invalid.tar.gz")), None); } diff --git a/magicblock-replicator/src/watcher.rs b/magicblock-replicator/src/watcher.rs index 8d5db582a..d7c962923 100644 --- a/magicblock-replicator/src/watcher.rs +++ b/magicblock-replicator/src/watcher.rs @@ -49,14 +49,12 @@ impl SnapshotWatcher { pub fn new(dir: &Path) -> Result { let (tx, rx) = mpsc::channel(32); - let tx_clone = tx.clone(); - let mut watcher = notify::recommended_watcher(move |res: notify::Result| { match res { Ok(event) => { if let Some(path) = Self::process_event(&event) { - if let Err(e) = tx_clone.blocking_send(path) { + if let Err(e) = tx.blocking_send(path) { error!("Failed to send snapshot event: {}", e); } } @@ -119,84 +117,3 @@ impl SnapshotWatcher { } } } - -#[cfg(test)] -mod tests { - use std::io::Write; - - use tempfile::TempDir; - use tokio::io::AsyncReadExt; - - use super::*; - - #[tokio::test] - async fn test_watcher_detects_new_snapshot() { - let temp_dir = TempDir::new().unwrap(); - let mut watcher = SnapshotWatcher::new(temp_dir.path()).unwrap(); - - let test_data = b"test archive contents"; - let snapshot_path = - temp_dir.path().join("snapshot-000000000001.tar.gz"); - std::fs::File::create(&snapshot_path) - .unwrap() - .write_all(test_data) - .unwrap(); - - let (mut file, slot) = tokio::time::timeout( - std::time::Duration::from_secs(2), - watcher.recv(), - ) - .await - .expect("Timeout waiting for snapshot") - .expect("Channel closed"); - - assert_eq!(slot, 1); - let mut contents = Vec::new(); - file.read_to_end(&mut contents).await.unwrap(); - assert_eq!(contents, test_data); - } - - #[tokio::test] - async fn test_watcher_ignores_non_snapshots() { - let temp_dir = TempDir::new().unwrap(); - let mut watcher = SnapshotWatcher::new(temp_dir.path()).unwrap(); - - let other_path = temp_dir.path().join("other.txt"); - std::fs::File::create(&other_path).unwrap(); - - let test_data = b"test archive"; - let snapshot_path = - temp_dir.path().join("snapshot-000000000002.tar.gz"); - std::fs::File::create(&snapshot_path) - .unwrap() - .write_all(test_data) - .unwrap(); - - let (mut file, slot) = tokio::time::timeout( - std::time::Duration::from_secs(2), - watcher.recv(), - ) - .await - .expect("Timeout waiting for snapshot") - .expect("Channel closed"); - - assert_eq!(slot, 2); - let mut contents = Vec::new(); - file.read_to_end(&mut contents).await.unwrap(); - assert_eq!(contents, test_data); - } - - #[test] - fn test_parse_slot() { - assert_eq!( - parse_slot(Path::new("snapshot-000000000001.tar.gz")), - Some(1) - ); - assert_eq!( - parse_slot(Path::new("/some/path/snapshot-000000000123.tar.gz")), - Some(123) - ); - assert_eq!(parse_slot(Path::new("other.txt")), None); - assert_eq!(parse_slot(Path::new("snapshot-invalid.tar.gz")), None); - } -} From 3ce7e8522ea333e948f1a6efdd9514b3462182ee Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Fri, 13 Mar 2026 13:36:43 +0400 Subject: [PATCH 12/13] feat: use unqiue machine id for replicator node --- Cargo.lock | 187 ++++++++++++++++++- Cargo.toml | 2 + magicblock-replicator/Cargo.toml | 1 + magicblock-replicator/src/error.rs | 2 +- magicblock-replicator/src/nats/snapshot.rs | 6 +- magicblock-replicator/src/service/context.rs | 21 ++- magicblock-replicator/src/service/mod.rs | 3 +- 7 files changed, 206 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 543ec7f85..fb63dced7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1225,6 +1225,25 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -3066,6 +3085,26 @@ dependencies = [ "libc", ] +[[package]] +name = "machineid-rs" +version = "1.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ceb4d434d69d7199abc3036541ba6ef86767a4356e3077d5a3419f85b70b14" +dependencies = [ + "hex", + "hmac 0.12.1", + "md-5", + "serde", + "serde_json", + "sha-1", + "sha2 0.10.9", + "sysinfo", + "uuid", + "whoami", + "winreg 0.11.0", + "wmi", +] + [[package]] name = "magic-domain-program" version = "0.2.0" @@ -3605,6 +3644,7 @@ dependencies = [ "bincode", "bytes", "futures", + "machineid-rs", "magicblock-accounts-db", "magicblock-core", "magicblock-ledger", @@ -3783,6 +3823,16 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest 0.10.7", +] + [[package]] name = "memchr" version = "2.7.6" @@ -3976,6 +4026,15 @@ dependencies = [ "bitflags 2.10.0", ] +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -4967,6 +5026,26 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -5014,7 +5093,7 @@ dependencies = [ "cfg-if", "libc", "rustix 1.1.2", - "windows", + "windows 0.62.2", ] [[package]] @@ -5105,7 +5184,7 @@ dependencies = [ "wasm-bindgen-futures", "web-sys", "webpki-roots 0.25.4", - "winreg", + "winreg 0.50.0", ] [[package]] @@ -5673,6 +5752,17 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "sha-1" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest 0.10.7", +] + [[package]] name = "sha1" version = "0.10.6" @@ -8634,6 +8724,21 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "sysinfo" +version = "0.29.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -9546,6 +9651,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.106" @@ -9669,6 +9780,17 @@ dependencies = [ "rustix 0.38.44", ] +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", + "web-sys", +] + [[package]] name = "winapi" version = "0.3.9" @@ -9700,6 +9822,17 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-implement 0.48.0", + "windows-interface 0.48.0", + "windows-targets 0.48.5", +] + [[package]] name = "windows" version = "0.62.2" @@ -9727,8 +9860,8 @@ version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ - "windows-implement", - "windows-interface", + "windows-implement 0.60.2", + "windows-interface 0.59.3", "windows-link", "windows-result", "windows-strings", @@ -9745,6 +9878,17 @@ dependencies = [ "windows-threading", ] +[[package]] +name = "windows-implement" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e2ee588991b9e7e6c8338edf3333fbe4da35dc72092643958ebb43f0ab2c49c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "windows-implement" version = "0.60.2" @@ -9756,6 +9900,17 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "windows-interface" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6fb8df20c9bcaa8ad6ab513f7b40104840c8867d5751126e4df3b08388d0cc7" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "windows-interface" version = "0.59.3" @@ -10050,6 +10205,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a1a57ff50e9b408431e8f97d5456f2807f8eb2a2cd79b06068fc87f8ecf189" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "winreg" version = "0.50.0" @@ -10066,6 +10231,20 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wmi" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daffb44abb7d2e87a1233aa17fdbde0d55b890b32a23a1f908895b87fa6f1a00" +dependencies = [ + "chrono", + "futures", + "log", + "serde", + "thiserror 1.0.69", + "windows 0.48.0", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index d2a960216..691b0a16c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,6 +120,8 @@ magicblock-tui-client = { path = "./tools/magicblock-tui-client" } magicblock-validator-admin = { path = "./magicblock-validator-admin" } magicblock-version = { path = "./magicblock-version" } +machineid-rs = "1.2" + num-derive = "0.4" num-format = "0.4.4" num-traits = "0.2" diff --git a/magicblock-replicator/Cargo.toml b/magicblock-replicator/Cargo.toml index 25e48e15e..12457e7fc 100644 --- a/magicblock-replicator/Cargo.toml +++ b/magicblock-replicator/Cargo.toml @@ -15,6 +15,7 @@ futures = { workspace = true } magicblock-accounts-db = { workspace = true } magicblock-core = { workspace = true } magicblock-ledger = { workspace = true } +machineid-rs = { workspace = true } notify = { version = "8.0", features = ["macos_kqueue"] } thiserror = { workspace = true } tokio = { workspace = true, features = [ diff --git a/magicblock-replicator/src/error.rs b/magicblock-replicator/src/error.rs index 1b1091a20..2fc69d7bc 100644 --- a/magicblock-replicator/src/error.rs +++ b/magicblock-replicator/src/error.rs @@ -30,7 +30,7 @@ pub enum Error { /// Internal protocol violation or malformed data. #[error("internal error: {0}")] - Internal(&'static str), + Internal(String), /// File system watcher error. #[error("watcher error: {0}")] diff --git a/magicblock-replicator/src/nats/snapshot.rs b/magicblock-replicator/src/nats/snapshot.rs index 4cde43591..9970f8f3d 100644 --- a/magicblock-replicator/src/nats/snapshot.rs +++ b/magicblock-replicator/src/nats/snapshot.rs @@ -6,7 +6,7 @@ use async_nats::jetstream::object_store; use magicblock_core::Slot; use super::cfg; -use crate::Result; +use crate::{Error, Result}; /// AccountsDb snapshot with positioning metadata. #[derive(Debug)] @@ -32,10 +32,10 @@ impl SnapshotMeta { |key: &str| info.metadata.get(key).and_then(|v| v.parse().ok()); let slot = get_parsed(cfg::META_SLOT).ok_or_else(|| { - crate::Error::Internal("missing 'slot' in snapshot metadata") + Error::Internal("missing 'slot' in snapshot metadata".into()) })?; let sequence = get_parsed(cfg::META_SEQUENCE).ok_or_else(|| { - crate::Error::Internal("missing 'sequence' in snapshot metadata") + Error::Internal("missing 'sequence' in snapshot metadata".into()) })?; Ok(Self { slot, sequence }) diff --git a/magicblock-replicator/src/service/context.rs b/magicblock-replicator/src/service/context.rs index 9f3214037..4365fbe9e 100644 --- a/magicblock-replicator/src/service/context.rs +++ b/magicblock-replicator/src/service/context.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use machineid_rs::IdBuilder; use magicblock_accounts_db::AccountsDb; use magicblock_core::{ link::transactions::{SchedulerMode, TransactionSchedulerHandle}, @@ -17,9 +18,9 @@ use tracing::info; use super::{Primary, Standby, CONSUMER_RETRY_DELAY}; use crate::{ nats::{Broker, Consumer, LockWatcher, Producer}, - proto::TransactionIndex, + proto::{self, TransactionIndex}, watcher::SnapshotWatcher, - Message, Result, + Error, Message, Result, }; /// Shared state for both primary and standby roles. @@ -44,13 +45,17 @@ pub struct ReplicationContext { impl ReplicationContext { /// Creates context from ledger state. pub async fn new( - id: String, broker: Broker, mode_tx: Sender, accountsdb: Arc, ledger: Arc, scheduler: TransactionSchedulerHandle, ) -> Result { + let id = IdBuilder::new(machineid_rs::Encryption::SHA256) + .add_component(machineid_rs::HWIDComponent::SystemID) + .build("magicblock") + .map_err(|e| Error::Internal(e.to_string()))?; + let (slot, index) = ledger .get_latest_transaction_position()? .unwrap_or_default(); @@ -75,14 +80,14 @@ impl ReplicationContext { } /// Writes block to ledger. - pub async fn write_block(&self, block: &crate::proto::Block) -> Result<()> { + pub async fn write_block(&self, block: &proto::Block) -> Result<()> { self.ledger .write_block(block.slot, block.timestamp, block.hash)?; Ok(()) } /// Verifies superblock checksum. - pub fn verify_checksum(&self, sb: &crate::proto::SuperBlock) -> Result<()> { + pub fn verify_checksum(&self, sb: &proto::SuperBlock) -> Result<()> { let _lock = self.accountsdb.lock_database(); // SAFETY: Lock acquired above ensures no concurrent modifications // during checksum computation. @@ -90,7 +95,11 @@ impl ReplicationContext { if checksum == sb.checksum { Ok(()) } else { - Err(crate::Error::Internal("accountsdb state mismatch")) + let msg = format!( + "accountsdb state mismatch at {}, expected {checksum}, got {}", + sb.slot, sb.checksum + ); + Err(Error::Internal(msg)) } } diff --git a/magicblock-replicator/src/service/mod.rs b/magicblock-replicator/src/service/mod.rs index 5556bcf2d..1e30bca41 100644 --- a/magicblock-replicator/src/service/mod.rs +++ b/magicblock-replicator/src/service/mod.rs @@ -61,7 +61,6 @@ pub enum Service { impl Service { /// Creates service, attempting primary role first. pub async fn new( - id: String, broker: Broker, mode_tx: Sender, accountsdb: Arc, @@ -70,7 +69,7 @@ impl Service { messages: Receiver, ) -> crate::Result { let ctx = ReplicationContext::new( - id, broker, mode_tx, accountsdb, ledger, scheduler, + broker, mode_tx, accountsdb, ledger, scheduler, ) .await?; From 1e97a73b7872c8e4c3093249006e747f8b50c398 Mon Sep 17 00:00:00 2001 From: Babur Makhmudov Date: Fri, 13 Mar 2026 14:42:52 +0400 Subject: [PATCH 13/13] feat: track the sequence number in the Broker --- magicblock-replicator/src/nats/broker.rs | 14 ++++---- magicblock-replicator/src/nats/consumer.rs | 36 +++++++++----------- magicblock-replicator/src/nats/snapshot.rs | 2 -- magicblock-replicator/src/service/context.rs | 10 ++++-- magicblock-replicator/src/service/mod.rs | 6 +++- magicblock-replicator/src/service/primary.rs | 4 +-- 6 files changed, 38 insertions(+), 34 deletions(-) diff --git a/magicblock-replicator/src/nats/broker.rs b/magicblock-replicator/src/nats/broker.rs index 4ed7c9107..a43a846d2 100644 --- a/magicblock-replicator/src/nats/broker.rs +++ b/magicblock-replicator/src/nats/broker.rs @@ -59,13 +59,13 @@ impl Broker { .backpressure_on_inflight(true) .build(client); - let broker = Self { ctx, sequence: 0 }; + let mut broker = Self { ctx, sequence: 0 }; broker.init_resources().await?; Ok(broker) } /// Initializes streams, object stores, and KV buckets. - async fn init_resources(&self) -> Result<()> { + async fn init_resources(&mut self) -> Result<()> { let info = self .ctx .create_or_update_stream(stream::Config { @@ -100,6 +100,8 @@ impl Broker { }) .await?; + self.sequence = info.state.first_sequence; + Ok(()) } @@ -120,7 +122,7 @@ impl Broker { } /// Retrieves the latest snapshot, if one exists. - pub async fn get_snapshot(&self) -> Result> { + pub async fn get_snapshot(&mut self) -> Result> { let store = self.ctx.get_object_store(cfg::SNAPSHOTS).await?; let mut object = match store.get(cfg::SNAPSHOT_NAME).await { @@ -134,11 +136,11 @@ impl Broker { let mut data = Vec::with_capacity(info.size); object.read_to_end(&mut data).await?; + self.sequence = meta.sequence; Ok(Some(Snapshot { data, slot: meta.slot, - sequence: meta.sequence, })) } @@ -172,9 +174,9 @@ impl Broker { pub async fn create_consumer( &self, id: &str, - start_seq: Option, + reset: bool, ) -> Result { - Consumer::new(id, &self.ctx, start_seq).await + Consumer::new(id, self, reset).await } /// Creates a producer for publishing events. diff --git a/magicblock-replicator/src/nats/consumer.rs b/magicblock-replicator/src/nats/consumer.rs index 7d666ce55..47a424f2c 100644 --- a/magicblock-replicator/src/nats/consumer.rs +++ b/magicblock-replicator/src/nats/consumer.rs @@ -1,16 +1,13 @@ //! Pull-based consumer for receiving replicated events. -use async_nats::jetstream::{ - consumer::{ - pull::{Config as PullConfig, Stream as MessageStream}, - AckPolicy, DeliverPolicy, PullConsumer, - }, - Context, +use async_nats::jetstream::consumer::{ + pull::{Config as PullConfig, Stream as MessageStream}, + AckPolicy, DeliverPolicy, PullConsumer, }; use tracing::warn; use super::cfg; -use crate::Result; +use crate::{nats::Broker, Result}; /// Pull-based consumer for receiving replicated events. /// @@ -23,22 +20,21 @@ pub struct Consumer { impl Consumer { pub(crate) async fn new( id: &str, - js: &Context, - start_seq: Option, + broker: &Broker, + reset: bool, ) -> Result { - let stream = js.get_stream(cfg::STREAM).await?; + let stream = broker.ctx.get_stream(cfg::STREAM).await?; - let deliver_policy = match start_seq { - Some(seq) => { - // Delete and recreate to change start position - if let Err(error) = stream.delete_consumer(id).await { - warn!(%error, "error removing consumer"); - } - DeliverPolicy::ByStartSequence { - start_sequence: seq, - } + let deliver_policy = if reset { + // Delete and recreate to change start position + if let Err(error) = stream.delete_consumer(id).await { + warn!(%error, "error removing consumer"); + } + DeliverPolicy::ByStartSequence { + start_sequence: broker.sequence, } - None => DeliverPolicy::All, + } else { + DeliverPolicy::All }; let inner = stream diff --git a/magicblock-replicator/src/nats/snapshot.rs b/magicblock-replicator/src/nats/snapshot.rs index 9970f8f3d..a7988eb7e 100644 --- a/magicblock-replicator/src/nats/snapshot.rs +++ b/magicblock-replicator/src/nats/snapshot.rs @@ -15,8 +15,6 @@ pub struct Snapshot { pub data: Vec, /// Slot at which the snapshot was taken. pub slot: Slot, - /// Stream sequence for replay start position. - pub sequence: u64, } /// Metadata stored with each snapshot object. diff --git a/magicblock-replicator/src/service/context.rs b/magicblock-replicator/src/service/context.rs index 4365fbe9e..fcd16d0f7 100644 --- a/magicblock-replicator/src/service/context.rs +++ b/magicblock-replicator/src/service/context.rs @@ -133,9 +133,9 @@ impl ReplicationContext { } /// Creates consumer with retry. - pub async fn create_consumer(&self, start_seq: Option) -> Consumer { + pub async fn create_consumer(&self, reset: bool) -> Consumer { loop { - match self.broker.create_consumer(&self.id, start_seq).await { + match self.broker.create_consumer(&self.id, reset).await { Ok(c) => return c, Err(e) => { tracing::warn!(%e, "consumer creation failed, retrying"); @@ -157,11 +157,15 @@ impl ReplicationContext { } /// Transitions to standby role. + /// reset parameter controls where in the stream the consumption starts: + /// true - the last known position that we know + /// false - the last known position that message broker tracks for us pub async fn into_standby( self, messages: Receiver, + reset: bool, ) -> Result { - let consumer = Box::new(self.create_consumer(None).await); + let consumer = Box::new(self.create_consumer(reset).await); let watcher = LockWatcher::new(&self.broker).await; self.enter_replica_mode().await; Ok(Standby::new(self, consumer, messages, watcher)) diff --git a/magicblock-replicator/src/service/mod.rs b/magicblock-replicator/src/service/mod.rs index 1e30bca41..4c89ae5ad 100644 --- a/magicblock-replicator/src/service/mod.rs +++ b/magicblock-replicator/src/service/mod.rs @@ -67,6 +67,7 @@ impl Service { ledger: Arc, scheduler: TransactionSchedulerHandle, messages: Receiver, + reset: bool, ) -> crate::Result { let ctx = ReplicationContext::new( broker, mode_tx, accountsdb, ledger, scheduler, @@ -78,7 +79,10 @@ impl Service { Some(producer) => { Ok(Self::Primary(ctx.into_primary(producer, messages).await?)) } - None => Ok(Self::Standby(ctx.into_standby(messages).await?)), + None => { + let standby = ctx.into_standby(messages, reset).await?; + Ok(Self::Standby(standby)) + } } } diff --git a/magicblock-replicator/src/service/primary.rs b/magicblock-replicator/src/service/primary.rs index 0705e4fb6..c2b00a161 100644 --- a/magicblock-replicator/src/service/primary.rs +++ b/magicblock-replicator/src/service/primary.rs @@ -44,7 +44,7 @@ impl Primary { // publish should not easily fail, if that happens, it means // the message broker has become unrecoverably unreacheable warn!(%error, "failed to publish the message"); - return self.ctx.into_standby(self.messages).await; + return self.ctx.into_standby(self.messages, true).await; } } @@ -58,7 +58,7 @@ impl Primary { }; if !held { info!("lost leadership, demoting"); - return self.ctx.into_standby(self.messages).await; + return self.ctx.into_standby(self.messages, true).await; } }