From 4f1f2e1c115dc3b10fe2d4ad3de4e2159afa44ea Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 6 Oct 2025 16:54:39 -0700 Subject: [PATCH 01/13] misc(node): remove unused notification_message_loop --- node/src/client/mod.rs | 7 ++-- node/src/lib.rs | 71 --------------------------------------- node/src/prover/mod.rs | 3 +- node/src/validator/mod.rs | 3 +- 4 files changed, 5 insertions(+), 79 deletions(-) diff --git a/node/src/client/mod.rs b/node/src/client/mod.rs index b161e35afb..646abe3ad8 100644 --- a/node/src/client/mod.rs +++ b/node/src/client/mod.rs @@ -219,6 +219,8 @@ impl> Client { // Initialize the routing. node.initialize_routing().await; + // Pass the node to the signal handler. + let _ = signal_node.set(node.clone()); // Initialize the sync module. node.initialize_sync(); // Initialize solution verification. @@ -227,10 +229,7 @@ impl> Client { node.initialize_deploy_verification(); // Initialize execution verification. node.initialize_execute_verification(); - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); + // Return the node. Ok(node) } diff --git a/node/src/lib.rs b/node/src/lib.rs index 3ac2106561..a162a12218 100644 --- a/node/src/lib.rs +++ b/node/src/lib.rs @@ -63,74 +63,3 @@ pub fn log_clean_error(storage_mode: &StorageMode) { } } } - -/// Starts the notification message loop. -pub fn start_notification_message_loop() -> tokio::task::JoinHandle<()> { - // let mut interval = tokio::time::interval(std::time::Duration::from_secs(180)); - tokio::spawn(async move { - // loop { - // interval.tick().await; - // // TODO (howardwu): Swap this with the official message for announcements. - // // info!("{}", notification_message()); - // } - }) -} - -/// Returns the notification message as a string. -pub fn notification_message() -> String { - use colored::Colorize; - - let mut output = String::new(); - output += &r#" - - ================================================================================================== - - 🚧 Welcome to Aleo - Calibration Period 🚧 - - ================================================================================================== - - During the calibration period, the network will be running in limited capacity. - - This calibration period is to ensure validators are stable and ready for mainnet launch. - During this period, the objective is to assess, adjust, and align validators' performance, - stability, and interoperability under varying network conditions. - - Please expect several network resets. With each network reset, software updates will - be performed to address potential bottlenecks, vulnerabilities, and/or inefficiencies, which - will ensure optimal performance for the ecosystem of validators, provers, and developers. - - ================================================================================================== - - Duration: - - Start Date: September 27, 2023 - - End Date: October 18, 2023 (subject to change) - - Participation: - - Node operators are NOT REQUIRED to participate during this calibration period. - - Network Resets: - - IMPORTANT: EXPECT MULTIPLE NETWORK RESETS. - - If participating, BE PREPARED TO RESET YOUR NODE AT ANY TIME. - - When a reset occurs, RUN THE FOLLOWING TO RESET YOUR NODE: - - git checkout mainnet && git pull - - cargo install --locked --path . - - snarkos clean - - snarkos start --nodisplay --client - - Communication: - - Stay ONLINE and MONITOR our Discord and Twitter for community updates. - - Purpose: - - This period is STRICTLY FOR NETWORK CALIBRATION. - - This period is NOT INTENDED for general-purpose usage by developers and provers. - - Incentives: - - There are NO INCENTIVES during this calibration period. - - ================================================================================================== -"# - .white() - .bold(); - - output -} diff --git a/node/src/prover/mod.rs b/node/src/prover/mod.rs index 70e279e4ba..31f534d735 100644 --- a/node/src/prover/mod.rs +++ b/node/src/prover/mod.rs @@ -154,10 +154,9 @@ impl> Prover { node.initialize_routing().await; // Initialize the puzzle. node.initialize_puzzle().await; - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); // Pass the node to the signal handler. let _ = signal_node.set(node.clone()); + // Return the node. Ok(node) } diff --git a/node/src/validator/mod.rs b/node/src/validator/mod.rs index c87c7ad436..49e8a53196 100644 --- a/node/src/validator/mod.rs +++ b/node/src/validator/mod.rs @@ -187,10 +187,9 @@ impl> Validator { // Initialize the routing. node.initialize_routing().await; - // Initialize the notification message loop. - node.handles.lock().push(crate::start_notification_message_loop()); // Pass the node to the signal handler. let _ = signal_node.set(node.clone()); + // Return the node. Ok(node) } From 70ca8f4960639926527893f4e19dc9f227658d14 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 6 Oct 2025 16:59:03 -0700 Subject: [PATCH 02/13] ci: separate test compilation and test execution steps --- .circleci/config.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f728ef6aa9..1d94c98f5c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -137,7 +137,11 @@ commands: cache_key: v3.3.1-rust-1.88.0-<< parameters.cache_key >>-cache - run: no_output_timeout: 30m - command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 cargo test << parameters.flags >> --no-run + - run: + name: "Run Tests" + no_output_timeout: 30m + command: cd << parameters.workspace_member >> && RUST_MIN_STACK=67108864 RUST_BACKTRACE=1 RUST_LOG=snarkos=trace cargo test << parameters.flags >> - clear_environment: cache_key: v3.3.1-rust-1.88.0-<< parameters.cache_key >>-cache From eee15bbbf4caea2cf6eda03f87f2ce2be6b06087 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 6 Oct 2025 17:25:54 -0700 Subject: [PATCH 03/13] feat(display): show logs in terminal after the UI has stopped --- display/src/lib.rs | 14 ++++++++++++-- display/src/pages/logs.rs | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/display/src/lib.rs b/display/src/lib.rs index 1a03058594..146100861f 100644 --- a/display/src/lib.rs +++ b/display/src/lib.rs @@ -41,6 +41,7 @@ use ratatui::{ }; use std::{ io, + io::Write, thread, time::{Duration, Instant}, }; @@ -91,11 +92,20 @@ impl Display { execute!(terminal.backend_mut(), LeaveAlternateScreen, DisableMouseCapture)?; terminal.show_cursor()?; - // Exit. + // Print any error that may have occurred. if let Err(err) = res { - println!("{err:?}") + eprintln!("{err:?}"); } + // Write any remaining log output to stdout while the node is shutting down. + let mut log_receiver = display.logs.into_log_receiver(); + tokio::spawn(async move { + let mut stdout = io::stdout(); + while let Some(log) = log_receiver.recv().await { + let _ = write!(stdout, "{}", String::from_utf8(log).unwrap_or_default()); + } + }); + Ok(()) } } diff --git a/display/src/pages/logs.rs b/display/src/pages/logs.rs index d9f4bd289c..aba6054499 100644 --- a/display/src/pages/logs.rs +++ b/display/src/pages/logs.rs @@ -72,4 +72,8 @@ impl Logs { .block(Block::default().borders(Borders::ALL).style(header_style()).title("Logs")); f.render_widget(combined_logs, chunks[0]); } + + pub fn into_log_receiver(self) -> mpsc::Receiver> { + self.log_receiver + } } From bc53c0a12b2143c098a3426459ad481b6b038e0d Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 6 Oct 2025 17:28:09 -0700 Subject: [PATCH 04/13] feat: shut down gracefully --- Cargo.lock | 15 ++- Cargo.toml | 7 +- cli/Cargo.toml | 3 + cli/src/commands/developer/scan.rs | 4 +- cli/src/commands/start.rs | 92 +++++++------- display/Cargo.toml | 3 + display/src/lib.rs | 15 ++- node/Cargo.toml | 6 +- node/bft/Cargo.toml | 3 + node/bft/examples/simple_node.rs | 8 +- node/bft/ledger-service/Cargo.toml | 3 + node/bft/ledger-service/src/ledger.rs | 22 ++-- node/bft/ledger-service/src/translucent.rs | 18 +-- node/bft/src/sync/mod.rs | 7 +- node/bft/tests/common/primary.rs | 5 +- node/bft/tests/common/utils.rs | 6 +- node/cdn/Cargo.toml | 3 + node/cdn/src/blocks.rs | 45 ++++--- node/src/client/mod.rs | 39 +++--- node/src/node.rs | 38 ++++-- node/src/prover/mod.rs | 29 +++-- node/src/traits.rs | 89 +++---------- node/src/validator/mod.rs | 25 ++-- node/tests/common/node.rs | 9 +- utilities/Cargo.toml | 25 ++++ utilities/src/lib.rs | 19 +++ utilities/src/signals.rs | 140 +++++++++++++++++++++ 27 files changed, 427 insertions(+), 251 deletions(-) create mode 100644 utilities/Cargo.toml create mode 100644 utilities/src/lib.rs create mode 100644 utilities/src/signals.rs diff --git a/Cargo.lock b/Cargo.lock index f3e7986523..694a3e1043 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3820,6 +3820,7 @@ dependencies = [ "snarkos-node-cdn", "snarkos-node-metrics", "snarkos-node-rest", + "snarkos-utilities", "snarkvm", "sys-info", "tempfile", @@ -3840,6 +3841,7 @@ dependencies = [ "crossterm 0.29.0", "ratatui", "snarkos-node", + "snarkos-utilities", "snarkvm", "tokio", ] @@ -3859,7 +3861,6 @@ dependencies = [ "locktick", "lru 0.16.1", "num_cpus", - "once_cell", "parking_lot", "paste", "pea2pea", @@ -3874,6 +3875,7 @@ dependencies = [ "snarkos-node-router", "snarkos-node-sync", "snarkos-node-tcp", + "snarkos-utilities", "snarkvm", "time", "tokio", @@ -3921,6 +3923,7 @@ dependencies = [ "snarkos-node-router", "snarkos-node-sync", "snarkos-node-tcp", + "snarkos-utilities", "snarkvm", "test-strategy 0.4.3", "time", @@ -3962,6 +3965,7 @@ dependencies = [ "rand 0.8.5", "rayon", "snarkos-node-metrics", + "snarkos-utilities", "snarkvm", "tokio", "tracing", @@ -3996,6 +4000,7 @@ dependencies = [ "serde", "serde_json", "snarkos-node-metrics", + "snarkos-utilities", "snarkvm", "tokio", "tokio-test", @@ -4182,6 +4187,14 @@ dependencies = [ "tracing", ] +[[package]] +name = "snarkos-utilities" +version = "4.2.1" +dependencies = [ + "tokio", + "tracing", +] + [[package]] name = "snarkvm" version = "4.2.1" diff --git a/Cargo.toml b/Cargo.toml index 28a7fcfb41..e313d2642b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,8 @@ members = [ "node/sync", "node/sync/communication-service", "node/sync/locators", - "node/tcp" + "node/tcp", + "utilities", ] [workspace.dependencies.aleo-std] @@ -240,6 +241,10 @@ version = "=4.2.2" path = "node/tcp" version = "=4.2.2" +[workspace.dependencies.snarkos-utilities] +path = "utilities" +version = "=4.2.1" + [[bin]] name = "snarkos" path = "snarkos/main.rs" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index d6e781284b..bd3788b39b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -121,6 +121,9 @@ optional = true [dependencies.snarkos-node-rest] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true features = [ "parameters", "circuit", "package" ] diff --git a/cli/src/commands/developer/scan.rs b/cli/src/commands/developer/scan.rs index 3284e82baf..41e5add81a 100644 --- a/cli/src/commands/developer/scan.rs +++ b/cli/src/commands/developer/scan.rs @@ -17,6 +17,8 @@ use super::DEFAULT_ENDPOINT; use crate::helpers::{args::prepare_endpoint, dev::get_development_key}; use snarkos_node_cdn::CDN_BASE_URL; +use snarkos_utilities::SimpleStoppable; + use snarkvm::{ console::network::Network, prelude::{Ciphertext, Field, FromBytes, Plaintext, PrivateKey, Record, ViewKey, block::Block}, @@ -285,7 +287,7 @@ impl Scan { let rt = tokio::runtime::Runtime::new()?; // Create a placeholder shutdown flag. - let _shutdown = Default::default(); + let _shutdown = SimpleStoppable::new(); // Copy endpoint for background task. let endpoint = endpoint.clone(); diff --git a/cli/src/commands/start.rs b/cli/src/commands/start.rs index 4e92cde64a..e4242f652f 100644 --- a/cli/src/commands/start.rs +++ b/cli/src/commands/start.rs @@ -23,6 +23,8 @@ use snarkos_node::{ rest::DEFAULT_REST_PORT, router::{DEFAULT_NODE_PORT, bootstrap_peers, messages::NodeType}, }; +use snarkos_utilities::SignalHandler; + use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -54,7 +56,10 @@ use std::{ path::PathBuf, sync::{Arc, atomic::AtomicBool}, }; -use tokio::runtime::{self, Runtime}; +use tokio::{ + runtime::{self, Runtime}, + sync::mpsc, +}; use tracing::warn; use ureq::http; @@ -254,7 +259,7 @@ pub struct Start { } impl Start { - /// Starts the snarkOS node. + /// Starts the snarkOS node and blocks until it terminates. pub fn parse(self) -> Result { // Prepare the shutdown flag. let shutdown: Arc = Default::default(); @@ -272,45 +277,30 @@ impl Start { // Initialize the runtime. Self::runtime().block_on(async move { // Error messages. - let node_parse_error = || "Failed to parse node arguments"; - let display_start_error = || "Failed to initialize the display"; + let node_parse_error = || "Failed to start node"; + let signal_handler = SignalHandler::new(); // Clone the configurations. - let mut cli = self.clone(); - // Parse the network. - match cli.network { - MainnetV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } - TestnetV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } - CanaryV0::ID => { - // Parse the node from the configurations. - let node = cli.parse_node::(shutdown.clone()).await.with_context(node_parse_error)?; - // If the display is enabled, render the display. - if !cli.nodisplay { - // Initialize the display. - Display::start(node, log_receiver).with_context(display_start_error)?; - } - } + let mut self_ = self.clone(); + + // Parse the node arguments, start it, and block until shutdown. + match self_.network { + MainnetV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, + + TestnetV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, + CanaryV0::ID => self_ + .parse_node::(log_receiver, signal_handler.clone()) + .await + .with_context(node_parse_error)?, _ => panic!("Invalid network ID specified"), }; - // Note: Do not move this. The pending await must be here otherwise - // other snarkOS commands will not exit. - std::future::pending::<()>().await; + Ok(String::new()) }) } @@ -569,9 +559,9 @@ impl Start { } } - /// Returns the node type corresponding to the given configurations. + /// Start the node and blocks until it terminates. #[rustfmt::skip] - async fn parse_node(&mut self, shutdown: Arc) -> Result> { + async fn parse_node(&mut self, log_receiver: mpsc::Receiver>, signal_handler: Arc) -> Result<()> { if !self.nobanner { // Print the welcome banner. println!("{}", crate::helpers::welcome_message()); @@ -710,21 +700,27 @@ impl Start { } }; - // TODO(kaimast): start the display earlier and show sync progress. if !self.nodisplay && !self.nocdn { println!("🪧 The terminal UI will not start until the node has finished syncing from the CDN. If this step takes too long, consider restarting with `--nodisplay`."); } // Initialize the node. - match node_type { - NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, self.dev, shutdown.clone()).await, - NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, storage_mode, self.dev, shutdown.clone()).await, - NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, self.dev, shutdown).await + let node = match node_type { + NodeType::Validator => Node::new_validator(node_ip, self.bft, rest_ip, self.rest_rps, account, &trusted_peers, &trusted_validators, genesis, cdn, storage_mode, self.allow_external_peers, dev_txs, self.dev, signal_handler.clone()).await, + NodeType::Prover => Node::new_prover(node_ip, account, &trusted_peers, genesis, storage_mode, self.dev, signal_handler.clone()).await, + NodeType::Client => Node::new_client(node_ip, rest_ip, self.rest_rps, account, &trusted_peers, genesis, cdn, storage_mode, self.rotate_external_peers, self.dev, signal_handler.clone()).await + }?; + + if !self.nodisplay { + Display::start(node.clone(), log_receiver, signal_handler.clone()).with_context(|| "Failed to start the display")?; } + + node.wait_for_signals(&signal_handler).await; + Ok(()) } - /// Returns a runtime for the node. + /// Starts a rayon thread pool and tokio runtime for the node, and returns the tokio `Runtime`. fn runtime() -> Runtime { // Retrieve the number of cores. let num_cores = num_cpus::get(); @@ -735,14 +731,16 @@ impl Start { let (num_tokio_worker_threads, max_tokio_blocking_threads, num_rayon_cores_global) = (2 * num_cores, 512, num_cores); - // Initialize the parallelization parameters. + // Set up the rayon thread pool. + // A custom panic handler is not needed here, as rayon propagates the panic to the calling thread by default (except for `rayon::spawn` which we do not use). rayon::ThreadPoolBuilder::new() .stack_size(8 * 1024 * 1024) .num_threads(num_rayon_cores_global) .build_global() .unwrap(); - // Initialize the runtime configuration. + // Set up the tokio Runtime. + // TODO(kaimast): set up a panic handler here for each worker thread once [`tokio::runtime::Builder::unhandled_panic`](https://docs.rs/tokio/latest/tokio/runtime/struct.Builder.html#method.unhandled_panic) is stabilized. runtime::Builder::new_multi_thread() .enable_all() .thread_stack_size(8 * 1024 * 1024) diff --git a/display/Cargo.toml b/display/Cargo.toml index 7d94718ff9..873c19dd83 100644 --- a/display/Cargo.toml +++ b/display/Cargo.toml @@ -28,6 +28,9 @@ version = "0.29" [dependencies.snarkos-node] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true diff --git a/display/src/lib.rs b/display/src/lib.rs index 146100861f..1dda3193b9 100644 --- a/display/src/lib.rs +++ b/display/src/lib.rs @@ -22,6 +22,8 @@ mod tabs; use tabs::Tabs; use snarkos_node::Node; +use snarkos_utilities::Stoppable; + use snarkvm::prelude::Network; use anyhow::Result; @@ -42,6 +44,7 @@ use ratatui::{ use std::{ io, io::Write, + sync::Arc, thread, time::{Duration, Instant}, }; @@ -68,7 +71,7 @@ fn content_style() -> Style { impl Display { /// Initializes a new display. - pub fn start(node: Node, log_receiver: Receiver>) -> Result<()> { + pub fn start(node: Node, log_receiver: Receiver>, stoppable: Arc) -> Result<()> { // Initialize the display. enable_raw_mode()?; let mut stdout = io::stdout(); @@ -85,7 +88,7 @@ impl Display { }; // Render the display. - let res = display.render(&mut terminal); + let res = display.render(&mut terminal, stoppable); // Terminate the display. disable_raw_mode()?; @@ -112,7 +115,7 @@ impl Display { impl Display { /// Renders the display. - fn render(&mut self, terminal: &mut Terminal) -> io::Result<()> { + fn render(&mut self, terminal: &mut Terminal, stoppable: Arc) -> io::Result<()> { let mut last_tick = Instant::now(); loop { terminal.draw(|f| self.draw(f))?; @@ -124,11 +127,7 @@ impl Display { if let Event::Key(key) = event::read()? { match key.code { KeyCode::Esc => { - // // TODO (howardwu): @ljedrz to implement a wrapping scope for Display within Node/Server. - // #[allow(unused_must_use)] - // { - // self.node.shut_down(); - // } + stoppable.stop(); return Ok(()); } KeyCode::Left => self.tabs.previous(), diff --git a/node/Cargo.toml b/node/Cargo.toml index 18b431fd8e..841def34f1 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -78,9 +78,6 @@ workspace = true [dependencies.num_cpus] workspace = true -[dependencies.once_cell] -workspace = true - [dependencies.parking_lot] workspace = true @@ -118,6 +115,9 @@ workspace = true [dependencies.snarkos-node-tcp] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index 8eeb1434d2..ce119f4ebf 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -120,6 +120,9 @@ workspace = true [dependencies.snarkos-node-tcp] workspace = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.snarkvm] workspace = true features = [ "utilities" ] diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index 6001617668..b71e712d61 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -19,7 +19,6 @@ extern crate tracing; #[cfg(feature = "metrics")] extern crate snarkos_node_metrics as metrics; -use aleo_std::StorageMode; use snarkos_account::Account; use snarkos_node_bft::{ BFT, @@ -30,6 +29,9 @@ use snarkos_node_bft::{ use snarkos_node_bft_ledger_service::TranslucentLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; +use snarkos_utilities::SimpleStoppable; + +use aleo_std::StorageMode; use snarkvm::{ console::{account::PrivateKey, algorithms::BHP256, types::Address}, ledger::{ @@ -64,7 +66,7 @@ use std::{ net::{IpAddr, Ipv4Addr, SocketAddr}, path::PathBuf, str::FromStr, - sync::{Arc, Mutex, OnceLock, atomic::AtomicBool}, + sync::{Arc, Mutex, OnceLock}, }; use tokio::{net::TcpListener, sync::oneshot}; use tracing_subscriber::{ @@ -221,7 +223,7 @@ fn create_ledger( } let mut rng = TestRng::default(); let gen_ledger = genesis_ledger(*gen_key, committee.clone(), balances.clone(), node_id, &mut rng); - Arc::new(TranslucentLedgerService::new(gen_ledger, Arc::new(AtomicBool::new(false)))) + Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())) } pub type CurrentLedger = Ledger>; diff --git a/node/bft/ledger-service/Cargo.toml b/node/bft/ledger-service/Cargo.toml index d468168c54..4c84ed3d8a 100644 --- a/node/bft/ledger-service/Cargo.toml +++ b/node/bft/ledger-service/Cargo.toml @@ -48,6 +48,9 @@ optional = true workspace = true optional = true +[dependencies.snarkos-utilities] +workspace = true + [dependencies.parking_lot] workspace = true optional = true diff --git a/node/bft/ledger-service/src/ledger.rs b/node/bft/ledger-service/src/ledger.rs index bdf506e32e..5d9a0aa0c0 100644 --- a/node/bft/ledger-service/src/ledger.rs +++ b/node/bft/ledger-service/src/ledger.rs @@ -14,6 +14,9 @@ // limitations under the License. use crate::{LedgerService, fmt_id, spawn_blocking}; + +use snarkos_utilities::Stoppable; + use snarkvm::{ ledger::{ Ledger, @@ -46,16 +49,7 @@ use parking_lot::RwLock; #[cfg(not(feature = "serial"))] use rayon::prelude::*; -use std::{ - collections::BTreeMap, - fmt, - io::Read, - ops::Range, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, -}; +use std::{collections::BTreeMap, fmt, io::Read, ops::Range, sync::Arc}; /// The capacity of the cache holding the highest blocks. const BLOCK_CACHE_SIZE: usize = 10; @@ -66,14 +60,14 @@ pub struct CoreLedgerService> { ledger: Ledger, block_cache: Arc>>>, latest_leader: Arc)>>>, - shutdown: Arc, + stoppable: Arc, } impl> CoreLedgerService { /// Initializes a new core ledger service. - pub fn new(ledger: Ledger, shutdown: Arc) -> Self { + pub fn new(ledger: Ledger, stoppable: Arc) -> Self { let block_cache = Arc::new(RwLock::new(BTreeMap::new())); - Self { ledger, block_cache, latest_leader: Default::default(), shutdown } + Self { ledger, block_cache, latest_leader: Default::default(), stoppable } } } @@ -371,7 +365,7 @@ impl> LedgerService for CoreLedgerService< #[cfg(feature = "ledger-write")] fn advance_to_next_block(&self, block: &Block) -> Result<()> { // If the Ctrl-C handler registered the signal, then skip advancing to the next block. - if self.shutdown.load(Ordering::Acquire) { + if self.stoppable.is_stopped() { bail!("Skipping advancing to block {} - The node is shutting down", block.height()); } // Advance to the next block. diff --git a/node/bft/ledger-service/src/translucent.rs b/node/bft/ledger-service/src/translucent.rs index c6f48a41ec..4c4fba4525 100644 --- a/node/bft/ledger-service/src/translucent.rs +++ b/node/bft/ledger-service/src/translucent.rs @@ -14,8 +14,9 @@ // limitations under the License. use crate::{CoreLedgerService, LedgerService}; -use async_trait::async_trait; -use indexmap::IndexMap; + +use snarkos_utilities::Stoppable; + use snarkvm::{ ledger::{ Ledger, @@ -27,11 +28,10 @@ use snarkvm::{ }, prelude::{Address, ConsensusVersion, Field, Network, Result, narwhal::BatchCertificate}, }; -use std::{ - fmt, - ops::Range, - sync::{Arc, atomic::AtomicBool}, -}; + +use async_trait::async_trait; +use indexmap::IndexMap; +use std::{fmt, ops::Range, sync::Arc}; pub struct TranslucentLedgerService> { inner: CoreLedgerService, @@ -46,8 +46,8 @@ impl> fmt::Debug for TranslucentLedgerService impl> TranslucentLedgerService { /// Initializes a new ledger service wrapper. - pub fn new(ledger: Ledger, shutdown: Arc) -> Self { - Self { inner: CoreLedgerService::new(ledger, shutdown) } + pub fn new(ledger: Ledger, stoppable: Arc) -> Self { + Self { inner: CoreLedgerService::new(ledger, stoppable) } } } diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 894b266d78..0051a0a180 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -951,6 +951,7 @@ mod tests { use snarkos_account::Account; use snarkos_node_sync::BlockSync; + use snarkos_utilities::SimpleStoppable; use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -1002,7 +1003,7 @@ mod tests { // Initialize the ledger with the genesis block. let ledger = CurrentLedger::load(genesis.clone(), StorageMode::new_test(None)).unwrap(); // Initialize the ledger. - let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), Default::default())); + let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), SimpleStoppable::new())); // Sample 5 rounds of batch certificates starting at the genesis round from a static set of 4 authors. let (round_to_certificates_map, committee) = { @@ -1176,7 +1177,7 @@ mod tests { let storage_mode = StorageMode::new_test(None); let syncing_ledger = Arc::new(CoreLedgerService::new( CurrentLedger::load(genesis, storage_mode.clone()).unwrap(), - Default::default(), + SimpleStoppable::new(), )); // Initialize the gateway. let gateway = @@ -1229,7 +1230,7 @@ mod tests { // Initialize the ledger with the genesis block. let ledger = CurrentLedger::load(genesis.clone(), StorageMode::new_test(None)).unwrap(); // Initialize the ledger. - let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), Default::default())); + let core_ledger = Arc::new(CoreLedgerService::new(ledger.clone(), SimpleStoppable::new())); // Sample rounds of batch certificates starting at the genesis round from a static set of 4 authors. let (round_to_certificates_map, committee) = { // Initialize the committee. diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index 1e29f26d99..d9e4f5e0d6 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -18,6 +18,7 @@ use crate::common::{ TranslucentLedgerService, utils::{fire_unconfirmed_solutions, fire_unconfirmed_transactions, initialize_logger}, }; + use snarkos_account::Account; use snarkos_node_bft::{ BFT, @@ -29,6 +30,8 @@ use snarkos_node_bft::{ use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_router::PeerPoolHandling; use snarkos_node_sync::BlockSync; +use snarkos_utilities::SimpleStoppable; + use snarkvm::{ console::{ account::{Address, PrivateKey}, @@ -160,7 +163,7 @@ impl TestNetwork { for (id, account) in accounts.into_iter().enumerate() { let gen_ledger = genesis_ledger(gen_key, committee.clone(), balances.clone(), bonded_balances.clone(), &mut rng); - let ledger = Arc::new(TranslucentLedgerService::new(gen_ledger, Default::default())); + let ledger = Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())); let storage = Storage::new( ledger.clone(), Arc::new(BFTMemoryService::new()), diff --git a/node/bft/tests/common/utils.rs b/node/bft/tests/common/utils.rs index a269a41787..e61e9d5b75 100644 --- a/node/bft/tests/common/utils.rs +++ b/node/bft/tests/common/utils.rs @@ -22,7 +22,9 @@ use snarkos_node_bft::{ helpers::{PrimarySender, Storage}, }; -use snarkos_node_bft_storage_service::BFTMemoryService; +use snarkos_node_bft::storage_service::BFTMemoryService; +use snarkos_utilities::SimpleStoppable; + use snarkvm::{ console::account::Address, ledger::{ @@ -202,7 +204,7 @@ pub fn sample_ledger( let gen_ledger = primary::genesis_ledger(gen_key, committee.clone(), balances.clone(), bonded_balances.clone(), rng); - Arc::new(TranslucentLedgerService::new(gen_ledger, Default::default())) + Arc::new(TranslucentLedgerService::new(gen_ledger, SimpleStoppable::new())) } /// Samples a new storage with the given ledger. diff --git a/node/cdn/Cargo.toml b/node/cdn/Cargo.toml index e626730108..4771bc25c7 100644 --- a/node/cdn/Cargo.toml +++ b/node/cdn/Cargo.toml @@ -45,6 +45,9 @@ workspace = true optional = true features = [ "metrics" ] +[dependencies.snarkos-utilities] +workspace = true + [dependencies.rayon] workspace = true optional = true diff --git a/node/cdn/src/blocks.rs b/node/cdn/src/blocks.rs index 4b489d195e..ab9a13db10 100644 --- a/node/cdn/src/blocks.rs +++ b/node/cdn/src/blocks.rs @@ -17,6 +17,8 @@ // https://github.com/rust-lang/rust-clippy/issues/6446 #![allow(clippy::await_holding_lock)] +use snarkos_utilities::Stoppable; + use snarkvm::prelude::{ Deserialize, DeserializeOwned, @@ -87,11 +89,11 @@ impl CdnBlockSync { pub fn new>( base_url: http::Uri, ledger: Ledger, - shutdown: Arc, + stoppable: Arc, ) -> Self { let task = { let base_url = base_url.clone(); - tokio::spawn(async move { Self::worker(base_url, ledger, shutdown).await }) + tokio::spawn(async move { Self::worker(base_url, ledger, stoppable).await }) }; debug!("Started sync from CDN at {base_url}"); @@ -119,13 +121,13 @@ impl CdnBlockSync { async fn worker>( base_url: http::Uri, ledger: Ledger, - shutdown: Arc, + stoppable: Arc, ) -> SyncResult { // Fetch the node height. let start_height = ledger.latest_height() + 1; // Load the blocks from the CDN into the ledger. let ledger_clone = ledger.clone(); - let result = load_blocks(&base_url, start_height, None, shutdown, move |block: Block| { + let result = load_blocks(&base_url, start_height, None, stoppable, move |block: Block| { ledger_clone.advance_to_next_block(&block) }) .await; @@ -172,7 +174,7 @@ pub async fn load_blocks( base_url: &http::Uri, start_height: u32, end_height: Option, - shutdown: Arc, + stoppable: Arc, process: impl FnMut(Block) -> Result<()> + Clone + Send + Sync + 'static, ) -> Result { // Create a Client to maintain a connection pool throughout the sync. @@ -225,16 +227,19 @@ pub async fn load_blocks( // Spawn a background task responsible for concurrent downloads. let pending_blocks_clone = pending_blocks.clone(); let base_url = base_url.to_owned(); - let shutdown_clone = shutdown.clone(); - tokio::spawn(async move { - download_block_bundles(client, &base_url, cdn_start, cdn_end, pending_blocks_clone, shutdown_clone).await; - }); + + { + let stoppable = stoppable.clone(); + tokio::spawn(async move { + download_block_bundles(client, &base_url, cdn_start, cdn_end, pending_blocks_clone, stoppable).await; + }); + } // A loop for inserting the pending blocks into the ledger. let mut current_height = start_height.saturating_sub(1); while current_height < end_height - 1 { // If we are instructed to shut down, abort. - if shutdown.load(Ordering::Acquire) { + if stoppable.is_stopped() { info!("Stopping block sync at {} - shutting down", current_height); // We can shut down cleanly from here, as the node hasn't been started yet. std::process::exit(0); @@ -269,12 +274,12 @@ pub async fn load_blocks( // Attempt to advance the ledger using the CDN block bundle. let mut process_clone = process.clone(); - let shutdown_clone = shutdown.clone(); + let stoppable_clone = stoppable.clone(); current_height = tokio::task::spawn_blocking(move || { threadpool.install(|| { for block in next_blocks.into_iter().filter(|b| (start_height..end_height).contains(&b.height())) { // If we are instructed to shut down, abort. - if shutdown_clone.load(Ordering::Relaxed) { + if stoppable_clone.is_stopped() { info!("Stopping block sync at {} - the node is shutting down", current_height); // We can shut down cleanly from here, as the node hasn't been started yet. std::process::exit(0); @@ -314,7 +319,7 @@ async fn download_block_bundles( cdn_start: u32, cdn_end: u32, pending_blocks: Arc>>>, - shutdown: Arc, + stoppable: Arc, ) { // Keep track of the number of concurrent requests. let active_requests: Arc = Default::default(); @@ -322,7 +327,7 @@ async fn download_block_bundles( let mut start = cdn_start; while start < cdn_end - 1 { // If we are instructed to shut down, stop downloading. - if shutdown.load(Ordering::Acquire) { + if stoppable.is_stopped() { break; } @@ -356,7 +361,7 @@ async fn download_block_bundles( let base_url_clone = base_url.clone(); let pending_blocks_clone = pending_blocks.clone(); let active_requests_clone = active_requests.clone(); - let shutdown_clone = shutdown.clone(); + let stoppable_clone = stoppable.clone(); tokio::spawn(async move { // Increment the number of active requests. active_requests_clone.fetch_add(1, Ordering::Relaxed); @@ -392,7 +397,7 @@ async fn download_block_bundles( attempts += 1; if attempts > MAXIMUM_REQUEST_ATTEMPTS { warn!("Maximum number of requests to {blocks_url} reached - shutting down..."); - shutdown_clone.store(true, Ordering::Relaxed); + stoppable_clone.stop(); break; } tokio::time::sleep(Duration::from_secs(attempts as u64 * 10)).await; @@ -553,8 +558,10 @@ fn log_progress( #[cfg(test)] mod tests { - use super::{BLOCKS_PER_FILE, CDN_BASE_URL, cdn_height, log_progress}; - use crate::load_blocks; + use super::{BLOCKS_PER_FILE, CDN_BASE_URL, cdn_height, load_blocks, log_progress}; + + use snarkos_utilities::SimpleStoppable; + use snarkvm::prelude::{MainnetV0, block::Block}; use http::Uri; @@ -576,7 +583,7 @@ mod tests { let rt = tokio::runtime::Runtime::new().unwrap(); rt.block_on(async { let completed_height = - load_blocks(&testnet_cdn_url, start, end, Default::default(), process).await.unwrap(); + load_blocks(&testnet_cdn_url, start, end, SimpleStoppable::new(), process).await.unwrap(); assert_eq!(blocks.read().len(), expected); if expected > 0 { assert_eq!(blocks.read().last().unwrap().height(), completed_height); diff --git a/node/src/client/mod.rs b/node/src/client/mod.rs index 646abe3ad8..2f6435b53e 100644 --- a/node/src/client/mod.rs +++ b/node/src/client/mod.rs @@ -15,11 +15,13 @@ mod router; -use crate::traits::NodeInterface; +use crate::{ + bft::{events::DataBlocks, helpers::fmt_id, ledger_service::CoreLedgerService}, + cdn::CdnBlockSync, + traits::NodeInterface, +}; use snarkos_account::Account; -use snarkos_node_bft::{events::DataBlocks, helpers::fmt_id, ledger_service::CoreLedgerService}; -use snarkos_node_cdn::CdnBlockSync; use snarkos_node_rest::Rest; use snarkos_node_router::{ Heartbeat, @@ -34,6 +36,8 @@ use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::{SignalHandler, Stoppable}; + use snarkvm::{ console::network::Network, ledger::{ @@ -60,7 +64,6 @@ use std::{ sync::{ Arc, atomic::{ - AtomicBool, AtomicUsize, Ordering::{Acquire, Relaxed}, }, @@ -121,10 +124,10 @@ pub struct Client> { num_verifying_executions: Arc, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, /// Keeps track of sending pings. ping: Arc>, + /// The signal handling logic. + signal_handler: Arc, } impl> Client { @@ -140,16 +143,13 @@ impl> Client { storage_mode: StorageMode, rotate_external_peers: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger. let ledger = Ledger::::load(genesis.clone(), storage_mode.clone())?; // Initialize the ledger service. - let ledger_service = Arc::new(CoreLedgerService::::new(ledger.clone(), shutdown.clone())); + let ledger_service = Arc::new(CoreLedgerService::::new(ledger.clone(), signal_handler.clone())); // Determine if the client should allow external peers. let allow_external_peers = true; @@ -191,13 +191,13 @@ impl> Client { num_verifying_deploys: Default::default(), num_verifying_executions: Default::default(), handles: Default::default(), - shutdown: shutdown.clone(), + signal_handler: signal_handler.clone(), }; // Perform sync with CDN (if enabled). let cdn_sync = cdn.map(|base_url| { trace!("CDN sync is enabled"); - Arc::new(CdnBlockSync::new(base_url, ledger.clone(), shutdown)) + Arc::new(CdnBlockSync::new(base_url, ledger.clone(), signal_handler)) }); // Initialize the REST server. @@ -219,8 +219,6 @@ impl> Client { // Initialize the routing. node.initialize_routing().await; - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); // Initialize the sync module. node.initialize_sync(); // Initialize solution verification. @@ -263,8 +261,8 @@ impl> Client { self.handles.lock().push(tokio::spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if _self.shutdown.load(std::sync::atomic::Ordering::Acquire) { - info!("Shutting down block production"); + if _self.signal_handler.is_stopped() { + info!("Shutting down sync task"); break; } @@ -370,7 +368,7 @@ impl> Client { self.handles.lock().push(tokio::spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down solution verification"); break; } @@ -444,7 +442,7 @@ impl> Client { self.handles.lock().push(tokio::spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down deployment verification"); break; } @@ -512,7 +510,7 @@ impl> Client { self.handles.lock().push(tokio::spawn(async move { loop { // If the Ctrl-C handler registered the signal, stop the node. - if node.shutdown.load(Acquire) { + if node.signal_handler.is_stopped() { info!("Shutting down execution verification"); break; } @@ -590,7 +588,6 @@ impl> NodeInterface for Client { // Shut down the node. trace!("Shutting down the node..."); - self.shutdown.store(true, std::sync::atomic::Ordering::Release); // Abort the tasks. trace!("Shutting down the client..."); diff --git a/node/src/node.rs b/node/src/node.rs index 591d5a0846..5a0540d6ce 100644 --- a/node/src/node.rs +++ b/node/src/node.rs @@ -13,9 +13,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::{Client, Prover, Validator, traits::NodeInterface}; +use crate::{ + Client, + Prover, + Validator, + router::{Outbound, Router, messages::NodeType}, + traits::NodeInterface, +}; + use snarkos_account::Account; -use snarkos_node_router::{Outbound, Router, messages::NodeType}; +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{ Address, Ledger, @@ -28,10 +36,7 @@ use snarkvm::prelude::{ use aleo_std::StorageMode; use anyhow::Result; -use std::{ - net::SocketAddr, - sync::{Arc, atomic::AtomicBool}, -}; +use std::{net::SocketAddr, sync::Arc}; #[derive(Clone)] pub enum Node { @@ -59,7 +64,7 @@ impl Node { allow_external_peers: bool, dev_txs: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Validator(Arc::new( Validator::new( @@ -76,7 +81,7 @@ impl Node { allow_external_peers, dev_txs, dev, - shutdown, + signal_handler, ) .await?, ))) @@ -90,10 +95,10 @@ impl Node { genesis: Block, storage_mode: StorageMode, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Prover(Arc::new( - Prover::new(node_ip, account, trusted_peers, genesis, storage_mode, dev, shutdown).await?, + Prover::new(node_ip, account, trusted_peers, genesis, storage_mode, dev, signal_handler).await?, ))) } @@ -109,7 +114,7 @@ impl Node { storage_mode: StorageMode, rotate_external_peers: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { Ok(Self::Client(Arc::new( Client::new( @@ -123,7 +128,7 @@ impl Node { storage_mode, rotate_external_peers, dev, - shutdown, + signal_handler, ) .await?, ))) @@ -229,4 +234,13 @@ impl Node { Self::Client(node) => node.shut_down().await, } } + + /// Waits until the node receives a signal. + pub async fn wait_for_signals(&self, signal_handler: &SignalHandler) { + match self { + Self::Validator(node) => node.wait_for_signals(signal_handler).await, + Self::Prover(node) => node.wait_for_signals(signal_handler).await, + Self::Client(node) => node.wait_for_signals(signal_handler).await, + } + } } diff --git a/node/src/prover/mod.rs b/node/src/prover/mod.rs index 31f534d735..5c44e81d1a 100644 --- a/node/src/prover/mod.rs +++ b/node/src/prover/mod.rs @@ -15,9 +15,13 @@ mod router; -use crate::traits::NodeInterface; +use crate::{ + bft::ledger_service::ProverLedgerService, + sync::{BlockSync, Ping}, + traits::NodeInterface, +}; + use snarkos_account::Account; -use snarkos_node_bft::ledger_service::ProverLedgerService; use snarkos_node_router::{ Heartbeat, Inbound, @@ -27,11 +31,12 @@ use snarkos_node_router::{ Routing, messages::{Message, NodeType, UnconfirmedSolution}, }; -use snarkos_node_sync::{BlockSync, Ping}; use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::{SignalHandler, Stoppable}; + use snarkvm::{ ledger::narwhal::Data, prelude::{ @@ -57,7 +62,7 @@ use std::{ net::SocketAddr, sync::{ Arc, - atomic::{AtomicBool, AtomicU8, Ordering}, + atomic::{AtomicU8, Ordering}, }, }; use tokio::task::JoinHandle; @@ -83,10 +88,10 @@ pub struct Prover> { max_puzzle_instances: u8, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, /// Keeps track of sending pings. ping: Arc>, + /// The signal handling logic. + signal_handler: Arc, /// PhantomData. _phantom: PhantomData, } @@ -100,11 +105,8 @@ impl> Prover { genesis: Block, storage_mode: StorageMode, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger service. let ledger_service = Arc::new(ProverLedgerService::new()); // Determine if the prover should allow external peers. @@ -147,15 +149,13 @@ impl> Prover { max_puzzle_instances: u8::try_from(max_puzzle_instances)?, handles: Default::default(), ping, - shutdown, + signal_handler, _phantom: Default::default(), }; // Initialize the routing. node.initialize_routing().await; // Initialize the puzzle. node.initialize_puzzle().await; - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); // Return the node. Ok(node) @@ -174,7 +174,6 @@ impl> NodeInterface for Prover { // Shut down the puzzle. debug!("Shutting down the puzzle..."); - self.shutdown.store(true, Ordering::Release); // Abort the tasks. debug!("Shutting down the prover..."); @@ -245,7 +244,7 @@ impl> Prover { } // If the Ctrl-C handler registered the signal, stop the prover. - if self.shutdown.load(Ordering::Acquire) { + if self.signal_handler.is_stopped() { debug!("Shutting down the puzzle..."); break; } diff --git a/node/src/traits.rs b/node/src/traits.rs index 0c031c0ec9..481d700bda 100644 --- a/node/src/traits.rs +++ b/node/src/traits.rs @@ -13,19 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use snarkos_node_router::{Routing, messages::NodeType}; +use crate::router::{Routing, messages::NodeType}; + +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{Address, Network, PrivateKey, ViewKey}; -use once_cell::sync::OnceCell; -use std::{ - future::Future, - io, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, - time::Duration, -}; +use std::time::Duration; #[async_trait] pub trait NodeInterface: Routing { @@ -56,65 +50,20 @@ pub trait NodeInterface: Routing { /// Handles OS signals for the node to intercept and perform a clean shutdown. /// The optional `shutdown_flag` flag can be used to cleanly terminate the syncing process. - fn handle_signals(shutdown_flag: Arc) -> Arc> { - // In order for the signal handler to be started as early as possible, a reference to the node needs - // to be passed to it at a later time. - let node: Arc> = Default::default(); - - #[cfg(target_family = "unix")] - fn signal_listener() -> impl Future> { - use tokio::signal::unix::{SignalKind, signal}; - - // Handle SIGINT, SIGTERM, SIGQUIT, and SIGHUP. - let mut s_int = signal(SignalKind::interrupt()).unwrap(); - let mut s_term = signal(SignalKind::terminate()).unwrap(); - let mut s_quit = signal(SignalKind::quit()).unwrap(); - let mut s_hup = signal(SignalKind::hangup()).unwrap(); - - // Return when any of the signals above is received. - async move { - tokio::select!( - _ = s_int.recv() => (), - _ = s_term.recv() => (), - _ = s_quit.recv() => (), - _ = s_hup.recv() => (), - ); - Ok(()) - } - } - #[cfg(not(target_family = "unix"))] - fn signal_listener() -> impl Future> { - tokio::signal::ctrl_c() - } - - let node_clone = node.clone(); - tokio::task::spawn(async move { - match signal_listener().await { - Ok(()) => { - warn!("=========================================================================================="); - warn!("⚠️ Attention - Starting the graceful shutdown procedure (ETA: 30 seconds)..."); - warn!("⚠️ Attention - To avoid DATA CORRUPTION, do NOT interrupt snarkOS (or press Ctrl+C again)"); - warn!("⚠️ Attention - Please wait until the shutdown gracefully completes (ETA: 30 seconds)"); - warn!("=========================================================================================="); - - match node_clone.get() { - // If the node is already initialized, then shut it down. - Some(node) => node.shut_down().await, - // Otherwise, if the node is not yet initialized, then set the shutdown flag directly. - None => shutdown_flag.store(true, Ordering::Relaxed), - } - - // A best-effort attempt to let any ongoing activity conclude. - tokio::time::sleep(Duration::from_secs(3)).await; - - // Terminate the process. - std::process::exit(0); - } - Err(error) => error!("tokio::signal::ctrl_c encountered an error: {}", error), - } - }); - - node + async fn wait_for_signals(&self, handler: &SignalHandler) { + handler.wait_for_signals().await; + + warn!("=========================================================================================="); + warn!("⚠️ Attention - Starting the graceful shutdown procedure (ETA: 30 seconds)..."); + warn!("⚠️ Attention - To avoid DATA CORRUPTION, do NOT interrupt snarkOS (or press Ctrl+C again)"); + warn!("⚠️ Attention - Please wait until the shutdown gracefully completes (ETA: 30 seconds)"); + warn!("=========================================================================================="); + + // If the node is already initialized, then shut it down. + self.shut_down().await; + + // A best-effort attempt to let any ongoing activity conclude. + tokio::time::sleep(Duration::from_secs(3)).await; } /// Shuts down the node. diff --git a/node/src/validator/mod.rs b/node/src/validator/mod.rs index 49e8a53196..5a85a48391 100644 --- a/node/src/validator/mod.rs +++ b/node/src/validator/mod.rs @@ -36,6 +36,8 @@ use snarkos_node_tcp::{ P2P, protocols::{Disconnect, Handshake, OnConnect, Reading}, }; +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{ Ledger, Network, @@ -51,11 +53,7 @@ use core::future::Future; use locktick::parking_lot::Mutex; #[cfg(not(feature = "locktick"))] use parking_lot::Mutex; -use std::{ - net::SocketAddr, - sync::{Arc, atomic::AtomicBool}, - time::Duration, -}; +use std::{net::SocketAddr, sync::Arc, time::Duration}; use tokio::task::JoinHandle; /// A validator is a full node, capable of validating blocks. @@ -73,8 +71,6 @@ pub struct Validator> { sync: Arc>, /// The spawned handles. handles: Arc>>>, - /// The shutdown signal. - shutdown: Arc, /// Keeps track of sending pings. ping: Arc>, } @@ -95,16 +91,13 @@ impl> Validator { allow_external_peers: bool, dev_txs: bool, dev: Option, - shutdown: Arc, + signal_handler: Arc, ) -> Result { - // Initialize the signal handler. - let signal_node = Self::handle_signals(shutdown.clone()); - // Initialize the ledger. let ledger = Ledger::load(genesis, storage_mode.clone())?; // Initialize the ledger service. - let ledger_service = Arc::new(CoreLedgerService::new(ledger.clone(), shutdown.clone())); + let ledger_service = Arc::new(CoreLedgerService::new(ledger.clone(), signal_handler.clone())); // Determine if the validator should rotate external peers. let rotate_external_peers = false; @@ -151,11 +144,10 @@ impl> Validator { sync: sync.clone(), ping, handles: Default::default(), - shutdown: shutdown.clone(), }; // Perform sync with CDN (if enabled). - let cdn_sync = cdn.map(|base_url| Arc::new(CdnBlockSync::new(base_url, ledger.clone(), shutdown))); + let cdn_sync = cdn.map(|base_url| Arc::new(CdnBlockSync::new(base_url, ledger.clone(), signal_handler))); // Initialize the transaction pool. node.initialize_transaction_pool(dev, dev_txs)?; @@ -187,8 +179,6 @@ impl> Validator { // Initialize the routing. node.initialize_routing().await; - // Pass the node to the signal handler. - let _ = signal_node.set(node.clone()); // Return the node. Ok(node) @@ -461,7 +451,6 @@ impl> NodeInterface for Validator { // Shut down the node. trace!("Shutting down the node..."); - self.shutdown.store(true, std::sync::atomic::Ordering::Release); // Abort the tasks. trace!("Shutting down the validator..."); @@ -531,7 +520,7 @@ mod tests { false, dev_txs, None, - Default::default(), + SignalHandler::new(), ) .await .unwrap(); diff --git a/node/tests/common/node.rs b/node/tests/common/node.rs index e161a703e7..67f31dc4b6 100644 --- a/node/tests/common/node.rs +++ b/node/tests/common/node.rs @@ -14,8 +14,11 @@ // limitations under the License. use crate::common::test_peer::sample_genesis_block; + use snarkos_account::Account; use snarkos_node::{Client, Prover, Validator}; +use snarkos_utilities::SignalHandler; + use snarkvm::prelude::{MainnetV0 as CurrentNetwork, store::helpers::memory::ConsensusMemory}; use aleo_std::StorageMode; @@ -33,7 +36,7 @@ pub async fn client() -> Client> StorageMode::new_test(None), false, // No extra peer rotation. None, - Default::default(), + SignalHandler::new(), ) .await .expect("couldn't create client instance") @@ -47,7 +50,7 @@ pub async fn prover() -> Prover> sample_genesis_block(), StorageMode::new_test(None), None, - Default::default(), + SignalHandler::new(), ) .await .expect("couldn't create prover instance") @@ -68,7 +71,7 @@ pub async fn validator() -> Validator" ] +description = "Utilities for a decentralized operating system" +homepage = "https://aleo.org" +repository = "https://github.com/ProvableHQ/snarkOS" +keywords = [ + "aleo", + "cryptography", + "blockchain", + "decentralized", + "zero-knowledge" +] +include = ["../LICENSE.md"] +categories = [ "cryptography", "cryptography::cryptocurrencies", "os" ] +license = "Apache-2.0" +edition = "2024" + +[dependencies.tokio] +workspace = true +features = [ "macros", "signal" ] + +[dependencies.tracing] +workspace = true diff --git a/utilities/src/lib.rs b/utilities/src/lib.rs new file mode 100644 index 0000000000..de7548d27a --- /dev/null +++ b/utilities/src/lib.rs @@ -0,0 +1,19 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Utilities for signal and shutdown handling. +pub mod signals; + +pub use signals::*; diff --git a/utilities/src/signals.rs b/utilities/src/signals.rs new file mode 100644 index 0000000000..99f1006a8c --- /dev/null +++ b/utilities/src/signals.rs @@ -0,0 +1,140 @@ +// Copyright (c) 2019-2025 Provable Inc. +// This file is part of the snarkOS library. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::{ + Arc, + atomic::{AtomicBool, Ordering}, +}; +use tokio::sync::Notify; + +use tracing::{debug, error}; + +/// Generic trait that can be queried for whether current process should be stopped. +/// This is implemented by `SignalHandler` and `SimpleStoppable`. +pub trait Stoppable: Send + Sync { + /// Initiates shutdown of the node. + fn stop(&self); + + /// Returns `true` if the node is (in the process of being) stopped. + fn is_stopped(&self) -> bool; +} + +/// Wrapper around `AtomicBool` that implements the `Stoppable` trait. +/// +/// This is useful when no signal or complex shutdown handling is necessary (e.g., in a test environment). +pub struct SimpleStoppable { + state: AtomicBool, +} + +impl SimpleStoppable { + pub fn new() -> Arc { + Arc::new(Self { state: AtomicBool::new(false) }) + } +} + +impl Stoppable for SimpleStoppable { + fn stop(&self) { + self.state.store(true, Ordering::SeqCst); + } + + fn is_stopped(&self) -> bool { + self.state.load(Ordering::SeqCst) + } +} + +/// Helper for signal handling that implements the `Stoppable` trait. +/// +/// This struct will set itself to "stopped" as soon as the process receives Ctrl+C. +/// It can also be manually stopped (e.g., when the node encounters a fatal error). +pub struct SignalHandler { + stopped: AtomicBool, + notify: Notify, +} + +impl SignalHandler { + /// Spawns a background tasks that listens for Ctrl+C and returns `Self`. + pub fn new() -> Arc { + let obj = Arc::new(Self { stopped: AtomicBool::new(false), notify: Default::default() }); + + { + let obj = obj.clone(); + tokio::spawn(async move { + obj.handle_signals().await; + }); + } + + obj + } + + /// Logic for the background task that waits for a signal. + async fn handle_signals(&self) { + #[cfg(target_family = "unix")] + let signal_listener = async move { + use tokio::signal::unix::{SignalKind, signal}; + + // Handle SIGINT, SIGTERM, SIGQUIT, and SIGHUP. + let mut s_int = signal(SignalKind::interrupt())?; + let mut s_term = signal(SignalKind::terminate())?; + let mut s_quit = signal(SignalKind::quit())?; + let mut s_hup = signal(SignalKind::hangup())?; + + tokio::select!( + _ = s_int.recv() => debug!("Received SIGINT"), + _ = s_term.recv() => debug!("Received SIGTERM"), + _ = s_quit.recv() => debug!("Received SIGQUIT"), + _ = s_hup.recv() => debug!("Received SIGHUP"), + ); + + std::io::Result::<()>::Ok(()) + }; + + #[cfg(not(target_family = "unix"))] + let signal_listener = async move { + tokio::signal::ctrl_c().await?; + debug!("Received signal"); + + std::io::Result::<()>::Ok(()) + }; + + // Block until the signal. + match signal_listener.await { + Ok(()) => {} + Err(error) => { + error!("tokio::signal encountered an error: {error}"); + } + } + + self.stop(); + } + + /// Blocks until the signal handler was invoked or the stopped flag was set some other way. + /// Note: This can only be called once, and must not be called concurrently. + pub async fn wait_for_signals(&self) { + while !self.is_stopped() { + self.notify.notified().await + } + } +} + +impl Stoppable for SignalHandler { + fn stop(&self) { + self.stopped.store(true, Ordering::SeqCst); + self.notify.notify_one(); + } + + fn is_stopped(&self) -> bool { + self.stopped.load(Ordering::SeqCst) + } +} From fe9131c0250f909a5835e6622620b6633e93dec7 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 6 Oct 2025 17:33:06 -0700 Subject: [PATCH 05/13] misc(node): remove obsolete sleep during shutdown --- node/src/traits.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/node/src/traits.rs b/node/src/traits.rs index 481d700bda..1e0f3aaed6 100644 --- a/node/src/traits.rs +++ b/node/src/traits.rs @@ -19,8 +19,6 @@ use snarkos_utilities::SignalHandler; use snarkvm::prelude::{Address, Network, PrivateKey, ViewKey}; -use std::time::Duration; - #[async_trait] pub trait NodeInterface: Routing { /// Returns the node type. @@ -61,9 +59,6 @@ pub trait NodeInterface: Routing { // If the node is already initialized, then shut it down. self.shut_down().await; - - // A best-effort attempt to let any ongoing activity conclude. - tokio::time::sleep(Duration::from_secs(3)).await; } /// Shuts down the node. From c1c542f4871476bcbf042d394494f48da1046db3 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 17:29:22 -0700 Subject: [PATCH 06/13] misc(bft): use snarkvm::utilities::task everywhere --- Cargo.lock | 451 ++++++++++++++-------------- Cargo.toml | 3 +- node/bft/Cargo.toml | 2 +- node/bft/src/primary.rs | 82 ++--- node/bft/src/sync/mod.rs | 13 +- node/bft/src/worker.rs | 39 ++- node/bft/tests/components/worker.rs | 17 +- 7 files changed, 313 insertions(+), 294 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 694a3e1043..c8d5546a21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,7 +82,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9ebd144c81671193ed85aa2db9bb5e183421843e0485de8fffc07e5cf50e18a" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 1.0.109", ] @@ -93,7 +93,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f6ff9e4c36858fa2c29e5284b77527b5a7466743976e1ba1f5824e16683545" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 1.0.109", ] @@ -123,9 +123,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" @@ -208,7 +208,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -230,7 +230,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -241,7 +241,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -259,9 +259,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "axum" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" +checksum = "8a18ed336352031311f4e0b4dd2ff392d4fbb370777c9d18d7fc9d7359f73871" dependencies = [ "axum-core", "bytes", @@ -278,8 +278,7 @@ dependencies = [ "mime", "percent-encoding", "pin-project-lite", - "rustversion", - "serde", + "serde_core", "serde_json", "serde_path_to_error", "serde_urlencoded", @@ -293,9 +292,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.2" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" +checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" dependencies = [ "bytes", "futures-core", @@ -304,7 +303,6 @@ dependencies = [ "http-body-util", "mime", "pin-project-lite", - "rustversion", "sync_wrapper", "tower-layer", "tower-service", @@ -313,9 +311,9 @@ dependencies = [ [[package]] name = "axum-extra" -version = "0.10.1" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d" +checksum = "9963ff19f40c6102c76756ef0a46004c0d58957d87259fc9208ff8441c12ab96" dependencies = [ "axum", "axum-core", @@ -328,11 +326,11 @@ dependencies = [ "mime", "pin-project-lite", "rustversion", - "serde", + "serde_core", "serde_json", - "tower 0.5.2", "tower-layer", "tower-service", + "tracing", "typed-json", ] @@ -348,7 +346,7 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -398,7 +396,7 @@ dependencies = [ "peeking_take_while", "prettyplease", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "regex", "rustc-hash 1.1.0", "shlex", @@ -536,9 +534,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.38" +version = "1.2.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80f41ae168f955c12fb8960b057d70d0ca153fb83182b57d86380443527be7e9" +checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" dependencies = [ "find-msvc-tools", "jobserver", @@ -576,7 +574,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -629,7 +627,7 @@ checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" dependencies = [ "heck", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -912,7 +910,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -945,7 +943,7 @@ dependencies = [ "fnv", "ident_case", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "strsim", "syn 2.0.106", ] @@ -959,7 +957,7 @@ dependencies = [ "fnv", "ident_case", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "strsim", "syn 2.0.106", ] @@ -971,7 +969,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -982,7 +980,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -1036,7 +1034,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bba95f299f6b9cd47f68a847eca2ae9060a2713af532dc35c342065544845407" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "structmeta 0.3.0", "syn 2.0.106", ] @@ -1048,7 +1046,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -1069,7 +1067,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" dependencies = [ "convert_case", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -1111,7 +1109,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -1204,7 +1202,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -1270,15 +1268,15 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "find-msvc-tools" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959" +checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" dependencies = [ "crc32fast", "miniz_oxide", @@ -1398,7 +1396,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -1827,7 +1825,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.0", "system-configuration", "tokio", "tower-service", @@ -2024,7 +2022,7 @@ dependencies = [ "darling 0.20.11", "indoc", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -2176,7 +2174,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -2254,11 +2252,10 @@ checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] @@ -2408,6 +2405,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -2444,7 +2442,7 @@ checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898" dependencies = [ "cfg-if", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -2543,7 +2541,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -2639,7 +2637,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -2663,9 +2661,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -2673,15 +2671,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link 0.2.1", ] [[package]] @@ -2754,7 +2752,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -2924,8 +2922,8 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.1", "rustls", - "socket2 0.5.10", - "thiserror 2.0.16", + "socket2 0.6.0", + "thiserror 2.0.17", "tokio", "tracing", "web-time", @@ -2946,7 +2944,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.16", + "thiserror 2.0.17", "tinyvec", "tracing", "web-time", @@ -2961,7 +2959,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.0", "tracing", "windows-sys 0.60.2", ] @@ -2974,9 +2972,9 @@ checksum = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a" [[package]] name = "quote" -version = "1.0.40" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] @@ -3117,9 +3115,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.17" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags 2.9.4", ] @@ -3137,21 +3135,21 @@ dependencies = [ [[package]] name = "ref-cast" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -3364,9 +3362,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.6" +version = "0.103.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8572f3c2cb9934231157b45499fc41e1f58c589fdfb81a844ba873265e80f8eb" +checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" dependencies = [ "ring", "rustls-pki-types", @@ -3381,9 +3379,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rusty-fork" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" dependencies = [ "fnv", "quick-error", @@ -3520,9 +3518,9 @@ checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" -version = "1.0.227" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", "serde_derive", @@ -3530,21 +3528,21 @@ dependencies = [ [[package]] name = "serde_core" -version = "1.0.227" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.227" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -3596,9 +3594,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.14.1" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c522100790450cf78eeac1507263d0a350d4d5b30df0c8e1fe051a10c22b376e" +checksum = "6093cd8c01b25262b84927e0f7151692158fab02d961e04c979d3903eba7ecc5" dependencies = [ "base64 0.22.1", "chrono", @@ -3607,8 +3605,7 @@ dependencies = [ "indexmap 2.11.4", "schemars 0.9.0", "schemars 1.0.4", - "serde", - "serde_derive", + "serde_core", "serde_json", "serde_with_macros", "time", @@ -3616,13 +3613,13 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.14.1" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327ada00f7d64abaac1e55a6911e90cf665aa051b9a561c7006c157f4633135e" +checksum = "a7e6c180db0816026a61afa1cff5344fb7ebded7e4d3062772179f2501481c27" dependencies = [ "darling 0.21.3", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -3717,7 +3714,7 @@ checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.16", + "thiserror 2.0.17", "time", ] @@ -3824,7 +3821,7 @@ dependencies = [ "snarkvm", "sys-info", "tempfile", - "thiserror 2.0.16", + "thiserror 2.0.17", "time", "tokio", "tracing", @@ -4181,7 +4178,7 @@ dependencies = [ "once_cell", "parking_lot", "snarkos-node-metrics", - "thiserror 2.0.16", + "thiserror 2.0.17", "tokio", "tokio-util", "tracing", @@ -4198,7 +4195,7 @@ dependencies = [ [[package]] name = "snarkvm" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "anyhow", "dotenvy", @@ -4221,7 +4218,7 @@ dependencies = [ [[package]] name = "snarkvm-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -4243,13 +4240,13 @@ dependencies = [ "snarkvm-fields", "snarkvm-parameters", "snarkvm-utilities", - "thiserror 2.0.16", + "thiserror 2.0.17", ] [[package]] name = "snarkvm-algorithms-cuda" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "blst", "cc", @@ -4260,7 +4257,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4274,7 +4271,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-network", "snarkvm-circuit-types", @@ -4284,7 +4281,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-types", "snarkvm-console-algorithms", @@ -4294,7 +4291,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-types", @@ -4304,7 +4301,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "indexmap 2.11.4", "itertools 0.14.0", @@ -4322,12 +4319,12 @@ dependencies = [ [[package]] name = "snarkvm-circuit-environment-witness" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" [[package]] name = "snarkvm-circuit-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-algorithms", "snarkvm-circuit-collections", @@ -4338,7 +4335,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-account", "snarkvm-circuit-algorithms", @@ -4352,7 +4349,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-address", @@ -4367,7 +4364,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4380,7 +4377,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-console-types-boolean", @@ -4389,7 +4386,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4399,7 +4396,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4411,7 +4408,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4423,7 +4420,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4434,7 +4431,7 @@ dependencies = [ [[package]] name = "snarkvm-circuit-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-circuit-environment", "snarkvm-circuit-types-boolean", @@ -4446,7 +4443,7 @@ dependencies = [ [[package]] name = "snarkvm-console" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-account", "snarkvm-console-algorithms", @@ -4459,7 +4456,7 @@ dependencies = [ [[package]] name = "snarkvm-console-account" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "bs58", "snarkvm-console-network", @@ -4470,7 +4467,7 @@ dependencies = [ [[package]] name = "snarkvm-console-algorithms" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "blake2s_simd", "smallvec", @@ -4483,7 +4480,7 @@ dependencies = [ [[package]] name = "snarkvm-console-collections" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "rayon", @@ -4494,7 +4491,7 @@ dependencies = [ [[package]] name = "snarkvm-console-network" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "anyhow", "enum-iterator", @@ -4514,7 +4511,7 @@ dependencies = [ [[package]] name = "snarkvm-console-network-environment" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "anyhow", "bech32", @@ -4532,7 +4529,7 @@ dependencies = [ [[package]] name = "snarkvm-console-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "enum-iterator", "enum_index", @@ -4552,7 +4549,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-address", @@ -4567,7 +4564,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-address" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4578,7 +4575,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-boolean" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", ] @@ -4586,7 +4583,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-field" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4596,7 +4593,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-group" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4607,7 +4604,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-integers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4618,7 +4615,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-scalar" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4629,7 +4626,7 @@ dependencies = [ [[package]] name = "snarkvm-console-types-string" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console-network-environment", "snarkvm-console-types-boolean", @@ -4640,7 +4637,7 @@ dependencies = [ [[package]] name = "snarkvm-curves" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "rand 0.8.5", "rayon", @@ -4648,13 +4645,13 @@ dependencies = [ "serde", "snarkvm-fields", "snarkvm-utilities", - "thiserror 2.0.16", + "thiserror 2.0.17", ] [[package]] name = "snarkvm-fields" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -4664,14 +4661,14 @@ dependencies = [ "rayon", "serde", "snarkvm-utilities", - "thiserror 2.0.16", + "thiserror 2.0.17", "zeroize", ] [[package]] name = "snarkvm-ledger" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -4699,7 +4696,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-authority" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "anyhow", "rand 0.8.5", @@ -4711,7 +4708,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-block" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "anyhow", "indexmap 2.11.4", @@ -4733,7 +4730,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-committee" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "anyhow", "indexmap 2.11.4", @@ -4752,7 +4749,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-ledger-narwhal-batch-certificate", "snarkvm-ledger-narwhal-batch-header", @@ -4765,7 +4762,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-certificate" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "indexmap 2.11.4", "rayon", @@ -4778,7 +4775,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-batch-header" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "indexmap 2.11.4", "rayon", @@ -4791,7 +4788,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-data" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "bytes", "serde_json", @@ -4802,7 +4799,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-subdag" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "indexmap 2.11.4", "rayon", @@ -4817,7 +4814,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "bytes", "serde_json", @@ -4830,7 +4827,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-narwhal-transmission-id" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "snarkvm-console", "snarkvm-ledger-puzzle", @@ -4839,7 +4836,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -4859,7 +4856,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-puzzle-epoch" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -4882,7 +4879,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-query" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "anyhow", "async-trait", @@ -4899,7 +4896,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-store" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std-storage", "anyhow", @@ -4927,7 +4924,7 @@ dependencies = [ [[package]] name = "snarkvm-ledger-test-helpers" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -4945,7 +4942,7 @@ dependencies = [ [[package]] name = "snarkvm-metrics" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "metrics", ] @@ -4953,7 +4950,7 @@ dependencies = [ [[package]] name = "snarkvm-parameters" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -4970,13 +4967,13 @@ dependencies = [ "sha2", "snarkvm-curves", "snarkvm-utilities", - "thiserror 2.0.16", + "thiserror 2.0.17", ] [[package]] name = "snarkvm-synthesizer" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", @@ -5009,7 +5006,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-process" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "colored 3.0.0", @@ -5034,7 +5031,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-program" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "indexmap 2.11.4", "paste", @@ -5052,7 +5049,7 @@ dependencies = [ [[package]] name = "snarkvm-synthesizer-snark" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "bincode", "serde_json", @@ -5065,11 +5062,12 @@ dependencies = [ [[package]] name = "snarkvm-utilities" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "aleo-std", "anyhow", "bincode", + "colored 3.0.0", "num-bigint", "num_cpus", "rand 0.8.5", @@ -5079,7 +5077,8 @@ dependencies = [ "serde_json", "smol_str", "snarkvm-utilities-derives", - "thiserror 2.0.16", + "thiserror 2.0.17", + "tokio", "tracing", "zeroize", ] @@ -5087,10 +5086,10 @@ dependencies = [ [[package]] name = "snarkvm-utilities-derives" version = "4.2.1" -source = "git+https://github.com/ProvableHQ/snarkVM.git?rev=619464c8edf75636809d0bfb8e8404de78a62c97#619464c8edf75636809d0bfb8e8404de78a62c97" +source = "git+https://github.com/ProvableHQ/snarkVM.git?branch=feat%2Ftrack-error#a79f90f8dd7b74db48880fad59b8e66141a607c4" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5168,7 +5167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ad9e09554f0456d67a69c1584c9798ba733a5b50349a6c0d0948710523922d" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "structmeta-derive 0.2.0", "syn 2.0.106", ] @@ -5180,7 +5179,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e1575d8d40908d70f6fd05537266b90ae71b15dbbe7a8b7dffa2b759306d329" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "structmeta-derive 0.3.0", "syn 2.0.106", ] @@ -5192,7 +5191,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5203,7 +5202,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5224,7 +5223,7 @@ checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ "heck", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "rustversion", "syn 2.0.106", ] @@ -5253,7 +5252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "unicode-ident", ] @@ -5264,7 +5263,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "unicode-ident", ] @@ -5293,7 +5292,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5354,7 +5353,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8361c808554228ad09bfed70f5c823caf8a3450b6881cc3a38eb57e8c08c1d9" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "structmeta 0.2.0", "syn 2.0.106", ] @@ -5367,7 +5366,7 @@ checksum = "43b12f9683de37f9980e485167ee624bfaa0b6b04da661e98e25ef9c2669bc1b" dependencies = [ "derive-ex", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "structmeta 0.3.0", "syn 2.0.106", ] @@ -5383,11 +5382,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl 2.0.16", + "thiserror-impl 2.0.17", ] [[package]] @@ -5397,18 +5396,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] [[package]] name = "thiserror-impl" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5542,7 +5541,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5558,9 +5557,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.3" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ "rustls", "tokio", @@ -5733,7 +5732,7 @@ dependencies = [ "governor", "http 1.3.1", "pin-project", - "thiserror 2.0.16", + "thiserror 2.0.17", "tower 0.5.2", "tracing", ] @@ -5757,7 +5756,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5817,7 +5816,7 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" dependencies = [ - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -5839,9 +5838,9 @@ dependencies = [ [[package]] name = "typenum" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unarray" @@ -6063,7 +6062,7 @@ dependencies = [ "bumpalo", "log", "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", "wasm-bindgen-shared", ] @@ -6087,7 +6086,7 @@ version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" dependencies = [ - "quote 1.0.40", + "quote 1.0.41", "wasm-bindgen-macro-support", ] @@ -6098,7 +6097,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", "wasm-bindgen-backend", "wasm-bindgen-shared", @@ -6204,7 +6203,7 @@ checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.2.0", + "windows-link 0.2.1", "windows-result 0.4.0", "windows-strings 0.5.0", ] @@ -6216,7 +6215,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb307e42a74fb6de9bf3a02d9712678b22399c87e6fa869d6dfcd8c1b7754e0" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -6227,7 +6226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0abd1ddbc6964ac14db11c7213d6532ef34bd9aa042c2e5935f59d7908b46a5" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -6239,9 +6238,9 @@ checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-link" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-registry" @@ -6269,7 +6268,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" dependencies = [ - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -6287,7 +6286,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" dependencies = [ - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -6323,7 +6322,7 @@ version = "0.61.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" dependencies = [ - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -6348,15 +6347,15 @@ version = "0.53.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d42b7b7f66d2a06854650af09cfdf8713e427a439c97ad65a6375318033ac4b" dependencies = [ - "windows-link 0.2.0", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows-link 0.2.1", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -6367,9 +6366,9 @@ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -6379,9 +6378,9 @@ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_aarch64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -6391,9 +6390,9 @@ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" [[package]] name = "windows_i686_gnullvm" @@ -6403,9 +6402,9 @@ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -6415,9 +6414,9 @@ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -6427,9 +6426,9 @@ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -6439,9 +6438,9 @@ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -6451,9 +6450,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" @@ -6498,7 +6497,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", "synstructure", ] @@ -6519,7 +6518,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -6539,16 +6538,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", "synstructure", ] [[package]] name = "zeroize" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" dependencies = [ "zeroize_derive", ] @@ -6560,7 +6559,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -6593,7 +6592,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", - "quote 1.0.40", + "quote 1.0.41", "syn 2.0.106", ] @@ -6610,7 +6609,7 @@ dependencies = [ "flate2", "indexmap 2.11.4", "memchr", - "thiserror 2.0.16", + "thiserror 2.0.17", "time", "zopfli", ] @@ -6622,7 +6621,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dba6063ff82cdbd9a765add16d369abe81e520f836054e997c2db217ceca40c0" dependencies = [ "ed25519-dalek", - "thiserror 2.0.16", + "thiserror 2.0.17", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e313d2642b..32d931699b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,7 +47,8 @@ default-features = false [workspace.dependencies.snarkvm] #path = "../snarkVM" git = "https://github.com/ProvableHQ/snarkVM.git" -rev = "619464c8edf75636809d0bfb8e8404de78a62c97" +branch = "feat/track-error" +#rev = "619464c8edf75636809d0bfb8e8404de78a62c97" #version = "=4.2.1" default-features = false #features = [ "circuit", "console", "rocks" ] diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index ce119f4ebf..850f3ae228 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -125,7 +125,7 @@ workspace = true [dependencies.snarkvm] workspace = true -features = [ "utilities" ] +features = [ "utilities", "async" ] [dependencies.time] workspace = true diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 07bfee158f..61bfeec712 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -57,6 +57,7 @@ use snarkvm::{ puzzle::{Solution, SolutionID}, }, prelude::{ConsensusVersion, committee::Committee}, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -81,7 +82,7 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::{sync::OnceCell, task::JoinHandle}; +use tokio::sync::OnceCell; /// A helper type for an optional proposed batch. pub type ProposedBatch = RwLock>>; @@ -407,7 +408,7 @@ impl Primary { // Resend the batch proposal to the validator for signing. Some(peer_ip) => { let (gateway, event_, round) = (self.gateway.clone(), event.clone(), proposal.round()); - tokio::spawn(async move { + task::spawn(async move { debug!("Resending batch proposal for round {round} to peer '{peer_ip}'"); // Resend the batch proposal to the peer. if gateway.send(peer_ip, event_).await.is_none() { @@ -573,14 +574,13 @@ impl Primary { } // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => { - Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?) - } + let transaction = task::spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => { + Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) } - })?; + }) + .await?; // TODO (raychu86): Record Commitment - Remove this logic after the next migration height is reached. // ConsensusVersion V8 Migration logic - @@ -679,15 +679,18 @@ impl Primary { // Prepare the previous batch certificate IDs. let previous_certificate_ids = previous_certificates.into_iter().map(|c| c.id()).collect(); // Sign the batch header and construct the proposal. - let (batch_header, proposal) = spawn_blocking!(BatchHeader::new( - &private_key, - round, - current_timestamp, - committee_id, - transmission_ids, - previous_certificate_ids, - &mut rand::thread_rng() - )) + let (batch_header, proposal) = task::spawn_blocking(move || { + BatchHeader::new( + &private_key, + round, + current_timestamp, + committee_id, + transmission_ids, + previous_certificate_ids, + &mut rand::thread_rng(), + ) + }) + .await .and_then(|batch_header| { Proposal::new(committee_lookback, batch_header.clone(), transmissions.clone()) .map(|proposal| (batch_header, proposal)) @@ -720,7 +723,7 @@ impl Primary { let BatchPropose { round: batch_round, batch_header } = batch_propose; // Deserialize the batch header. - let batch_header = spawn_blocking!(batch_header.deserialize_blocking())?; + let batch_header = task::spawn_blocking(|| batch_header.deserialize_blocking()).await?; // Ensure the round matches in the batch header. if batch_round != batch_header.round() { // Proceed to disconnect the validator. @@ -786,7 +789,7 @@ impl Primary { // Instead, rebroadcast the cached signature to the peer. if signed_round == batch_header.round() && signed_batch_id == batch_header.batch_id() { let gateway = self.gateway.clone(); - tokio::spawn(async move { + task::spawn(async move { debug!("Resending a signature for a batch in round {batch_round} from '{peer_ip}'"); let event = Event::BatchSignature(BatchSignature::new(batch_header.batch_id(), signature)); // Resend the batch signature to the peer. @@ -850,8 +853,10 @@ impl Primary { // Ensure the batch header from the peer is valid. let (storage, header) = (self.storage.clone(), batch_header.clone()); - let missing_transmissions = - spawn_blocking!(storage.check_batch_header(&header, missing_transmissions, Default::default()))?; + let missing_transmissions = task::spawn_blocking(move || { + storage.check_batch_header(&header, missing_transmissions, Default::default()) + }) + .await?; // Inserts the missing transmissions into the workers. self.insert_missing_transmissions_into_workers(peer_ip, missing_transmissions.into_iter())?; @@ -876,14 +881,13 @@ impl Primary { (transmission_id, transmission) { // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => { - Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?) - } + let transaction = task::spawn_blocking(|| match transaction { + Data::Object(transaction) => Ok(transaction), + Data::Buffer(bytes) => { + Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64)) } - })?; + }) + .await?; // TODO (raychu86): Record Commitment - Remove this logic after the next migration height is reached. // ConsensusVersion V8 Migration logic - @@ -944,7 +948,7 @@ impl Primary { let batch_id = batch_header.batch_id(); // Sign the batch ID. let account = self.gateway.account().clone(); - let signature = spawn_blocking!(account.sign(&[batch_id], &mut rand::thread_rng()))?; + let signature = task::spawn_blocking(move || account.sign(&[batch_id], &mut rand::thread_rng())).await?; // Ensure the proposal has not already been signed. // @@ -972,7 +976,7 @@ impl Primary { // Broadcast the signature back to the validator. let self_ = self.clone(); - tokio::spawn(async move { + task::spawn(async move { let event = Event::BatchSignature(BatchSignature::new(batch_id, signature)); // Send the batch signature to the peer. if self_.gateway.send(peer_ip, event).await.is_some() { @@ -1017,7 +1021,7 @@ impl Primary { } let self_ = self.clone(); - let Some(proposal) = spawn_blocking!({ + let Some(proposal) = task::spawn_blocking(move || { // Acquire the write lock. let mut proposed_batch = self_.proposed_batch.write(); // Add the signature to the batch, and determine if the batch is ready to be certified. @@ -1065,7 +1069,7 @@ impl Primary { Some(proposal) => Ok(Some(proposal)), None => Ok(None), } - })? + }).await? else { return Ok(()); }; @@ -1211,7 +1215,7 @@ impl Primary { // Retrieve the block locators. let self__ = self_.clone(); - let block_locators = match spawn_blocking!(self__.sync.get_block_locators()) { + let block_locators = match task::spawn_blocking(move || self__.sync.get_block_locators()).await { Ok(block_locators) => block_locators, Err(e) => { warn!("Failed to retrieve block locators - {e}"); @@ -1633,7 +1637,8 @@ impl Primary { let transmissions = transmissions.into_iter().collect::>(); // Store the certified batch. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking!(storage.insert_certificate(certificate_, transmissions, Default::default()))?; + task::spawn_blocking(move || storage.insert_certificate(certificate_, transmissions, Default::default())) + .await?; debug!("Stored a batch certificate for round {}", certificate.round()); // If a BFT sender was provided, send the certificate to the BFT. if let Some(bft_sender) = self.bft_sender.get() { @@ -1720,7 +1725,10 @@ impl Primary { if !self.storage.contains_certificate(certificate.id()) { // Store the batch certificate. let (storage, certificate_) = (self.storage.clone(), certificate.clone()); - spawn_blocking!(storage.insert_certificate(certificate_, missing_transmissions, Default::default()))?; + task::spawn_blocking(move || { + storage.insert_certificate(certificate_, missing_transmissions, Default::default()) + }) + .await?; debug!("Stored a batch certificate for round {batch_round} from '{peer_ip}'"); // If a BFT sender was provided, send the round and certificate to the BFT. if let Some(bft_sender) = self.bft_sender.get() { @@ -1933,7 +1941,7 @@ impl Primary { impl Primary { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the primary. diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 0051a0a180..ba80f578a7 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -29,7 +29,11 @@ use snarkos_node_sync::{BLOCK_REQUEST_BATCH_DELAY, BlockSync, Ping, PrepareSyncR use snarkvm::{ console::{network::Network, types::Field}, ledger::{authority::Authority, block::Block, narwhal::BatchCertificate}, - prelude::{cfg_into_iter, cfg_iter}, + utilities::{ + cfg_into_iter, + cfg_iter, + task::{self, JoinHandle}, + }, }; use anyhow::{Result, anyhow, bail}; @@ -49,10 +53,7 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::{ - sync::{OnceCell, oneshot}, - task::JoinHandle, -}; +use tokio::sync::{OnceCell, oneshot}; /// Block synchronization logic for validators. /// @@ -928,7 +929,7 @@ impl Sync { impl Sync { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the primary. diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index 2e3f9c24cf..870fa0a340 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -24,14 +24,19 @@ use crate::{ }; use snarkos_node_bft_ledger_service::LedgerService; use snarkvm::{ - console::prelude::*, + console::{network::Network, prelude::Read}, ledger::{ block::Transaction, narwhal::{BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, + utilities::{ + FromBytes, + task::{self, JoinHandle}, + }, }; +use anyhow::{Context, Result, bail, ensure}; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] @@ -40,7 +45,7 @@ use locktick::parking_lot::{Mutex, RwLock}; use parking_lot::{Mutex, RwLock}; use rand::seq::IteratorRandom; use std::{future::Future, net::SocketAddr, sync::Arc, time::Duration}; -use tokio::{sync::oneshot, task::JoinHandle, time::timeout}; +use tokio::{sync::oneshot, time::timeout}; /// A worker's main role is maintaining a queue of verified ("ready") transmissions, /// which will eventually be fetched by the primary when the primary generates a new batch. @@ -498,12 +503,11 @@ impl Worker { ); } // Wait for the transmission to be fetched. - match timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver).await { - // If the transmission was fetched, return it. - Ok(result) => Ok((transmission_id, result?)), - // If the transmission was not fetched, return an error. - Err(e) => bail!("Unable to fetch transmission - (timeout) {e}"), - } + let transmission = timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver) + .await + .with_context(|| "Unable to fetch transmission - (timeout)")??; + + Ok((transmission_id, transmission)) } /// Handles the incoming transmission response. @@ -540,7 +544,7 @@ impl Worker { /// Spawns a task with the given future; it should only be used for long-running tasks. fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the worker. @@ -558,19 +562,25 @@ mod tests { use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkvm::{ - console::{network::Network, types::Field}, + console::{ + network::{ConsensusVersion, Network}, + types::{Address, Field}, + }, ledger::{ block::Block, committee::Committee, narwhal::{BatchCertificate, Subdag, Transmission, TransmissionID}, test_helpers::sample_execution_transaction_with_fee, }, - prelude::Address, + prelude::{Itertools, Uniform}, + utilities::TestRng, }; + use anyhow::anyhow; use bytes::Bytes; use indexmap::IndexMap; use mockall::mock; + use rand::Rng; use std::{io, ops::Range}; type CurrentNetwork = snarkvm::prelude::MainnetV0; @@ -926,7 +936,7 @@ mod tests { for i in 1..=num_flood_requests { let worker_ = worker.clone(); let peer_ip = peer_ips.pop().unwrap(); - tokio::spawn(async move { + task::spawn(async move { let _ = worker_.send_transmission_request(peer_ip, transmission_id).await; }); tokio::time::sleep(Duration::from_millis(10)).await; @@ -946,7 +956,7 @@ mod tests { // Flood the pending queue with transmission requests again, this time to a single peer for i in 1..=num_flood_requests { let worker_ = worker.clone(); - tokio::spawn(async move { + task::spawn(async move { let _ = worker_.send_transmission_request(first_peer_ip, transmission_id).await; }); tokio::time::sleep(Duration::from_millis(10)).await; @@ -999,12 +1009,15 @@ mod tests { mod prop_tests { use super::*; use crate::Gateway; + use snarkos_node_bft_ledger_service::MockLedgerService; use snarkvm::{ console::account::Address, ledger::committee::{Committee, MIN_VALIDATOR_STAKE}, + prelude::TestRng, }; + use rand::Rng; use test_strategy::proptest; type CurrentNetwork = snarkvm::prelude::MainnetV0; diff --git a/node/bft/tests/components/worker.rs b/node/bft/tests/components/worker.rs index be53686ae0..4408afbf99 100644 --- a/node/bft/tests/components/worker.rs +++ b/node/bft/tests/components/worker.rs @@ -19,10 +19,7 @@ use crate::common::{ utils::{sample_ledger, sample_worker}, }; use snarkos_node_bft::helpers::max_redundant_requests; -use snarkvm::{ - ledger::narwhal::TransmissionID, - prelude::{Network, TestRng}, -}; +use snarkvm::{console::network::Network, ledger::narwhal::TransmissionID, prelude::TestRng, utilities::task}; use std::net::SocketAddr; @@ -57,7 +54,7 @@ async fn test_resend_transmission_request() { // Send a request to fetch the dummy transmission. let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(initial_peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(initial_peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -76,7 +73,7 @@ async fn test_resend_transmission_request() { for i in 1..num_test_requests { let worker_ = worker.clone(); let peer_ip = initial_peer_ip; - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -95,7 +92,7 @@ async fn test_resend_transmission_request() { for i in 1..num_test_requests { let peer_ip = peer_ips.pop().unwrap(); let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -141,7 +138,7 @@ async fn test_flood_transmission_requests() { // Send the maximum number of redundant requests to fetch the dummy transmission. for peer_ip in remaining_peer_ips.clone() { let worker_ = worker.clone(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); } tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -160,7 +157,7 @@ async fn test_flood_transmission_requests() { for i in 1..=6 { let worker_ = worker.clone(); let peer_ip = initial_peer_ip; - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; @@ -179,7 +176,7 @@ async fn test_flood_transmission_requests() { for i in 1..=6 { let worker_ = worker.clone(); let peer_ip = remaining_peer_ips.pop().unwrap(); - tokio::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); + task::spawn(async move { worker_.get_or_fetch_transmission(peer_ip, transmission_id).await }); tokio::time::sleep(std::time::Duration::from_millis(10)).await; From 9d7b851806af28d22aec1523c5dc7bef3e676cf6 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Mon, 15 Sep 2025 16:32:44 -0700 Subject: [PATCH 07/13] feat: use thread-safe panic handling refactor(utils): move signal handling to dedicated snarkos-utilities crate --- cli/src/commands/start.rs | 1 + node/src/validator/mod.rs | 18 ++++++------ snarkos/main.rs | 60 +++++++++++++++++++-------------------- utilities/src/lib.rs | 1 - 4 files changed, 41 insertions(+), 39 deletions(-) diff --git a/cli/src/commands/start.rs b/cli/src/commands/start.rs index e4242f652f..41e4cd9546 100644 --- a/cli/src/commands/start.rs +++ b/cli/src/commands/start.rs @@ -741,6 +741,7 @@ impl Start { // Set up the tokio Runtime. // TODO(kaimast): set up a panic handler here for each worker thread once [`tokio::runtime::Builder::unhandled_panic`](https://docs.rs/tokio/latest/tokio/runtime/struct.Builder.html#method.unhandled_panic) is stabilized. + // As of now, detached tasks may panic and the error may not be handled by the top-level `catch_unwind`. runtime::Builder::new_multi_thread() .enable_all() .thread_stack_size(8 * 1024 * 1024) diff --git a/node/src/validator/mod.rs b/node/src/validator/mod.rs index 5a85a48391..054e774eca 100644 --- a/node/src/validator/mod.rs +++ b/node/src/validator/mod.rs @@ -38,12 +38,15 @@ use snarkos_node_tcp::{ }; use snarkos_utilities::SignalHandler; -use snarkvm::prelude::{ - Ledger, - Network, - block::{Block, Header}, - puzzle::Solution, - store::ConsensusStorage, +use snarkvm::{ + prelude::{ + Ledger, + Network, + block::{Block, Header}, + puzzle::Solution, + store::ConsensusStorage, + }, + utilities::task::{self, JoinHandle}, }; use aleo_std::StorageMode; @@ -54,7 +57,6 @@ use locktick::parking_lot::Mutex; #[cfg(not(feature = "locktick"))] use parking_lot::Mutex; use std::{net::SocketAddr, sync::Arc, time::Duration}; -use tokio::task::JoinHandle; /// A validator is a full node, capable of validating blocks. #[derive(Clone)] @@ -439,7 +441,7 @@ impl> Validator { /// Spawns a task with the given future; it should only be used for long-running tasks. pub fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } } diff --git a/snarkos/main.rs b/snarkos/main.rs index f0558f35bc..77d8c63306 100644 --- a/snarkos/main.rs +++ b/snarkos/main.rs @@ -14,14 +14,17 @@ // limitations under the License. use snarkos_cli::{commands::CLI, helpers::Updater}; -use snarkvm::utilities::display_error; +use snarkvm::utilities::{ + display_error, + errors::{catch_unwind, set_panic_hook}, +}; use clap::Parser; #[cfg(feature = "locktick")] use locktick::lock_snapshots; +use std::env; #[cfg(feature = "locktick")] use std::time::Instant; -use std::{backtrace::Backtrace, env, panic::catch_unwind}; use tracing::log::logger; #[cfg(all(target_os = "linux", target_arch = "x86_64"))] @@ -92,36 +95,10 @@ fn main() { } }); - // Set a custom hook here to show "pretty" errors when panicking. - std::panic::set_hook(Box::new(|err| { - print_error!("⚠️ {}\n", err.to_string().replace("panicked at", "snarkOS encountered an unexpected error at")); - - // Always show backtraces. - let backtrace = Backtrace::force_capture().to_string(); - - let mut msg = "Backtrace:\n".to_string(); - msg.push_str(" [...]\n"); - - // Remove all the low level frames. - // This can be done more cleanly once the `backtrace_frames` feature is stabilized. - let lines = backtrace.lines().skip_while(|line| !line.contains("core::panicking")); - - for line in lines { - // Stop printing once we hit the panic handler. - if line.contains("snarkos::main") { - break; - } - - msg.push_str(&format!("{line}\n")); - } - - // Print the entire backtrace as a single log message. - print_error!("{msg}"); - })); - // Run the CLI. // We use `catch_unwind` here to ensure a panic stops execution and not just a single thread. // Note: `catch_unwind` can be nested without problems. + set_panic_hook(); let result = catch_unwind(|| { // Parse the given arguments. let cli = CLI::parse(); @@ -151,7 +128,30 @@ fn main() { exit(1); } - Err(_) => { + Err((msg, backtrace)) => { + print_error!("⚠️ {}\n", msg.replace("panicked at", "snarkOS encountered an unexpected error at")); + + // Always show backtraces. + let mut msg = "Backtrace:\n".to_string(); + msg.push_str(" [...]\n"); + + // Remove all the low level frames. + // This can be done more cleanly once the `backtrace_frames` feature is stabilized. + let backtrace = backtrace.to_string(); + let lines = backtrace.lines().skip_while(|line| !line.contains("core::panicking")); + + for line in lines { + // Stop printing once we hit the panic handler. + if line.contains("snarkos::main") { + break; + } + + msg.push_str(&format!("{line}\n")); + } + + // Print the entire backtrace as a single log message. + print_error!("{msg}"); + // Print some information for the end-user. print_error!( "This is most likely a bug!\n\ Please report it to the snarkOS developers: https://github.com/ProvableHQ/snarkOS/issues/new?template=bug.md" diff --git a/utilities/src/lib.rs b/utilities/src/lib.rs index de7548d27a..b6aae9f9be 100644 --- a/utilities/src/lib.rs +++ b/utilities/src/lib.rs @@ -15,5 +15,4 @@ /// Utilities for signal and shutdown handling. pub mod signals; - pub use signals::*; From 2bd1313db60c1b13958deecdeace98d37637df11 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 13:17:41 -0700 Subject: [PATCH 08/13] misc(node): use new error logging mechanisms in snarkVM --- Cargo.lock | 44 +++++++++++++++---------------- Cargo.toml | 3 +-- node/bft/Cargo.toml | 2 +- node/bft/src/bft.rs | 52 +++++++++++++++++++------------------ node/bft/src/sync/mod.rs | 44 ++++++++++++++++--------------- node/sync/src/block_sync.rs | 24 ++++++++--------- 6 files changed, 86 insertions(+), 83 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c8d5546a21..3940228ce7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1251,7 +1251,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -3323,7 +3323,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -3422,7 +3422,7 @@ version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -5337,7 +5337,7 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix 1.1.2", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -6186,7 +6186,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -6197,22 +6197,22 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.62.1" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", "windows-link 0.2.1", - "windows-result 0.4.0", - "windows-strings 0.5.0", + "windows-result 0.4.1", + "windows-strings 0.5.1", ] [[package]] name = "windows-implement" -version = "0.60.1" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edb307e42a74fb6de9bf3a02d9712678b22399c87e6fa869d6dfcd8c1b7754e0" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote 1.0.41", @@ -6221,9 +6221,9 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.59.2" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0abd1ddbc6964ac14db11c7213d6532ef34bd9aa042c2e5935f59d7908b46a5" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote 1.0.41", @@ -6264,9 +6264,9 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ "windows-link 0.2.1", ] @@ -6282,9 +6282,9 @@ dependencies = [ [[package]] name = "windows-strings" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ "windows-link 0.2.1", ] @@ -6313,14 +6313,14 @@ version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.4", + "windows-targets 0.53.5", ] [[package]] name = "windows-sys" -version = "0.61.1" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link 0.2.1", ] @@ -6343,9 +6343,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.4" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d42b7b7f66d2a06854650af09cfdf8713e427a439c97ad65a6375318033ac4b" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ "windows-link 0.2.1", "windows_aarch64_gnullvm 0.53.1", diff --git a/Cargo.toml b/Cargo.toml index 32d931699b..885abc5f06 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,11 +47,10 @@ default-features = false [workspace.dependencies.snarkvm] #path = "../snarkVM" git = "https://github.com/ProvableHQ/snarkVM.git" +#rev = "e873e69b458" branch = "feat/track-error" -#rev = "619464c8edf75636809d0bfb8e8404de78a62c97" #version = "=4.2.1" default-features = false -#features = [ "circuit", "console", "rocks" ] [workspace.dependencies.anyhow] version = "1.0" diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index 850f3ae228..26ff9f73f0 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -125,7 +125,7 @@ workspace = true [dependencies.snarkvm] workspace = true -features = [ "utilities", "async" ] +features = [ "async", "utilities" ] [dependencies.time] workspace = true diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 2d878d7bfb..89913c61ba 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -40,9 +40,11 @@ use snarkvm::{ puzzle::{Solution, SolutionID}, }, prelude::{Field, Network, Result, bail, ensure}, + utilities::LoggableError, }; use aleo_std::StorageMode; +use anyhow::Context; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] @@ -269,8 +271,8 @@ impl BFT { // If the BFT is ready, then update to the next round. if is_ready { // Update to the next round in storage. - if let Err(e) = self.storage().increment_to_next_round(current_round) { - warn!("BFT failed to increment to the next round from round {current_round} - {e}"); + if let Err(err) = self.storage().increment_to_next_round(current_round) { + err.log_warning(format!("BFT failed to increment to the next round from round {current_round}")); return false; } // Update the timer for the leader certificate. @@ -312,8 +314,10 @@ impl BFT { // Retrieve the committee lookback of the current round. let committee_lookback = match self.ledger().get_committee_lookback_for_round(current_round) { Ok(committee) => committee, - Err(e) => { - error!("BFT failed to retrieve the committee lookback for the even round {current_round} - {e}"); + Err(err) => { + err.log_error(format!( + "BFT failed to retrieve the committee lookback for the even round {current_round}" + )); return false; } }; @@ -324,8 +328,8 @@ impl BFT { // Compute the leader for the current round. let computed_leader = match committee_lookback.get_leader(current_round) { Ok(leader) => leader, - Err(e) => { - error!("BFT failed to compute the leader for the even round {current_round} - {e}"); + Err(err) => { + err.log_error(format!("BFT failed to compute the leader for the even round {current_round}")); return false; } }; @@ -403,8 +407,10 @@ impl BFT { // Retrieve the committee lookback for the current round. let committee_lookback = match self.ledger().get_committee_lookback_for_round(current_round) { Ok(committee) => committee, - Err(e) => { - error!("BFT failed to retrieve the committee lookback for the odd round {current_round} - {e}"); + Err(err) => { + err.log_error(format!( + "BFT failed to retrieve the committee lookback for the odd round {current_round}" + )); return false; } }; @@ -498,7 +504,7 @@ impl BFT { // Retrieve the committee lookback for the commit round. let Ok(committee_lookback) = self.ledger().get_committee_lookback_for_round(commit_round) else { - bail!("BFT failed to retrieve the committee with lag for commit round {commit_round}"); + bail!("BFT failed to retrieve the committee lookback for commit round {commit_round}"); }; // Either retrieve the cached leader or compute it. @@ -573,23 +579,19 @@ impl BFT { for round in (self.dag.read().last_committed_round() + 2..=leader_round.saturating_sub(2)).rev().step_by(2) { // Retrieve the previous committee for the leader round. - let previous_committee_lookback = match self.ledger().get_committee_lookback_for_round(round) { - Ok(committee) => committee, - Err(e) => { - bail!("BFT failed to retrieve a previous committee lookback for the even round {round} - {e}"); - } - }; + let previous_committee_lookback = + self.ledger().get_committee_lookback_for_round(round).with_context(|| { + format!("BFT failed to retrieve a previous committee lookback for the even round {round}") + })?; + // Either retrieve the cached leader or compute it. let leader = match self.ledger().latest_leader() { Some((cached_round, cached_leader)) if cached_round == round => cached_leader, _ => { // Compute the leader for the commit round. - let computed_leader = match previous_committee_lookback.get_leader(round) { - Ok(leader) => leader, - Err(e) => { - bail!("BFT failed to compute the leader for the even round {round} - {e}"); - } - }; + let computed_leader = previous_committee_lookback + .get_leader(round) + .with_context(|| format!("BFT failed to compute the leader for the even round {round}"))?; // Cache the computed leader. self.ledger().update_latest_leader(round, computed_leader); @@ -710,12 +712,12 @@ impl BFT { // Await the callback to continue. match callback_receiver.await { Ok(Ok(())) => (), // continue - Ok(Err(e)) => { - error!("BFT failed to advance the subdag for round {anchor_round} - {e}"); + Ok(Err(err)) => { + err.log_error("BFT failed to advance the subdag for round {anchor_round}"); return Ok(()); } - Err(e) => { - error!("BFT failed to receive the callback for round {anchor_round} - {e}"); + Err(err) => { + err.log_error("BFT failed to receive the callback for round {anchor_round}"); return Ok(()); } } diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index ba80f578a7..833a627fe0 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -20,7 +20,6 @@ use crate::{ Transport, events::DataBlocks, helpers::{BFTSender, Pending, Storage, SyncReceiver, fmt_id, max_redundant_requests}, - spawn_blocking, }; use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; use snarkos_node_bft_ledger_service::LedgerService; @@ -30,13 +29,14 @@ use snarkvm::{ console::{network::Network, types::Field}, ledger::{authority::Authority, block::Block, narwhal::BatchCertificate}, utilities::{ + LoggableError, cfg_into_iter, cfg_iter, task::{self, JoinHandle}, }, }; -use anyhow::{Result, anyhow, bail}; +use anyhow::{Context, Result, anyhow, bail}; use indexmap::IndexMap; #[cfg(feature = "locktick")] use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; @@ -184,7 +184,7 @@ impl Sync { if let Some(ping) = &ping { match self_.get_block_locators() { Ok(locators) => ping.update_block_locators(locators), - Err(err) => error!("Failed to update block locators: {err}"), + Err(err) => err.log_error("Failed to update block locators"), } } } @@ -200,10 +200,10 @@ impl Sync { // Remove the expired pending transmission requests. let self__ = self_.clone(); - let _ = spawn_blocking!({ + task::spawn_blocking(move || { self__.pending.clear_expired_callbacks(); - Ok(()) - }); + }) + .await; } }); @@ -313,7 +313,7 @@ impl Sync { match self.try_advancing_block_synchronization().await { Ok(new_blocks) => new_blocks, Err(err) => { - error!("Block synchronization failed - {err}"); + err.log_error("Block synchronization failed"); false } } @@ -434,9 +434,11 @@ impl Sync { // If a BFT sender was provided, send the certificates to the BFT. if let Some(bft_sender) = self.bft_sender.get() { // Await the callback to continue. - if let Err(e) = bft_sender.tx_sync_bft_dag_at_bootup.send(certificates).await { - bail!("Failed to update the BFT DAG from sync: {e}"); - } + bft_sender + .tx_sync_bft_dag_at_bootup + .send(certificates) + .await + .with_context(|| "Failed to update the BFT DAG from sync")?; } self.block_sync.set_sync_height(block_height); @@ -584,7 +586,7 @@ impl Sync { if within_gc { info!("Finished catching up with the network. Switching back to BFT sync."); if let Err(err) = self.sync_storage_with_ledger_at_bootup().await { - error!("BFT sync (with bootup routine) failed - {err}"); + err.log_error("BFT sync (with bootup routine) failed"); } } @@ -600,7 +602,7 @@ impl Sync { let _lock = self.sync_lock.lock().await; let self_ = self.clone(); - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Check the next block. self_.ledger.check_next_block(&block)?; // Attempt to advance to the next block. @@ -615,7 +617,7 @@ impl Sync { Ok(()) }) - .await? + .await } /// Advances the ledger by the given block and updates the storage accordingly. @@ -773,7 +775,7 @@ impl Sync { let block_authority = block.authority().clone(); let self_ = self.clone(); - tokio::task::spawn_blocking(move || { + task::spawn_blocking(move || { // Check the next block. self_.ledger.check_next_block(&block)?; // Attempt to advance to the next block. @@ -786,7 +788,7 @@ impl Sync { Ok::<(), anyhow::Error>(()) }) - .await??; + .await?; // Remove the block height from the latest block responses. latest_block_responses.remove(&block_height); @@ -891,12 +893,12 @@ impl Sync { } // Wait for the certificate to be fetched. // TODO (raychu86): Consider making the timeout dynamic based on network traffic and/or the number of validators. - match tokio::time::timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver).await { - // If the certificate was fetched, return it. - Ok(result) => Ok(result?), - // If the certificate was not fetched, return an error. - Err(e) => bail!("Unable to fetch certificate {} - (timeout) {e}", fmt_id(certificate_id)), - } + let cert = tokio::time::timeout(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS), callback_receiver) + .await + .with_context(|| format!("Unable to fetch certificate {} (timeout)", fmt_id(certificate_id)))? + .with_context(|| format!("Unable to fetch certificate {} (timeout)", fmt_id(certificate_id)))?; + + Ok(cert) } /// Handles the incoming certificate request. diff --git a/node/sync/src/block_sync.rs b/node/sync/src/block_sync.rs index b2d320ad2d..8372364a22 100644 --- a/node/sync/src/block_sync.rs +++ b/node/sync/src/block_sync.rs @@ -21,7 +21,7 @@ use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_router::{PeerPoolHandling, messages::DataBlocks}; use snarkos_node_sync_communication_service::CommunicationService; use snarkos_node_sync_locators::{CHECKPOINT_INTERVAL, NUM_RECENT_BLOCKS}; -use snarkvm::prelude::{Network, block::Block}; +use snarkvm::{console::network::Network, ledger::Block, utilities::LoggableError}; use anyhow::{Result, bail, ensure}; use indexmap::{IndexMap, IndexSet}; @@ -360,8 +360,8 @@ impl BlockSync { // Insert the chunk of block requests. for (height, (hash, previous_hash, _)) in requests.iter() { // Insert the block request into the sync pool using the sync IPs from the last block request in the chunk. - if let Err(error) = self.insert_block_request(*height, (*hash, *previous_hash, sync_ips.clone())) { - warn!("Block sync failed - {error}"); + if let Err(err) = self.insert_block_request(*height, (*hash, *previous_hash, sync_ips.clone())) { + err.log_error("Block sync failed"); return false; } } @@ -380,7 +380,7 @@ impl BlockSync { match sender { Some(sender) => { if let Err(err) = sender.await { - warn!("Failed to send block request to peer '{sync_ip}': {err}"); + err.log_warning(format!("Failed to send block request to peer '{sync_ip}'")); false } else { true @@ -401,7 +401,7 @@ impl BlockSync { let success = match result { Ok(success) => success, Err(err) => { - error!("tokio join error: {err}"); + err.log_error("tokio join error"); false } }; @@ -432,7 +432,7 @@ impl BlockSync { for block in blocks { if let Err(error) = self.insert_block_response(peer_ip, block) { self.remove_block_requests_to_peer(&peer_ip); - bail!("{error}"); + return Err(error); } } Ok(()) @@ -509,20 +509,20 @@ impl BlockSync { Ok(_) => match ledger.advance_to_next_block(&block) { Ok(_) => true, Err(err) => { - warn!( - "Failed to advance to next block (height: {}, hash: '{}'): {err}", + err.log_warning(format!( + "Failed to advance to next block (height: {}, hash: '{}')", block.height(), block.hash() - ); + )); false } }, Err(err) => { - warn!( - "The next block (height: {}, hash: '{}') is invalid - {err}", + err.log_warning(format!( + "The next block (height: {}, hash: '{}') is invalid", block.height(), block.hash() - ); + )); false } } From b8d6b73e81cb5f5a69b1c23390467859c29dd0ba Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Thu, 4 Sep 2025 12:38:54 -0600 Subject: [PATCH 09/13] misc(node/bft): replace SyncSender with a callback --- Cargo.lock | 51 +++++---- Cargo.toml | 2 +- build.rs | 10 +- node/bft/src/gateway.rs | 67 +++++++----- node/bft/src/helpers/channels.rs | 86 +-------------- node/bft/src/helpers/mod.rs | 62 +++++++++++ node/bft/src/primary.rs | 7 +- node/bft/src/sync/mod.rs | 182 ++++++++++--------------------- node/src/client/mod.rs | 6 +- 9 files changed, 209 insertions(+), 264 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3940228ce7..2a54574b04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2264,6 +2264,17 @@ name = "locktick" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "307f02aff720d58003290879abe635b818b2176488c5ba2855ab9c11b4e0c04e" +dependencies = [ + "backtrace", + "parking_lot", + "simple_moving_average", +] + +[[package]] +name = "locktick" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a4a7c4b9459e549968abf200cb77c6faf0bdedc87df5065f73ca95031795050" dependencies = [ "backtrace", "parking_lot", @@ -3760,7 +3771,7 @@ version = "4.2.2" dependencies = [ "built", "clap", - "locktick", + "locktick 0.3.0", "rusty-hook", "snarkos-account", "snarkos-cli", @@ -3800,7 +3811,7 @@ dependencies = [ "colored 3.0.0", "crossterm 0.29.0", "indexmap 2.11.4", - "locktick", + "locktick 0.4.0", "nix", "num_cpus", "parking_lot", @@ -3855,7 +3866,7 @@ dependencies = [ "futures-util", "http 1.3.1", "indexmap 2.11.4", - "locktick", + "locktick 0.4.0", "lru 0.16.1", "num_cpus", "parking_lot", @@ -3898,7 +3909,7 @@ dependencies = [ "futures", "indexmap 2.11.4", "itertools 0.14.0", - "locktick", + "locktick 0.4.0", "lru 0.16.1", "mockall", "open", @@ -3957,7 +3968,7 @@ dependencies = [ "anyhow", "async-trait", "indexmap 2.11.4", - "locktick", + "locktick 0.4.0", "parking_lot", "rand 0.8.5", "rayon", @@ -3975,7 +3986,7 @@ dependencies = [ "aleo-std", "anyhow", "indexmap 2.11.4", - "locktick", + "locktick 0.4.0", "lru 0.16.1", "parking_lot", "snarkvm", @@ -3990,7 +4001,7 @@ dependencies = [ "bincode", "colored 3.0.0", "http 1.3.1", - "locktick", + "locktick 0.4.0", "parking_lot", "rayon", "reqwest", @@ -4013,7 +4024,7 @@ dependencies = [ "colored 3.0.0", "indexmap 2.11.4", "itertools 0.14.0", - "locktick", + "locktick 0.4.0", "lru 0.16.1", "once_cell", "parking_lot", @@ -4033,7 +4044,7 @@ dependencies = [ name = "snarkos-node-metrics" version = "4.2.2" dependencies = [ - "locktick", + "locktick 0.4.0", "metrics-exporter-prometheus", "parking_lot", "rayon", @@ -4053,7 +4064,7 @@ dependencies = [ "http 1.3.1", "indexmap 2.11.4", "jsonwebtoken", - "locktick", + "locktick 0.4.0", "once_cell", "parking_lot", "rand 0.8.5", @@ -4086,7 +4097,7 @@ dependencies = [ "futures", "futures-util", "linked-hash-map", - "locktick", + "locktick 0.4.0", "parking_lot", "peak_alloc", "rand 0.8.5", @@ -4132,7 +4143,7 @@ dependencies = [ "futures", "indexmap 2.11.4", "itertools 0.14.0", - "locktick", + "locktick 0.4.0", "parking_lot", "rand 0.8.5", "serde", @@ -4174,7 +4185,7 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "locktick", + "locktick 0.4.0", "once_cell", "parking_lot", "snarkos-node-metrics", @@ -4673,7 +4684,7 @@ dependencies = [ "aleo-std", "anyhow", "indexmap 2.11.4", - "locktick", + "locktick 0.3.0", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4842,7 +4853,7 @@ dependencies = [ "anyhow", "bincode", "indexmap 2.11.4", - "locktick", + "locktick 0.3.0", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4862,7 +4873,7 @@ dependencies = [ "anyhow", "colored 3.0.0", "indexmap 2.11.4", - "locktick", + "locktick 0.3.0", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -4902,7 +4913,7 @@ dependencies = [ "anyhow", "bincode", "indexmap 2.11.4", - "locktick", + "locktick 0.3.0", "parking_lot", "rayon", "rocksdb", @@ -4959,7 +4970,7 @@ dependencies = [ "curl", "hex", "lazy_static", - "locktick", + "locktick 0.3.0", "parking_lot", "paste", "rand 0.8.5", @@ -4979,7 +4990,7 @@ dependencies = [ "anyhow", "indexmap 2.11.4", "itertools 0.14.0", - "locktick", + "locktick 0.3.0", "lru 0.16.1", "parking_lot", "rand 0.8.5", @@ -5011,7 +5022,7 @@ dependencies = [ "aleo-std", "colored 3.0.0", "indexmap 2.11.4", - "locktick", + "locktick 0.3.0", "parking_lot", "rand 0.8.5", "rand_chacha 0.3.1", diff --git a/Cargo.toml b/Cargo.toml index 885abc5f06..8dd6081303 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,7 +95,7 @@ version = "0.3" version = "0.3" [workspace.dependencies.locktick] -version = "0.3" +version = "0.4" [workspace.dependencies.lru] version = "0.16" diff --git a/build.rs b/build.rs index a807f82d0b..806685e9e0 100644 --- a/build.rs +++ b/build.rs @@ -101,8 +101,14 @@ fn check_locktick_imports>(path: P) { if line.contains("Mutex") { lock_balance += 1; } - if line.contains("RwLock") { - lock_balance += 1; + + // Only count the import of RwLock itself. + lock_balance += line.matches("RwLock").count() as i8; + if line.contains("RwLockReadGuard") { + lock_balance -= 1; + } + if line.contains("RwLockWriteGuard") { + lock_balance -= 1; } } else if ioi == ImportOfInterest::Locktick { // Use `matches` instead of just `contains` here, as more than a single diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index 6c72524b59..862e135391 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -21,7 +21,7 @@ use crate::{ MEMORY_POOL_PORT, Worker, events::{EventCodec, PrimaryPing}, - helpers::{Cache, PrimarySender, Resolver, Storage, SyncSender, WorkerSender, assign_to_worker}, + helpers::{Cache, CallbackHandle, PrimarySender, Resolver, Storage, WorkerSender, assign_to_worker}, spawn_blocking, }; use aleo_std::StorageMode; @@ -44,7 +44,7 @@ use snarkos_node_bft_events::{ }; use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_router::{NodeType, Peer, PeerPoolHandling}; -use snarkos_node_sync::{MAX_BLOCKS_BEHIND, communication_service::CommunicationService}; +use snarkos_node_sync::{MAX_BLOCKS_BEHIND, communication_service::CommunicationService, locators::BlockLocators}; use snarkos_node_tcp::{ Config, Connection, @@ -56,6 +56,7 @@ use snarkos_node_tcp::{ use snarkvm::{ console::prelude::*, ledger::{ + Block, committee::Committee, narwhal::{BatchHeader, Data}, }, @@ -116,6 +117,22 @@ pub trait Transport: Send + Sync { fn broadcast(&self, event: Event); } +pub trait SyncCallback: Send + Sync { + /// We received a block response and can (possibly) advance synchronization. + fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()>; + + /// We received new peer locators during a Ping. + fn update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()>; + + /// A peer disconnected. + fn remove_peer(&self, peer_ip: SocketAddr); + + /// Handles the incoming certificate request. + fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest); + + fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse); +} + /// The gateway maintains connections to other validators. /// For connections with clients and provers, the Router logic is used. #[derive(Clone)] @@ -149,9 +166,9 @@ pub struct InnerGateway { /// The primary sender. primary_sender: OnceCell>, /// The worker senders. - worker_senders: OnceCell>>, - /// The sync sender. - sync_sender: OnceCell>, + worker_senders: Arc>>>, + /// The callback for sync messages. + sync_callback: Arc>>>, /// The spawned handles. handles: Mutex>>, /// The storage mode. @@ -214,7 +231,7 @@ impl Gateway { validator_telemetry: Default::default(), primary_sender: Default::default(), worker_senders: Default::default(), - sync_sender: Default::default(), + sync_callback: Default::default(), handles: Default::default(), storage_mode, dev, @@ -226,7 +243,7 @@ impl Gateway { &self, primary_sender: PrimarySender, worker_senders: IndexMap>, - sync_sender: Option>, + sync_callback: Option>>, ) { debug!("Starting the gateway for the memory pool..."); @@ -236,9 +253,8 @@ impl Gateway { // Set the worker senders. self.worker_senders.set(worker_senders).expect("The worker senders are already set"); - // If the sync sender was provided, set the sync sender. - if let Some(sync_sender) = sync_sender { - self.sync_sender.set(sync_sender).expect("Sync sender already set in gateway"); + if let Some(sync_callback) = sync_callback { + self.sync_callback.set(sync_callback).unwrap(); } // Enable the TCP protocols. @@ -454,13 +470,8 @@ impl Gateway { /// Removes the connected peer and adds them to the candidate peers. fn remove_connected_peer(&self, peer_ip: SocketAddr) { // Remove the peer from the sync module. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { - let tx_block_sync_remove_peer_ = sync_sender.tx_block_sync_remove_peer.clone(); - tokio::spawn(async move { - if let Err(e) = tx_block_sync_remove_peer_.send(peer_ip).await { - warn!("Unable to remove '{peer_ip}' from the sync module - {e}"); - } - }); + if let Some(cb) = &*self.sync_callback.get_ref() { + cb.remove_peer(peer_ip); } if let Some(peer) = self.peer_pool.write().get_mut(&peer_ip) { if let Peer::Connected(connected_peer) = peer { @@ -604,7 +615,7 @@ impl Gateway { } Event::BlockResponse(block_response) => { // Process the block response. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = self.sync_callback.get() { // Retrieve the block response. let BlockResponse { request, blocks } = block_response; @@ -630,8 +641,8 @@ impl Gateway { // Ensure the block response is well-formed. blocks.ensure_response_is_well_formed(peer_ip, request.start_height, request.end_height)?; // Send the blocks to the sync module. - if let Err(e) = sync_sender.advance_with_sync_blocks(peer_ip, blocks.0).await { - warn!("Unable to process block response from '{peer_ip}' - {e}"); + if let Err(err) = cb.insert_block_response(peer_ip, blocks.0) { + warn!("Unable to process block response from '{peer_ip}': {err}"); } } Ok(true) @@ -639,18 +650,18 @@ impl Gateway { Event::CertificateRequest(certificate_request) => { // Send the certificate request to the sync module. // Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Send the certificate request to the sync module. - let _ = sync_sender.tx_certificate_request.send((peer_ip, certificate_request)).await; + cb.send_certificate_response(peer_ip, certificate_request); } Ok(true) } Event::CertificateResponse(certificate_response) => { // Send the certificate response to the sync module. // Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Send the certificate response to the sync module. - let _ = sync_sender.tx_certificate_response.send((peer_ip, certificate_response)).await; + cb.finish_certificate_request(peer_ip, certificate_response); } Ok(true) } @@ -673,9 +684,9 @@ impl Gateway { } // Update the peer locators. Except for some tests, there is always a sync sender. - if let Some(sync_sender) = self.sync_sender.get() { + if let Some(cb) = &*self.sync_callback.get_ref() { // Check the block locators are valid, and update the validators in the sync module. - if let Err(error) = sync_sender.update_peer_locators(peer_ip, block_locators).await { + if let Err(error) = cb.update_peer_locators(peer_ip, block_locators) { bail!("Validator '{peer_ip}' sent invalid block locators - {error}"); } } @@ -848,9 +859,11 @@ impl Gateway { warn!("Failed to persist best validators to disk: {e}"); } // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Close the listener. self.tcp.shut_down().await; + // Remove the sync callback (so it can be dropped). + self.sync_callback.clear(); } } diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 1370ee33de..8dfa1229f9 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -13,19 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::events::{ - BatchPropose, - BatchSignature, - CertificateRequest, - CertificateResponse, - TransmissionRequest, - TransmissionResponse, -}; -use snarkos_node_sync::locators::BlockLocators; +use crate::events::{BatchPropose, BatchSignature, TransmissionRequest, TransmissionResponse}; use snarkvm::{ console::network::*, ledger::{ - block::{Block, Transaction}, + block::Transaction, narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, @@ -228,77 +220,3 @@ pub fn init_worker_channels() -> (WorkerSender, WorkerReceiver (sender, receiver) } - -#[derive(Debug)] -pub struct SyncSender { - pub tx_block_sync_advance_with_sync_blocks: mpsc::Sender<(SocketAddr, Vec>, oneshot::Sender>)>, - pub tx_block_sync_remove_peer: mpsc::Sender, - pub tx_block_sync_update_peer_locators: mpsc::Sender<(SocketAddr, BlockLocators, oneshot::Sender>)>, - pub tx_certificate_request: mpsc::Sender<(SocketAddr, CertificateRequest)>, - pub tx_certificate_response: mpsc::Sender<(SocketAddr, CertificateResponse)>, -} - -impl SyncSender { - /// Sends the request to update the peer locators. - pub async fn update_peer_locators(&self, peer_ip: SocketAddr, block_locators: BlockLocators) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the request to update the peer locators. - // This `tx_block_sync_update_peer_locators.send()` call - // causes the `rx_block_sync_update_peer_locators.recv()` call - // in one of the loops in [`Sync::run()`] to return. - self.tx_block_sync_update_peer_locators.send((peer_ip, block_locators, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the request to advance with sync blocks. - pub async fn advance_with_sync_blocks(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the request to advance with sync blocks. - // This `tx_block_sync_advance_with_sync_blocks.send()` call - // causes the `rx_block_sync_advance_with_sync_blocks.recv()` call - // in one of the loops in [`Sync::run()`] to return. - self.tx_block_sync_advance_with_sync_blocks.send((peer_ip, blocks, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -#[derive(Debug)] -pub struct SyncReceiver { - pub rx_block_sync_advance_with_sync_blocks: - mpsc::Receiver<(SocketAddr, Vec>, oneshot::Sender>)>, - pub rx_block_sync_remove_peer: mpsc::Receiver, - pub rx_block_sync_update_peer_locators: mpsc::Receiver<(SocketAddr, BlockLocators, oneshot::Sender>)>, - pub rx_certificate_request: mpsc::Receiver<(SocketAddr, CertificateRequest)>, - pub rx_certificate_response: mpsc::Receiver<(SocketAddr, CertificateResponse)>, -} - -/// Initializes the sync channels. -pub fn init_sync_channels() -> (SyncSender, SyncReceiver) { - let (tx_block_sync_advance_with_sync_blocks, rx_block_sync_advance_with_sync_blocks) = - mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_block_sync_remove_peer, rx_block_sync_remove_peer) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_block_sync_update_peer_locators, rx_block_sync_update_peer_locators) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_certificate_request, rx_certificate_request) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_certificate_response, rx_certificate_response) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = SyncSender { - tx_block_sync_advance_with_sync_blocks, - tx_block_sync_remove_peer, - tx_block_sync_update_peer_locators, - tx_certificate_request, - tx_certificate_response, - }; - let receiver = SyncReceiver { - rx_block_sync_advance_with_sync_blocks, - rx_block_sync_remove_peer, - rx_block_sync_update_peer_locators, - rx_certificate_request, - rx_certificate_response, - }; - - (sender, receiver) -} diff --git a/node/bft/src/helpers/mod.rs b/node/bft/src/helpers/mod.rs index 7d9dd7f531..50c31cbc78 100644 --- a/node/bft/src/helpers/mod.rs +++ b/node/bft/src/helpers/mod.rs @@ -54,6 +54,14 @@ pub use telemetry::*; pub mod timestamp; pub use timestamp::*; +#[cfg(feature = "locktick")] +use locktick::{LockGuard, parking_lot::RwLock}; +#[cfg(not(feature = "locktick"))] +use parking_lot::RwLock; + +use anyhow::{Result, bail}; +use parking_lot::RwLockReadGuard; + /// Formats an ID into a truncated identifier (for logging purposes). pub fn fmt_id(id: impl ToString) -> String { let id = id.to_string(); @@ -63,3 +71,57 @@ pub fn fmt_id(id: impl ToString) -> String { } formatted_id } + +/// Helper struct to hold a reference to a callback struct. +pub struct CallbackHandle { + callback: RwLock>, +} + +impl Default for CallbackHandle { + /// By default, the handle holds no callback. + fn default() -> Self { + Self { callback: RwLock::new(None) } + } +} + +impl CallbackHandle { + /// Set a callback. Returns an error if a callback was already set. + pub fn set(&self, callback: C) -> Result<()> { + let prev = self.callback.write().replace(callback); + + if prev.is_some() { + bail!("Callback was already set"); + } + + Ok(()) + } + + /// Get a cloned copy of the callback. + /// Useful when the callback will be used across await-boundaries. + #[inline] + pub fn get(&self) -> Option { + self.callback.read().clone() + } + + /// Get reference to the callback. + /// Cannot be shared across await-boundaries. + #[cfg(feature = "locktick")] + #[inline] + pub fn get_ref(&self) -> LockGuard>> { + self.callback.read() + } + + /// Get reference to the callback. + /// Cannot be shared across await-boundaries. + #[cfg(not(feature = "locktick"))] + #[inline] + pub fn get_ref(&self) -> RwLockReadGuard<'_, Option> { + self.callback.read() + } + + /// Remove the callback. + /// Used during shutdown to resolve circular dependencies between types. + pub fn clear(&self) { + let _ = self.callback.write().take(); + } +} diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 61bfeec712..cb45c5f522 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -35,7 +35,6 @@ use crate::{ assign_to_worker, assign_to_workers, fmt_id, - init_sync_channels, init_worker_channels, now, }, @@ -238,16 +237,14 @@ impl Primary { // Set the workers. self.workers = Arc::from(workers); - // First, initialize the sync channels. - let (sync_sender, sync_receiver) = init_sync_channels(); // Next, initialize the sync module and sync the storage from ledger. self.sync.initialize(bft_sender).await?; // Next, load and process the proposal cache before running the sync module. self.load_proposal_cache().await?; // Next, run the sync module. - self.sync.run(ping, sync_receiver).await?; + self.sync.run(ping).await?; // Next, initialize the gateway. - self.gateway.run(primary_sender, worker_senders, Some(sync_sender)).await; + self.gateway.run(primary_sender, worker_senders, Some(Arc::new(self.sync.clone()))).await; // Lastly, start the primary handlers. // Note: This ensures the primary does not start communicating before syncing is complete. self.start_handlers(primary_receiver); diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 833a627fe0..35e45bacbb 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -19,7 +19,8 @@ use crate::{ PRIMARY_PING_IN_MS, Transport, events::DataBlocks, - helpers::{BFTSender, Pending, Storage, SyncReceiver, fmt_id, max_redundant_requests}, + gateway::SyncCallback as GatewaySyncCallback, + helpers::{BFTSender, Pending, Storage, fmt_id, max_redundant_requests}, }; use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; use snarkos_node_bft_ledger_service::LedgerService; @@ -49,16 +50,19 @@ use std::{ future::Future, net::SocketAddr, sync::Arc, - time::Duration, + time::{Duration, Instant}, }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::sync::{OnceCell, oneshot}; +use tokio::{ + sync::{OnceCell, oneshot}, + time::{sleep, timeout}, +}; /// Block synchronization logic for validators. /// /// Synchronization works differently for nodes that act as validators in AleoBFT; -/// In the common case, validators generate blocks after receiving an anchor block that has been accepted +/// In the common case, validators generate blocks after receiving an anchor certificate that has been accepted /// by a supermajority of the committee instead of fetching entire blocks from other nodes. /// However, if a validator does not have an up-to-date DAG, it might still fetch entire blocks from other nodes. /// @@ -98,6 +102,8 @@ pub struct Sync { } impl Sync { + const SYNC_INTERVAL: Duration = Duration::from_millis(PRIMARY_PING_IN_MS); + /// Initializes a new sync instance. pub fn new( gateway: Gateway, @@ -164,7 +170,7 @@ impl Sync { /// /// When this function returns successfully, the sync module will have spawned background tasks /// that fetch blocks from other validators. - pub async fn run(&self, ping: Option>>, sync_receiver: SyncReceiver) -> Result<()> { + pub async fn run(&self, ping: Option>>) -> Result<()> { info!("Starting the sync module..."); // Start the block sync loop. @@ -174,10 +180,18 @@ impl Sync { // Ideally, a node does not consider itself synced when it has not received // any block locators from peers. However, in the initial bootup of validators, // this needs to happen, so we use this additional sleep as a grace period. - tokio::time::sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + + let mut last_update = Instant::now(); loop { - // Sleep briefly to avoid triggering spam detection. - tokio::time::sleep(Duration::from_millis(PRIMARY_PING_IN_MS)).await; + // Make sure we do not sync too often + let now = Instant::now(); + let elapsed = now.saturating_duration_since(last_update); + let sleep_time = Self::SYNC_INTERVAL.saturating_sub(elapsed); + + if !sleep_time.is_zero() { + sleep(sleep_time).await; + } let new_blocks = self_.try_block_sync().await; if new_blocks { @@ -188,6 +202,7 @@ impl Sync { } } } + last_update = now; } }); @@ -196,7 +211,7 @@ impl Sync { self.spawn(async move { loop { // Sleep briefly. - tokio::time::sleep(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS)).await; + sleep(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS)).await; // Remove the expired pending transmission requests. let self__ = self_.clone(); @@ -207,78 +222,6 @@ impl Sync { } }); - /* Set up callbacks for events from the Gateway */ - - // Retrieve the sync receiver. - let SyncReceiver { - mut rx_block_sync_advance_with_sync_blocks, - mut rx_block_sync_remove_peer, - mut rx_block_sync_update_peer_locators, - mut rx_certificate_request, - mut rx_certificate_response, - } = sync_receiver; - - // Process the block sync request to advance with sync blocks. - // Each iteration of this loop is triggered by an incoming [`BlockResponse`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`SyncSender::advance_with_sync_blocks()`], - // which calls [`tx_block_sync_advance_with_sync_blocks.send()`], - // which causes the `rx_block_sync_advance_with_sync_blocks.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, blocks, callback)) = rx_block_sync_advance_with_sync_blocks.recv().await { - callback.send(self_.advance_with_sync_blocks(peer_ip, blocks).await).ok(); - } - }); - - // Process the block sync request to remove the peer. - let self_ = self.clone(); - self.spawn(async move { - while let Some(peer_ip) = rx_block_sync_remove_peer.recv().await { - self_.remove_peer(peer_ip); - } - }); - - // Process each block sync request to update peer locators. - // Each iteration of this loop is triggered by an incoming [`PrimaryPing`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`SyncSender::update_peer_locators()`], - // which calls [`tx_block_sync_update_peer_locators.send()`], - // which causes the `rx_block_sync_update_peer_locators.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, locators, callback)) = rx_block_sync_update_peer_locators.recv().await { - let self_clone = self_.clone(); - tokio::spawn(async move { - callback.send(self_clone.update_peer_locators(peer_ip, locators)).ok(); - }); - } - }); - - // Process each certificate request. - // Each iteration of this loop is triggered by an incoming [`CertificateRequest`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`tx_certificate_request.send()`], - // which causes the `rx_certificate_request.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, certificate_request)) = rx_certificate_request.recv().await { - self_.send_certificate_response(peer_ip, certificate_request); - } - }); - - // Process each certificate response. - // Each iteration of this loop is triggered by an incoming [`CertificateResponse`], - // which is initially handled by [`Gateway::inbound()`], - // which calls [`tx_certificate_response.send()`], - // which causes the `rx_certificate_response.recv()` call below to return. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, certificate_response)) = rx_certificate_response.recv().await { - self_.finish_certificate_request(peer_ip, certificate_response); - } - }); - Ok(()) } @@ -297,6 +240,9 @@ impl Sync { self.send_block_requests(sync_peers, requests).await; } + // Wait for updates or a timeout. + let _ = timeout(Self::SYNC_INTERVAL, self.block_sync.wait_for_update()).await; + // Do not attempt to sync if there are no blocks to sync. // This prevents redundant log messages and performing unnecessary computation. if !self.block_sync.can_block_sync() { @@ -318,22 +264,19 @@ impl Sync { } } } + + /// Test-only. Manually add peer locators. + #[cfg(test)] + pub fn test_update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()> { + self.update_peer_locators(peer_ip, locators) + } } // Callbacks used when receiving messages from the Gateway -impl Sync { +impl GatewaySyncCallback for Sync { /// We received a block response and can (possibly) advance synchronization. - async fn advance_with_sync_blocks(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { - // Verify that the response is valid and add it to block sync. - self.block_sync.insert_block_responses(peer_ip, blocks)?; - - // Try to process responses stored in BlockSync. - // Note: Do not call `self.block_sync.try_advancing_block_synchronziation` here as it will process - // and remove any completed requests, which means the call to `sync_storage_with_blocks` will not process - // them as expected. - self.try_advancing_block_synchronization().await?; - - Ok(()) + fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()> { + self.block_sync.insert_block_responses(peer_ip, blocks) } /// We received new peer locators during a Ping. @@ -346,9 +289,30 @@ impl Sync { self.block_sync.remove_peer(&peer_ip); } - #[cfg(test)] - pub fn test_update_peer_locators(&self, peer_ip: SocketAddr, locators: BlockLocators) -> Result<()> { - self.update_peer_locators(peer_ip, locators) + /// Handles the incoming certificate request. + fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest) { + // Attempt to retrieve the certificate. + if let Some(certificate) = self.storage.get_certificate(request.certificate_id) { + // Send the certificate response to the peer. + let self_ = self.clone(); + tokio::spawn(async move { + let _ = self_.gateway.send(peer_ip, Event::CertificateResponse(certificate.into())).await; + }); + } + } + + /// Handles the incoming certificate response. + /// This method ensures the certificate response is well-formed and matches the certificate ID. + fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse) { + let certificate = response.certificate; + // Check if the peer IP exists in the pending queue for the given certificate ID. + let exists = self.pending.get_peers(certificate.id()).unwrap_or_default().contains(&peer_ip); + // If the peer IP exists, finish the pending request. + if exists { + // TODO: Validate the certificate. + // Remove the certificate ID from the pending queue. + self.pending.remove(certificate.id(), Some(certificate)); + } } } @@ -900,32 +864,6 @@ impl Sync { Ok(cert) } - - /// Handles the incoming certificate request. - fn send_certificate_response(&self, peer_ip: SocketAddr, request: CertificateRequest) { - // Attempt to retrieve the certificate. - if let Some(certificate) = self.storage.get_certificate(request.certificate_id) { - // Send the certificate response to the peer. - let self_ = self.clone(); - tokio::spawn(async move { - let _ = self_.gateway.send(peer_ip, Event::CertificateResponse(certificate.into())).await; - }); - } - } - - /// Handles the incoming certificate response. - /// This method ensures the certificate response is well-formed and matches the certificate ID. - fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse) { - let certificate = response.certificate; - // Check if the peer IP exists in the pending queue for the given certificate ID. - let exists = self.pending.get_peers(certificate.id()).unwrap_or_default().contains(&peer_ip); - // If the peer IP exists, finish the pending request. - if exists { - // TODO: Validate the certificate. - // Remove the certificate ID from the pending queue. - self.pending.remove(certificate.id(), Some(certificate)); - } - } } impl Sync { diff --git a/node/src/client/mod.rs b/node/src/client/mod.rs index 2f6435b53e..1b6df270c5 100644 --- a/node/src/client/mod.rs +++ b/node/src/client/mod.rs @@ -266,7 +266,7 @@ impl> Client { break; } - // Make sure we do not sync too often + // Make sure we do not sync too often. let now = Instant::now(); let elapsed = now.saturating_duration_since(last_update); let sleep_time = Self::SYNC_INTERVAL.saturating_sub(elapsed); @@ -282,9 +282,9 @@ impl> Client { })); } - /// Client-side version of `snarkvm_node_bft::Sync::try_block_sync()`. + /// Client-side version of `snarkos_node_bft::Sync::try_block_sync()`. async fn try_block_sync(&self) { - // Sleep briefly to avoid triggering spam detection. + // Wait for updates or a timeout. let _ = timeout(Self::SYNC_INTERVAL, self.sync.wait_for_update()).await; // For sanity, check that sync height is never below ledger height. From c5e94ce1726d07f9ba06a824d4232972e5985480 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 16:25:28 -0700 Subject: [PATCH 10/13] misc(bft): get rid of bft channels --- build.rs | 3 +- node/bft/examples/simple_node.rs | 2 +- node/bft/src/bft.rs | 166 +++++++++++-------------------- node/bft/src/gateway.rs | 7 +- node/bft/src/helpers/channels.rs | 63 ------------ node/bft/src/lib.rs | 2 +- node/bft/src/primary.rs | 80 +++++++-------- node/bft/src/sync/mod.rs | 77 ++++++++------ node/bft/src/worker.rs | 2 +- node/bft/tests/common/primary.rs | 2 +- node/consensus/src/lib.rs | 31 ++++-- 11 files changed, 171 insertions(+), 264 deletions(-) diff --git a/build.rs b/build.rs index 806685e9e0..8fd65c428a 100644 --- a/build.rs +++ b/build.rs @@ -132,8 +132,9 @@ fn check_locktick_imports>(path: P) { } // If the file has a lock import "imbalance", print it out and increment the counter. + // Allow having more locktick, than regular, imports. assert!( - lock_balance == 0, + lock_balance <= 0, "The locks in \"{}\" don't seem to have `locktick` counterparts!", entry.path().display() ); diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index b71e712d61..b12a3feec7 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -200,7 +200,7 @@ pub async fn start_primary( None, )?; // Run the primary instance. - primary.run(None, None, sender.clone(), receiver).await?; + primary.run(None, None, None, sender.clone(), receiver).await?; // Handle OS signals. handle_signals(&primary); // Return the primary instance. diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 89913c61ba..1617a23bc9 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -15,18 +15,9 @@ use crate::{ MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, - Primary, - helpers::{ - BFTReceiver, - ConsensusSender, - DAG, - PrimaryReceiver, - PrimarySender, - Storage, - fmt_id, - init_bft_channels, - now, - }, + helpers::{ConsensusSender, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, + primary::{Primary, PrimaryCallback}, + sync::SyncCallback, }; use snarkos_account::Account; use snarkos_node_bft_ledger_service::LedgerService; @@ -48,15 +39,11 @@ use anyhow::Context; use colored::Colorize; use indexmap::{IndexMap, IndexSet}; #[cfg(feature = "locktick")] -use locktick::{ - parking_lot::{Mutex, RwLock}, - tokio::Mutex as TMutex, -}; +use locktick::{parking_lot::RwLock, tokio::Mutex as TMutex}; #[cfg(not(feature = "locktick"))] -use parking_lot::{Mutex, RwLock}; +use parking_lot::RwLock; use std::{ collections::{BTreeMap, HashSet}, - future::Future, net::SocketAddr, sync::{ Arc, @@ -65,10 +52,7 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::{ - sync::{OnceCell, oneshot}, - task::JoinHandle, -}; +use tokio::sync::{OnceCell, oneshot}; #[derive(Clone)] pub struct BFT { @@ -82,8 +66,6 @@ pub struct BFT { leader_certificate_timer: Arc, /// The consensus sender. consensus_sender: Arc>>, - /// Handles for all spawned tasks. - handles: Arc>>>, /// The BFT lock. lock: Arc>, } @@ -107,7 +89,6 @@ impl BFT { leader_certificate: Default::default(), leader_certificate_timer: Default::default(), consensus_sender: Default::default(), - handles: Default::default(), lock: Default::default(), }) } @@ -124,14 +105,16 @@ impl BFT { primary_receiver: PrimaryReceiver, ) -> Result<()> { info!("Starting the BFT instance..."); - // Initialize the BFT channels. - let (bft_sender, bft_receiver) = init_bft_channels::(); - // First, start the BFT handlers. - self.start_handlers(bft_receiver); + // Set up callbacks. + let primary_callback = Some(Arc::new(self.clone()) as Arc>); + + let sync_callback = Some(Arc::new(self.clone()) as Arc>); + // Next, run the primary instance. - self.primary.run(ping, Some(bft_sender), primary_sender, primary_receiver).await?; + self.primary.run(ping, primary_callback, sync_callback, primary_sender, primary_receiver).await?; + // Lastly, set the consensus sender. - // Note: This ensures during initial syncing, that the BFT does not advance the ledger. + // Note: This ensures that, during initial syncing, that the BFT does not advance the ledger. if let Some(consensus_sender) = consensus_sender { self.consensus_sender.set(consensus_sender).expect("Consensus sender already set"); } @@ -213,8 +196,9 @@ impl BFT { } } -impl BFT { - /// Stores the certificate in the DAG, and attempts to commit one or more anchors. +#[async_trait::async_trait] +impl PrimaryCallback for BFT { + /// Notification that a new round has started. fn update_to_next_round(&self, current_round: u64) -> bool { // Ensure the current round is at least the storage round (this is a sanity check). let storage_round = self.storage().current_round(); @@ -282,6 +266,41 @@ impl BFT { is_ready } + /// Notification about a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()> { + // Update the DAG with the certificate. + self.update_dag::(certificate).await + } +} + +#[async_trait::async_trait] +impl SyncCallback for BFT { + /// Syncs the BFT DAG with the given batch certificates. These batch certificates **must** + /// already exist in the ledger. + /// + /// This method commits all the certificates into the DAG. + /// Note that there is no need to insert the certificates into the DAG, because these certificates + /// already exist in the ledger and therefore do not need to be re-ordered into future committed subdags. + async fn sync_dag_at_bootup(&self, certificates: Vec>) -> Result<()> { + // Acquire the BFT write lock. + let mut dag = self.dag.write(); + + // Commit all the certificates. + for certificate in certificates { + dag.commit(&certificate, self.storage().max_gc_rounds()); + } + + Ok(()) + } + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()> { + // Update the DAG with the certificate. + self.update_dag::(certificate).await + } +} + +impl BFT { /// Updates the leader certificate to the current even round, /// returning `true` if the BFT is ready to update to the next round. /// @@ -857,77 +876,6 @@ impl BFT { } impl BFT { - /// Starts the BFT handlers. - fn start_handlers(&self, bft_receiver: BFTReceiver) { - let BFTReceiver { - mut rx_primary_round, - mut rx_primary_certificate, - mut rx_sync_bft_dag_at_bootup, - mut rx_sync_bft, - } = bft_receiver; - - // Process the current round from the primary. - let self_ = self.clone(); - self.spawn(async move { - while let Some((current_round, callback)) = rx_primary_round.recv().await { - callback.send(self_.update_to_next_round(current_round)).ok(); - } - }); - - // Process the certificate from the primary. - let self_ = self.clone(); - self.spawn(async move { - while let Some((certificate, callback)) = rx_primary_certificate.recv().await { - // Update the DAG with the certificate. - let result = self_.update_dag::(certificate).await; - // Send the callback **after** updating the DAG. - // Note: We must await the DAG update before proceeding. - callback.send(result).ok(); - } - }); - - // Process the request to sync the BFT DAG at bootup. - let self_ = self.clone(); - self.spawn(async move { - while let Some(certificates) = rx_sync_bft_dag_at_bootup.recv().await { - self_.sync_bft_dag_at_bootup(certificates).await; - } - }); - - // Handler for new certificates that were fetched by the sync module. - let self_ = self.clone(); - self.spawn(async move { - while let Some((certificate, callback)) = rx_sync_bft.recv().await { - // Update the DAG with the certificate. - let result = self_.update_dag::(certificate).await; - // Send the callback **after** updating the DAG. - // Note: We must await the DAG update before proceeding. - callback.send(result).ok(); - } - }); - } - - /// Syncs the BFT DAG with the given batch certificates. These batch certificates **must** - /// already exist in the ledger. - /// - /// This method commits all the certificates into the DAG. - /// Note that there is no need to insert the certificates into the DAG, because these certificates - /// already exist in the ledger and therefore do not need to be re-ordered into future committed subdags. - async fn sync_bft_dag_at_bootup(&self, certificates: Vec>) { - // Acquire the BFT write lock. - let mut dag = self.dag.write(); - - // Commit all the certificates. - for certificate in certificates { - dag.commit(&certificate, self.storage().max_gc_rounds()); - } - } - - /// Spawns a task with the given future; it should only be used for long-running tasks. - fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); - } - /// Shuts down the BFT. pub async fn shut_down(&self) { info!("Shutting down the BFT..."); @@ -935,14 +883,12 @@ impl BFT { let _lock = self.lock.lock().await; // Shut down the primary. self.primary.shut_down().await; - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); } } #[cfg(test)] mod tests { - use crate::{BFT, MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, helpers::Storage}; + use crate::{BFT, MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, helpers::Storage, sync::SyncCallback}; use snarkos_account::Account; use snarkos_node_bft_ledger_service::MockLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -1526,7 +1472,7 @@ mod tests { let bootup_bft = initialize_bft(account.clone(), storage_2, ledger)?; // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(certificates.clone()).await.unwrap(); // Check that the BFT starts from the same last committed round. assert_eq!(bft.dag.read().last_committed_round(), bootup_bft.dag.read().last_committed_round()); @@ -1705,7 +1651,7 @@ mod tests { let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone())?; // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(pre_shutdown_certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); // Insert the post shutdown certificates to the storage and BFT with bootup. for certificate in post_shutdown_certificates.iter() { @@ -1885,7 +1831,7 @@ mod tests { // Insert a mock DAG in the BFT without bootup. *bootup_bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); // Sync the BFT DAG at bootup. - bootup_bft.sync_bft_dag_at_bootup(pre_shutdown_certificates.clone()).await; + bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); // Insert the post shutdown certificates into the storage. let mut post_shutdown_certificates: Vec> = diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index 862e135391..e4a469de06 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -1502,12 +1502,15 @@ impl Gateway { #[cfg(test)] mod prop_tests { - use crate::{ + use super::{ Gateway, + prop_tests::GatewayAddress::{Dev, Prod}, + }; + + use crate::{ MAX_WORKERS, MEMORY_POOL_PORT, Worker, - gateway::prop_tests::GatewayAddress::{Dev, Prod}, helpers::{Storage, init_primary_channels, init_worker_channels}, }; use aleo_std::StorageMode; diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 8dfa1229f9..f14ed56f64 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -52,69 +52,6 @@ pub fn init_consensus_channels() -> (ConsensusSender, ConsensusRe (sender, receiver) } -/// "Interface" that enables, for example, sending data from storage to the the BFT logic. -#[derive(Clone, Debug)] -pub struct BFTSender { - pub tx_primary_round: mpsc::Sender<(u64, oneshot::Sender)>, - pub tx_primary_certificate: mpsc::Sender<(BatchCertificate, oneshot::Sender>)>, - pub tx_sync_bft_dag_at_bootup: mpsc::Sender>>, - pub tx_sync_bft: mpsc::Sender<(BatchCertificate, oneshot::Sender>)>, -} - -impl BFTSender { - /// Sends the current round to the BFT. - pub async fn send_primary_round_to_bft(&self, current_round: u64) -> Result { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the current round to the BFT. - self.tx_primary_round.send((current_round, callback_sender)).await?; - // Await the callback to continue. - Ok(callback_receiver.await?) - } - - /// Sends the batch certificate to the BFT. - pub async fn send_primary_certificate_to_bft(&self, certificate: BatchCertificate) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the certificate to the BFT. - self.tx_primary_certificate.send((certificate, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the batch certificates to the BFT for syncing. - pub async fn send_sync_bft(&self, certificate: BatchCertificate) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the certificate to the BFT for syncing. - self.tx_sync_bft.send((certificate, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -/// Receiving counterpart to `BFTSender` -#[derive(Debug)] -pub struct BFTReceiver { - pub rx_primary_round: mpsc::Receiver<(u64, oneshot::Sender)>, - pub rx_primary_certificate: mpsc::Receiver<(BatchCertificate, oneshot::Sender>)>, - pub rx_sync_bft_dag_at_bootup: mpsc::Receiver>>, - pub rx_sync_bft: mpsc::Receiver<(BatchCertificate, oneshot::Sender>)>, -} - -/// Initializes the BFT channels, and returns the sending and receiving ends. -pub fn init_bft_channels() -> (BFTSender, BFTReceiver) { - let (tx_primary_round, rx_primary_round) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_primary_certificate, rx_primary_certificate) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_sync_bft_dag_at_bootup, rx_sync_bft_dag_at_bootup) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_sync_bft, rx_sync_bft) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = BFTSender { tx_primary_round, tx_primary_certificate, tx_sync_bft_dag_at_bootup, tx_sync_bft }; - let receiver = BFTReceiver { rx_primary_round, rx_primary_certificate, rx_sync_bft_dag_at_bootup, rx_sync_bft }; - - (sender, receiver) -} - #[derive(Clone, Debug)] pub struct PrimarySender { pub tx_batch_propose: mpsc::Sender<(SocketAddr, BatchPropose)>, diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index c92032d4c7..a46598b0c0 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -35,7 +35,7 @@ mod bft; pub use bft::*; mod gateway; -pub use gateway::*; +pub use gateway::Gateway; mod primary; pub use primary::*; diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index cb45c5f522..93644174ef 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -14,18 +14,16 @@ // limitations under the License. use crate::{ - Gateway, MAX_BATCH_DELAY_IN_MS, MAX_WORKERS, MIN_BATCH_DELAY_IN_SECS, PRIMARY_PING_IN_MS, - Sync, - Transport, WORKER_PING_IN_MS, Worker, events::{BatchPropose, BatchSignature, Event}, + gateway::{Gateway, Transport}, helpers::{ - BFTSender, + CallbackHandle, PrimaryReceiver, PrimarySender, Proposal, @@ -39,6 +37,7 @@ use crate::{ now, }, spawn_blocking, + sync::{Sync, SyncCallback}, }; use snarkos_account::Account; use snarkos_node_bft_events::PrimaryPing; @@ -60,6 +59,7 @@ use snarkvm::{ }; use aleo_std::StorageMode; +use anyhow::Context; use colored::Colorize; use futures::stream::{FuturesUnordered, StreamExt}; use indexmap::{IndexMap, IndexSet}; @@ -81,11 +81,21 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::sync::OnceCell; /// A helper type for an optional proposed batch. pub type ProposedBatch = RwLock>>; +/// This callback trait allows listening to changes in the Primary, such as round advancement. +/// This is currently used by BFT. +#[async_trait::async_trait] +pub trait PrimaryCallback: Send + std::marker::Sync { + /// Notifies that a new round has started. + fn update_to_next_round(&self, current_round: u64) -> bool; + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()>; +} + /// The primary logic of a node. /// AleoBFT adopts a primary-worker architecture as described in the Narwhal and Tusk paper (Section 4.2). #[derive(Clone)] @@ -100,8 +110,8 @@ pub struct Primary { ledger: Arc>, /// The workers. workers: Arc<[Worker]>, - /// The BFT sender. - bft_sender: Arc>>, + /// The primary callback (used by [`BFT`]). + primary_callback: Arc>>>, /// The batch proposal, if the primary is currently proposing a batch. proposed_batch: Arc>, /// The timestamp of the most recent proposed batch. @@ -145,7 +155,7 @@ impl Primary { storage, ledger, workers: Arc::from(vec![]), - bft_sender: Default::default(), + primary_callback: Default::default(), proposed_batch: Default::default(), latest_proposed_batch_timestamp: Default::default(), signed_proposals: Default::default(), @@ -199,16 +209,16 @@ impl Primary { pub async fn run( &mut self, ping: Option>>, - bft_sender: Option>, + primary_callback: Option>>, + sync_callback: Option>>, primary_sender: PrimarySender, primary_receiver: PrimaryReceiver, ) -> Result<()> { info!("Starting the primary instance of the memory pool..."); // Set the BFT sender. - if let Some(bft_sender) = &bft_sender { - // Set the BFT sender in the primary. - self.bft_sender.set(bft_sender.clone()).expect("BFT sender already set"); + if let Some(callback) = primary_callback { + self.primary_callback.set(callback)?; } // Construct a map of the worker senders. @@ -238,7 +248,7 @@ impl Primary { self.workers = Arc::from(workers); // Next, initialize the sync module and sync the storage from ledger. - self.sync.initialize(bft_sender).await?; + self.sync.initialize(sync_callback).await?; // Next, load and process the proposal cache before running the sync module. self.load_proposal_cache().await?; // Next, run the sync module. @@ -432,17 +442,12 @@ impl Primary { // Ensure the primary has not proposed a batch for this round before. if self.storage.contains_certificate_in_round_from(round, self.gateway.account().address()) { // If a BFT sender was provided, attempt to advance the current round. - if let Some(bft_sender) = self.bft_sender.get() { - match bft_sender.send_primary_round_to_bft(self.current_round()).await { + if let Some(cb) = &*self.primary_callback.get_ref() { + match cb.update_to_next_round(self.current_round()) { // 'is_ready' is true if the primary is ready to propose a batch for the next round. - Ok(true) => (), // continue, + true => (), // continue, // 'is_ready' is false if the primary is not ready to propose a batch for the next round. - Ok(false) => return Ok(()), - // An error occurred while attempting to advance the current round. - Err(e) => { - warn!("Failed to update the BFT to the next round - {e}"); - return Err(e); - } + false => return Ok(()), } } debug!("Primary is safely skipping {}", format!("(round {round} was already certified)").dimmed()); @@ -1543,14 +1548,8 @@ impl Primary { // Attempt to advance to the next round. if current_round < next_round { // If a BFT sender was provided, send the current round to the BFT. - let is_ready = if let Some(bft_sender) = self.bft_sender.get() { - match bft_sender.send_primary_round_to_bft(current_round).await { - Ok(is_ready) => is_ready, - Err(e) => { - warn!("Failed to update the BFT to the next round - {e}"); - return Err(e); - } - } + let is_ready = if let Some(cb) = self.primary_callback.get() { + cb.update_to_next_round(current_round) } // Otherwise, handle the Narwhal case. else { @@ -1638,12 +1637,11 @@ impl Primary { .await?; debug!("Stored a batch certificate for round {}", certificate.round()); // If a BFT sender was provided, send the certificate to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { + if let Some(cb) = self.primary_callback.get() { // Await the callback to continue. - if let Err(e) = bft_sender.send_primary_certificate_to_bft(certificate.clone()).await { - warn!("Failed to update the BFT DAG from primary - {e}"); - return Err(e); - }; + cb.add_new_certificate(certificate.clone()) + .await + .with_context(|| "Failed to add new certificate from primary")?; } // Broadcast the certified batch to all validators. self.gateway.broadcast(Event::BatchCertified(certificate.clone().into())); @@ -1728,12 +1726,8 @@ impl Primary { .await?; debug!("Stored a batch certificate for round {batch_round} from '{peer_ip}'"); // If a BFT sender was provided, send the round and certificate to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { - // Send the certificate to the BFT. - if let Err(e) = bft_sender.send_primary_certificate_to_bft(certificate).await { - warn!("Failed to update the BFT DAG from sync: {e}"); - return Err(e); - }; + if let Some(cb) = self.primary_callback.get() { + cb.add_new_certificate(certificate).await.with_context(|| "Failed to update the DAG from sync")?; } } Ok(()) @@ -1944,10 +1938,12 @@ impl Primary { /// Shuts down the primary. pub async fn shut_down(&self) { info!("Shutting down the primary..."); + // Remove the callback. + self.primary_callback.clear(); // Shut down the workers. self.workers.iter().for_each(|worker| worker.shut_down()); // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Save the current proposal cache to disk. let proposal_cache = { let proposal = self.proposed_batch.write().take(); diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 35e45bacbb..4cdb119e3f 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -14,18 +14,17 @@ // limitations under the License. use crate::{ - Gateway, MAX_FETCH_TIMEOUT_IN_MS, PRIMARY_PING_IN_MS, - Transport, - events::DataBlocks, - gateway::SyncCallback as GatewaySyncCallback, - helpers::{BFTSender, Pending, Storage, fmt_id, max_redundant_requests}, + events::{CertificateRequest, CertificateResponse, DataBlocks, Event}, + gateway::{Gateway, SyncCallback as GatewaySyncCallback, Transport}, + helpers::{Pending, Storage, fmt_id, max_redundant_requests}, + ledger_service::LedgerService, }; -use snarkos_node_bft_events::{CertificateRequest, CertificateResponse, Event}; -use snarkos_node_bft_ledger_service::LedgerService; + use snarkos_node_router::PeerPoolHandling; use snarkos_node_sync::{BLOCK_REQUEST_BATCH_DELAY, BlockSync, Ping, PrepareSyncRequest, locators::BlockLocators}; + use snarkvm::{ console::{network::Network, types::Field}, ledger::{authority::Authority, block::Block, narwhal::BatchCertificate}, @@ -37,12 +36,15 @@ use snarkvm::{ }, }; -use anyhow::{Context, Result, anyhow, bail}; +use anyhow::{Context, Result, anyhow, bail, ensure}; use indexmap::IndexMap; #[cfg(feature = "locktick")] -use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; +use locktick::{ + parking_lot::{Mutex, RwLock}, + tokio::Mutex as TMutex, +}; #[cfg(not(feature = "locktick"))] -use parking_lot::Mutex; +use parking_lot::{Mutex, RwLock}; #[cfg(not(feature = "serial"))] use rayon::prelude::*; use std::{ @@ -55,10 +57,20 @@ use std::{ #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; use tokio::{ - sync::{OnceCell, oneshot}, + sync::oneshot, time::{sleep, timeout}, }; +/// This callback trait allows listening to synchronization updates, such as discorvering new `BatchCertificate`s. +/// This is currently used by BFT. +#[async_trait::async_trait] +pub trait SyncCallback: Send + std::marker::Sync { + async fn sync_dag_at_bootup(&self, certificates: Vec>) -> Result<()>; + + /// Sends a new certificate. + async fn add_new_certificate(&self, certificate: BatchCertificate) -> Result<()>; +} + /// Block synchronization logic for validators. /// /// Synchronization works differently for nodes that act as validators in AleoBFT; @@ -83,8 +95,8 @@ pub struct Sync { block_sync: Arc>, /// The pending certificates queue. pending: Arc, BatchCertificate>>, - /// The BFT sender. - bft_sender: Arc>>, + /// The sync callback (used by [`BFT`]). + sync_callback: Arc>>>>, /// Handles to the spawned background tasks. handles: Arc>>>, /// The response lock. @@ -118,7 +130,7 @@ impl Sync { ledger, block_sync, pending: Default::default(), - bft_sender: Default::default(), + sync_callback: Default::default(), handles: Default::default(), response_lock: Default::default(), sync_lock: Default::default(), @@ -127,10 +139,11 @@ impl Sync { } /// Initializes the sync module and sync the storage with the ledger at bootup. - pub async fn initialize(&self, bft_sender: Option>) -> Result<()> { - // If a BFT sender was provided, set it. - if let Some(bft_sender) = bft_sender { - self.bft_sender.set(bft_sender).expect("BFT sender already set in gateway"); + pub async fn initialize(&self, sync_callback: Option>>) -> Result<()> { + // If a callback was provided, set it. + if let Some(callback) = sync_callback { + let prev = self.sync_callback.write().replace(callback); + ensure!(prev.is_none(), "Sync callback was already set"); } info!("Syncing storage with the ledger..."); @@ -142,6 +155,11 @@ impl Sync { Ok(()) } + /// Get the `SyncCallback` if one is set. + fn get_callback(&self) -> Option>> { + self.sync_callback.read().clone() + } + /// Sends the given batch of block requests to peers. /// /// Responses to block requests will eventually be processed by `Self::try_advancing_block_synchronization`. @@ -396,13 +414,9 @@ impl Sync { .collect::>(); // If a BFT sender was provided, send the certificates to the BFT. - if let Some(bft_sender) = self.bft_sender.get() { + if let Some(cb) = self.get_callback() { // Await the callback to continue. - bft_sender - .tx_sync_bft_dag_at_bootup - .send(certificates) - .await - .with_context(|| "Failed to update the BFT DAG from sync")?; + cb.sync_dag_at_bootup(certificates).await.with_context(|| "Failed to update the DAG from sync")?; } self.block_sync.set_sync_height(block_height); @@ -636,11 +650,10 @@ impl Sync { for certificate in certificates { // If a BFT sender was provided, send the certificate to the BFT. // For validators, BFT spawns a receiver task in `BFT::start_handlers`. - if let Some(bft_sender) = self.bft_sender.get() { - // Await the callback to continue. - if let Err(err) = bft_sender.send_sync_bft(certificate).await { - bail!("Failed to sync certificate - {err}"); - }; + if let Some(cb) = self.get_callback() { + cb.add_new_certificate(certificate) + .await + .with_context(|| "Failed to sync certificate - {err}")?; } } } @@ -875,12 +888,14 @@ impl Sync { /// Shuts down the primary. pub async fn shut_down(&self) { info!("Shutting down the sync module..."); + // Remove the callback. + let _ = self.sync_callback.write().take(); // Acquire the response lock. let _lock = self.response_lock.lock().await; // Acquire the sync lock. let _lock = self.sync_lock.lock().await; - // Abort the tasks. - self.handles.lock().iter().for_each(|handle| handle.abort()); + // Abort all running tasks. + self.handles.lock().drain(..).for_each(|handle| handle.abort()); } } diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index 870fa0a340..f3ade138e0 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -17,8 +17,8 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, MAX_WORKERS, ProposedBatch, - Transport, events::{Event, TransmissionRequest, TransmissionResponse}, + gateway::Transport, helpers::{Pending, Ready, Storage, WorkerReceiver, fmt_id, max_redundant_requests}, spawn_blocking, }; diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index d9e4f5e0d6..4c346cf8f5 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -230,7 +230,7 @@ impl TestNetwork { bft.run(None, None, primary_sender, primary_receiver).await.unwrap(); } else { // Setup the channels and start the primary. - validator.primary.run(None, None, primary_sender, primary_receiver).await.unwrap(); + validator.primary.run(None, None, None, primary_sender, primary_receiver).await.unwrap(); } if let Some(interval_ms) = self.config.fire_transmissions { diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index 80415491bf..3e3c34c0a3 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -53,7 +53,7 @@ use snarkvm::{ }; use aleo_std::StorageMode; -use anyhow::Result; +use anyhow::{Context, Result}; use colored::Colorize; use indexmap::IndexMap; #[cfg(feature = "locktick")] @@ -84,7 +84,7 @@ const MAX_DEPLOYMENTS_PER_INTERVAL: usize = 1; /// /// Consensus acts as a rate limiter to prevents workers in BFT from being overloaded. /// Each worker maintains a ready queue (which is essentially also a mempool), but verifies transactions/solutions -/// before enquing them. +/// before enqueuing them. /// Consensus only passes more transactions/solutions to the BFT layer if its ready queues are not already full. #[derive(Clone)] pub struct Consensus { @@ -501,8 +501,7 @@ impl Consensus { let result = spawn_blocking! { self_.try_advance_to_next_block(subdag, transmissions_) }; // If the block failed to advance, reinsert the transmissions into the memory pool. - if let Err(e) = &result { - error!("Unable to advance to the next block - {e}"); + if result.is_err() { // On failure, reinsert the transmissions into the memory pool. self.reinsert_transmissions(transmissions).await; } @@ -517,6 +516,8 @@ impl Consensus { subdag: Subdag, transmissions: IndexMap, Transmission>, ) -> Result<()> { + trace!("Trying to advance to new subdag anchored at round {}", subdag.anchor_round()); + #[cfg(feature = "metrics")] let start = subdag.leader_certificate().batch_header().timestamp(); #[cfg(feature = "metrics")] @@ -525,14 +526,20 @@ impl Consensus { let current_block_timestamp = self.ledger.latest_block().header().metadata().timestamp(); // Create the candidate next block. - let next_block = self.ledger.prepare_advance_to_next_quorum_block(subdag, transmissions)?; + let next_block = self + .ledger + .prepare_advance_to_next_quorum_block(subdag, transmissions) + .with_context(|| "Ledger preparation for advancement to next block failed")?; // Check that the block is well-formed. - self.ledger.check_next_block(&next_block)?; + self.ledger.check_next_block(&next_block).with_context(|| "Check for new block failed")?; // Advance to the next block. - self.ledger.advance_to_next_block(&next_block)?; + self.ledger.advance_to_next_block(&next_block).with_context(|| "Ledger advancement to new block failed")?; + + // Note: Do not return failure after this point, as the ledger already advanced. + #[cfg(feature = "telemetry")] // Fetch the latest committee - let latest_committee = self.ledger.current_committee()?; + let latest_committee = self.ledger.current_committee(); // If the next block starts a new epoch, clear the existing solutions. if next_block.height() % N::NUM_BLOCKS_PER_EPOCH == 0 { @@ -543,8 +550,10 @@ impl Consensus { } // Notify peers that we have a new block. - let locators = self.block_sync.get_block_locators()?; - self.ping.update_block_locators(locators); + match self.block_sync.get_block_locators() { + Ok(locators) => self.ping.update_block_locators(locators), + Err(err) => warn!("Failed to generate new block locators after block advancement: {err:?}"), + } // Make block sync aware of the new block. self.block_sync.set_sync_height(next_block.height()); @@ -571,7 +580,7 @@ impl Consensus { metrics::gauge(metrics::blocks::CUMULATIVE_PROOF_TARGET, cumulative_proof_target as f64); #[cfg(feature = "telemetry")] - { + if let Ok(latest_committee) = latest_committee { // Retrieve the latest participation scores. let participation_scores = self.bft().primary().gateway().validator_telemetry().get_participation_scores(&latest_committee); From 0de4fd83b122a2cef38855bd38db8a1b4d662f30 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Fri, 12 Sep 2025 19:39:56 -0700 Subject: [PATCH 11/13] misc(consensus): replace consensus channel with a callback --- Cargo.lock | 1 + node/bft/examples/simple_node.rs | 50 +++++++++++++++-------------- node/bft/src/bft.rs | 55 ++++++++++++++++---------------- node/bft/src/helpers/channels.rs | 25 +-------------- node/bft/src/lib.rs | 2 +- node/bft/src/sync/mod.rs | 32 +++++++------------ node/consensus/Cargo.toml | 2 ++ node/consensus/src/lib.rs | 46 ++++++++++---------------- 8 files changed, 85 insertions(+), 128 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a54574b04..be977d5f61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4021,6 +4021,7 @@ version = "4.2.2" dependencies = [ "aleo-std", "anyhow", + "async-trait", "colored 3.0.0", "indexmap 2.11.4", "itertools 0.14.0", diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index b12a3feec7..e7a84a829b 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -22,9 +22,10 @@ extern crate snarkos_node_metrics as metrics; use snarkos_account::Account; use snarkos_node_bft::{ BFT, + BftCallback, MEMORY_POOL_PORT, Primary, - helpers::{ConsensusReceiver, PrimarySender, Storage, init_consensus_channels, init_primary_channels}, + helpers::{PrimarySender, Storage, init_primary_channels}, }; use snarkos_node_bft_ledger_service::TranslucentLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -39,7 +40,7 @@ use snarkvm::{ Ledger, block::Transaction, committee::{Committee, MIN_VALIDATOR_STAKE}, - narwhal::{BatchHeader, Data}, + narwhal::{BatchHeader, Data, Subdag, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, store::{ConsensusStore, helpers::memory::ConsensusMemory}, }, @@ -144,15 +145,13 @@ pub async fn start_bft( // Initialize the trusted validators. let trusted_validators = trusted_validators(node_id, num_nodes, peers); // Initialize the consensus channels. - let (consensus_sender, consensus_receiver) = init_consensus_channels::(); - // Initialize the consensus receiver handler. - consensus_handler(consensus_receiver); + let consensus_handler = Arc::new(ConsensusHandler {}); // Initialize the BFT instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); let mut bft = BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None)?; // Run the BFT instance. - bft.run(None, Some(consensus_sender), sender.clone(), receiver).await?; + bft.run(None, Some(consensus_handler), sender.clone(), receiver).await?; // Retrieve the BFT's primary. let primary = bft.primary(); // Handle OS signals. @@ -310,25 +309,28 @@ fn initialize_components(node_id: u16, num_nodes: u16) -> Result<(Committee) { - let ConsensusReceiver { mut rx_consensus_subdag } = receiver; +struct ConsensusHandler {} + +#[async_trait::async_trait] +impl BftCallback for ConsensusHandler { + async fn process_bft_subdag( + &self, + subdag: Subdag, + transmissions: IndexMap, Transmission>, + ) -> Result<()> { + // Determine the amount of time to sleep for the subdag. + let subdag_ms = subdag.values().flatten().count(); + // Determine the amount of time to sleep for the transmissions. + let transmissions_ms = transmissions.len() * 25; + // Add a constant delay. + let constant_ms = 100; + // Compute the total amount of time to sleep. + let sleep_ms = (subdag_ms + transmissions_ms + constant_ms) as u64; + // Sleep for the determined amount of time. + tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await; - tokio::task::spawn(async move { - while let Some((subdag, transmissions, callback)) = rx_consensus_subdag.recv().await { - // Determine the amount of time to sleep for the subdag. - let subdag_ms = subdag.values().flatten().count(); - // Determine the amount of time to sleep for the transmissions. - let transmissions_ms = transmissions.len() * 25; - // Add a constant delay. - let constant_ms = 100; - // Compute the total amount of time to sleep. - let sleep_ms = (subdag_ms + transmissions_ms + constant_ms) as u64; - // Sleep for the determined amount of time. - tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await; - // Call the callback. - callback.send(Ok(())).ok(); - } - }); + Ok(()) + } } /// Returns the trusted validators. diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 1617a23bc9..f258157ffa 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -15,7 +15,7 @@ use crate::{ MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, - helpers::{ConsensusSender, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, + helpers::{CallbackHandle, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, primary::{Primary, PrimaryCallback}, sync::SyncCallback, }; @@ -52,7 +52,16 @@ use std::{ }; #[cfg(not(feature = "locktick"))] use tokio::sync::Mutex as TMutex; -use tokio::sync::{OnceCell, oneshot}; + +#[async_trait::async_trait] +pub trait BftCallback: Send + std::marker::Sync { + /// Attempts to build a new block from the given subDAG, and (tries to) advance the legder to it. + async fn process_bft_subdag( + &self, + subdag: Subdag, + transmissions: IndexMap, Transmission>, + ) -> Result<()>; +} #[derive(Clone)] pub struct BFT { @@ -64,8 +73,8 @@ pub struct BFT { leader_certificate: Arc>>>, /// The timer for the leader certificate to be received. leader_certificate_timer: Arc, - /// The consensus sender. - consensus_sender: Arc>>, + /// The BFT callback (used by `Consensus`). + bft_callback: Arc>>>, /// The BFT lock. lock: Arc>, } @@ -88,7 +97,7 @@ impl BFT { dag: Default::default(), leader_certificate: Default::default(), leader_certificate_timer: Default::default(), - consensus_sender: Default::default(), + bft_callback: Default::default(), lock: Default::default(), }) } @@ -100,23 +109,22 @@ impl BFT { pub async fn run( &mut self, ping: Option>>, - consensus_sender: Option>, + bft_callback: Option>>, primary_sender: PrimarySender, primary_receiver: PrimaryReceiver, ) -> Result<()> { info!("Starting the BFT instance..."); - // Set up callbacks. + // Set up callbacks to pass to the primary. let primary_callback = Some(Arc::new(self.clone()) as Arc>); - let sync_callback = Some(Arc::new(self.clone()) as Arc>); // Next, run the primary instance. self.primary.run(ping, primary_callback, sync_callback, primary_sender, primary_receiver).await?; - // Lastly, set the consensus sender. - // Note: This ensures that, during initial syncing, that the BFT does not advance the ledger. - if let Some(consensus_sender) = consensus_sender { - self.consensus_sender.set(consensus_sender).expect("Consensus sender already set"); + // Lastly, set up callbacks for BFT itself. + // Note: This ensures that, during initial syncing, the BFT does not advance the ledger. + if let Some(callback) = bft_callback { + self.bft_callback.set(callback)?; } Ok(()) } @@ -722,23 +730,12 @@ impl BFT { "BFT failed to commit - the subdag anchor round {anchor_round} does not match the leader round {leader_round}", ); - // Trigger consensus. - if let Some(consensus_sender) = self.consensus_sender.get() { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); + // Trigger the callback (if any). + if let Some(cb) = self.bft_callback.get() { // Send the subdag and transmissions to consensus. - consensus_sender.tx_consensus_subdag.send((subdag, transmissions, callback_sender)).await?; - // Await the callback to continue. - match callback_receiver.await { - Ok(Ok(())) => (), // continue - Ok(Err(err)) => { - err.log_error("BFT failed to advance the subdag for round {anchor_round}"); - return Ok(()); - } - Err(err) => { - err.log_error("BFT failed to receive the callback for round {anchor_round}"); - return Ok(()); - } + if let Err(err) = cb.process_bft_subdag(subdag, transmissions).await { + err.log_error("BFT failed to advance the subdag for round {anchor_round}"); + return Ok(()); } } @@ -879,6 +876,8 @@ impl BFT { /// Shuts down the BFT. pub async fn shut_down(&self) { info!("Shutting down the BFT..."); + // Remove the callback. + self.bft_callback.clear(); // Acquire the lock. let _lock = self.lock.lock().await; // Shut down the primary. diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index f14ed56f64..1cc3f1cec0 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -18,40 +18,17 @@ use snarkvm::{ console::network::*, ledger::{ block::Transaction, - narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, + narwhal::{BatchCertificate, Data, TransmissionID}, puzzle::{Solution, SolutionID}, }, prelude::Result, }; -use indexmap::IndexMap; use std::net::SocketAddr; use tokio::sync::{mpsc, oneshot}; const MAX_CHANNEL_SIZE: usize = 8192; -#[derive(Debug)] -pub struct ConsensusSender { - pub tx_consensus_subdag: - mpsc::Sender<(Subdag, IndexMap, Transmission>, oneshot::Sender>)>, -} - -#[derive(Debug)] -pub struct ConsensusReceiver { - pub rx_consensus_subdag: - mpsc::Receiver<(Subdag, IndexMap, Transmission>, oneshot::Sender>)>, -} - -/// Initializes the consensus channels. -pub fn init_consensus_channels() -> (ConsensusSender, ConsensusReceiver) { - let (tx_consensus_subdag, rx_consensus_subdag) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = ConsensusSender { tx_consensus_subdag }; - let receiver = ConsensusReceiver { rx_consensus_subdag }; - - (sender, receiver) -} - #[derive(Clone, Debug)] pub struct PrimarySender { pub tx_batch_propose: mpsc::Sender<(SocketAddr, BatchPropose)>, diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index a46598b0c0..d2b5c29e2d 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -32,7 +32,7 @@ pub use snarkos_node_bft_storage_service as storage_service; pub mod helpers; mod bft; -pub use bft::*; +pub use bft::{BFT, BftCallback}; mod gateway; pub use gateway::Gateway; diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index 4cdb119e3f..f6ffdff1a0 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -18,7 +18,7 @@ use crate::{ PRIMARY_PING_IN_MS, events::{CertificateRequest, CertificateResponse, DataBlocks, Event}, gateway::{Gateway, SyncCallback as GatewaySyncCallback, Transport}, - helpers::{Pending, Storage, fmt_id, max_redundant_requests}, + helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, ledger_service::LedgerService, }; @@ -36,15 +36,12 @@ use snarkvm::{ }, }; -use anyhow::{Context, Result, anyhow, bail, ensure}; +use anyhow::{Context, Result, anyhow, bail}; use indexmap::IndexMap; #[cfg(feature = "locktick")] -use locktick::{ - parking_lot::{Mutex, RwLock}, - tokio::Mutex as TMutex, -}; +use locktick::{parking_lot::Mutex, tokio::Mutex as TMutex}; #[cfg(not(feature = "locktick"))] -use parking_lot::{Mutex, RwLock}; +use parking_lot::Mutex; #[cfg(not(feature = "serial"))] use rayon::prelude::*; use std::{ @@ -96,7 +93,7 @@ pub struct Sync { /// The pending certificates queue. pending: Arc, BatchCertificate>>, /// The sync callback (used by [`BFT`]). - sync_callback: Arc>>>>, + sync_callback: Arc>>>, /// Handles to the spawned background tasks. handles: Arc>>>, /// The response lock. @@ -142,8 +139,7 @@ impl Sync { pub async fn initialize(&self, sync_callback: Option>>) -> Result<()> { // If a callback was provided, set it. if let Some(callback) = sync_callback { - let prev = self.sync_callback.write().replace(callback); - ensure!(prev.is_none(), "Sync callback was already set"); + self.sync_callback.set(callback)?; } info!("Syncing storage with the ledger..."); @@ -155,11 +151,6 @@ impl Sync { Ok(()) } - /// Get the `SyncCallback` if one is set. - fn get_callback(&self) -> Option>> { - self.sync_callback.read().clone() - } - /// Sends the given batch of block requests to peers. /// /// Responses to block requests will eventually be processed by `Self::try_advancing_block_synchronization`. @@ -413,9 +404,8 @@ impl Sync { .flatten() .collect::>(); - // If a BFT sender was provided, send the certificates to the BFT. - if let Some(cb) = self.get_callback() { - // Await the callback to continue. + // If a callback was provided, send the certificates to it. + if let Some(cb) = self.sync_callback.get() { cb.sync_dag_at_bootup(certificates).await.with_context(|| "Failed to update the DAG from sync")?; } @@ -648,9 +638,9 @@ impl Sync { // Sync the BFT DAG with the certificates. for certificate in certificates { - // If a BFT sender was provided, send the certificate to the BFT. + // If a callback was provided, send the certificate to ti. // For validators, BFT spawns a receiver task in `BFT::start_handlers`. - if let Some(cb) = self.get_callback() { + if let Some(cb) = self.sync_callback.get() { cb.add_new_certificate(certificate) .await .with_context(|| "Failed to sync certificate - {err}")?; @@ -889,7 +879,7 @@ impl Sync { pub async fn shut_down(&self) { info!("Shutting down the sync module..."); // Remove the callback. - let _ = self.sync_callback.write().take(); + self.sync_callback.clear(); // Acquire the response lock. let _lock = self.response_lock.lock().await; // Acquire the sync lock. diff --git a/node/consensus/Cargo.toml b/node/consensus/Cargo.toml index 88c1e1c367..bbb29264b3 100644 --- a/node/consensus/Cargo.toml +++ b/node/consensus/Cargo.toml @@ -30,6 +30,8 @@ telemetry = [ "snarkos-node-bft/telemetry" ] cuda = [ "snarkvm/cuda", "snarkos-account/cuda", "snarkos-node-bft-ledger-service/cuda" ] serial = [ "snarkos-node-bft-ledger-service/serial" ] +[dependencies.async-trait] +workspace = true [dependencies.aleo-std] workspace = true diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index 3e3c34c0a3..4a2bb3c17e 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -27,16 +27,10 @@ extern crate snarkos_node_metrics as metrics; use snarkos_account::Account; use snarkos_node_bft::{ BFT, + BftCallback, MAX_BATCH_DELAY_IN_MS, Primary, - helpers::{ - ConsensusReceiver, - PrimarySender, - Storage as NarwhalStorage, - fmt_id, - init_consensus_channels, - init_primary_channels, - }, + helpers::{PrimarySender, Storage as NarwhalStorage, fmt_id, init_primary_channels}, spawn_blocking, }; use snarkos_node_bft_ledger_service::LedgerService; @@ -152,12 +146,12 @@ impl Consensus { info!("Starting the consensus instance..."); - // First, initialize the consensus channels. - let (consensus_sender, consensus_receiver) = init_consensus_channels(); - // Then, start the consensus handlers. - _self.start_handlers(consensus_receiver); + _self.start_handlers(); // Lastly, also start BFTs handlers. - _self.bft.run(Some(ping), Some(consensus_sender), _self.primary_sender.clone(), primary_receiver).await?; + _self + .bft + .run(Some(ping), Some(Arc::new(_self.clone())), _self.primary_sender.clone(), primary_receiver) + .await?; Ok(_self) } @@ -456,17 +450,7 @@ impl Consensus { /// Starts the consensus handlers. /// /// This is only invoked once, in the constructor. - fn start_handlers(&self, consensus_receiver: ConsensusReceiver) { - let ConsensusReceiver { mut rx_consensus_subdag } = consensus_receiver; - - // Process the committed subdag and transmissions from the BFT. - let self_ = self.clone(); - self.spawn(async move { - while let Some((committed_subdag, transmissions, callback)) = rx_consensus_subdag.recv().await { - self_.process_bft_subdag(committed_subdag, transmissions, callback).await; - } - }); - + fn start_handlers(&self) { // Process the unconfirmed transactions in the memory pool. // // TODO (kaimast): This shouldn't happen periodically but only when new batches/blocks are accepted @@ -487,14 +471,16 @@ impl Consensus { } }); } +} +#[async_trait::async_trait] +impl BftCallback for Consensus { /// Attempts to build a new block from the given subDAG, and (tries to) advance the legder to it. async fn process_bft_subdag( &self, subdag: Subdag, transmissions: IndexMap, Transmission>, - callback: oneshot::Sender>, - ) { + ) -> Result<()> { // Try to advance to the next block. let self_ = self.clone(); let transmissions_ = transmissions.clone(); @@ -502,14 +488,14 @@ impl Consensus { // If the block failed to advance, reinsert the transmissions into the memory pool. if result.is_err() { - // On failure, reinsert the transmissions into the memory pool. self.reinsert_transmissions(transmissions).await; } - // Send the callback **after** advancing to the next block. - // Note: We must await the block to be advanced before sending the callback. - callback.send(result).ok(); + + result } +} +impl Consensus { /// Attempts to advance the ledger to the next block, and updates the metrics (if enabled) accordingly. fn try_advance_to_next_block( &self, From 74c00c72f54c1f282f1e4d8a81c6b37481ac277c Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Tue, 16 Sep 2025 14:36:54 -0700 Subject: [PATCH 12/13] misc(bft): replace PrimarySender with a callback --- Cargo.lock | 2 - node/bft/Cargo.toml | 13 +- node/bft/examples/simple_node.rs | 70 ++-- node/bft/ledger-service/src/traits.rs | 6 +- node/bft/src/bft.rs | 86 +++-- node/bft/src/gateway.rs | 122 +++++-- node/bft/src/helpers/channels.rs | 94 +---- node/bft/src/helpers/dag.rs | 4 +- node/bft/src/helpers/partition.rs | 4 +- node/bft/src/lib.rs | 4 +- node/bft/src/primary.rs | 478 ++++++++++++-------------- node/bft/src/sync/mod.rs | 2 +- node/bft/tests/bft_e2e.rs | 21 +- node/bft/tests/common/primary.rs | 35 +- node/bft/tests/common/utils.rs | 33 +- node/bft/tests/gateway_e2e.rs | 16 +- node/bft/tests/narwhal_e2e.rs | 12 +- node/consensus/Cargo.toml | 15 +- node/consensus/src/lib.rs | 35 +- 19 files changed, 468 insertions(+), 584 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be977d5f61..94eda53842 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4031,8 +4031,6 @@ dependencies = [ "parking_lot", "snarkos-account", "snarkos-node-bft", - "snarkos-node-bft-ledger-service", - "snarkos-node-bft-storage-service", "snarkos-node-metrics", "snarkos-node-sync", "snarkvm", diff --git a/node/bft/Cargo.toml b/node/bft/Cargo.toml index 26ff9f73f0..a73dd9f760 100644 --- a/node/bft/Cargo.toml +++ b/node/bft/Cargo.toml @@ -38,11 +38,15 @@ cuda = [ "snarkos-node-bft-ledger-service/cuda", "snarkos-node-sync/cuda" ] +persistent-storage = [ "snarkos-node-bft-storage-service/persistent" ] test = [ # "snarkvm/test" this breaks some of the tests - "snarkvm/test-helpers", "snarkos-node-bft-ledger-service/test", - "snarkos-node-bft-storage-service/test" + "snarkos-node-bft-storage-service/test", + "test-helpers" +] +test-helpers = [ + "snarkvm/test-helpers", ] serial = [ "snarkos-node-bft-ledger-service/serial" ] @@ -206,3 +210,8 @@ workspace = true [dev-dependencies.mockall] version = "0.13" + +[[test]] +name = "gateway-e2e" +path = "./tests/gateway_e2e.rs" +required-features = [ "test-helpers" ] diff --git a/node/bft/examples/simple_node.rs b/node/bft/examples/simple_node.rs index e7a84a829b..0f9bfad1b0 100644 --- a/node/bft/examples/simple_node.rs +++ b/node/bft/examples/simple_node.rs @@ -20,13 +20,7 @@ extern crate tracing; extern crate snarkos_node_metrics as metrics; use snarkos_account::Account; -use snarkos_node_bft::{ - BFT, - BftCallback, - MEMORY_POOL_PORT, - Primary, - helpers::{PrimarySender, Storage, init_primary_channels}, -}; +use snarkos_node_bft::{BFT, BftCallback, MEMORY_POOL_PORT, Primary, helpers::Storage}; use snarkos_node_bft_ledger_service::TranslucentLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_sync::BlockSync; @@ -69,7 +63,7 @@ use std::{ str::FromStr, sync::{Arc, Mutex, OnceLock}, }; -use tokio::{net::TcpListener, sync::oneshot}; +use tokio::net::TcpListener; use tracing_subscriber::{ layer::{Layer, SubscriberExt}, util::SubscriberInitExt, @@ -119,13 +113,7 @@ pub fn initialize_logger(verbosity: u8) { /**************************************************************************************************/ /// Starts the BFT instance. -pub async fn start_bft( - node_id: u16, - num_nodes: u16, - peers: HashMap, -) -> Result<(BFT, PrimarySender)> { - // Initialize the primary channels. - let (sender, receiver) = init_primary_channels(); +pub async fn start_bft(node_id: u16, num_nodes: u16, peers: HashMap) -> Result> { // Initialize the components. let (committee, account) = initialize_components(node_id, num_nodes)?; // Initialize the translucent ledger service. @@ -148,16 +136,17 @@ pub async fn start_bft( let consensus_handler = Arc::new(ConsensusHandler {}); // Initialize the BFT instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); - let mut bft = - BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None)?; + let bft = + BFT::::new(account, storage, ledger, block_sync, ip, &trusted_validators, storage_mode, None) + .await?; // Run the BFT instance. - bft.run(None, Some(consensus_handler), sender.clone(), receiver).await?; + bft.run(None, Some(consensus_handler)).await?; // Retrieve the BFT's primary. let primary = bft.primary(); // Handle OS signals. handle_signals(primary); // Return the BFT instance. - Ok((bft, sender)) + Ok(bft) } /// Starts the primary instance. @@ -165,9 +154,7 @@ pub async fn start_primary( node_id: u16, num_nodes: u16, peers: HashMap, -) -> Result<(Primary, PrimarySender)> { - // Initialize the primary channels. - let (sender, receiver) = init_primary_channels(); +) -> Result> { // Initialize the components. let (committee, account) = initialize_components(node_id, num_nodes)?; // Initialize the translucent ledger service. @@ -188,7 +175,7 @@ pub async fn start_primary( let trusted_validators = trusted_validators(node_id, num_nodes, peers); // Initialize the primary instance. let block_sync = Arc::new(BlockSync::new(ledger.clone())); - let mut primary = Primary::::new( + let primary = Primary::::new( account, storage, ledger, @@ -197,13 +184,14 @@ pub async fn start_primary( &trusted_validators, storage_mode, None, - )?; + ) + .await?; // Run the primary instance. - primary.run(None, None, None, sender.clone(), receiver).await?; + primary.run(None, None, None).await?; // Handle OS signals. handle_signals(&primary); // Return the primary instance. - Ok((primary, sender)) + Ok(primary) } /// Initialize the translucent ledger service. @@ -371,8 +359,7 @@ fn handle_signals(primary: &Primary) { /**************************************************************************************************/ /// Fires *fake* unconfirmed solutions at the node. -fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u16, interval_ms: u64) { - let tx_unconfirmed_solution = sender.tx_unconfirmed_solution.clone(); +fn fire_unconfirmed_solutions(primary: Primary, node_id: u16, interval_ms: u64) { tokio::task::spawn(async move { // This RNG samples the *same* fake solutions for all nodes. let mut shared_rng = rand_chacha::ChaChaRng::seed_from_u64(123456789); @@ -396,13 +383,8 @@ fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u // Sample a random fake solution ID and solution. let (solution_id, solution) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake solution. - if let Err(e) = tx_unconfirmed_solution.send((solution_id, solution, callback)).await { - error!("Failed to send unconfirmed solution: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_solution(solution_id, solution).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -412,8 +394,7 @@ fn fire_unconfirmed_solutions(sender: &PrimarySender, node_id: u } /// Fires *fake* unconfirmed transactions at the node. -fn fire_unconfirmed_transactions(sender: &PrimarySender, node_id: u16, interval_ms: u64) { - let tx_unconfirmed_transaction = sender.tx_unconfirmed_transaction.clone(); +fn fire_unconfirmed_transactions(primary: Primary, node_id: u16, interval_ms: u64) { tokio::task::spawn(async move { // This RNG samples the *same* fake transactions for all nodes. let mut shared_rng = rand_chacha::ChaChaRng::seed_from_u64(123456789); @@ -438,13 +419,8 @@ fn fire_unconfirmed_transactions(sender: &PrimarySender, node_id loop { // Sample a random fake transaction ID and transaction. let (id, transaction) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake transaction. - if let Err(e) = tx_unconfirmed_transaction.send((id, transaction, callback)).await { - error!("Failed to send unconfirmed transaction: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_transaction(id, transaction).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -583,14 +559,14 @@ async fn main() -> Result<()> { let mut bft_holder = None; // Start the node. - let (primary, sender) = match args.mode { + let primary = match args.mode { Mode::Bft => { // Start the BFT. - let (bft, sender) = start_bft(args.id, args.num_nodes, peers).await?; + let bft = start_bft(args.id, args.num_nodes, peers).await?; // Set the BFT holder. bft_holder = Some(bft.clone()); // Return the primary and sender. - (bft.primary().clone(), sender) + bft.primary().clone() } Mode::Narwhal => start_primary(args.id, args.num_nodes, peers).await?, }; @@ -602,7 +578,7 @@ async fn main() -> Result<()> { match (args.fire_transmissions, args.fire_solutions) { // Note: We allow the user to overload the solutions rate, even when the 'fire-transmissions' flag is enabled. (Some(rate), _) | (_, Some(rate)) => { - fire_unconfirmed_solutions(&sender, args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); + fire_unconfirmed_solutions(primary.clone(), args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); } _ => (), }; @@ -611,7 +587,7 @@ async fn main() -> Result<()> { match (args.fire_transmissions, args.fire_transactions) { // Note: We allow the user to overload the transactions rate, even when the 'fire-transmissions' flag is enabled. (Some(rate), _) | (_, Some(rate)) => { - fire_unconfirmed_transactions(&sender, args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); + fire_unconfirmed_transactions(primary.clone(), args.id, rate.unwrap_or(DEFAULT_INTERVAL_MS)); } _ => (), }; diff --git a/node/bft/ledger-service/src/traits.rs b/node/bft/ledger-service/src/traits.rs index 02b1f2d7e5..8c09857739 100644 --- a/node/bft/ledger-service/src/traits.rs +++ b/node/bft/ledger-service/src/traits.rs @@ -17,13 +17,17 @@ use snarkvm::{ ledger::{ block::{Block, Transaction}, committee::Committee, - narwhal::{BatchCertificate, Data, Subdag, Transmission, TransmissionID}, + narwhal::{BatchCertificate, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, prelude::{Address, ConsensusVersion, Field, Network, Result}, }; +#[cfg(feature = "ledger-write")] use indexmap::IndexMap; +#[cfg(feature = "ledger-write")] +use snarkvm::ledger::narwhal::Subdag; + use std::{fmt::Debug, ops::Range}; #[async_trait] diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index f258157ffa..7b05883568 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -15,7 +15,7 @@ use crate::{ MAX_LEADER_CERTIFICATE_DELAY_IN_SECS, - helpers::{CallbackHandle, DAG, PrimaryReceiver, PrimarySender, Storage, fmt_id, now}, + helpers::{CallbackHandle, DAG, Storage, fmt_id, now}, primary::{Primary, PrimaryCallback}, sync::SyncCallback, }; @@ -82,7 +82,7 @@ pub struct BFT { impl BFT { /// Initializes a new instance of the BFT. #[allow(clippy::too_many_arguments)] - pub fn new( + pub async fn new( account: Account, storage: Storage, ledger: Arc>, @@ -93,7 +93,8 @@ impl BFT { dev: Option, ) -> Result { Ok(Self { - primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode, dev)?, + primary: Primary::new(account, storage, ledger, block_sync, ip, trusted_validators, storage_mode, dev) + .await?, dag: Default::default(), leader_certificate: Default::default(), leader_certificate_timer: Default::default(), @@ -106,20 +107,14 @@ impl BFT { /// /// This will return as soon as all required tasks are spawned. /// The function must not be called more than once per instance. - pub async fn run( - &mut self, - ping: Option>>, - bft_callback: Option>>, - primary_sender: PrimarySender, - primary_receiver: PrimaryReceiver, - ) -> Result<()> { + pub async fn run(&self, ping: Option>>, bft_callback: Option>>) -> Result<()> { info!("Starting the BFT instance..."); // Set up callbacks to pass to the primary. let primary_callback = Some(Arc::new(self.clone()) as Arc>); let sync_callback = Some(Arc::new(self.clone()) as Arc>); // Next, run the primary instance. - self.primary.run(ping, primary_callback, sync_callback, primary_sender, primary_receiver).await?; + self.primary.run(ping, primary_callback, sync_callback).await?; // Lastly, set up callbacks for BFT itself. // Note: This ensures that, during initial syncing, the BFT does not advance the ledger. @@ -932,7 +927,7 @@ mod tests { } // Helper function to set up BFT for testing. - fn initialize_bft( + async fn initialize_bft( account: Account, storage: Storage, ledger: Arc>, @@ -950,11 +945,12 @@ mod tests { StorageMode::new_test(None), None, ) + .await } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_odd() -> Result<()> { + async fn test_is_leader_quorum_odd() -> Result<()> { let rng = &mut TestRng::default(); // Sample batch certificates. @@ -983,7 +979,7 @@ mod tests { // Initialize the account. let account = Account::new(rng)?; // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call succeeds on an odd round. let result = bft.is_leader_quorum_or_nonleaders_available(1); @@ -1006,9 +1002,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_even_out_of_sync() -> Result<()> { + async fn test_is_leader_quorum_even_out_of_sync() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1018,7 +1014,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Store is at round 1, and we are checking for round 2. @@ -1028,9 +1024,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_leader_quorum_even() -> Result<()> { + async fn test_is_leader_quorum_even() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1040,7 +1036,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call fails on an even round. @@ -1049,9 +1045,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_is_even_round_ready() -> Result<()> { + async fn test_is_even_round_ready() -> Result<()> { let rng = &mut TestRng::default(); // Sample batch certificates. @@ -1081,7 +1077,7 @@ mod tests { let account = Account::new(rng)?; // Set up the BFT logic. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Set the leader certificate. @@ -1095,7 +1091,7 @@ mod tests { assert!(result); // Initialize a new BFT. - let bft_timer = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft_timer = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // If the leader certificate is not set and the timer has not expired, we are not ready for the next round. let result = bft_timer.is_even_round_ready_for_next_round(certificates.clone(), committee.clone(), 2); if !bft_timer.is_timer_expired() { @@ -1116,9 +1112,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_odd() -> Result<()> { + async fn test_update_leader_certificate_odd() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1126,7 +1122,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; assert!(bft.is_timer_expired()); // Ensure this call fails on an odd round. @@ -1135,9 +1131,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_bad_round() -> Result<()> { + async fn test_update_leader_certificate_bad_round() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1145,7 +1141,7 @@ mod tests { assert_eq!(storage.max_gc_rounds(), 10); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Ensure this call succeeds on an even round. let result = bft.update_leader_certificate_to_even_round(6); @@ -1153,9 +1149,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_update_leader_certificate_even() -> Result<()> { + async fn test_update_leader_certificate_even() -> Result<()> { let rng = &mut TestRng::default(); // Set the current round. @@ -1197,7 +1193,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Set the leader certificate. *bft.leader_certificate.write() = Some(leader_certificate); @@ -1235,7 +1231,7 @@ mod tests { // Initialize the storage. let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 1); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(3); @@ -1265,7 +1261,7 @@ mod tests { // Initialize the storage. let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 1); // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(2); @@ -1297,9 +1293,9 @@ mod tests { Ok(()) } - #[test] + #[tokio::test] #[tracing_test::traced_test] - fn test_order_dag_with_dfs_fails_on_missing_previous_certificate() -> Result<()> { + async fn test_order_dag_with_dfs_fails_on_missing_previous_certificate() -> Result<()> { let rng = &mut TestRng::default(); // Sample the test instance. @@ -1323,7 +1319,7 @@ mod tests { /* Test missing previous certificate. */ // Initialize the BFT. - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // The expected error message. let error_msg = format!( @@ -1384,7 +1380,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(commit_round); @@ -1450,7 +1446,7 @@ mod tests { // Initialize the BFT. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(commit_round); @@ -1468,7 +1464,7 @@ mod tests { // Initialize a new instance of storage. let storage_2 = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), max_gc_rounds); // Initialize a new instance of BFT. - let bootup_bft = initialize_bft(account.clone(), storage_2, ledger)?; + let bootup_bft = initialize_bft(account.clone(), storage_2, ledger).await?; // Sync the BFT DAG at bootup. bootup_bft.sync_dag_at_bootup(certificates.clone()).await.unwrap(); @@ -1622,7 +1618,7 @@ mod tests { // Initialize the BFT without bootup. let account = Account::new(rng)?; - let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT without bootup. *bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); @@ -1647,7 +1643,7 @@ mod tests { let bootup_storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), max_gc_rounds); // Initialize a new instance of BFT with bootup. - let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone())?; + let bootup_bft = initialize_bft(account.clone(), bootup_storage.clone(), ledger.clone()).await?; // Sync the BFT DAG at bootup. bootup_bft.sync_dag_at_bootup(pre_shutdown_certificates.clone()).await.unwrap(); @@ -1825,7 +1821,7 @@ mod tests { } // Initialize the bootup BFT. let account = Account::new(rng)?; - let bootup_bft = initialize_bft(account.clone(), storage.clone(), ledger.clone())?; + let bootup_bft = initialize_bft(account.clone(), storage.clone(), ledger.clone()).await?; // Insert a mock DAG in the BFT without bootup. *bootup_bft.dag.write() = crate::helpers::dag::test_helpers::mock_dag_with_modified_last_committed_round(0); diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index e4a469de06..5d62cb7b60 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -20,8 +20,8 @@ use crate::{ MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Worker, - events::{EventCodec, PrimaryPing}, - helpers::{Cache, CallbackHandle, PrimarySender, Resolver, Storage, WorkerSender, assign_to_worker}, + events::{BatchPropose, BatchSignature, EventCodec, PrimaryPing}, + helpers::{Cache, CallbackHandle, Resolver, Storage, WorkerSender, assign_to_worker}, spawn_blocking, }; use aleo_std::StorageMode; @@ -58,7 +58,7 @@ use snarkvm::{ ledger::{ Block, committee::Committee, - narwhal::{BatchHeader, Data}, + narwhal::{BatchCertificate, BatchHeader, Data}, }, prelude::{Address, Field}, }; @@ -117,7 +117,8 @@ pub trait Transport: Send + Sync { fn broadcast(&self, event: Event); } -pub trait SyncCallback: Send + Sync { +/// Callback for events specific to BlockSync. +pub trait GatewaySyncCallback: Send + Sync { /// We received a block response and can (possibly) advance synchronization. fn insert_block_response(&self, peer_ip: SocketAddr, blocks: Vec>) -> Result<()>; @@ -133,6 +134,18 @@ pub trait SyncCallback: Send + Sync { fn finish_certificate_request(&self, peer_ip: SocketAddr, response: CertificateResponse); } +/// Callback for primary-specific events +#[async_trait::async_trait] +pub trait GatewayPrimaryCallback: Send + Sync { + async fn process_incoming_ping(&self, peer_ip: SocketAddr, primary_certificate: Data>); + + async fn process_batch_propose(&self, peer_ip: SocketAddr, batch_propose: BatchPropose); + + async fn process_batch_signature(&self, peer_ip: SocketAddr, batch_signature: BatchSignature); + + async fn process_batch_certified(&self, peer_ip: SocketAddr, batch_certificate: Data>); +} + /// The gateway maintains connections to other validators. /// For connections with clients and provers, the Router logic is used. #[derive(Clone)] @@ -163,12 +176,12 @@ pub struct InnerGateway { peer_pool: RwLock>>, #[cfg(feature = "telemetry")] validator_telemetry: Telemetry, - /// The primary sender. - primary_sender: OnceCell>, /// The worker senders. worker_senders: Arc>>>, /// The callback for sync messages. - sync_callback: Arc>>>, + sync_callback: Arc>>>, + /// The callback for bft/primary messages. + primary_callback: Arc>>>, /// The spawned handles. handles: Mutex>>, /// The storage mode. @@ -229,7 +242,7 @@ impl Gateway { peer_pool: RwLock::new(initial_peers), #[cfg(feature = "telemetry")] validator_telemetry: Default::default(), - primary_sender: Default::default(), + primary_callback: Default::default(), worker_senders: Default::default(), sync_callback: Default::default(), handles: Default::default(), @@ -241,18 +254,16 @@ impl Gateway { /// Run the gateway. pub async fn run( &self, - primary_sender: PrimarySender, worker_senders: IndexMap>, - sync_callback: Option>>, + primary_callback: Arc>, + sync_callback: Option>>, ) { debug!("Starting the gateway for the memory pool..."); - // Set the primary sender. - self.primary_sender.set(primary_sender).expect("Primary sender already set in gateway"); - - // Set the worker senders. self.worker_senders.set(worker_senders).expect("The worker senders are already set"); + self.primary_callback.set(primary_callback).expect("The primary callback is already set"); + if let Some(sync_callback) = sync_callback { self.sync_callback.set(sync_callback).unwrap(); } @@ -354,11 +365,6 @@ impl Gateway { &self.validator_telemetry } - /// Returns the primary sender. - pub fn primary_sender(&self) -> &PrimarySender { - self.primary_sender.get().expect("Primary sender not set in gateway") - } - /// Returns the number of workers. pub fn num_workers(&self) -> u8 { u8::try_from(self.worker_senders.get().expect("Missing worker senders in gateway").len()) @@ -565,18 +571,30 @@ impl Gateway { match event { Event::BatchPropose(batch_propose) => { // Send the batch propose to the primary. - let _ = self.primary_sender().tx_batch_propose.send((peer_ip, batch_propose)).await; - Ok(true) + if let Some(cb) = self.primary_callback.get() { + cb.process_batch_propose(peer_ip, batch_propose).await; + Ok(true) + } else { + bail!("No callback set"); + } } Event::BatchSignature(batch_signature) => { - // Send the batch signature to the primary. - let _ = self.primary_sender().tx_batch_signature.send((peer_ip, batch_signature)).await; - Ok(true) + // Send the batch propose to the primary. + if let Some(cb) = self.primary_callback.get() { + cb.process_batch_signature(peer_ip, batch_signature).await; + Ok(true) + } else { + bail!("No calback set"); + } } Event::BatchCertified(batch_certified) => { // Send the batch certificate to the primary. - let _ = self.primary_sender().tx_batch_certified.send((peer_ip, batch_certified.certificate)).await; - Ok(true) + if let Some(cb) = self.primary_callback.get() { + cb.process_batch_certified(peer_ip, batch_certified.certificate).await; + Ok(true) + } else { + bail!("No calback set"); + } } Event::BlockRequest(block_request) => { let BlockRequest { start_height, end_height } = block_request; @@ -692,7 +710,11 @@ impl Gateway { } // Send the batch certificates to the primary. - let _ = self.primary_sender().tx_primary_ping.send((peer_ip, primary_certificate)).await; + if let Some(cb) = self.primary_callback.get() { + cb.process_incoming_ping(peer_ip, primary_certificate).await; + } else { + bail!("No callback set"); + } Ok(true) } Event::TransmissionRequest(request) => { @@ -862,8 +884,9 @@ impl Gateway { self.handles.lock().drain(..).for_each(|handle| handle.abort()); // Close the listener. self.tcp.shut_down().await; - // Remove the sync callback (so it can be dropped). + // Remove the sync and primary callback (so they can be dropped). self.sync_callback.clear(); + self.primary_callback.clear(); } } @@ -1500,18 +1523,55 @@ impl Gateway { } } +#[cfg(any(test, feature = "test"))] +pub mod test_helpers { + use super::*; + + type CurrentNetwork = MainnetV0; + + #[derive(Default)] + pub struct DummyGatewayPrimaryCallback {} + + #[async_trait::async_trait] + impl GatewayPrimaryCallback for DummyGatewayPrimaryCallback { + async fn process_incoming_ping( + &self, + _peer_ip: SocketAddr, + _primary_certificate: Data>, + ) { + } + + async fn process_batch_propose(&self, _peer_ip: SocketAddr, _batch_propose: BatchPropose) {} + + async fn process_batch_signature( + &self, + _peer_ip: SocketAddr, + _batch_signature: BatchSignature, + ) { + } + + async fn process_batch_certified( + &self, + _peer_ip: SocketAddr, + _batch_certificate: Data>, + ) { + } + } +} + #[cfg(test)] mod prop_tests { use super::{ Gateway, prop_tests::GatewayAddress::{Dev, Prod}, + test_helpers::DummyGatewayPrimaryCallback, }; use crate::{ MAX_WORKERS, MEMORY_POOL_PORT, Worker, - helpers::{Storage, init_primary_channels, init_worker_channels}, + helpers::{Storage, init_worker_channels}, }; use aleo_std::StorageMode; use snarkos_account::Account; @@ -1703,8 +1763,6 @@ mod prop_tests { ) .unwrap(); - let (primary_sender, _) = init_primary_channels(); - let (workers, worker_senders) = { // Construct a map of the worker senders. let mut tx_workers = IndexMap::new(); @@ -1729,7 +1787,7 @@ mod prop_tests { (workers, tx_workers) }; - gateway.run(primary_sender, worker_senders, None).await; + gateway.run(worker_senders, Arc::new(DummyGatewayPrimaryCallback::default()), None).await; assert_eq!( gateway.local_ip(), SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + dev.port().unwrap()) diff --git a/node/bft/src/helpers/channels.rs b/node/bft/src/helpers/channels.rs index 1cc3f1cec0..1f23e38264 100644 --- a/node/bft/src/helpers/channels.rs +++ b/node/bft/src/helpers/channels.rs @@ -13,102 +13,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::events::{BatchPropose, BatchSignature, TransmissionRequest, TransmissionResponse}; -use snarkvm::{ - console::network::*, - ledger::{ - block::Transaction, - narwhal::{BatchCertificate, Data, TransmissionID}, - puzzle::{Solution, SolutionID}, - }, - prelude::Result, -}; +use crate::events::{TransmissionRequest, TransmissionResponse}; +use snarkvm::{console::network::*, ledger::narwhal::TransmissionID}; use std::net::SocketAddr; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::mpsc; const MAX_CHANNEL_SIZE: usize = 8192; -#[derive(Clone, Debug)] -pub struct PrimarySender { - pub tx_batch_propose: mpsc::Sender<(SocketAddr, BatchPropose)>, - pub tx_batch_signature: mpsc::Sender<(SocketAddr, BatchSignature)>, - pub tx_batch_certified: mpsc::Sender<(SocketAddr, Data>)>, - pub tx_primary_ping: mpsc::Sender<(SocketAddr, Data>)>, - pub tx_unconfirmed_solution: mpsc::Sender<(SolutionID, Data>, oneshot::Sender>)>, - pub tx_unconfirmed_transaction: mpsc::Sender<(N::TransactionID, Data>, oneshot::Sender>)>, -} - -impl PrimarySender { - /// Sends the unconfirmed solution to the primary. - pub async fn send_unconfirmed_solution( - &self, - solution_id: SolutionID, - solution: Data>, - ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the unconfirmed solution to the primary. - self.tx_unconfirmed_solution.send((solution_id, solution, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } - - /// Sends the unconfirmed transaction to the primary. - pub async fn send_unconfirmed_transaction( - &self, - transaction_id: N::TransactionID, - transaction: Data>, - ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback_sender, callback_receiver) = oneshot::channel(); - // Send the unconfirmed transaction to the primary. - self.tx_unconfirmed_transaction.send((transaction_id, transaction, callback_sender)).await?; - // Await the callback to continue. - callback_receiver.await? - } -} - -#[derive(Debug)] -pub struct PrimaryReceiver { - pub rx_batch_propose: mpsc::Receiver<(SocketAddr, BatchPropose)>, - pub rx_batch_signature: mpsc::Receiver<(SocketAddr, BatchSignature)>, - pub rx_batch_certified: mpsc::Receiver<(SocketAddr, Data>)>, - pub rx_primary_ping: mpsc::Receiver<(SocketAddr, Data>)>, - pub rx_unconfirmed_solution: mpsc::Receiver<(SolutionID, Data>, oneshot::Sender>)>, - pub rx_unconfirmed_transaction: - mpsc::Receiver<(N::TransactionID, Data>, oneshot::Sender>)>, -} - -/// Initializes the primary channels. -pub fn init_primary_channels() -> (PrimarySender, PrimaryReceiver) { - let (tx_batch_propose, rx_batch_propose) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_batch_signature, rx_batch_signature) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_batch_certified, rx_batch_certified) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_primary_ping, rx_primary_ping) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_unconfirmed_solution, rx_unconfirmed_solution) = mpsc::channel(MAX_CHANNEL_SIZE); - let (tx_unconfirmed_transaction, rx_unconfirmed_transaction) = mpsc::channel(MAX_CHANNEL_SIZE); - - let sender = PrimarySender { - tx_batch_propose, - tx_batch_signature, - tx_batch_certified, - tx_primary_ping, - tx_unconfirmed_solution, - tx_unconfirmed_transaction, - }; - let receiver = PrimaryReceiver { - rx_batch_propose, - rx_batch_signature, - rx_batch_certified, - rx_primary_ping, - rx_unconfirmed_solution, - rx_unconfirmed_transaction, - }; - - (sender, receiver) -} - #[derive(Debug)] pub struct WorkerSender { pub tx_worker_ping: mpsc::Sender<(SocketAddr, TransmissionID)>, diff --git a/node/bft/src/helpers/dag.rs b/node/bft/src/helpers/dag.rs index 53f149734f..babb62d16b 100644 --- a/node/bft/src/helpers/dag.rs +++ b/node/bft/src/helpers/dag.rs @@ -128,7 +128,9 @@ impl DAG { // Update the recently committed IDs. let is_new = self.recent_committed_ids.entry(certificate_round).or_default().insert(certificate_id); - if !is_new { + if is_new { + trace!("Got new commit for certificate {certificate_id} at round {certificate_round}"); + } else { //TODO (kaimast): return early here? trace!("Certificate {certificate_id} was already committed for round {certificate_round}"); } diff --git a/node/bft/src/helpers/partition.rs b/node/bft/src/helpers/partition.rs index 809c9fb814..f171a2a27d 100644 --- a/node/bft/src/helpers/partition.rs +++ b/node/bft/src/helpers/partition.rs @@ -19,7 +19,7 @@ use snarkvm::{ prelude::{Network, ToBytes}, }; -use anyhow::{Result, bail}; +use anyhow::{Result, bail, ensure}; use sha2::{Digest, Sha256}; fn double_sha256(data: &[u8]) -> [u8; 32] { @@ -38,6 +38,8 @@ pub fn sha256d_to_u128(data: &[u8]) -> u128 { /// Returns the worker ID for the given transmission ID. pub fn assign_to_worker(transmission_id: impl Into>, num_workers: u8) -> Result { + ensure!(num_workers > 0, "Need at least one worker"); + // If there is only one worker, return it. if num_workers == 1 { return Ok(0); diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index d2b5c29e2d..33dbb08775 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -34,8 +34,8 @@ pub mod helpers; mod bft; pub use bft::{BFT, BftCallback}; -mod gateway; -pub use gateway::Gateway; +pub mod gateway; +pub use gateway::{Gateway, GatewayPrimaryCallback, GatewaySyncCallback}; mod primary; pub use primary::*; diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 93644174ef..541b5c8f59 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -21,11 +21,9 @@ use crate::{ WORKER_PING_IN_MS, Worker, events::{BatchPropose, BatchSignature, Event}, - gateway::{Gateway, Transport}, + gateway::{Gateway, GatewayPrimaryCallback, Transport}, helpers::{ CallbackHandle, - PrimaryReceiver, - PrimarySender, Proposal, ProposalCache, SignedProposals, @@ -132,7 +130,7 @@ impl Primary { /// Initializes a new primary instance. #[allow(clippy::too_many_arguments)] - pub fn new( + pub async fn new( account: Account, storage: Storage, ledger: Arc>, @@ -147,22 +145,77 @@ impl Primary { Gateway::new(account, storage.clone(), ledger.clone(), ip, trusted_validators, storage_mode.clone(), dev)?; // Initialize the sync module. let sync = Sync::new(gateway.clone(), storage.clone(), ledger.clone(), block_sync); + let proposed_batch = Arc::new(ProposedBatch::default()); + + // Construct a map of the worker senders. + let mut worker_senders = IndexMap::new(); + + // Initialize the workers. + let mut workers = Vec::new(); + for id in 0..MAX_WORKERS { + // Construct the worker channels. + let (tx_worker, rx_worker) = init_worker_channels(); + // Construct the worker instance. + let worker = + Worker::new(id, Arc::new(gateway.clone()), storage.clone(), ledger.clone(), proposed_batch.clone()) + .with_context(|| "Failed to initialize worker")?; + // Run the worker instance. + worker.run(rx_worker); + // Add the worker to the list of workers. + workers.push(worker); + // Add the worker sender to the map. + worker_senders.insert(id, tx_worker); + } // Initialize the primary instance. - Ok(Self { + let obj = Self { sync, - gateway, + gateway: gateway.clone(), storage, ledger, - workers: Arc::from(vec![]), + workers: Arc::from(workers), primary_callback: Default::default(), - proposed_batch: Default::default(), + proposed_batch, latest_proposed_batch_timestamp: Default::default(), signed_proposals: Default::default(), handles: Default::default(), propose_lock: Default::default(), storage_mode, - }) + }; + + // Next, initialize the gateway. + let gateway_primary_callback = Arc::new(obj.clone()) as Arc>; + let gateway_sync_callback = Arc::new(obj.sync.clone()); + obj.gateway.run(worker_senders, gateway_primary_callback, Some(gateway_sync_callback)).await; + + Ok(obj) + } + + /// Starts all remaining (background) tasks needed for the primary instance. + pub async fn run( + &self, + ping: Option>>, + primary_callback: Option>>, + sync_callback: Option>>, + ) -> Result<()> { + info!("Starting the primary instance of the memory pool..."); + + // Set the BFT sender. + if let Some(callback) = primary_callback { + self.primary_callback.set(callback)?; + } + + // Next, initialize the sync module and sync the storage from ledger. + self.sync.initialize(sync_callback).await?; + // Next, load and process the proposal cache before running the sync module. + self.load_proposal_cache().await?; + // Next, run the sync module. + self.sync.run(ping).await?; + // Lastly, start the primary handlers. + // Note: This ensures the primary does not start communicating before syncing is complete. + self.start_handlers(); + + Ok(()) } /// Load the proposal cache file and update the Primary state with the stored data. @@ -205,63 +258,6 @@ impl Primary { } } - /// Run the primary instance. - pub async fn run( - &mut self, - ping: Option>>, - primary_callback: Option>>, - sync_callback: Option>>, - primary_sender: PrimarySender, - primary_receiver: PrimaryReceiver, - ) -> Result<()> { - info!("Starting the primary instance of the memory pool..."); - - // Set the BFT sender. - if let Some(callback) = primary_callback { - self.primary_callback.set(callback)?; - } - - // Construct a map of the worker senders. - let mut worker_senders = IndexMap::new(); - // Construct a map for the workers. - let mut workers = Vec::new(); - // Initialize the workers. - for id in 0..MAX_WORKERS { - // Construct the worker channels. - let (tx_worker, rx_worker) = init_worker_channels(); - // Construct the worker instance. - let worker = Worker::new( - id, - Arc::new(self.gateway.clone()), - self.storage.clone(), - self.ledger.clone(), - self.proposed_batch.clone(), - )?; - // Run the worker instance. - worker.run(rx_worker); - // Add the worker to the list of workers. - workers.push(worker); - // Add the worker sender to the map. - worker_senders.insert(id, tx_worker); - } - // Set the workers. - self.workers = Arc::from(workers); - - // Next, initialize the sync module and sync the storage from ledger. - self.sync.initialize(sync_callback).await?; - // Next, load and process the proposal cache before running the sync module. - self.load_proposal_cache().await?; - // Next, run the sync module. - self.sync.run(ping).await?; - // Next, initialize the gateway. - self.gateway.run(primary_sender, worker_senders, Some(Arc::new(self.sync.clone()))).await; - // Lastly, start the primary handlers. - // Note: This ensures the primary does not start communicating before syncing is complete. - self.start_handlers(primary_receiver); - - Ok(()) - } - /// Returns the current round. pub fn current_round(&self) -> u64 { self.storage.current_round() @@ -1198,16 +1194,7 @@ impl Primary { /// tries to move the the next round of batches. /// /// This function is called exactly once, in `Self::run()`. - fn start_handlers(&self, primary_receiver: PrimaryReceiver) { - let PrimaryReceiver { - mut rx_batch_propose, - mut rx_batch_signature, - mut rx_batch_certified, - mut rx_primary_ping, - mut rx_unconfirmed_solution, - mut rx_unconfirmed_transaction, - } = primary_receiver; - + fn start_handlers(&self) { // Start the primary ping sender. let self_ = self.clone(); self.spawn(async move { @@ -1264,39 +1251,6 @@ impl Primary { } }); - // Start the primary ping handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, primary_certificate)) = rx_primary_ping.recv().await { - // If the primary is not synced, then do not process the primary ping. - if self_.sync.is_synced() { - trace!("Processing new primary ping from '{peer_ip}'"); - } else { - trace!("Skipping a primary ping from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - - // Spawn a task to process the primary certificate. - { - let self_ = self_.clone(); - tokio::spawn(async move { - // Deserialize the primary certificate in the primary ping. - let Ok(primary_certificate) = spawn_blocking!(primary_certificate.deserialize_blocking()) - else { - warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); - return; - }; - // Process the primary certificate. - let id = fmt_id(primary_certificate.id()); - let round = primary_certificate.round(); - if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, primary_certificate).await { - warn!("Cannot process a primary certificate '{id}' at round {round} in a 'PrimaryPing' from '{peer_ip}' - {e}"); - } - }); - } - } - }); - // Start the worker ping(s). let self_ = self.clone(); self.spawn(async move { @@ -1344,75 +1298,6 @@ impl Primary { } }); - // Start the proposed batch handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_propose)) = rx_batch_propose.recv().await { - // If the primary is not synced, then do not sign the batch. - if !self_.sync.is_synced() { - trace!("Skipping a batch proposal from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Spawn a task to process the proposed batch. - let self_ = self_.clone(); - tokio::spawn(async move { - // Process the batch proposal. - let round = batch_propose.round; - if let Err(e) = self_.process_batch_propose_from_peer(peer_ip, batch_propose).await { - warn!("Cannot sign a batch at round {round} from '{peer_ip}' - {e}"); - } - }); - } - }); - - // Start the batch signature handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_signature)) = rx_batch_signature.recv().await { - // If the primary is not synced, then do not store the signature. - if !self_.sync.is_synced() { - trace!("Skipping a batch signature from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Process the batch signature. - // Note: Do NOT spawn a task around this function call. Processing signatures from peers - // is a critical path, and we should only store the minimum required number of signatures. - // In addition, spawning a task can cause concurrent processing of signatures (even with a lock), - // which means the RwLock for the proposed batch must become a 'tokio::sync' to be safe. - let id = fmt_id(batch_signature.batch_id); - if let Err(e) = self_.process_batch_signature_from_peer(peer_ip, batch_signature).await { - warn!("Cannot store a signature for batch '{id}' from '{peer_ip}' - {e}"); - } - } - }); - - // Start the certified batch handler. - let self_ = self.clone(); - self.spawn(async move { - while let Some((peer_ip, batch_certificate)) = rx_batch_certified.recv().await { - // If the primary is not synced, then do not store the certificate. - if !self_.sync.is_synced() { - trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); - continue; - } - // Spawn a task to process the batch certificate. - let self_ = self_.clone(); - tokio::spawn(async move { - // Deserialize the batch certificate. - let Ok(batch_certificate) = spawn_blocking!(batch_certificate.deserialize_blocking()) else { - warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); - return; - }; - // Process the batch certificate. - let id = fmt_id(batch_certificate.id()); - let round = batch_certificate.round(); - if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, batch_certificate).await { - warn!("Cannot store a certificate '{id}' for round {round} from '{peer_ip}' - {e}"); - } - }); - } - }); - // This task periodically tries to move to the next round. // // Note: This is necessary to ensure that the primary is not stuck on a previous round @@ -1455,59 +1340,6 @@ impl Primary { } } }); - - // Start a handler to process new unconfirmed solutions. - let self_ = self.clone(); - self.spawn(async move { - while let Some((solution_id, solution, callback)) = rx_unconfirmed_solution.recv().await { - // Compute the checksum for the solution. - let Ok(checksum) = solution.to_checksum::() else { - error!("Failed to compute the checksum for the unconfirmed solution"); - continue; - }; - // Compute the worker ID. - let Ok(worker_id) = assign_to_worker((solution_id, checksum), self_.num_workers()) else { - error!("Unable to determine the worker ID for the unconfirmed solution"); - continue; - }; - let self_ = self_.clone(); - tokio::spawn(async move { - // Retrieve the worker. - let worker = &self_.workers[worker_id as usize]; - // Process the unconfirmed solution. - let result = worker.process_unconfirmed_solution(solution_id, solution).await; - // Send the result to the callback. - callback.send(result).ok(); - }); - } - }); - - // Start a handler to process new unconfirmed transactions. - let self_ = self.clone(); - self.spawn(async move { - while let Some((transaction_id, transaction, callback)) = rx_unconfirmed_transaction.recv().await { - trace!("Primary - Received an unconfirmed transaction '{}'", fmt_id(transaction_id)); - // Compute the checksum for the transaction. - let Ok(checksum) = transaction.to_checksum::() else { - error!("Failed to compute the checksum for the unconfirmed transaction"); - continue; - }; - // Compute the worker ID. - let Ok(worker_id) = assign_to_worker::((&transaction_id, &checksum), self_.num_workers()) else { - error!("Unable to determine the worker ID for the unconfirmed transaction"); - continue; - }; - let self_ = self_.clone(); - tokio::spawn(async move { - // Retrieve the worker. - let worker = &self_.workers[worker_id as usize]; - // Process the unconfirmed transaction. - let result = worker.process_unconfirmed_transaction(transaction_id, transaction).await; - // Send the result to the callback. - callback.send(result).ok(); - }); - } - }); } /// Checks if the proposed batch is expired, and clears the proposed batch if it has expired. @@ -1960,6 +1792,138 @@ impl Primary { } } +/// Handle events from the Gateway +#[async_trait::async_trait] +impl GatewayPrimaryCallback for Primary { + async fn process_incoming_ping(&self, peer_ip: SocketAddr, primary_certificate: Data>) { + // If the primary is not synced, then do not process the primary ping. + if self.sync.is_synced() { + trace!("Processing new primary ping from '{peer_ip}'"); + } else { + trace!("Skipping a primary ping from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + + // Spawn a task to process the primary certificate. + { + let self_ = self.clone(); + tokio::spawn(async move { + // Deserialize the primary certificate in the primary ping. + let Ok(primary_certificate) = spawn_blocking!(primary_certificate.deserialize_blocking()) else { + warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); + return; + }; + // Process the primary certificate. + let id = fmt_id(primary_certificate.id()); + let round = primary_certificate.round(); + if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, primary_certificate).await { + warn!( + "Cannot process a primary certificate '{id}' at round {round} in a 'PrimaryPing' from '{peer_ip}' - {e}" + ); + } + }); + } + } + + async fn process_batch_propose(&self, peer_ip: SocketAddr, batch_propose: BatchPropose) { + // If the primary is not synced, then do not sign the batch. + if !self.sync.is_synced() { + trace!("Skipping a batch proposal from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Spawn a task to process the proposed batch. + let self_ = self.clone(); + tokio::spawn(async move { + // Process the batch proposal. + let round = batch_propose.round; + if let Err(e) = self_.process_batch_propose_from_peer(peer_ip, batch_propose).await { + warn!("Cannot sign a batch at round {round} from '{peer_ip}' - {e}"); + } + }); + } + + async fn process_batch_signature(&self, peer_ip: SocketAddr, batch_signature: BatchSignature) { + // If the primary is not synced, then do not store the signature. + if !self.sync.is_synced() { + trace!("Skipping a batch signature from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Process the batch signature. + // Note: Do NOT spawn a task around this function call. Processing signatures from peers + // is a critical path, and we should only store the minimum required number of signatures. + // In addition, spawning a task can cause concurrent processing of signatures (even with a lock), + // which means the RwLock for the proposed batch must become a 'tokio::sync' to be safe. + let id = fmt_id(batch_signature.batch_id); + if let Err(e) = self.process_batch_signature_from_peer(peer_ip, batch_signature).await { + warn!("Cannot store a signature for batch '{id}' from '{peer_ip}' - {e}"); + } + } + + async fn process_batch_certified(&self, peer_ip: SocketAddr, batch_certificate: Data>) { + // If the primary is not synced, then do not store the certificate. + if !self.sync.is_synced() { + trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); + return; + } + // Spawn a task to process the batch certificate. + let self_ = self.clone(); + tokio::spawn(async move { + // Deserialize the batch certificate. + let Ok(batch_certificate) = spawn_blocking!(batch_certificate.deserialize_blocking()) else { + warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); + return; + }; + // Process the batch certificate. + let id = fmt_id(batch_certificate.id()); + let round = batch_certificate.round(); + if let Err(e) = self_.process_batch_certificate_from_peer(peer_ip, batch_certificate).await { + warn!("Cannot store a certificate '{id}' for round {round} from '{peer_ip}' - {e}"); + } + }); + } +} + +/// Invoked by the mempool ("Consensus"). +impl Primary { + pub async fn process_unconfirmed_solution( + &self, + solution_id: SolutionID, + solution: Data>, + ) -> Result<()> { + // Compute the checksum for the solution. + let Ok(checksum) = solution.to_checksum::() else { + bail!("Failed to compute the checksum for the unconfirmed solution"); + }; + + // Compute the worker ID. + let Ok(worker_id) = assign_to_worker((solution_id, checksum), self.num_workers()) else { + bail!("Unable to determine the worker ID for the unconfirmed solution"); + }; + + // Wait for the worker to process the unconfirmed solution. + self.workers[worker_id as usize].process_unconfirmed_solution(solution_id, solution).await + } + + pub async fn process_unconfirmed_transaction( + &self, + transaction_id: N::TransactionID, + transaction: Data>, + ) -> Result<()> { + trace!("Primary - Received an unconfirmed transaction '{}'", fmt_id(transaction_id)); + // Compute the checksum for the transaction. + let Ok(checksum) = transaction.to_checksum::() else { + bail!("Failed to compute the checksum for the unconfirmed transaction"); + }; + // Compute the worker ID. + let Ok(worker_id) = assign_to_worker::((&transaction_id, &checksum), self.num_workers()) else { + bail!("Unable to determine the worker ID for the unconfirmed transaction"); + }; + + // Wait for the worker to process the unconfirmed transaction. + self.workers[worker_id as usize].process_unconfirmed_transaction(transaction_id, transaction).await + } +} + #[cfg(test)] mod tests { use super::*; @@ -1998,7 +1962,7 @@ mod tests { } // Returns a primary and a list of accounts in the configured committee. - fn primary_with_committee( + async fn primary_with_committee( account_index: usize, accounts: &[(SocketAddr, Account)], committee: Committee, @@ -2011,7 +1975,7 @@ mod tests { let account = accounts[account_index].1.clone(); let block_sync = Arc::new(BlockSync::new(ledger.clone())); let mut primary = - Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::new_test(None), None).unwrap(); + Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None), None).await.unwrap(); // Construct a worker instance. primary.workers = Arc::from([Worker::new( @@ -2029,7 +1993,7 @@ mod tests { primary } - fn primary_without_handlers( + async fn primary_without_handlers( rng: &mut TestRng, ) -> (Primary, Vec<(SocketAddr, Account)>) { let (accounts, committee) = sample_committee(rng); @@ -2038,7 +2002,8 @@ mod tests { &accounts, committee, CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V1).unwrap(), - ); + ) + .await; (primary, accounts) } @@ -2239,7 +2204,7 @@ mod tests { #[tokio::test] async fn test_propose_batch() { let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2260,7 +2225,7 @@ mod tests { #[tokio::test] async fn test_propose_batch_with_no_transmissions() { let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2274,7 +2239,7 @@ mod tests { async fn test_propose_batch_in_round() { let round = 3; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Fill primary storage. store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2300,7 +2265,7 @@ mod tests { let round = 3; let prev_round = round - 1; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; let peer_account = &accounts[1]; let peer_ip = peer_account.0; @@ -2378,7 +2343,8 @@ mod tests { &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V4).unwrap(), - ); + ) + .await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2406,7 +2372,7 @@ mod tests { #[tokio::test] async fn test_batch_propose_from_peer() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2445,7 +2411,7 @@ mod tests { #[tokio::test] async fn test_batch_propose_from_peer_when_not_synced() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2483,7 +2449,7 @@ mod tests { async fn test_batch_propose_from_peer_in_round() { let round = 2; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2522,7 +2488,7 @@ mod tests { #[tokio::test] async fn test_batch_propose_from_peer_wrong_round() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2565,7 +2531,7 @@ mod tests { async fn test_batch_propose_from_peer_in_round_wrong_round() { let round = 4; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2611,7 +2577,7 @@ mod tests { async fn test_batch_propose_from_peer_with_past_timestamp() { let round = 2; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2668,13 +2634,15 @@ mod tests { &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V4).unwrap(), - ); + ) + .await; let primary_v5 = primary_with_committee( 1, &accounts, committee.clone(), CurrentNetwork::CONSENSUS_HEIGHT(ConsensusVersion::V5).unwrap(), - ); + ) + .await; // Create a valid proposal with an author that isn't the primary. let round = 1; @@ -2724,7 +2692,7 @@ mod tests { async fn test_propose_batch_with_storage_round_behind_proposal_lock() { let round = 3; let mut rng = TestRng::default(); - let (primary, _) = primary_without_handlers(&mut rng); + let (primary, _) = primary_without_handlers(&mut rng).await; // Check there is no batch currently proposed. assert!(primary.proposed_batch.read().is_none()); @@ -2757,7 +2725,7 @@ mod tests { async fn test_propose_batch_with_storage_round_behind_proposal() { let round = 5; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; // Generate previous certificates. let previous_certificates = store_certificate_chain(&primary, &accounts, round, &mut rng); @@ -2786,7 +2754,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Create a valid proposal. @@ -2823,7 +2791,7 @@ mod tests { async fn test_batch_signature_from_peer_in_round() { let round = 5; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Generate certificates. @@ -2861,7 +2829,7 @@ mod tests { #[tokio::test] async fn test_batch_signature_from_peer_no_quorum() { let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Create a valid proposal. @@ -2897,7 +2865,7 @@ mod tests { async fn test_batch_signature_from_peer_in_round_no_quorum() { let round = 7; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; map_account_addresses(&primary, &accounts); // Generate certificates. @@ -2936,7 +2904,7 @@ mod tests { let round = 3; let prev_round = round - 1; let mut rng = TestRng::default(); - let (primary, accounts) = primary_without_handlers(&mut rng); + let (primary, accounts) = primary_without_handlers(&mut rng).await; let peer_account = &accounts[1]; let peer_ip = peer_account.0; diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index f6ffdff1a0..8d50d0429f 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -17,7 +17,7 @@ use crate::{ MAX_FETCH_TIMEOUT_IN_MS, PRIMARY_PING_IN_MS, events::{CertificateRequest, CertificateResponse, DataBlocks, Event}, - gateway::{Gateway, SyncCallback as GatewaySyncCallback, Transport}, + gateway::{Gateway, GatewaySyncCallback, Transport}, helpers::{CallbackHandle, Pending, Storage, fmt_id, max_redundant_requests}, ledger_service::LedgerService, }; diff --git a/node/bft/tests/bft_e2e.rs b/node/bft/tests/bft_e2e.rs index 5f2b0baeeb..ceb515065f 100644 --- a/node/bft/tests/bft_e2e.rs +++ b/node/bft/tests/bft_e2e.rs @@ -39,7 +39,8 @@ async fn test_state_coherence() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: true, - }); + }) + .await; network.start().await; @@ -60,7 +61,8 @@ async fn test_resync() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: false, - }); + }) + .await; network.start().await; // Let the nodes advance through the rounds. @@ -77,7 +79,8 @@ async fn test_resync() { fire_transmissions: None, log_level: None, log_connections: false, - }); + }) + .await; spare_network.start().await; for i in 1..N { @@ -106,7 +109,8 @@ async fn test_quorum_threshold() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check each node is at round 1 (0 is genesis). @@ -158,7 +162,8 @@ async fn test_quorum_break() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. @@ -194,7 +199,8 @@ async fn test_leader_election_consistency() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Wait for starting round to be reached @@ -246,7 +252,8 @@ async fn test_transient_break() { // Set this to Some(0..=4) to see the logs. log_level: Some(6), log_connections: false, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index 4c346cf8f5..ca26db34ec 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -20,13 +20,7 @@ use crate::common::{ }; use snarkos_account::Account; -use snarkos_node_bft::{ - BFT, - MAX_BATCH_DELAY_IN_MS, - MEMORY_POOL_PORT, - Primary, - helpers::{PrimarySender, Storage, init_primary_channels}, -}; +use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Primary, helpers::Storage}; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_router::PeerPoolHandling; use snarkos_node_sync::BlockSync; @@ -100,8 +94,6 @@ pub struct TestValidator { pub id: u16, /// The primary instance. When the BFT is enabled this is a clone of the BFT primary. pub primary: Primary, - /// The channel sender of the primary. - pub primary_sender: Option>, /// The BFT instance. This is only set if the BFT is enabled. pub bft: OnceLock>, /// The tokio handles of all long-running tasks associated with the validator (incl. cannons). @@ -112,9 +104,8 @@ pub type CurrentLedger = Ledger> impl TestValidator { pub fn fire_transmissions(&mut self, interval_ms: u64) { - let solution_handle = fire_unconfirmed_solutions(self.primary_sender.as_mut().unwrap(), self.id, interval_ms); - let transaction_handle = - fire_unconfirmed_transactions(self.primary_sender.as_mut().unwrap(), self.id, interval_ms); + let solution_handle = fire_unconfirmed_solutions(self.primary.clone(), self.id, interval_ms); + let transaction_handle = fire_unconfirmed_transactions(self.primary.clone(), self.id, interval_ms); self.handles.lock().push(solution_handle); self.handles.lock().push(transaction_handle); @@ -137,7 +128,7 @@ impl TestValidator { impl TestNetwork { // Creates a new test network with the given configuration. - pub fn new(config: TestNetworkConfig) -> Self { + pub async fn new(config: TestNetworkConfig) -> Self { let mut rng = TestRng::default(); if let Some(log_level) = config.log_level { @@ -182,6 +173,7 @@ impl TestNetwork { StorageMode::new_test(None), None, ) + .await .unwrap(); (bft.primary().clone(), Some(bft)) } else { @@ -195,17 +187,13 @@ impl TestNetwork { StorageMode::new_test(None), None, ) + .await .unwrap(); (primary, None) }; - let test_validator = TestValidator { - id: id as u16, - primary, - primary_sender: None, - bft: OnceLock::new(), - handles: Default::default(), - }; + let test_validator = + TestValidator { id: id as u16, primary, bft: OnceLock::new(), handles: Default::default() }; if let Some(bft) = bft { assert!(test_validator.bft.set(bft).is_ok()); } @@ -218,19 +206,16 @@ impl TestNetwork { // Starts each node in the network. pub async fn start(&mut self) { for validator in self.validators.values_mut() { - let (primary_sender, primary_receiver) = init_primary_channels(); - validator.primary_sender = Some(primary_sender.clone()); - // let ledger_service = validator.primary.ledger().clone(); // let sync = BlockSync::new(BlockSyncMode::Gateway, ledger_service); // sync.try_block_sync(validator.primary.gateway()).await.unwrap(); if let Some(bft) = validator.bft.get_mut() { // Setup the channels and start the bft. - bft.run(None, None, primary_sender, primary_receiver).await.unwrap(); + bft.run(None, None).await.unwrap(); } else { // Setup the channels and start the primary. - validator.primary.run(None, None, None, primary_sender, primary_receiver).await.unwrap(); + validator.primary.run(None, None, None).await.unwrap(); } if let Some(interval_ms) = self.config.fire_transmissions { diff --git a/node/bft/tests/common/utils.rs b/node/bft/tests/common/utils.rs index e61e9d5b75..8708a8535b 100644 --- a/node/bft/tests/common/utils.rs +++ b/node/bft/tests/common/utils.rs @@ -16,11 +16,7 @@ use crate::common::{CurrentNetwork, TranslucentLedgerService, primary}; use aleo_std::StorageMode; use snarkos_account::Account; -use snarkos_node_bft::{ - Gateway, - Worker, - helpers::{PrimarySender, Storage}, -}; +use snarkos_node_bft::{Gateway, Primary, Worker, helpers::Storage}; use snarkos_node_bft::storage_service::BFTMemoryService; use snarkos_utilities::SimpleStoppable; @@ -52,8 +48,7 @@ use locktick::parking_lot::RwLock; #[cfg(not(feature = "locktick"))] use parking_lot::RwLock; use rand::Rng; -use tokio::{sync::oneshot, task::JoinHandle, time::sleep}; -use tracing::*; +use tokio::{task::JoinHandle, time::sleep}; use tracing_subscriber::{ layer::{Layer, SubscriberExt}, util::SubscriberInitExt, @@ -91,12 +86,7 @@ pub fn initialize_logger(verbosity: u8) { } /// Fires *fake* unconfirmed solutions at the node. -pub fn fire_unconfirmed_solutions( - sender: &PrimarySender, - node_id: u16, - interval_ms: u64, -) -> JoinHandle<()> { - let tx_unconfirmed_solution = sender.tx_unconfirmed_solution.clone(); +pub fn fire_unconfirmed_solutions(primary: Primary, node_id: u16, interval_ms: u64) -> JoinHandle<()> { tokio::task::spawn(async move { // This RNG samples the *same* fake solutions for all nodes. let mut shared_rng = TestRng::fixed(123456789); @@ -122,13 +112,8 @@ pub fn fire_unconfirmed_solutions( // Sample a random fake solution ID and solution. let (solution_id, solution) = if counter % 2 == 0 { sample(&mut shared_rng).await } else { sample(&mut unique_rng).await }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake solution. - if let Err(e) = tx_unconfirmed_solution.send((solution_id, solution, callback)).await { - error!("Failed to send unconfirmed solution: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_solution(solution_id, solution).await; // Increment the counter. counter += 1; // Sleep briefly. @@ -139,11 +124,10 @@ pub fn fire_unconfirmed_solutions( /// Fires *fake* unconfirmed transactions at the node. pub fn fire_unconfirmed_transactions( - sender: &PrimarySender, + primary: Primary, node_id: u16, interval_ms: u64, ) -> JoinHandle<()> { - let tx_unconfirmed_transaction = sender.tx_unconfirmed_transaction.clone(); tokio::task::spawn(async move { // This RNG samples the *same* fake transactions for all nodes. let mut shared_rng = TestRng::fixed(123456789); @@ -170,13 +154,8 @@ pub fn fire_unconfirmed_transactions( loop { // Sample a random fake transaction ID and transaction. let (id, transaction) = if counter % 2 == 0 { sample(&mut shared_rng) } else { sample(&mut unique_rng) }; - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the fake transaction. - if let Err(e) = tx_unconfirmed_transaction.send((id, transaction, callback)).await { - error!("Failed to send unconfirmed transaction: {e}"); - } - let _ = callback_receiver.await; + let _ = primary.process_unconfirmed_transaction(id, transaction).await; // Increment the counter. counter += 1; // Sleep briefly. diff --git a/node/bft/tests/gateway_e2e.rs b/node/bft/tests/gateway_e2e.rs index d616b70378..b4535a399d 100644 --- a/node/bft/tests/gateway_e2e.rs +++ b/node/bft/tests/gateway_e2e.rs @@ -22,14 +22,20 @@ use crate::common::{ test_peer::TestPeer, utils::{sample_gateway, sample_ledger, sample_storage}, }; + +use snarkos_node_bft::{ + Gateway, + events::{ChallengeRequest, ChallengeResponse, Event}, + gateway::test_helpers::DummyGatewayPrimaryCallback, +}; + use snarkos_account::Account; -use snarkos_node_bft::{Gateway, helpers::init_primary_channels}; -use snarkos_node_bft_events::{ChallengeRequest, ChallengeResponse, Event}; use snarkos_node_router::PeerPoolHandling; use snarkos_node_tcp::P2P; + use snarkvm::{ledger::narwhal::Data, prelude::TestRng}; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; use deadline::deadline; use rand::Rng; @@ -44,9 +50,7 @@ async fn new_test_gateway( let gateway = sample_gateway(accounts[0].clone(), storage, ledger); // Set up primary channels, we discard the rx as we're testing the gateway sans BFT. - let (primary_tx, _primary_rx) = init_primary_channels(); - - gateway.run(primary_tx, [].into(), None).await; + gateway.run([].into(), Arc::new(DummyGatewayPrimaryCallback::default()), None).await; (accounts, gateway) } diff --git a/node/bft/tests/narwhal_e2e.rs b/node/bft/tests/narwhal_e2e.rs index 63c803767d..c202d40fa7 100644 --- a/node/bft/tests/narwhal_e2e.rs +++ b/node/bft/tests/narwhal_e2e.rs @@ -38,7 +38,8 @@ async fn test_state_coherence() { // Set this to Some(0..=4) to see the logs. log_level: Some(0), log_connections: true, - }); + }) + .await; network.start().await; @@ -62,7 +63,8 @@ async fn test_quorum_threshold() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check each node is at round 1 (0 is genesis). @@ -114,7 +116,8 @@ async fn test_quorum_break() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. @@ -144,7 +147,8 @@ async fn test_storage_coherence() { // Set this to Some(0..=4) to see the logs. log_level: None, log_connections: true, - }); + }) + .await; network.start().await; // Check the nodes have started advancing through the rounds. diff --git a/node/consensus/Cargo.toml b/node/consensus/Cargo.toml index bbb29264b3..e33383cd67 100644 --- a/node/consensus/Cargo.toml +++ b/node/consensus/Cargo.toml @@ -21,14 +21,12 @@ default = [ ] locktick = [ "dep:locktick", "snarkos-node-bft/locktick", - "snarkos-node-bft-ledger-service/locktick", - "snarkos-node-bft-storage-service/locktick", "snarkvm/locktick" ] metrics = [ "dep:snarkos-node-metrics" ] telemetry = [ "snarkos-node-bft/telemetry" ] -cuda = [ "snarkvm/cuda", "snarkos-account/cuda", "snarkos-node-bft-ledger-service/cuda" ] -serial = [ "snarkos-node-bft-ledger-service/serial" ] +cuda = [ "snarkvm/cuda", "snarkos-account/cuda" ] +serial = [ ] [dependencies.async-trait] workspace = true @@ -66,14 +64,7 @@ workspace = true [dependencies.snarkos-node-bft] workspace = true - -[dependencies.snarkos-node-bft-ledger-service] -workspace = true -features = [ "ledger", "ledger-write" ] - -[dependencies.snarkos-node-bft-storage-service] -workspace = true -features = [ "persistent" ] +features = [ "persistent-storage" ] [dependencies.snarkos-node-sync] workspace = true diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index 4a2bb3c17e..a2a6d40f70 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -30,11 +30,11 @@ use snarkos_node_bft::{ BftCallback, MAX_BATCH_DELAY_IN_MS, Primary, - helpers::{PrimarySender, Storage as NarwhalStorage, fmt_id, init_primary_channels}, + helpers::{Storage as NarwhalStorage, fmt_id}, + ledger_service::LedgerService, spawn_blocking, + storage_service::BFTPersistentStorage, }; -use snarkos_node_bft_ledger_service::LedgerService; -use snarkos_node_bft_storage_service::BFTPersistentStorage; use snarkos_node_sync::{BlockSync, Ping}; use snarkvm::{ @@ -56,7 +56,7 @@ use lru::LruCache; #[cfg(not(feature = "locktick"))] use parking_lot::{Mutex, RwLock}; use std::{future::Future, net::SocketAddr, num::NonZeroUsize, sync::Arc, time::Duration}; -use tokio::{sync::oneshot, task::JoinHandle}; +use tokio::task::JoinHandle; #[cfg(feature = "metrics")] use std::collections::HashMap; @@ -86,8 +86,6 @@ pub struct Consensus { ledger: Arc>, /// The BFT. bft: BFT, - /// The primary sender. - primary_sender: PrimarySender, /// The unconfirmed solutions queue. solutions_queue: Arc, Solution>>>, /// The unconfirmed transactions queue. @@ -119,21 +117,19 @@ impl Consensus { ping: Arc>, dev: Option, ) -> Result { - // Initialize the primary channels. - let (primary_sender, primary_receiver) = init_primary_channels::(); // Initialize the Narwhal transmissions. let transmissions = Arc::new(BFTPersistentStorage::open(storage_mode.clone())?); // Initialize the Narwhal storage. let storage = NarwhalStorage::new(ledger.clone(), transmissions, BatchHeader::::MAX_GC_ROUNDS as u64); // Initialize the BFT. let bft = - BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode, dev)?; + BFT::new(account, storage, ledger.clone(), block_sync.clone(), ip, trusted_validators, storage_mode, dev) + .await?; // Create a new instance of Consensus. let mut _self = Self { ledger, bft, block_sync, - primary_sender, solutions_queue: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(CAPACITY_FOR_SOLUTIONS).unwrap()))), transactions_queue: Default::default(), seen_solutions: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(1 << 16).unwrap()))), @@ -148,10 +144,7 @@ impl Consensus { _self.start_handlers(); // Lastly, also start BFTs handlers. - _self - .bft - .run(Some(ping), Some(Arc::new(_self.clone())), _self.primary_sender.clone(), primary_receiver) - .await?; + _self.bft.run(Some(ping), Some(Arc::new(_self.clone()))).await?; Ok(_self) } @@ -331,7 +324,7 @@ impl Consensus { let solution_id = solution.id(); trace!("Adding unconfirmed solution '{}' to the memory pool...", fmt_id(solution_id)); // Send the unconfirmed solution to the primary. - if let Err(e) = self.primary_sender.send_unconfirmed_solution(solution_id, Data::Object(solution)).await { + if let Err(e) = self.bft.primary().process_unconfirmed_solution(solution_id, Data::Object(solution)).await { // If the BFT is synced, then log the warning. if self.bft.is_synced() { // If error occurs after the first 10 blocks of the epoch, log it as a warning, otherwise ignore. @@ -431,7 +424,7 @@ impl Consensus { trace!("Adding unconfirmed {tx_type_str} transaction '{}' to the memory pool...", fmt_id(transaction_id)); // Send the unconfirmed transaction to the primary. if let Err(e) = - self.primary_sender.send_unconfirmed_transaction(transaction_id, Data::Object(transaction)).await + self.bft.primary().process_unconfirmed_transaction(transaction_id, Data::Object(transaction)).await { // If the BFT is synced, then log the warning. if self.bft.is_synced() { @@ -606,23 +599,19 @@ impl Consensus { transmission_id: TransmissionID, transmission: Transmission, ) -> Result<()> { - // Initialize a callback sender and receiver. - let (callback, callback_receiver) = oneshot::channel(); // Send the transmission to the primary. match (transmission_id, transmission) { - (TransmissionID::Ratification, Transmission::Ratification) => return Ok(()), + (TransmissionID::Ratification, Transmission::Ratification) => Ok(()), (TransmissionID::Solution(solution_id, _), Transmission::Solution(solution)) => { // Send the solution to the primary. - self.primary_sender.tx_unconfirmed_solution.send((solution_id, solution, callback)).await?; + self.bft.primary().process_unconfirmed_solution(solution_id, solution).await } (TransmissionID::Transaction(transaction_id, _), Transmission::Transaction(transaction)) => { // Send the transaction to the primary. - self.primary_sender.tx_unconfirmed_transaction.send((transaction_id, transaction, callback)).await?; + self.bft.primary().process_unconfirmed_transaction(transaction_id, transaction).await } _ => bail!("Mismatching `(transmission_id, transmission)` pair in consensus"), } - // Await the callback. - callback_receiver.await? } /// Spawns a task with the given future; it should only be used for long-running tasks. From e386bee09d2e56ea965c62944e0c00f9bd59a046 Mon Sep 17 00:00:00 2001 From: Kai Mast Date: Wed, 17 Sep 2025 00:19:38 -0700 Subject: [PATCH 13/13] fix(node/bft): ensure gateways are shut down during prod tests --- node/bft/src/bft.rs | 11 +++- node/bft/src/gateway.rs | 94 +++++++++++++++++--------------- node/bft/src/lib.rs | 11 ---- node/bft/src/primary.rs | 49 ++++++++++++++--- node/bft/src/worker.rs | 44 +++++++-------- node/bft/tests/bft_e2e.rs | 2 + node/bft/tests/common/primary.rs | 6 +- node/consensus/src/lib.rs | 4 +- node/src/validator/mod.rs | 24 ++++---- node/tests/common/node.rs | 20 ++++--- 10 files changed, 156 insertions(+), 109 deletions(-) diff --git a/node/bft/src/bft.rs b/node/bft/src/bft.rs index 7b05883568..8c81296305 100644 --- a/node/bft/src/bft.rs +++ b/node/bft/src/bft.rs @@ -899,7 +899,10 @@ mod tests { use aleo_std::StorageMode; use anyhow::Result; use indexmap::{IndexMap, IndexSet}; - use std::sync::Arc; + use std::{ + net::{Ipv4Addr, SocketAddr, SocketAddrV4}, + sync::Arc, + }; type CurrentNetwork = snarkvm::console::network::MainnetV0; @@ -934,13 +937,17 @@ mod tests { ) -> anyhow::Result> { // Create the block synchronization logic. let block_sync = Arc::new(BlockSync::new(ledger.clone())); + + // Pick a random port so we can run tests concurrently. + let any_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0)); + // Initialize the BFT. BFT::new( account.clone(), storage.clone(), ledger.clone(), block_sync, - None, + Some(any_addr), &[], StorageMode::new_test(None), None, diff --git a/node/bft/src/gateway.rs b/node/bft/src/gateway.rs index 5d62cb7b60..6d99dff42e 100644 --- a/node/bft/src/gateway.rs +++ b/node/bft/src/gateway.rs @@ -20,29 +20,31 @@ use crate::{ MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Worker, - events::{BatchPropose, BatchSignature, EventCodec, PrimaryPing}, + events::{ + BatchPropose, + BatchSignature, + BlockRequest, + BlockResponse, + CertificateRequest, + CertificateResponse, + ChallengeRequest, + ChallengeResponse, + DataBlocks, + DisconnectReason, + Event, + EventCodec, + EventTrait, + PrimaryPing, + TransmissionRequest, + TransmissionResponse, + ValidatorsRequest, + ValidatorsResponse, + }, helpers::{Cache, CallbackHandle, Resolver, Storage, WorkerSender, assign_to_worker}, - spawn_blocking, + ledger_service::LedgerService, }; -use aleo_std::StorageMode; + use snarkos_account::Account; -use snarkos_node_bft_events::{ - BlockRequest, - BlockResponse, - CertificateRequest, - CertificateResponse, - ChallengeRequest, - ChallengeResponse, - DataBlocks, - DisconnectReason, - Event, - EventTrait, - TransmissionRequest, - TransmissionResponse, - ValidatorsRequest, - ValidatorsResponse, -}; -use snarkos_node_bft_ledger_service::LedgerService; use snarkos_node_router::{NodeType, Peer, PeerPoolHandling}; use snarkos_node_sync::{MAX_BLOCKS_BEHIND, communication_service::CommunicationService, locators::BlockLocators}; use snarkos_node_tcp::{ @@ -53,6 +55,8 @@ use snarkos_node_tcp::{ Tcp, protocols::{Disconnect, Handshake, OnConnect, Reading, Writing}, }; + +use aleo_std::StorageMode; use snarkvm::{ console::prelude::*, ledger::{ @@ -61,6 +65,7 @@ use snarkvm::{ narwhal::{BatchCertificate, BatchHeader, Data}, }, prelude::{Address, Field}, + utilities::task::{self, JoinHandle}, }; use colored::Colorize; @@ -82,7 +87,6 @@ use std::{ use tokio::{ net::TcpStream, sync::{OnceCell, oneshot}, - task::{self, JoinHandle}, }; use tokio_stream::StreamExt; use tokio_util::codec::Framed; @@ -215,8 +219,14 @@ impl Gateway { (None, None) => SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, MEMORY_POOL_PORT)), (Some(ip), _) => ip, }; + + // Allow at most as many connections as the maximum committe size. + // and fail if the chosen port is not available. + let mut tcp_config = Config::new(ip, Committee::::max_committee_size()?); + tcp_config.allow_random_port = false; + // Initialize the TCP stack. - let tcp = Tcp::new(Config::new(ip, Committee::::max_committee_size()?)); + let tcp = Tcp::new(tcp_config); // Prepare the collection of the initial peers. let mut initial_peers = HashMap::new(); @@ -609,19 +619,14 @@ impl Gateway { } let self_ = self.clone(); - let blocks = match task::spawn_blocking(move || { + let blocks = task::spawn_blocking(move || { // Retrieve the blocks within the requested range. match self_.ledger.get_blocks(start_height..end_height) { Ok(blocks) => Ok(Data::Object(DataBlocks(blocks))), Err(error) => bail!("Missing blocks {start_height} to {end_height} from ledger - {error}"), } }) - .await - { - Ok(Ok(blocks)) => blocks, - Ok(Err(error)) => return Err(error), - Err(error) => return Err(anyhow!("[BlockRequest] {error}")), - }; + .await?; let self_ = self.clone(); tokio::spawn(async move { @@ -870,7 +875,7 @@ impl Gateway { /// Spawns a task with the given future; it should only be used for long-running tasks. #[allow(dead_code)] fn spawn + Send + 'static>(&self, future: T) { - self.handles.lock().push(tokio::spawn(future)); + self.handles.lock().push(task::spawn(future)); } /// Shuts down the gateway. @@ -1510,7 +1515,7 @@ impl Gateway { return Some(DisconnectReason::InvalidChallengeResponse); } // Perform the deferred non-blocking deserialization of the signature. - let Ok(signature) = spawn_blocking!(signature.deserialize_blocking()) else { + let Ok(signature) = task::spawn_blocking(|| signature.deserialize_blocking()).await else { warn!("{CONTEXT} Gateway handshake with '{peer_addr}' failed (cannot deserialize the signature)"); return Some(DisconnectReason::InvalidChallengeResponse); }; @@ -1621,17 +1626,11 @@ mod prop_tests { impl GatewayAddress { fn ip(&self) -> Option { - if let GatewayAddress::Prod(ip) = self { - return *ip; - } - None + if let GatewayAddress::Prod(ip) = self { *ip } else { None } } fn port(&self) -> Option { - if let GatewayAddress::Dev(port) = self { - return Some(*port as u16); - } - None + if let GatewayAddress::Dev(port) = self { Some(*port as u16) } else { None } } } @@ -1689,8 +1688,8 @@ mod prop_tests { .boxed() } - #[proptest] - fn gateway_dev_initialization(#[strategy(any_valid_dev_gateway())] input: GatewayInput) { + #[proptest(async = "tokio")] + async fn gateway_dev_initialization(#[strategy(any_valid_dev_gateway())] input: GatewayInput) { let (storage, _, private_key, dev) = input; let account = Account::try_from(private_key).unwrap(); @@ -1711,10 +1710,13 @@ mod prop_tests { let tcp_config = gateway.tcp().config(); assert_eq!(tcp_config.max_connections, Committee::::max_committee_size().unwrap()); assert_eq!(gateway.account().address(), account.address()); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } - #[proptest] - fn gateway_prod_initialization(#[strategy(any_valid_prod_gateway())] input: GatewayInput) { + #[proptest(async = "tokio")] + async fn gateway_prod_initialization(#[strategy(any_valid_prod_gateway())] input: GatewayInput) { let (storage, _, private_key, dev) = input; let account = Account::try_from(private_key).unwrap(); @@ -1740,6 +1742,9 @@ mod prop_tests { let tcp_config = gateway.tcp().config(); assert_eq!(tcp_config.max_connections, Committee::::max_committee_size().unwrap()); assert_eq!(gateway.account().address(), account.address()); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } #[proptest(async = "tokio")] @@ -1793,6 +1798,9 @@ mod prop_tests { SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + dev.port().unwrap()) ); assert_eq!(gateway.num_workers(), workers.len() as u8); + + // Ensure the gateway shuts down and unbinds the TCP port. + gateway.shut_down().await; } #[proptest] diff --git a/node/bft/src/lib.rs b/node/bft/src/lib.rs index 33dbb08775..19aaab9c6b 100644 --- a/node/bft/src/lib.rs +++ b/node/bft/src/lib.rs @@ -69,14 +69,3 @@ pub const MAX_WORKERS: u8 = 1; // worker(s) pub const PRIMARY_PING_IN_MS: u64 = 2 * MAX_BATCH_DELAY_IN_MS; // ms /// The interval at which each worker broadcasts a ping to every other node. pub const WORKER_PING_IN_MS: u64 = 4 * MAX_BATCH_DELAY_IN_MS; // ms - -/// A helper macro to spawn a blocking task. -#[macro_export] -macro_rules! spawn_blocking { - ($expr:expr) => { - match tokio::task::spawn_blocking(move || $expr).await { - Ok(value) => value, - Err(error) => Err(anyhow::anyhow!("[tokio::spawn_blocking] {error}")), - } - }; -} diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 541b5c8f59..f41c64eb53 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -20,7 +20,7 @@ use crate::{ PRIMARY_PING_IN_MS, WORKER_PING_IN_MS, Worker, - events::{BatchPropose, BatchSignature, Event}, + events::{BatchPropose, BatchSignature, Event, PrimaryPing}, gateway::{Gateway, GatewayPrimaryCallback, Transport}, helpers::{ CallbackHandle, @@ -34,25 +34,27 @@ use crate::{ init_worker_channels, now, }, - spawn_blocking, + ledger_service::LedgerService, sync::{Sync, SyncCallback}, }; + use snarkos_account::Account; -use snarkos_node_bft_events::PrimaryPing; -use snarkos_node_bft_ledger_service::LedgerService; + use snarkos_node_router::PeerPoolHandling; use snarkos_node_sync::{BlockSync, DUMMY_SELF_IP, Ping}; + use snarkvm::{ console::{ + network::ConsensusVersion, prelude::*, types::{Address, Field}, }, ledger::{ block::Transaction, + committee::Committee, narwhal::{BatchCertificate, BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, - prelude::{ConsensusVersion, committee::Committee}, utilities::task::{self, JoinHandle}, }; @@ -128,7 +130,7 @@ impl Primary { /// The maximum number of unconfirmed transmissions to send to the primary. pub const MAX_TRANSMISSIONS_TOLERANCE: usize = BatchHeader::::MAX_TRANSMISSIONS_PER_BATCH * 2; - /// Initializes a new primary instance. + /// Initializes a new primary instance and starts the gateway. #[allow(clippy::too_many_arguments)] pub async fn new( account: Account, @@ -1772,6 +1774,8 @@ impl Primary { info!("Shutting down the primary..."); // Remove the callback. self.primary_callback.clear(); + // Stop syncing. + self.sync.shut_down().await; // Shut down the workers. self.workers.iter().for_each(|worker| worker.shut_down()); // Abort the tasks. @@ -1809,7 +1813,8 @@ impl GatewayPrimaryCallback for Primary { let self_ = self.clone(); tokio::spawn(async move { // Deserialize the primary certificate in the primary ping. - let Ok(primary_certificate) = spawn_blocking!(primary_certificate.deserialize_blocking()) else { + let Ok(primary_certificate) = task::spawn_blocking(|| primary_certificate.deserialize_blocking()).await + else { warn!("Failed to deserialize primary certificate in 'PrimaryPing' from '{peer_ip}'"); return; }; @@ -1869,7 +1874,7 @@ impl GatewayPrimaryCallback for Primary { let self_ = self.clone(); tokio::spawn(async move { // Deserialize the batch certificate. - let Ok(batch_certificate) = spawn_blocking!(batch_certificate.deserialize_blocking()) else { + let Ok(batch_certificate) = task::spawn_blocking(|| batch_certificate.deserialize_blocking()).await else { warn!("Failed to deserialize the batch certificate from '{peer_ip}'"); return; }; @@ -1926,6 +1931,8 @@ impl Primary { #[cfg(test)] mod tests { + use std::net::{Ipv4Addr, SocketAddrV4}; + use super::*; use snarkos_node_bft_ledger_service::MockLedgerService; use snarkos_node_bft_storage_service::BFTMemoryService; @@ -1971,11 +1978,16 @@ mod tests { let ledger = Arc::new(MockLedgerService::new_at_height(committee, height)); let storage = Storage::new(ledger.clone(), Arc::new(BFTMemoryService::new()), 10); + // Pick a random port so we can run tests concurrently. + let any_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0)); + // Initialize the primary. let account = accounts[account_index].1.clone(); let block_sync = Arc::new(BlockSync::new(ledger.clone())); let mut primary = - Primary::new(account, storage, ledger, block_sync, None, &[], StorageMode::Test(None), None).await.unwrap(); + Primary::new(account, storage, ledger, block_sync, Some(any_addr), &[], StorageMode::Test(None), None) + .await + .unwrap(); // Construct a worker instance. primary.workers = Arc::from([Worker::new( @@ -2201,6 +2213,7 @@ mod tests { } } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch() { let mut rng = TestRng::default(); @@ -2222,6 +2235,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_no_transmissions() { let mut rng = TestRng::default(); @@ -2235,6 +2249,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_in_round() { let round = 3; @@ -2260,6 +2275,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_skip_transmissions_from_previous_certificates() { let round = 3; @@ -2332,6 +2348,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_over_spend_limit() { let mut rng = TestRng::default(); @@ -2369,6 +2386,7 @@ mod tests { assert_eq!(primary.workers().iter().map(|worker| worker.transmissions().len()).sum::(), 3); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer() { let mut rng = TestRng::default(); @@ -2408,6 +2426,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_when_not_synced() { let mut rng = TestRng::default(); @@ -2445,6 +2464,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_in_round() { let round = 2; @@ -2485,6 +2505,7 @@ mod tests { primary.process_batch_propose_from_peer(peer_ip, (*proposal.batch_header()).clone().into()).await.unwrap(); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_wrong_round() { let mut rng = TestRng::default(); @@ -2527,6 +2548,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_in_round_wrong_round() { let round = 4; @@ -2573,6 +2595,7 @@ mod tests { } /// Tests that the minimum batch delay is enforced as expected, i.e., that proposals with timestamps that are too close to the previous proposal are rejected. + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_with_past_timestamp() { let round = 2; @@ -2623,6 +2646,7 @@ mod tests { } /// Check that proposals rejected that have timestamps older than the previous proposal. + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_propose_from_peer_over_spend_limit() { let mut rng = TestRng::default(); @@ -2688,6 +2712,7 @@ mod tests { ); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_storage_round_behind_proposal_lock() { let round = 3; @@ -2721,6 +2746,7 @@ mod tests { assert!(primary.proposed_batch.read().is_some()); } + #[tracing_test::traced_test] #[tokio::test] async fn test_propose_batch_with_storage_round_behind_proposal() { let round = 5; @@ -2751,6 +2777,7 @@ mod tests { assert!(primary.proposed_batch.read().as_ref().unwrap().round() > primary.current_round()); } + #[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer() { let mut rng = TestRng::default(); @@ -2787,6 +2814,7 @@ mod tests { assert_eq!(primary.current_round(), round + 1); } + #[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_batch_signature_from_peer_in_round() { let round = 5; @@ -2826,6 +2854,7 @@ mod tests { assert_eq!(primary.current_round(), round + 1); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_signature_from_peer_no_quorum() { let mut rng = TestRng::default(); @@ -2861,6 +2890,7 @@ mod tests { assert_eq!(primary.current_round(), round); } + #[tracing_test::traced_test] #[tokio::test] async fn test_batch_signature_from_peer_in_round_no_quorum() { let round = 7; @@ -2899,6 +2929,7 @@ mod tests { assert_eq!(primary.current_round(), round); } + #[tracing_test::traced_test] #[tokio::test] async fn test_insert_certificate_with_aborted_transmissions() { let round = 3; diff --git a/node/bft/src/worker.rs b/node/bft/src/worker.rs index f3ade138e0..e51a95baaf 100644 --- a/node/bft/src/worker.rs +++ b/node/bft/src/worker.rs @@ -20,20 +20,17 @@ use crate::{ events::{Event, TransmissionRequest, TransmissionResponse}, gateway::Transport, helpers::{Pending, Ready, Storage, WorkerReceiver, fmt_id, max_redundant_requests}, - spawn_blocking, }; use snarkos_node_bft_ledger_service::LedgerService; use snarkvm::{ - console::{network::Network, prelude::Read}, + console::network::Network, ledger::{ block::Transaction, narwhal::{BatchHeader, Data, Transmission, TransmissionID}, puzzle::{Solution, SolutionID}, }, - utilities::{ - FromBytes, - task::{self, JoinHandle}, - }, + prelude::{FromBytes, Read}, + utilities::task::{self, JoinHandle}, }; use anyhow::{Context, Result, bail, ensure}; @@ -395,12 +392,15 @@ impl Worker { bail!("Transaction '{}.{}' already exists.", fmt_id(transaction_id), fmt_id(checksum).dimmed()); } // Deserialize the transaction. If the transaction exceeds the maximum size, then return an error. - let transaction = spawn_blocking!({ - match transaction { - Data::Object(transaction) => Ok(transaction), - Data::Buffer(bytes) => Ok(Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?), - } - })?; + let transaction = task::spawn_blocking(move || { + let transaction = match transaction { + Data::Object(txn) => txn, + Data::Buffer(bytes) => Transaction::::read_le(&mut bytes.take(N::MAX_TRANSACTION_SIZE as u64))?, + }; + + Result::>::Ok(transaction) + }) + .await?; // Check that the transaction is well-formed and unique. self.ledger.check_transaction_basic(transaction_id, transaction).await?; @@ -430,11 +430,11 @@ impl Worker { tokio::time::sleep(Duration::from_millis(MAX_FETCH_TIMEOUT_IN_MS)).await; // Remove the expired pending certificate requests. - let self__ = self_.clone(); - let _ = spawn_blocking!({ - self__.pending.clear_expired_callbacks(); - Ok(()) - }); + let self_ = self_.clone(); + task::spawn_blocking(move || { + self_.pending.clear_expired_callbacks(); + }) + .await; } }); @@ -459,11 +459,11 @@ impl Worker { self.spawn(async move { while let Some((peer_ip, transmission_response)) = rx_transmission_response.recv().await { // Process the transmission response. - let self__ = self_.clone(); - let _ = spawn_blocking!({ - self__.finish_transmission_request(peer_ip, transmission_response); - Ok(()) - }); + let self_ = self_.clone(); + task::spawn_blocking(move || { + self_.finish_transmission_request(peer_ip, transmission_response); + }) + .await; } }); } diff --git a/node/bft/tests/bft_e2e.rs b/node/bft/tests/bft_e2e.rs index ceb515065f..042110c60f 100644 --- a/node/bft/tests/bft_e2e.rs +++ b/node/bft/tests/bft_e2e.rs @@ -95,6 +95,7 @@ async fn test_resync() { deadline!(Duration::from_secs(20), move || { network_clone.is_round_reached(RECOVERY_ROUND) }); } +#[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_quorum_threshold() { // Start N nodes but don't connect them. @@ -149,6 +150,7 @@ async fn test_quorum_threshold() { deadline!(Duration::from_secs(20), move || { network.is_round_reached(TARGET_ROUND) }); } +#[tracing_test::traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_quorum_break() { // Start N nodes, connect them and start the cannons for each. diff --git a/node/bft/tests/common/primary.rs b/node/bft/tests/common/primary.rs index ca26db34ec..5aa803b571 100644 --- a/node/bft/tests/common/primary.rs +++ b/node/bft/tests/common/primary.rs @@ -20,7 +20,7 @@ use crate::common::{ }; use snarkos_account::Account; -use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, MEMORY_POOL_PORT, Primary, helpers::Storage}; +use snarkos_node_bft::{BFT, MAX_BATCH_DELAY_IN_MS, Primary, helpers::Storage}; use snarkos_node_bft_storage_service::BFTMemoryService; use snarkos_node_router::PeerPoolHandling; use snarkos_node_sync::BlockSync; @@ -168,7 +168,7 @@ impl TestNetwork { storage, ledger, block_sync, - Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)), + Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0)), &[], StorageMode::new_test(None), None, @@ -182,7 +182,7 @@ impl TestNetwork { storage, ledger, block_sync, - Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), MEMORY_POOL_PORT + id as u16)), + Some(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0)), &[], StorageMode::new_test(None), None, diff --git a/node/consensus/src/lib.rs b/node/consensus/src/lib.rs index a2a6d40f70..f64f8d3060 100644 --- a/node/consensus/src/lib.rs +++ b/node/consensus/src/lib.rs @@ -32,7 +32,6 @@ use snarkos_node_bft::{ Primary, helpers::{Storage as NarwhalStorage, fmt_id}, ledger_service::LedgerService, - spawn_blocking, storage_service::BFTPersistentStorage, }; use snarkos_node_sync::{BlockSync, Ping}; @@ -44,6 +43,7 @@ use snarkvm::{ puzzle::{Solution, SolutionID}, }, prelude::*, + utilities::task, }; use aleo_std::StorageMode; @@ -477,7 +477,7 @@ impl BftCallback for Consensus { // Try to advance to the next block. let self_ = self.clone(); let transmissions_ = transmissions.clone(); - let result = spawn_blocking! { self_.try_advance_to_next_block(subdag, transmissions_) }; + let result = task::spawn_blocking(move || self_.try_advance_to_next_block(subdag, transmissions_)).await; // If the block failed to advance, reinsert the transmissions into the memory pool. if result.is_err() { diff --git a/node/src/validator/mod.rs b/node/src/validator/mod.rs index 054e774eca..ce653894d5 100644 --- a/node/src/validator/mod.rs +++ b/node/src/validator/mod.rs @@ -18,7 +18,7 @@ mod router; use crate::traits::NodeInterface; use snarkos_account::Account; -use snarkos_node_bft::{ledger_service::CoreLedgerService, spawn_blocking}; +use snarkos_node_bft::ledger_service::CoreLedgerService; use snarkos_node_cdn::CdnBlockSync; use snarkos_node_consensus::Consensus; use snarkos_node_rest::Rest; @@ -408,15 +408,19 @@ impl> Validator { let inputs = [Value::from(Literal::Address(self_.address())), Value::from(Literal::U64(U64::new(1)))]; // Execute the transaction. let self__ = self_.clone(); - let transaction = match spawn_blocking!(self__.ledger.vm().execute( - self__.private_key(), - locator, - inputs.into_iter(), - None, - 10_000, - None, - &mut rand::thread_rng(), - )) { + let transaction = match task::spawn_blocking(move || { + self__.ledger.vm().execute( + self__.private_key(), + locator, + inputs.into_iter(), + None, + 10_000, + None, + &mut rand::thread_rng(), + ) + }) + .await + { Ok(transaction) => transaction, Err(error) => { error!("Transaction pool encountered an execution error - {error}"); diff --git a/node/tests/common/node.rs b/node/tests/common/node.rs index 67f31dc4b6..b83d22ca1c 100644 --- a/node/tests/common/node.rs +++ b/node/tests/common/node.rs @@ -22,12 +22,18 @@ use snarkos_utilities::SignalHandler; use snarkvm::prelude::{MainnetV0 as CurrentNetwork, store::helpers::memory::ConsensusMemory}; use aleo_std::StorageMode; -use std::str::FromStr; +use std::{ + net::{IpAddr, Ipv4Addr, SocketAddr}, + str::FromStr, +}; + +/// Bind to a random port to avoid conflicts during testing. +const ANY_ADDR: SocketAddr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0); pub async fn client() -> Client> { Client::new( - "127.0.0.1:0".parse().unwrap(), - None, + ANY_ADDR, + Some(ANY_ADDR), 10, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], @@ -44,7 +50,7 @@ pub async fn client() -> Client> pub async fn prover() -> Prover> { Prover::new( - "127.0.0.1:0".parse().unwrap(), + ANY_ADDR, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[], sample_genesis_block(), @@ -58,9 +64,9 @@ pub async fn prover() -> Prover> pub async fn validator() -> Validator> { Validator::new( - "127.0.0.1:0".parse().unwrap(), - None, - None, + ANY_ADDR, + Some(ANY_ADDR), + Some(ANY_ADDR), 10, Account::::from_str("APrivateKey1zkp2oVPTci9kKcUprnbzMwq95Di1MQERpYBhEeqvkrDirK1").unwrap(), &[],