Skip to content

Commit cf8d69f

Browse files
ametel01claude
andauthored
Add comprehensive crash logging and error handling (#51)
- Adds panic hook to log crashes with location, message, and backtrace instructions - Enhances service error logging with detailed failure context - Improves thread completion monitoring to identify which service failed - Adds error chain formatting to show root causes - Provides clear "INDEXER SHUTDOWN" messages explaining crash reasons Fixes silent ECS indexer crashes by ensuring all failures are logged with actionable details. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <[email protected]>
1 parent 65d6f8b commit cf8d69f

File tree

2 files changed

+173
-21
lines changed

2 files changed

+173
-21
lines changed

src/indexer/lib.rs

Lines changed: 101 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,23 @@ async fn setup_database_and_rpc(
267267

268268
fn spawn_router_service(should_terminate: Arc<AtomicBool>) -> tokio::task::JoinHandle<Result<()>> {
269269
tokio::spawn(async move {
270-
if let Err(e) = router::initialize_router(should_terminate.clone()).await {
271-
error!("[router] unexpected error {}", e);
270+
info!("[router] Starting router service");
271+
match router::initialize_router(should_terminate.clone()).await {
272+
Ok(()) => {
273+
info!("[router] Router service completed normally");
274+
Ok(())
275+
}
276+
Err(e) => {
277+
error!(
278+
"[router] CRITICAL: Router service failed with error: {:?}",
279+
e
280+
);
281+
error!("[router] This router failure may have caused the indexer to become unreachable");
282+
Err(BlockchainError::internal(format!(
283+
"Router service failed: {e}"
284+
)))
285+
}
272286
}
273-
info!("[router] shutting down");
274-
Ok(())
275287
})
276288
}
277289

@@ -292,11 +304,23 @@ fn spawn_quick_indexer_service(
292304
let quick_indexer = QuickIndexer::new(quick_config, db, rpc_client, should_terminate);
293305

294306
Ok(tokio::spawn(async move {
295-
info!("Starting quick indexer");
296-
if let Err(e) = quick_indexer.index().await {
297-
error!("[quick_index] unexpected error {}", e);
307+
info!("[quick_index] Starting quick indexer service");
308+
match quick_indexer.index().await {
309+
Ok(()) => {
310+
info!("[quick_index] Quick indexer completed normally");
311+
Ok(())
312+
}
313+
Err(e) => {
314+
error!(
315+
"[quick_index] CRITICAL: Quick indexer failed with error: {:?}",
316+
e
317+
);
318+
error!("[quick_index] Quick indexer handles real-time block indexing - this failure stops new block processing");
319+
Err(BlockchainError::internal(format!(
320+
"Quick indexer failed: {e}"
321+
)))
322+
}
298323
}
299-
Ok(())
300324
}))
301325
}
302326

@@ -319,11 +343,23 @@ fn spawn_batch_indexer_service(
319343
let batch_indexer = BatchIndexer::new(batch_config, db, rpc_client, should_terminate);
320344

321345
Ok(tokio::spawn(async move {
322-
info!("Starting batch indexer");
323-
if let Err(e) = batch_indexer.index().await {
324-
error!("[batch_index] unexpected error {}", e);
346+
info!("[batch_index] Starting batch indexer service");
347+
match batch_indexer.index().await {
348+
Ok(()) => {
349+
info!("[batch_index] Batch indexer completed normally");
350+
Ok(())
351+
}
352+
Err(e) => {
353+
error!(
354+
"[batch_index] CRITICAL: Batch indexer failed with error: {:?}",
355+
e
356+
);
357+
error!("[batch_index] Batch indexer handles historical block indexing - this failure stops backfilling");
358+
Err(BlockchainError::internal(format!(
359+
"Batch indexer failed: {e}"
360+
)))
361+
}
325362
}
326-
Ok(())
327363
}))
328364
}
329365

@@ -389,20 +425,69 @@ async fn initialize_index_metadata(
389425
Err(BlockchainError::internal("Failed to get indexer metadata"))
390426
}
391427

428+
#[allow(clippy::cognitive_complexity)]
392429
async fn wait_for_thread_completion(handles: Vec<JoinHandle<Result<()>>>) -> Result<()> {
393-
for handle in handles {
430+
let mut has_errors = false;
431+
432+
for (index, handle) in handles.into_iter().enumerate() {
433+
let service_name = match index {
434+
0 => "router",
435+
1 => "quick_indexer",
436+
2 => "batch_indexer",
437+
_ => "unknown_service",
438+
};
439+
394440
match handle.await {
395441
Ok(Ok(())) => {
396-
info!("Thread completed successfully");
442+
info!("[{}] Thread completed successfully", service_name);
397443
}
398444
Ok(Err(e)) => {
399-
error!("Thread completed with an error: {:?}", e);
445+
error!(
446+
"[{}] CRITICAL: Thread completed with an error: {:?}",
447+
service_name, e
448+
);
449+
error!(
450+
"[{}] Error details: {}",
451+
service_name,
452+
format_error_details(&e)
453+
);
454+
has_errors = true;
400455
}
401456
Err(e) => {
402-
error!("Thread panicked: {:?}", e);
457+
error!("[{}] CRITICAL: Thread panicked: {:?}", service_name, e);
458+
if e.is_panic() {
459+
error!("[{}] This was a panic - check for unwrap(), expect(), or other panic sources", service_name);
460+
}
461+
if e.is_cancelled() {
462+
error!("[{}] Task was cancelled", service_name);
463+
}
464+
has_errors = true;
403465
}
404466
}
405467
}
406468

469+
if has_errors {
470+
error!(
471+
"INDEXER SHUTDOWN: One or more services failed - this explains why the indexer stopped"
472+
);
473+
return Err(BlockchainError::internal(
474+
"One or more indexing services failed",
475+
));
476+
}
477+
478+
info!("All indexing services completed successfully");
407479
Ok(())
408480
}
481+
482+
fn format_error_details(error: &BlockchainError) -> String {
483+
let mut details = Vec::new();
484+
details.push(format!("Error: {error}"));
485+
486+
let mut current_error: &dyn std::error::Error = error;
487+
while let Some(source) = current_error.source() {
488+
details.push(format!("Caused by: {source}"));
489+
current_error = source;
490+
}
491+
492+
details.join("\n ")
493+
}

src/indexer/main.rs

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use fossil_headers_db::errors::{BlockchainError, Result};
22
use fossil_headers_db::indexer::lib::{start_indexing_services, IndexingConfig};
33
use std::{
4-
env,
4+
env, panic,
55
sync::{
66
atomic::{AtomicBool, Ordering},
77
Arc,
88
},
99
};
10-
use tracing::info;
10+
use tracing::{error, info};
1111
use tracing_subscriber::fmt;
1212

1313
#[tokio::main]
@@ -48,6 +48,9 @@ pub async fn main() -> Result<()> {
4848
.compact()
4949
.init();
5050

51+
// Setup panic hook to log panics before the application crashes
52+
setup_panic_hook();
53+
5154
let should_terminate = Arc::new(AtomicBool::new(false));
5255

5356
setup_ctrlc_handler(Arc::clone(&should_terminate))?;
@@ -63,9 +66,19 @@ pub async fn main() -> Result<()> {
6366

6467
let indexing_config = indexing_config_builder.build()?;
6568

66-
start_indexing_services(indexing_config, should_terminate).await?;
67-
68-
Ok(())
69+
// Main indexing operation with comprehensive error logging
70+
match start_indexing_services(indexing_config, should_terminate).await {
71+
Ok(()) => {
72+
info!("Indexing services completed successfully");
73+
Ok(())
74+
}
75+
Err(e) => {
76+
error!("CRITICAL: Indexing services failed with error: {:?}", e);
77+
error!("Error chain: {}", format_error_chain(&e));
78+
error!("This is a fatal error that caused the indexer to stop");
79+
Err(e)
80+
}
81+
}
6982
}
7083

7184
fn setup_ctrlc_handler(should_terminate: Arc<AtomicBool>) -> Result<()> {
@@ -76,3 +89,57 @@ fn setup_ctrlc_handler(should_terminate: Arc<AtomicBool>) -> Result<()> {
7689
})
7790
.map_err(|e| BlockchainError::internal(format!("Failed to set Ctrl+C handler: {e}")))
7891
}
92+
93+
#[allow(clippy::cognitive_complexity)]
94+
fn setup_panic_hook() {
95+
panic::set_hook(Box::new(|panic_info| {
96+
let location = panic_info.location().map_or_else(
97+
|| "unknown location".to_string(),
98+
|loc| format!("{}:{}:{}", loc.file(), loc.line(), loc.column()),
99+
);
100+
101+
let message = panic_info.payload().downcast_ref::<&str>().map_or_else(
102+
|| {
103+
panic_info.payload().downcast_ref::<String>().map_or_else(
104+
|| "unknown panic message".to_string(),
105+
std::clone::Clone::clone,
106+
)
107+
},
108+
|s| (*s).to_string(),
109+
);
110+
111+
error!("PANIC OCCURRED - INDEXER CRASHING!");
112+
error!("Panic location: {}", location);
113+
error!("Panic message: {}", message);
114+
error!("This indicates a critical bug in the indexer");
115+
116+
// Try to log the backtrace if available
117+
if let Ok(backtrace) = std::env::var("RUST_BACKTRACE") {
118+
if !backtrace.is_empty() && backtrace != "0" {
119+
error!(
120+
"Backtrace logging is enabled (RUST_BACKTRACE={})",
121+
backtrace
122+
);
123+
}
124+
} else {
125+
error!("Enable RUST_BACKTRACE=1 for stack traces");
126+
}
127+
128+
// Flush logs before panic continues
129+
std::io::Write::flush(&mut std::io::stderr()).ok();
130+
}));
131+
}
132+
133+
fn format_error_chain(error: &BlockchainError) -> String {
134+
let mut chain = Vec::new();
135+
let mut current_error: &dyn std::error::Error = error;
136+
137+
chain.push(current_error.to_string());
138+
139+
while let Some(source) = current_error.source() {
140+
chain.push(source.to_string());
141+
current_error = source;
142+
}
143+
144+
chain.join(" -> ")
145+
}

0 commit comments

Comments
 (0)