Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions datafusion/core/tests/sql/explain_analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,21 @@ async fn parquet_explain_analyze() {
&formatted,
"row_groups_pruned_statistics=1 total \u{2192} 1 matched"
);

// The order of metrics is expected to be the same as the actual pruning order
// (file-> row-group -> page)
let i_file = formatted.find("files_ranges_pruned_statistics").unwrap();
let i_rowgroup_stat = formatted.find("row_groups_pruned_statistics").unwrap();
let i_rowgroup_bloomfilter =
formatted.find("row_groups_pruned_bloom_filter").unwrap();
let i_page = formatted.find("page_index_rows_pruned").unwrap();

assert!(
(i_file < i_rowgroup_stat)
&& (i_rowgroup_stat < i_rowgroup_bloomfilter)
&& (i_rowgroup_bloomfilter < i_page),
"The parquet pruning metrics should be displayed in an order of: file range -> row group statistics -> row group bloom filter -> page index."
);
}

// This test reproduces the behavior described in
Expand Down
35 changes: 24 additions & 11 deletions datafusion/physical-plan/src/metrics/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -749,17 +749,30 @@ impl MetricValue {
Self::ElapsedCompute(_) => 1,
Self::OutputBytes(_) => 2,
// Other metrics
Self::PruningMetrics { .. } => 3,
Self::SpillCount(_) => 4,
Self::SpilledBytes(_) => 5,
Self::SpilledRows(_) => 6,
Self::CurrentMemoryUsage(_) => 7,
Self::Count { .. } => 8,
Self::Gauge { .. } => 9,
Self::Time { .. } => 10,
Self::StartTimestamp(_) => 11, // show timestamps last
Self::EndTimestamp(_) => 12,
Self::Custom { .. } => 13,
Self::PruningMetrics { name, .. } => match name.as_ref() {
// The following metrics belong to `DataSourceExec` with a Parquet data source.
// They are displayed in a specific order that reflects the actual pruning process,
// from coarse-grained to fine-grained pruning levels.
//
// You may update these metrics as long as their relative order remains unchanged.
//
// Reference PR: <https://github.com/apache/datafusion/pull/18379>
"files_ranges_pruned_statistics" => 3,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little bit hacky 🤔 but I think this way is simpler and easy to update.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a comment could help / ticket to assist others who find this code and are confused by it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, comments added.

"row_groups_pruned_statistics" => 4,
"row_groups_pruned_bloom_filter" => 5,
"page_index_rows_pruned" => 6,
_ => 7,
},
Self::SpillCount(_) => 8,
Self::SpilledBytes(_) => 9,
Self::SpilledRows(_) => 10,
Self::CurrentMemoryUsage(_) => 11,
Self::Count { .. } => 12,
Self::Gauge { .. } => 13,
Self::Time { .. } => 14,
Self::StartTimestamp(_) => 15, // show timestamps last
Self::EndTimestamp(_) => 16,
Self::Custom { .. } => 17,
}
}

Expand Down