diff --git a/crates/core/common/src/catalog/physical/snapshot.rs b/crates/core/common/src/catalog/physical/snapshot.rs index bc36cf9f1..dfb12f860 100644 --- a/crates/core/common/src/catalog/physical/snapshot.rs +++ b/crates/core/common/src/catalog/physical/snapshot.rs @@ -5,7 +5,7 @@ use amp_parquet::reader; use datafusion::{ arrow::datatypes::SchemaRef, catalog::{Session, memory::DataSourceExec}, - common::{DFSchema, project_schema, stats::Precision}, + common::{DFSchema, ScalarValue, project_schema, stats::Precision}, datasource::{ TableProvider, TableType, create_ordering, listing::{ListingTableUrl, PartitionedFile}, @@ -342,9 +342,24 @@ impl TableProvider for QueryableSnapshot { let target_partitions = state.config_options().execution.target_partitions; let table_schema = self.physical_table.schema(); - let (file_groups, statistics) = self + let (file_groups, mut statistics) = self .resolve_file_groups(&segments, target_partitions, table_schema.clone()) .await?; + + // Override _block_num column statistics with exact min/max from synced_range. + // This enables the AggregateStatistics optimizer to resolve MIN/MAX(_block_num) + // as constants without scanning parquet files. + if let Some(range) = &self.synced_range + && let Ok(idx) = + table_schema.index_of(datasets_common::block_num::RESERVED_BLOCK_NUM_COLUMN_NAME) + { + statistics.column_statistics[idx].null_count = Precision::Exact(0); + statistics.column_statistics[idx].min_value = + Precision::Exact(ScalarValue::UInt64(Some(range.start()))); + statistics.column_statistics[idx].max_value = + Precision::Exact(ScalarValue::UInt64(Some(range.end()))); + } + if statistics.num_rows == Precision::Absent { tracing::warn!("Table has no row count statistics. Queries may be inefficient."); }