From 6b7adb0d92f5363334e21980edcb770e19de1413 Mon Sep 17 00:00:00 2001 From: Theo Butler Date: Mon, 30 Mar 2026 08:17:53 -0600 Subject: [PATCH] fix(common): remove incorrect _block_num statistics injection Reverts eb6e264b, since `synced_range` bounds do not always replace the row-level `_block_num` MIN/MAX values in a given table. --- .../common/src/catalog/physical/snapshot.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/crates/core/common/src/catalog/physical/snapshot.rs b/crates/core/common/src/catalog/physical/snapshot.rs index c40e68edc..e9d0eb83a 100644 --- a/crates/core/common/src/catalog/physical/snapshot.rs +++ b/crates/core/common/src/catalog/physical/snapshot.rs @@ -5,7 +5,7 @@ use amp_parquet::reader; use datafusion::{ arrow::datatypes::SchemaRef, catalog::{Session, memory::DataSourceExec}, - common::{DFSchema, ScalarValue, project_schema, stats::Precision}, + common::{DFSchema, project_schema, stats::Precision}, datasource::{ TableProvider, TableType, create_ordering, listing::{ListingTableUrl, PartitionedFile}, @@ -343,24 +343,10 @@ impl TableProvider for QueryableSnapshot { let target_partitions = state.config_options().execution.target_partitions; let table_schema = self.physical_table.schema(); - let (file_groups, mut statistics) = self + let (file_groups, statistics) = self .resolve_file_groups(&segments, target_partitions, table_schema.clone()) .await?; - // Override _block_num column statistics with exact min/max from synced_range. - // This enables the AggregateStatistics optimizer to resolve MIN/MAX(_block_num) - // as constants without scanning parquet files. - if let Some(range) = &self.synced_range - && let Ok(idx) = - table_schema.index_of(datasets_common::block_num::RESERVED_BLOCK_NUM_COLUMN_NAME) - { - statistics.column_statistics[idx].null_count = Precision::Exact(0); - statistics.column_statistics[idx].min_value = - Precision::Exact(ScalarValue::UInt64(Some(range.start()))); - statistics.column_statistics[idx].max_value = - Precision::Exact(ScalarValue::UInt64(Some(range.end()))); - } - if statistics.num_rows == Precision::Absent { tracing::warn!("Table has no row count statistics. Queries may be inefficient."); }