From 3d2606c083cd9bf37bfccb78495e6f15cab23570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 15 Oct 2025 13:55:54 +0000 Subject: [PATCH 1/2] Merge pull request #88440 from korowa/fix-gby-overflow-sparse Fix sparse columns aggregation for any overflow mode --- src/AggregateFunctions/IAggregateFunction.h | 5 +++-- .../03657_gby_overflow_any_sparse.reference | 10 ++++++++++ .../0_stateless/03657_gby_overflow_any_sparse.sql | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03657_gby_overflow_any_sparse.reference create mode 100644 tests/queries/0_stateless/03657_gby_overflow_any_sparse.sql diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 524b1a8040b4..a69208e431a4 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -496,8 +496,9 @@ class IAggregateFunctionHelper : public IAggregateFunction auto offset_it = column_sparse.getIterator(row_begin); for (size_t i = row_begin; i < row_end; ++i, ++offset_it) - static_cast(this)->add(places[offset_it.getCurrentRow()] + place_offset, - &values, offset_it.getValueIndex(), arena); + if (places[offset_it.getCurrentRow()]) + static_cast(this)->add(places[offset_it.getCurrentRow()] + place_offset, + &values, offset_it.getValueIndex(), arena); } void mergeBatch( diff --git a/tests/queries/0_stateless/03657_gby_overflow_any_sparse.reference b/tests/queries/0_stateless/03657_gby_overflow_any_sparse.reference new file mode 100644 index 000000000000..7f1bc308d222 --- /dev/null +++ b/tests/queries/0_stateless/03657_gby_overflow_any_sparse.reference @@ -0,0 +1,10 @@ +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 diff --git a/tests/queries/0_stateless/03657_gby_overflow_any_sparse.sql b/tests/queries/0_stateless/03657_gby_overflow_any_sparse.sql new file mode 100644 index 000000000000..9d0891b6b710 --- /dev/null +++ b/tests/queries/0_stateless/03657_gby_overflow_any_sparse.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS 03657_gby_overflow; + +CREATE TABLE 03657_gby_overflow(key UInt64, val UInt16) ENGINE = MergeTree ORDER BY tuple() +AS SELECT number, 0 from numbers(100000); + +SELECT key, any(val) FROM 03657_gby_overflow GROUP BY key ORDER BY key LIMIT 10 +SETTINGS group_by_overflow_mode = 'any', + max_rows_to_group_by = 100, + max_threads = 1, + max_block_size = 100, + group_by_two_level_threshold = 1000000000, + group_by_two_level_threshold_bytes = 1000000000; + +DROP TABLE 03657_gby_overflow; From c5ec5fb2a96ed00be7868e671aaf8d514099e601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 3 Feb 2026 11:25:02 +0000 Subject: [PATCH 2/2] Merge pull request #95301 from mkmkme/aggegate-crash Fix sparse column aggregation with sum() and timeseries --- src/AggregateFunctions/AggregateFunctionSum.h | 3 ++- .../AggregateFunctionLast2Samples.h | 19 ----------------- .../AggregateFunctionTimeSeriesGroupArray.h | 19 ----------------- .../AggregateFunctionTimeseriesBase.h | 19 ----------------- ...arse_column_aggregation_with_sum.reference | 0 ...811_sparse_column_aggregation_with_sum.sql | 21 +++++++++++++++++++ 6 files changed, 23 insertions(+), 58 deletions(-) create mode 100644 tests/queries/0_stateless/03811_sparse_column_aggregation_with_sum.reference create mode 100644 tests/queries/0_stateless/03811_sparse_column_aggregation_with_sum.sql diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 7c7fb6338a28..d67d0a5e0a8c 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -562,7 +562,8 @@ class AggregateFunctionSum final : public IAggregateFunctionDataHelper(*columns[0]); - const auto * values = &column_sparse.getValuesColumn(); - const auto & offsets = column_sparse.getOffsetsData(); - - size_t from = std::lower_bound(offsets.begin(), offsets.end(), row_begin) - offsets.begin(); - size_t to = std::lower_bound(offsets.begin(), offsets.end(), row_end) - offsets.begin(); - - for (size_t i = from; i < to; ++i) - add(places[offsets[i]] + place_offset, &values, i + 1, arena); - } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { data(place).merge(data(rhs)); diff --git a/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeSeriesGroupArray.h b/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeSeriesGroupArray.h index 223d71168cea..cf4ee8689ad1 100644 --- a/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeSeriesGroupArray.h +++ b/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeSeriesGroupArray.h @@ -358,25 +358,6 @@ class AggregateFunctionTimeSeriesGroupArray final : { } - void addBatchSparse( - size_t row_begin, - size_t row_end, - AggregateDataPtr * places, - size_t place_offset, - const IColumn ** columns, - Arena * arena) const override - { - const auto & column_sparse = typeid_cast(*columns[0]); - const auto * values = &column_sparse.getValuesColumn(); - const auto & offsets = column_sparse.getOffsetsData(); - - size_t from = std::lower_bound(offsets.begin(), offsets.end(), row_begin) - offsets.begin(); - size_t to = std::lower_bound(offsets.begin(), offsets.end(), row_end) - offsets.begin(); - - for (size_t i = from; i < to; ++i) - add(places[offsets[i]] + place_offset, &values, i + 1, arena); - } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { data(place).merge(data(rhs), arena); diff --git a/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeseriesBase.h b/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeseriesBase.h index 584de1989148..c3ec47eb375b 100644 --- a/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeseriesBase.h +++ b/src/AggregateFunctions/TimeSeries/AggregateFunctionTimeseriesBase.h @@ -336,25 +336,6 @@ class AggregateFunctionTimeseriesBase : { } - void addBatchSparse( - size_t row_begin, - size_t row_end, - AggregateDataPtr * places, - size_t place_offset, - const IColumn ** columns, - Arena * arena) const override - { - const auto & column_sparse = typeid_cast(*columns[0]); - const auto * values = &column_sparse.getValuesColumn(); - const auto & offsets = column_sparse.getOffsetsData(); - - size_t from = std::lower_bound(offsets.begin(), offsets.end(), row_begin) - offsets.begin(); - size_t to = std::lower_bound(offsets.begin(), offsets.end(), row_end) - offsets.begin(); - - for (size_t i = from; i < to; ++i) - add(places[offsets[i]] + place_offset, &values, i + 1, arena); - } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { auto & buckets = data(place)->buckets; diff --git a/tests/queries/0_stateless/03811_sparse_column_aggregation_with_sum.reference b/tests/queries/0_stateless/03811_sparse_column_aggregation_with_sum.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03811_sparse_column_aggregation_with_sum.sql b/tests/queries/0_stateless/03811_sparse_column_aggregation_with_sum.sql new file mode 100644 index 000000000000..7d6240b01f1d --- /dev/null +++ b/tests/queries/0_stateless/03811_sparse_column_aggregation_with_sum.sql @@ -0,0 +1,21 @@ +CREATE TABLE 03811_sparse_column_aggregation_with_sum(key UInt128, val UInt16) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO 03811_sparse_column_aggregation_with_sum + SELECT number, number % 10000 = 0 FROM numbers(100000) + SETTINGS min_insert_block_size_rows = 1000, + max_block_size =1000, + max_threads = 2; + +SELECT key, sum(val) AS c +FROM 03811_sparse_column_aggregation_with_sum +GROUP BY key +ORDER BY c DESC +LIMIT 100 +FORMAT Null +SETTINGS group_by_overflow_mode = 'any', + max_rows_to_group_by = 100, + group_by_two_level_threshold_bytes = 1, + group_by_two_level_threshold = 1, + max_threads = 2; + +DROP TABLE 03811_sparse_column_aggregation_with_sum;