diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index a25b65fca274f5..eeafb0ea735fc0 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2570,6 +2570,17 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true, description = {"Maximum number of buckets for auto bucketing."}) public static int autobucket_max_buckets = 128; + @ConfField(mutable = true, masterOnly = true, description = { + "Maximum number of buckets allowed when creating a table or adding a partition. " + + "This config shares the same default value with autobucket_max_buckets for consistency. " + + "Behavior: " + + "1. For user-specified buckets (CREATE TABLE / ALTER TABLE ADD PARTITION): " + + "if bucket number exceeds this limit, the operation will be rejected with an error message. " + + "2. For auto-bucket feature (Dynamic Partition): " + + "bucket number will be capped at autobucket_max_buckets automatically. " + + "Set to 0 or negative value to disable this limit for user-specified buckets."}) + public static int max_bucket_num_per_partition = autobucket_max_buckets; + @ConfField(description = {"Maximum number of connections for the Arrow Flight Server per FE."}) public static int arrow_flight_max_connections = 4096; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index cbbc3dd1797242..060cf59a1701fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -1606,6 +1606,16 @@ public void addPartition(Database db, String tableName, AddPartitionOp addPartit if (hashDistributionInfo.getBucketNum() <= 0) { throw new DdlException("Cannot assign hash distribution buckets less than 1"); } + if (Config.max_bucket_num_per_partition > 0 + && hashDistributionInfo.getBucketNum() > Config.max_bucket_num_per_partition) { + throw new DdlException(String.format( + "Number of buckets (%d) exceeds the maximum allowed value (%d). " + + "Generally, a large number of buckets is not needed. " + + "If you have a specific use case requiring more buckets, " + + "please review your schema design or modify the FE config " + + "'max_bucket_num_per_partition' to adjust this limit.", + hashDistributionInfo.getBucketNum(), Config.max_bucket_num_per_partition)); + } if (!hashDistributionInfo.sameDistributionColumns((HashDistributionInfo) defaultDistributionInfo)) { throw new DdlException("Cannot assign hash distribution with different distribution cols. " + "new is: " + hashDistributionInfo.getDistributionColumns() + " default is: " @@ -1616,6 +1626,16 @@ public void addPartition(Database db, String tableName, AddPartitionOp addPartit if (randomDistributionInfo.getBucketNum() <= 0) { throw new DdlException("Cannot assign random distribution buckets less than 1"); } + if (Config.max_bucket_num_per_partition > 0 + && randomDistributionInfo.getBucketNum() > Config.max_bucket_num_per_partition) { + throw new DdlException(String.format( + "Number of buckets (%d) exceeds the maximum allowed value (%d). " + + "Generally, a large number of buckets is not needed. " + + "If you have a specific use case requiring more buckets, " + + "please review your schema design or modify the FE config " + + "'max_bucket_num_per_partition' to adjust this limit.", + randomDistributionInfo.getBucketNum(), Config.max_bucket_num_per_partition)); + } } } else { // make sure partition-dristribution-info is deep copied from default-distribution-info diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java index 108e6c8fc83ed3..35dbf8a8b34c5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java @@ -22,6 +22,7 @@ import org.apache.doris.analysis.RandomDistributionDesc; import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.KeysType; +import org.apache.doris.common.Config; import org.apache.doris.nereids.exceptions.AnalysisException; import com.google.common.collect.Lists; @@ -76,6 +77,20 @@ public void validate(Map columnMap, KeysType keysType) throw new AnalysisException(isHash ? "Number of hash distribution should be greater than zero." : "Number of random distribution should be greater than zero."); } + + // Check bucket max limit for non-auto-bucket cases + // auto bucket is limited by autobucket_max_buckets during calculation + if (!isAutoBucket && Config.max_bucket_num_per_partition > 0 + && bucketNum > Config.max_bucket_num_per_partition) { + throw new AnalysisException(String.format( + "Number of buckets (%d) exceeds the maximum allowed value (%d). " + + "Generally, a large number of buckets is not needed. " + + "If you have a specific use case requiring more buckets, " + + "please review your schema design or modify the FE config " + + "'max_bucket_num_per_partition' to adjust this limit.", + bucketNum, Config.max_bucket_num_per_partition)); + } + if (isHash) { Set colSet = Sets.newHashSet(cols); if (colSet.size() != cols.size()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptorTest.java new file mode 100644 index 00000000000000..22c5c5af116c41 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptorTest.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands.info; + +import org.apache.doris.catalog.KeysType; +import org.apache.doris.common.Config; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.types.IntegerType; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Map; + +public class DistributionDescriptorTest { + + private Map createColumnMap() { + Map columnMap = Maps.newHashMap(); + ColumnDefinition col1 = new ColumnDefinition("col1", IntegerType.INSTANCE, false); + columnMap.put("col1", col1); + ColumnDefinition col2 = new ColumnDefinition("col2", IntegerType.INSTANCE, false); + columnMap.put("col2", col2); + return columnMap; + } + + @Test + public void testBucketNumMaxLimit() { + Map columnMap = createColumnMap(); + int originalValue = Config.max_bucket_num_per_partition; + + try { + // Test 1: normal bucket number within limit + Config.max_bucket_num_per_partition = 100; + DistributionDescriptor desc1 = new DistributionDescriptor( + true, false, 50, Lists.newArrayList("col1")); + desc1.validate(columnMap, KeysType.DUP_KEYS); // should not throw + + // Test 2: bucket number exceeds limit + DistributionDescriptor desc2 = new DistributionDescriptor( + true, false, 150, Lists.newArrayList("col1")); + AnalysisException ex = Assertions.assertThrows(AnalysisException.class, + () -> desc2.validate(columnMap, KeysType.DUP_KEYS)); + Assertions.assertTrue(ex.getMessage().contains("exceeds the maximum allowed value (100)")); + Assertions.assertTrue(ex.getMessage().contains("max_bucket_num_per_partition")); + + // Test 3: disable limit by setting to 0 + Config.max_bucket_num_per_partition = 0; + DistributionDescriptor desc3 = new DistributionDescriptor( + true, false, 10000, Lists.newArrayList("col1")); + desc3.validate(columnMap, KeysType.DUP_KEYS); // should not throw + + // Test 4: auto bucket is not limited by this config + Config.max_bucket_num_per_partition = 10; + DistributionDescriptor desc4 = new DistributionDescriptor( + true, true, 1000, Lists.newArrayList("col1")); + desc4.validate(columnMap, KeysType.DUP_KEYS); // auto bucket should not throw + + // Test 5: random distribution also respects limit + Config.max_bucket_num_per_partition = 100; + DistributionDescriptor desc5 = new DistributionDescriptor( + false, false, 50, Lists.newArrayList()); + desc5.validate(columnMap, KeysType.DUP_KEYS); // should not throw + + DistributionDescriptor desc6 = new DistributionDescriptor( + false, false, 150, Lists.newArrayList()); + AnalysisException ex2 = Assertions.assertThrows(AnalysisException.class, + () -> desc6.validate(columnMap, KeysType.DUP_KEYS)); + Assertions.assertTrue(ex2.getMessage().contains("exceeds the maximum allowed value (100)")); + + } finally { + Config.max_bucket_num_per_partition = originalValue; + } + } + + @Test + public void testBucketNumZeroOrNegative() { + Map columnMap = createColumnMap(); + + // hash distribution with bucket <= 0 + DistributionDescriptor desc1 = new DistributionDescriptor( + true, false, 0, Lists.newArrayList("col1")); + AnalysisException ex1 = Assertions.assertThrows(AnalysisException.class, + () -> desc1.validate(columnMap, KeysType.DUP_KEYS)); + Assertions.assertTrue(ex1.getMessage().contains("greater than zero")); + + DistributionDescriptor desc2 = new DistributionDescriptor( + true, false, -1, Lists.newArrayList("col1")); + AnalysisException ex2 = Assertions.assertThrows(AnalysisException.class, + () -> desc2.validate(columnMap, KeysType.DUP_KEYS)); + Assertions.assertTrue(ex2.getMessage().contains("greater than zero")); + + // random distribution with bucket <= 0 + DistributionDescriptor desc3 = new DistributionDescriptor( + false, false, 0, Lists.newArrayList()); + AnalysisException ex3 = Assertions.assertThrows(AnalysisException.class, + () -> desc3.validate(columnMap, KeysType.DUP_KEYS)); + Assertions.assertTrue(ex3.getMessage().contains("greater than zero")); + } +} \ No newline at end of file