From 0fc75fc9d80c6b1d08ef7bbf5e22a35fddabd7b0 Mon Sep 17 00:00:00 2001 From: happenlee Date: Wed, 18 Mar 2026 22:02:51 +0800 Subject: [PATCH 1/4] update apache doris result based on 4.1 RC01 and c7a.48xl, modify benchmark script change the load way to parquet file --- doris/benchmark.sh | 168 +++++++++++++++++++++++++----- doris/results/c7a.metal-48xl.json | 94 ++++++++--------- doris/run.sh | 13 --- 3 files changed, 191 insertions(+), 84 deletions(-) mode change 100755 => 100644 doris/benchmark.sh delete mode 100755 doris/run.sh diff --git a/doris/benchmark.sh b/doris/benchmark.sh old mode 100755 new mode 100644 index 2f754c569..9375d8b76 --- a/doris/benchmark.sh +++ b/doris/benchmark.sh @@ -9,7 +9,7 @@ ROOT=$(pwd) if [[ -n "$1" ]]; then url="$1" else - url='https://apache-doris-releases.oss-accelerate.aliyuncs.com/apache-doris-2.1.7-rc01-bin-x64.tar.gz' + url='https://qa-build.oss-cn-beijing.aliyuncs.com/doris_release/apache-doris-4.1.0-rc01-bin-x64.tar.gz' fi # Download file_name="$(basename ${url})" @@ -24,21 +24,20 @@ dir_name="${file_name/.tar.gz/}" # Try to stop Doris and remove it first if execute this script multiple times set +e -"$dir_name"/apache-doris-2.1.7-rc01-bin-x64/fe/bin/stop_fe.sh -"$dir_name"/apache-doris-2.1.7-rc01-bin-x64/be/bin/stop_be.sh +"$dir_name"/"$dir_name"/fe/bin/stop_fe.sh +"$dir_name"/"$dir_name"/be/bin/stop_be.sh rm -rf "$dir_name" set -e # Uncompress mkdir "$dir_name" tar zxf "$file_name" -C "$dir_name" -DORIS_HOME="$ROOT/$dir_name/apache-doris-2.1.7-rc01-bin-x64" +DORIS_HOME="$ROOT/$dir_name/$dir_name" export DORIS_HOME # Install dependencies sudo apt-get update -y -sudo apt-get install -y openjdk-17-jdk -sudo apt-get install -y mysql-client +sudo apt-get install -y openjdk-17-jdk mysql-client export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-$(dpkg --print-architecture)/" export PATH=$JAVA_HOME/bin:$PATH @@ -89,34 +88,155 @@ sleep 5 mysql -h 127.0.0.1 -P9030 -uroot hits <"$ROOT"/create.sql # Download data -if [[ ! -f hits.tsv.gz ]] && [[ ! -f hits.tsv ]]; then - sudo apt-get install -y pigz - wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' - pigz -d -f hits.tsv.gz -fi +BE_DATA_DIR="$DORIS_HOME/be/" +mkdir -p "$BE_DATA_DIR/user_files_secure" + +seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue --progress=dot:giga https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet' +mv *.parquet "$BE_DATA_DIR/user_files_secure" + +BE_ID=$(mysql -h127.0.0.1 -P9030 -uroot -N -e 'show backends' | awk '{print $1}' | head -1) -# Load data -echo "start loading hits.tsv, estimated to take about 9 minutes ..." -date +PARALLEL_NUM=$(($(nproc) / 4)) +echo "Setting parallel_pipeline_task_num to $PARALLEL_NUM (cpu cores: $(nproc) / 4)" + +echo "start loading hits.parquet using TVF, estimated to take about 3 minutes ..." START=$(date +%s) -curl --location-trusted \ - -u root: \ - -T "hits.tsv" \ - -H "label:hits" \ - -H "columns: WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \ - http://localhost:8030/api/hits/hits/_stream_load +mysql -h 127.0.0.1 -P9030 -uroot hits -e "SET parallel_pipeline_task_num = $PARALLEL_NUM;\ +INSERT INTO hits SELECT + CounterID, + DATE_ADD('1970-01-01', INTERVAL EventDate DAY) AS EventDate, + UserID, + FROM_UNIXTIME(EventTime) AS EventTime, + WatchID, + JavaEnable, + Title, + GoodEvent, + ClientIP, + RegionID, + CounterClass, + OS, + UserAgent, + URL, + Referer, + IsRefresh, + RefererCategoryID, + RefererRegionID, + URLCategoryID, + URLRegionID, + ResolutionWidth, + ResolutionHeight, + ResolutionDepth, + FlashMajor, + FlashMinor, + FlashMinor2, + NetMajor, + NetMinor, + UserAgentMajor, + UserAgentMinor, + CookieEnable, + JavascriptEnable, + IsMobile, + MobilePhone, + MobilePhoneModel, + Params, + IPNetworkID, + TraficSourceID, + SearchEngineID, + SearchPhrase, + AdvEngineID, + IsArtifical, + WindowClientWidth, + WindowClientHeight, + ClientTimeZone, + FROM_UNIXTIME(ClientEventTime) AS ClientEventTime, + SilverlightVersion1, + SilverlightVersion2, + SilverlightVersion3, + SilverlightVersion4, + PageCharset, + CodeVersion, + IsLink, + IsDownload, + IsNotBounce, + FUniqID, + OriginalURL, + HID, + IsOldCounter, + IsEvent, + IsParameter, + DontCountHits, + WithHash, + HitColor, + FROM_UNIXTIME(LocalEventTime) AS LocalEventTime, + Age, + Sex, + Income, + Interests, + Robotness, + RemoteIP, + WindowName, + OpenerName, + HistoryLength, + BrowserLanguage, + BrowserCountry, + SocialNetwork, + SocialAction, + HTTPError, + SendTiming, + DNSTiming, + ConnectTiming, + ResponseStartTiming, + ResponseEndTiming, + FetchTiming, + SocialSourceNetworkID, + SocialSourcePage, + ParamPrice, + ParamOrderID, + ParamCurrency, + ParamCurrencyID, + OpenstatServiceName, + OpenstatCampaignID, + OpenstatAdID, + OpenstatSourceID, + UTMSource, + UTMMedium, + UTMCampaign, + UTMContent, + UTMTerm, + FromTag, + HasGCLID, + RefererHash, + URLHash, + CLID +FROM local( + \"file_path\" = \"user_files_secure/hits_*.parquet\", + \"backend_id\" = \"$BE_ID\", + \"format\" = \"parquet\" +) +" END=$(date +%s) LOADTIME=$(echo "$END - $START" | bc) echo "Load time: $LOADTIME" echo "$LOADTIME" > loadtime -# Dataset contains 99997497 rows, storage size is about 17319588503 bytes -mysql -h 127.0.0.1 -P9030 -uroot hits -e "SELECT count(*) FROM hits" -du -bs "$DORIS_HOME"/be/storage/ | cut -f1 | tee storage_size +du -bs "$DORIS_HOME"/be/storage/ | cut -f1 | tee storage_size echo "Data size: $(cat storage_size)" -./run.sh 2>&1 | tee -a log.txt +mysql -h 127.0.0.1 -P9030 -uroot hits -e "set global enable_sql_cache = false" +# Dataset contains 99997497 rows, storage size is about 13319588503 bytes +mysql -h 127.0.0.1 -P9030 -uroot hits -e "SELECT count(*) FROM hits" + +# Run queries +TRIES=3 +while read -r query; do + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + + for i in $(seq 1 $TRIES); do + mysql -vvv -h127.1 -P9030 -uroot hits -e "${query}" 2>&1 | tee -a log.txt + done +done /dev/null - - for i in $(seq 1 $TRIES); do - mysql -vvv -h127.1 -P9030 -uroot hits -e "${query}" - done -done Date: Thu, 19 Mar 2026 11:56:57 +0800 Subject: [PATCH 2/4] update apache doris download url --- doris/benchmark.sh | 2 +- doris/results/c6a.4xlarge.json | 88 +++++++++++++++++----------------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/doris/benchmark.sh b/doris/benchmark.sh index 9375d8b76..f45c69974 100644 --- a/doris/benchmark.sh +++ b/doris/benchmark.sh @@ -9,7 +9,7 @@ ROOT=$(pwd) if [[ -n "$1" ]]; then url="$1" else - url='https://qa-build.oss-cn-beijing.aliyuncs.com/doris_release/apache-doris-4.1.0-rc01-bin-x64.tar.gz' + url='https://apache-doris-releases.oss-accelerate.aliyuncs.com/apache-doris-4.1.0-rc01-bin-x64.tar.gz' fi # Download file_name="$(basename ${url})" diff --git a/doris/results/c6a.4xlarge.json b/doris/results/c6a.4xlarge.json index 55fefe82e..c69c40a6d 100644 --- a/doris/results/c6a.4xlarge.json +++ b/doris/results/c6a.4xlarge.json @@ -10,49 +10,49 @@ "load_time": 475, "data_size": 17105042742, "result": [ - [0.1, 0.04, 0.04], - [1.28, 0.03, 0.04], - [2.06, 0.06, 0.06], - [2.32, 0.09, 0.09], - [2.3, 0.72, 0.69], - [2.17, 0.81, 0.8], - [0.07, 0.01, 0.02], - [1.45, 0.04, 0.04], - [4.05, 0.61, 0.62], - [5.73, 0.69, 0.69], - [3.69, 0.11, 0.11], - [4.29, 0.12, 0.12], - [2.96, 0.61, 0.6], - [5.22, 1.07, 0.97], - [4.08, 0.96, 0.99], - [2.24, 0.47, 0.52], - [4.66, 1.44, 1.36], - [4.42, 0.38, 0.42], - [6.29, 2.47, 2.53], - [0.05, 0.01, 0.01], - [11.91, 0.92, 0.89], - [14.52, 0.76, 0.76], - [26.85, 1.51, 1.51], - [5.88, 0.17, 0.17], - [1.83, 0.19, 0.09], - [3.29, 0.21, 0.22], - [1.81, 0.16, 0.14], - [12.21, 1.49, 1.49], - [11.14, 8.73, 8.63], - [1.33, 0.07, 0.07], - [7.04, 0.39, 0.39], - [8.97, 0.52, 0.52], - [6.56, 3.33, 3.35], - [14.23, 14.02, 13.28], - [14.26, 13.82, 12.72], - [2.04, 0.6, 0.59], - [2.42, 0.09, 0.08], - [2.45, 0.05, 0.05], - [1.9, 0.04, 0.04], - [2.53, 0.24, 0.23], - [1.4, 0.03, 0.03], - [1.85, 0.03, 0.03], - [1.51, 0.03, 0.03] -] + [0.03, 0.01, 0.02], + [0.09, 0.03, 0.03], + [0.68, 0.05, 0.05], + [4.03, 0.09, 0.08], + [0.31, 0.31, 0.34], + [1.75, 0.86, 0.86], + [0.01, 0.01, 0.01], + [0.04, 0.03, 0.04], + [0.80, 0.61, 0.62], + [0.69, 0.66, 0.70], + [0.28, 0.11, 0.11], + [0.54, 0.14, 0.13], + [0.68, 0.64, 0.62], + [1.14, 1.14, 1.14], + [0.98, 0.98, 0.98], + [0.53, 0.55, 0.53], + [1.57, 1.64, 1.60], + [0.19, 0.18, 0.20], + [2.91, 2.82, 2.55], + [0.01, 0.00, 0.01], + [17.11, 0.96, 1.00], + [14.96, 0.87, 0.87], + [31.07, 1.75, 1.79], + [1.85, 0.21, 0.11], + [0.07, 0.06, 0.06], + [0.15, 0.15, 0.14], + [0.05, 0.05, 0.05], + [15.12, 1.61, 1.55], + [14.69, 9.58, 9.56], + [0.67, 0.10, 0.07], + [3.71, 0.51, 0.40], + [6.27, 0.63, 0.52], + [3.60, 3.73, 3.84], + [19.76, 5.61, 5.39], + [19.58, 5.75, 6.10], + [2.17, 0.59, 0.63], + [0.11, 0.06, 0.07], + [0.06, 0.03, 0.03], + [0.04, 0.02, 0.03], + [0.18, 0.17, 0.16], + [0.05, 0.03, 0.03], + [0.05, 0.03, 0.03], + [0.04, 0.03, 0.03] + ] } From 18f3aee133832488816d8f2f0e757ba3f5063a18 Mon Sep 17 00:00:00 2001 From: happenlee Date: Thu, 19 Mar 2026 12:05:58 +0800 Subject: [PATCH 3/4] add parallel num check with minimum value --- doris/benchmark.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doris/benchmark.sh b/doris/benchmark.sh index f45c69974..b57738670 100644 --- a/doris/benchmark.sh +++ b/doris/benchmark.sh @@ -96,8 +96,13 @@ mv *.parquet "$BE_DATA_DIR/user_files_secure" BE_ID=$(mysql -h127.0.0.1 -P9030 -uroot -N -e 'show backends' | awk '{print $1}' | head -1) -PARALLEL_NUM=$(($(nproc) / 4)) -echo "Setting parallel_pipeline_task_num to $PARALLEL_NUM (cpu cores: $(nproc) / 4)" +CORES=$(nproc) +PARALLEL_NUM=$((CORES / 4)) +if [ "$PARALLEL_NUM" -lt 1 ]; then + echo "Computed parallel_pipeline_task_num ($PARALLEL_NUM) is less than 1 based on $CORES cores; clamping to 1." + PARALLEL_NUM=1 +fi +echo "Setting parallel_pipeline_task_num to $PARALLEL_NUM (cpu cores: $CORES, computed as CORES/4 with min 1)" echo "start loading hits.parquet using TVF, estimated to take about 3 minutes ..." START=$(date +%s) From dd89698b775aae1ed692ef63aeaeb7810b738f61 Mon Sep 17 00:00:00 2001 From: happenlee Date: Thu, 19 Mar 2026 12:08:09 +0800 Subject: [PATCH 4/4] revert c6a.4xlarge.json to origin/main version --- doris/results/c6a.4xlarge.json | 88 +++++++++++++++++----------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/doris/results/c6a.4xlarge.json b/doris/results/c6a.4xlarge.json index c69c40a6d..55fefe82e 100644 --- a/doris/results/c6a.4xlarge.json +++ b/doris/results/c6a.4xlarge.json @@ -10,49 +10,49 @@ "load_time": 475, "data_size": 17105042742, "result": [ - [0.03, 0.01, 0.02], - [0.09, 0.03, 0.03], - [0.68, 0.05, 0.05], - [4.03, 0.09, 0.08], - [0.31, 0.31, 0.34], - [1.75, 0.86, 0.86], - [0.01, 0.01, 0.01], - [0.04, 0.03, 0.04], - [0.80, 0.61, 0.62], - [0.69, 0.66, 0.70], - [0.28, 0.11, 0.11], - [0.54, 0.14, 0.13], - [0.68, 0.64, 0.62], - [1.14, 1.14, 1.14], - [0.98, 0.98, 0.98], - [0.53, 0.55, 0.53], - [1.57, 1.64, 1.60], - [0.19, 0.18, 0.20], - [2.91, 2.82, 2.55], - [0.01, 0.00, 0.01], - [17.11, 0.96, 1.00], - [14.96, 0.87, 0.87], - [31.07, 1.75, 1.79], - [1.85, 0.21, 0.11], - [0.07, 0.06, 0.06], - [0.15, 0.15, 0.14], - [0.05, 0.05, 0.05], - [15.12, 1.61, 1.55], - [14.69, 9.58, 9.56], - [0.67, 0.10, 0.07], - [3.71, 0.51, 0.40], - [6.27, 0.63, 0.52], - [3.60, 3.73, 3.84], - [19.76, 5.61, 5.39], - [19.58, 5.75, 6.10], - [2.17, 0.59, 0.63], - [0.11, 0.06, 0.07], - [0.06, 0.03, 0.03], - [0.04, 0.02, 0.03], - [0.18, 0.17, 0.16], - [0.05, 0.03, 0.03], - [0.05, 0.03, 0.03], - [0.04, 0.03, 0.03] - ] + [0.1, 0.04, 0.04], + [1.28, 0.03, 0.04], + [2.06, 0.06, 0.06], + [2.32, 0.09, 0.09], + [2.3, 0.72, 0.69], + [2.17, 0.81, 0.8], + [0.07, 0.01, 0.02], + [1.45, 0.04, 0.04], + [4.05, 0.61, 0.62], + [5.73, 0.69, 0.69], + [3.69, 0.11, 0.11], + [4.29, 0.12, 0.12], + [2.96, 0.61, 0.6], + [5.22, 1.07, 0.97], + [4.08, 0.96, 0.99], + [2.24, 0.47, 0.52], + [4.66, 1.44, 1.36], + [4.42, 0.38, 0.42], + [6.29, 2.47, 2.53], + [0.05, 0.01, 0.01], + [11.91, 0.92, 0.89], + [14.52, 0.76, 0.76], + [26.85, 1.51, 1.51], + [5.88, 0.17, 0.17], + [1.83, 0.19, 0.09], + [3.29, 0.21, 0.22], + [1.81, 0.16, 0.14], + [12.21, 1.49, 1.49], + [11.14, 8.73, 8.63], + [1.33, 0.07, 0.07], + [7.04, 0.39, 0.39], + [8.97, 0.52, 0.52], + [6.56, 3.33, 3.35], + [14.23, 14.02, 13.28], + [14.26, 13.82, 12.72], + [2.04, 0.6, 0.59], + [2.42, 0.09, 0.08], + [2.45, 0.05, 0.05], + [1.9, 0.04, 0.04], + [2.53, 0.24, 0.23], + [1.4, 0.03, 0.03], + [1.85, 0.03, 0.03], + [1.51, 0.03, 0.03] +] }