Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 148 additions & 23 deletions doris/benchmark.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ROOT=$(pwd)
if [[ -n "$1" ]]; then
url="$1"
else
url='https://apache-doris-releases.oss-accelerate.aliyuncs.com/apache-doris-2.1.7-rc01-bin-x64.tar.gz'
url='https://apache-doris-releases.oss-accelerate.aliyuncs.com/apache-doris-4.1.0-rc01-bin-x64.tar.gz'
fi
# Download
file_name="$(basename ${url})"
Expand All @@ -24,21 +24,20 @@ dir_name="${file_name/.tar.gz/}"

# Try to stop Doris and remove it first if execute this script multiple times
set +e
"$dir_name"/apache-doris-2.1.7-rc01-bin-x64/fe/bin/stop_fe.sh
"$dir_name"/apache-doris-2.1.7-rc01-bin-x64/be/bin/stop_be.sh
"$dir_name"/"$dir_name"/fe/bin/stop_fe.sh
"$dir_name"/"$dir_name"/be/bin/stop_be.sh
rm -rf "$dir_name"
set -e

# Uncompress
mkdir "$dir_name"
tar zxf "$file_name" -C "$dir_name"
DORIS_HOME="$ROOT/$dir_name/apache-doris-2.1.7-rc01-bin-x64"
DORIS_HOME="$ROOT/$dir_name/$dir_name"
export DORIS_HOME

# Install dependencies
sudo apt-get update -y
sudo apt-get install -y openjdk-17-jdk
sudo apt-get install -y mysql-client
sudo apt-get install -y openjdk-17-jdk mysql-client
export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-$(dpkg --print-architecture)/"
export PATH=$JAVA_HOME/bin:$PATH

Expand Down Expand Up @@ -89,34 +88,160 @@ sleep 5
mysql -h 127.0.0.1 -P9030 -uroot hits <"$ROOT"/create.sql

# Download data
if [[ ! -f hits.tsv.gz ]] && [[ ! -f hits.tsv ]]; then
sudo apt-get install -y pigz
wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
pigz -d -f hits.tsv.gz
BE_DATA_DIR="$DORIS_HOME/be/"
mkdir -p "$BE_DATA_DIR/user_files_secure"

seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue --progress=dot:giga https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
mv *.parquet "$BE_DATA_DIR/user_files_secure"

BE_ID=$(mysql -h127.0.0.1 -P9030 -uroot -N -e 'show backends' | awk '{print $1}' | head -1)

CORES=$(nproc)
PARALLEL_NUM=$((CORES / 4))
if [ "$PARALLEL_NUM" -lt 1 ]; then
echo "Computed parallel_pipeline_task_num ($PARALLEL_NUM) is less than 1 based on $CORES cores; clamping to 1."
PARALLEL_NUM=1
fi
echo "Setting parallel_pipeline_task_num to $PARALLEL_NUM (cpu cores: $CORES, computed as CORES/4 with min 1)"

# Load data
echo "start loading hits.tsv, estimated to take about 9 minutes ..."
date
echo "start loading hits.parquet using TVF, estimated to take about 3 minutes ..."
START=$(date +%s)
curl --location-trusted \
-u root: \
-T "hits.tsv" \
-H "label:hits" \
-H "columns: WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
http://localhost:8030/api/hits/hits/_stream_load
mysql -h 127.0.0.1 -P9030 -uroot hits -e "SET parallel_pipeline_task_num = $PARALLEL_NUM;\
INSERT INTO hits SELECT
CounterID,
DATE_ADD('1970-01-01', INTERVAL EventDate DAY) AS EventDate,
UserID,
FROM_UNIXTIME(EventTime) AS EventTime,
WatchID,
JavaEnable,
Title,
GoodEvent,
ClientIP,
RegionID,
CounterClass,
OS,
UserAgent,
URL,
Referer,
IsRefresh,
RefererCategoryID,
RefererRegionID,
URLCategoryID,
URLRegionID,
ResolutionWidth,
ResolutionHeight,
ResolutionDepth,
FlashMajor,
FlashMinor,
FlashMinor2,
NetMajor,
NetMinor,
UserAgentMajor,
UserAgentMinor,
CookieEnable,
JavascriptEnable,
IsMobile,
MobilePhone,
MobilePhoneModel,
Params,
IPNetworkID,
TraficSourceID,
SearchEngineID,
SearchPhrase,
AdvEngineID,
IsArtifical,
WindowClientWidth,
WindowClientHeight,
ClientTimeZone,
FROM_UNIXTIME(ClientEventTime) AS ClientEventTime,
SilverlightVersion1,
SilverlightVersion2,
SilverlightVersion3,
SilverlightVersion4,
PageCharset,
CodeVersion,
IsLink,
IsDownload,
IsNotBounce,
FUniqID,
OriginalURL,
HID,
IsOldCounter,
IsEvent,
IsParameter,
DontCountHits,
WithHash,
HitColor,
FROM_UNIXTIME(LocalEventTime) AS LocalEventTime,
Age,
Sex,
Income,
Interests,
Robotness,
RemoteIP,
WindowName,
OpenerName,
HistoryLength,
BrowserLanguage,
BrowserCountry,
SocialNetwork,
SocialAction,
HTTPError,
SendTiming,
DNSTiming,
ConnectTiming,
ResponseStartTiming,
ResponseEndTiming,
FetchTiming,
SocialSourceNetworkID,
SocialSourcePage,
ParamPrice,
ParamOrderID,
ParamCurrency,
ParamCurrencyID,
OpenstatServiceName,
OpenstatCampaignID,
OpenstatAdID,
OpenstatSourceID,
UTMSource,
UTMMedium,
UTMCampaign,
UTMContent,
UTMTerm,
FromTag,
HasGCLID,
RefererHash,
URLHash,
CLID
FROM local(
\"file_path\" = \"user_files_secure/hits_*.parquet\",
\"backend_id\" = \"$BE_ID\",
\"format\" = \"parquet\"
)
"
END=$(date +%s)
LOADTIME=$(echo "$END - $START" | bc)
echo "Load time: $LOADTIME"
echo "$LOADTIME" > loadtime

# Dataset contains 99997497 rows, storage size is about 17319588503 bytes
mysql -h 127.0.0.1 -P9030 -uroot hits -e "SELECT count(*) FROM hits"
du -bs "$DORIS_HOME"/be/storage/ | cut -f1 | tee storage_size

du -bs "$DORIS_HOME"/be/storage/ | cut -f1 | tee storage_size
echo "Data size: $(cat storage_size)"

./run.sh 2>&1 | tee -a log.txt
mysql -h 127.0.0.1 -P9030 -uroot hits -e "set global enable_sql_cache = false"
# Dataset contains 99997497 rows, storage size is about 13319588503 bytes
mysql -h 127.0.0.1 -P9030 -uroot hits -e "SELECT count(*) FROM hits"

# Run queries
TRIES=3
while read -r query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null

for i in $(seq 1 $TRIES); do
mysql -vvv -h127.1 -P9030 -uroot hits -e "${query}" 2>&1 | tee -a log.txt
done
done <queries.sql

cat log.txt |
grep -P 'rows? in set|Empty set|^ERROR' |
Expand Down
94 changes: 47 additions & 47 deletions doris/results/c7a.metal-48xl.json
Original file line number Diff line number Diff line change
@@ -1,57 +1,57 @@
{
"system": "Apache Doris",
"date": "2025-08-30",
"date": "2026-03-18",
"machine": "c7a.metal-48xl",
"cluster_size": 1,
"proprietary": "no",
"hardware": "cpu",
"tuned": "no",
"tags": ["C++","column-oriented","MySQL compatible","ClickHouse derivative", "lukewarm-cold-run"],
"load_time": 363,
"data_size": 17361427232,
"load_time": 152,
"data_size": 13777831111,
"result": [
[0.04, 0.03, 0.02],
[0.36, 0.02, 0.02],
[1.29, 0.02, 0.03],
[1.71, 0.03, 0.03],
[1.62, 0.1, 0.11],
[1.56, 0.15, 0.13],
[0.04, 0.01, 0.01],
[0.36, 0.03, 0.02],
[2.81, 0.19, 0.19],
[3.82, 0.21, 0.2],
[2.38, 0.07, 0.07],
[3.12, 0.07, 0.07],
[1.99, 0.15, 0.14],
[3.61, 0.2, 0.18],
[2.25, 0.17, 0.16],
[1.49, 0.11, 0.1],
[3.5, 0.23, 0.21],
[2.45, 0.05, 0.05],
[4.73, 0.38, 0.37],
[0.05, 0.01, 0],
[11.14, 0.08, 0.05],
[12.94, 0.08, 0.05],
[24.8, 0.14, 0.07],
[7.49, 0.05, 0.04],
[2.94, 0.17, 0.04],
[2.05, 0.05, 0.05],
[2.88, 0.28, 0.22],
[11.37, 0.15, 0.12],
[9.15, 0.72, 0.66],
[0.52, 0.04, 0.03],
[5.17, 0.1, 0.08],
[6.79, 0.12, 0.11],
[4.87, 0.65, 0.58],
[11.4, 0.75, 0.68],
[11.44, 0.77, 0.66],
[0.88, 0.14, 0.15],
[1.85, 0.03, 0.03],
[2.06, 0.02, 0.02],
[2.01, 0.03, 0.03],
[2.62, 0.06, 0.05],
[1.51, 0.03, 0.03],
[1.94, 0.03, 0.03],
[1.35, 0.03, 0.02]
]
[0.024, 0.004, 0.003],
[0.100, 0.028, 0.027],
[0.488, 0.051, 0.050],
[3.588, 0.052, 0.044],
[0.092, 0.061, 0.068],
[1.921, 0.120, 0.120],
[0.031, 0.008, 0.004],
[0.052, 0.032, 0.032],
[0.430, 0.094, 0.095],
[0.147, 0.146, 0.144],
[0.112, 0.038, 0.040],
[0.550, 0.045, 0.046],
[0.099, 0.097, 0.091],
[0.161, 0.160, 0.156],
[3.030, 2.667, 2.491],
[0.112, 0.102, 0.105],
[0.260, 0.258, 0.248],
[0.067, 0.051, 0.055],
[1.544, 0.374, 0.380],
[0.016, 0.008, 0.008],
[17.071, 0.128, 0.056],
[0.082, 0.054, 0.049],
[15.391, 0.085, 0.069],
[0.823, 0.057, 0.051],
[0.046, 0.052, 0.076],
[0.063, 0.063, 0.061],
[0.055, 0.049, 0.054],
[0.193, 0.154, 0.153],
[14.729, 0.698, 0.683],
[0.072, 0.058, 0.056],
[1.549, 0.068, 0.068],
[6.815, 0.100, 0.098],
[0.533, 0.477, 0.512],
[0.536, 0.525, 0.536],
[0.609, 0.520, 0.516],
[0.129, 0.125, 0.126],
[0.074, 0.038, 0.039],
[0.053, 0.035, 0.035],
[0.302, 0.035, 0.036],
[0.223, 0.063, 0.056],
[0.555, 0.038, 0.038],
[0.442, 0.040, 0.041],
[0.040, 0.045, 0.038]
]
}
13 changes: 0 additions & 13 deletions doris/run.sh

This file was deleted.