From 1c953c7e804979279850905546276b5e8ac4ca27 Mon Sep 17 00:00:00 2001
From: LiaCastaneda <lia.castaneda@datadoghq.com>
Date: Wed, 18 Mar 2026 12:51:03 +0100
Subject: [PATCH 1/2] Fix tdigest

---
 .../tests/dataframe/dataframe_functions.rs    |  2 +-
 .../functions-aggregate-common/src/tdigest.rs | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 014f356cd64cd..c4feb89e33e70 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -510,7 +510,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
     +-------------------------------------------------------------------------------------+
     | approx_percentile_cont(Float64(0.1),Int32(2)) WITHIN GROUP [test.b DESC NULLS LAST] |
     +-------------------------------------------------------------------------------------+
-    | 69                                                                                  |
+    | 100                                                                                 |
     +-------------------------------------------------------------------------------------+
     ");
 
diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
index a7450f0eb52e9..da4c0c4e7452a 100644
--- a/datafusion/functions-aggregate-common/src/tdigest.rs
+++ b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -443,6 +443,13 @@ impl TDigest {
                 return self.max();
             }
 
+            // If rank reaches the last unit of weight, return max directly.
+            // Without this, interpolation at the last centroid boundary can
+            // produce p90 > p99 on sparse data (e.g. 10 values).
+            if rank >= self.count - 1.0 {
+                return self.max();
+            }
+
             pos = 0;
             t = self.count;
 
@@ -735,6 +742,23 @@ mod tests {
         assert_state_roundtrip!(t);
     }
 
+    // On sparse data, higher quantiles must not return lower values than lower quantiles.
+    #[test]
+    fn test_sparse_dataset_quantile_ordering() {
+        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0, 1000.0];
+        let t = TDigest::new(100);
+        let t = t.merge_unsorted_f64(values);
+
+        let p50 = t.estimate_quantile(0.5);
+        let p90 = t.estimate_quantile(0.9);
+        let p99 = t.estimate_quantile(0.99);
+
+        assert!(p50 <= p90, "p50 ({p50}) should be <= p90 ({p90})");
+        assert!(p90 <= p99, "p90 ({p90}) should be <= p99 ({p99})");
+        assert_eq!(p90, 1000.0, "p90 should be max on boundary rank");
+        assert_eq!(p99, 1000.0, "p99 should be max on boundary rank");
+    }
+
     #[test]
     fn test_size() {
         let t = TDigest::new(10);

From c89eb6600cec7ae274ff4a6078be3cd2ace64e7e Mon Sep 17 00:00:00 2001
From: LiaCastaneda <lia.castaneda@datadoghq.com>
Date: Wed, 18 Mar 2026 13:56:09 +0100
Subject: [PATCH 2/2] Adjust sqllogictest

---
 datafusion/sqllogictest/test_files/aggregate.slt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index cf894a494ad90..a29cde3047c8f 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -2292,7 +2292,7 @@ SELECT c1, approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c3) AS c3_p95 FRO
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 
@@ -2303,7 +2303,7 @@ SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100 GR
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 
@@ -2314,7 +2314,7 @@ SELECT c1, approx_percentile_cont(c2, 0.95) AS c2, approx_percentile_cont(c3, 0.
 a 5 73
 b 5 68
 c 5 122
-d 5 124
+d 5 125
 e 5 115
 
 # error is unique to this UDAF
@@ -2345,7 +2345,7 @@ SELECT c1, approx_percentile_cont_with_weight(1, 0.95) WITHIN GROUP (ORDER BY c3
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 # csv_query_approx_percentile_cont_with_weight alternate syntax
@@ -2355,7 +2355,7 @@ SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggreg
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 
@@ -2375,7 +2375,7 @@ SELECT c1, approx_percentile_cont(0.95, 200) WITHIN GROUP (ORDER BY c3) AS c3_p9
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 query TI