apache · LiaCastaneda · Mar 18, 2026 · Mar 18, 2026
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -510,7 +510,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
     +-------------------------------------------------------------------------------------+
     | approx_percentile_cont(Float64(0.1),Int32(2)) WITHIN GROUP [test.b DESC NULLS LAST] |
     +-------------------------------------------------------------------------------------+
-    | 69                                                                                  |
+    | 100                                                                                 |
     +-------------------------------------------------------------------------------------+
     ");
 

diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -443,6 +443,13 @@ impl TDigest {
                 return self.max();
             }
 
+            // If rank reaches the last unit of weight, return max directly.
+            // Without this, interpolation at the last centroid boundary can
+            // produce p90 > p99 on sparse data (e.g. 10 values).
+            if rank >= self.count - 1.0 {
+                return self.max();
+            }
+
             pos = 0;
             t = self.count;
 
@@ -735,6 +742,23 @@ mod tests {
         assert_state_roundtrip!(t);
     }
 
+    // On sparse data, higher quantiles must not return lower values than lower quantiles.
+    #[test]
+    fn test_sparse_dataset_quantile_ordering() {
+        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0, 1000.0];
+        let t = TDigest::new(100);
+        let t = t.merge_unsorted_f64(values);
+
+        let p50 = t.estimate_quantile(0.5);
+        let p90 = t.estimate_quantile(0.9);
+        let p99 = t.estimate_quantile(0.99);
+
+        assert!(p50 <= p90, "p50 ({p50}) should be <= p90 ({p90})");
+        assert!(p90 <= p99, "p90 ({p90}) should be <= p99 ({p99})");
+        assert_eq!(p90, 1000.0, "p90 should be max on boundary rank");
+        assert_eq!(p99, 1000.0, "p99 should be max on boundary rank");
+    }
+
     #[test]
     fn test_size() {
         let t = TDigest::new(10);

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -2292,7 +2292,7 @@ SELECT c1, approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c3) AS c3_p95 FRO
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 
@@ -2303,7 +2303,7 @@ SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100 GR
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 
@@ -2314,7 +2314,7 @@ SELECT c1, approx_percentile_cont(c2, 0.95) AS c2, approx_percentile_cont(c3, 0.
 a 5 73
 b 5 68
 c 5 122
-d 5 124
+d 5 125
 e 5 115
 
 # error is unique to this UDAF
@@ -2345,7 +2345,7 @@ SELECT c1, approx_percentile_cont_with_weight(1, 0.95) WITHIN GROUP (ORDER BY c3
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 # csv_query_approx_percentile_cont_with_weight alternate syntax
@@ -2355,7 +2355,7 @@ SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggreg
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 
@@ -2375,7 +2375,7 @@ SELECT c1, approx_percentile_cont(0.95, 200) WITHIN GROUP (ORDER BY c3) AS c3_p9
 a 73
 b 68
 c 122
-d 124
+d 125
 e 115
 
 query TI
-Original file line number
+Diff line change
@@ Expand Up / @@ -510,7 +510,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> { @@
         +-------------------------------------------------------------------------------------+
         | approx_percentile_cont(Float64(0.1),Int32(2)) WITHIN GROUP [test.b DESC NULLS LAST] |
         +-------------------------------------------------------------------------------------+
-        | 69                                                                                  |
+        | 100                                                                                 |
         +-------------------------------------------------------------------------------------+
         ");
@@ Expand Down @@