apache · shangxinli · Oct 13, 2025 · Nov 4, 2025 · Nov 12, 2025 · Nov 16, 2025
diff --git a/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java b/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java
@@ -147,6 +147,32 @@ public interface RewriteDataFiles
    */
   String OUTPUT_SPEC_ID = "output-spec-id";
 
+  /**
+   * Use Parquet row-group level merging during rewrite operations when applicable.
+   *
+   * <p>When enabled, Parquet files will be merged at the row-group level by directly copying row
+   * groups without deserialization and re-serialization. This provides significant performance
+   * improvements for compatible Parquet files.
+   *
+   * <p>Requirements for row-group merging:
+   *
+   * <ul>
+   *   <li>All files must be in Parquet format
+   *   <li>Files must have compatible schemas
+   *   <li>Files must not be encrypted
+   *   <li>Files must not have associated delete files or delete vectors
+   *   <li>Table must not have a sort order (including z-ordered tables)
+   * </ul>
+   *
+   * <p>If requirements are not met, the rewrite will automatically fall back to the standard
+   * read-rewrite approach with a warning logged.
+   *
+   * <p>Defaults to false.
+   */
+  String USE_PARQUET_ROW_GROUP_MERGE = "use-parquet-row-group-merge";
+
+  boolean USE_PARQUET_ROW_GROUP_MERGE_DEFAULT = false;
+
   /**
    * Choose BINPACK as a strategy for this rewrite operation
    *

diff --git a/docs/docs/maintenance.md b/docs/docs/maintenance.md
@@ -138,6 +138,29 @@ The `files` metadata table is useful for inspecting data file sizes and determin
 
 See the [`RewriteDataFiles` Javadoc](../../javadoc/{{ icebergVersion }}/org/apache/iceberg/actions/RewriteDataFiles.html) to see more configuration options.
 
+#### Parquet row-group level merging
+
+For Parquet tables, `rewriteDataFiles` can use an optimized row-group level merge strategy that is significantly faster than the standard read-rewrite approach. This optimization directly copies row groups without deserialization and re-serialization.
+
+```java
+Table table = ...
+SparkActions
+    .get()
+    .rewriteDataFiles(table)
+    .option(RewriteDataFiles.USE_PARQUET_ROW_GROUP_MERGE, "true")
+    .execute();
+```
+
+This optimization is applied when the following requirements are met:
+
+* All files are in Parquet format
+* Files have compatible schemas
+* Files are not encrypted
+* Files do not have associated delete files or delete vectors
+* Table does not have a sort order (including z-ordered tables)
+
+If the requirements are not met, the rewrite automatically falls back to the standard read-rewrite approach with a warning logged.
+
 ### Rewrite manifests
 
 Iceberg uses metadata in its manifest list and manifest files to speed up query planning and to prune unnecessary data files. The metadata tree functions as an index over a table's data.