diff --git a/.github/workflows/utitcase-spark-4.x.yml b/.github/workflows/utitcase-spark-4.x.yml
index 56629110f503..993fa97ba2cf 100644
--- a/.github/workflows/utitcase-spark-4.x.yml
+++ b/.github/workflows/utitcase-spark-4.x.yml
@@ -61,7 +61,7 @@ jobs:
           jvm_timezone=$(random_timezone)
           echo "JVM timezone is set to $jvm_timezone"
           test_modules=""
-          for suffix in ut 4.0; do
+          for suffix in ut 4.0 4.1; do
           test_modules+="org.apache.paimon:paimon-spark-${suffix}_2.13,"
           done
           test_modules="${test_modules%,}"
diff --git a/docs/content/spark/quick-start.md b/docs/content/spark/quick-start.md
index 58530ebcb73e..524d82a16352 100644
--- a/docs/content/spark/quick-start.md
+++ b/docs/content/spark/quick-start.md
@@ -30,7 +30,7 @@ under the License.
 
 Paimon supports the following Spark versions with their respective Java and Scala compatibility. We recommend using the latest Spark version for a better experience.
 
-- Spark 4.x (including 4.0) : Pre-built with Java 17 and Scala 2.13
+- Spark 4.x (including 4.1, 4.0) : Pre-built with Java 17 and Scala 2.13
 
 - Spark 3.x (including 3.5, 3.4, 3.3, 3.2) : Pre-built with Java 8 and Scala 2.12/2.13
 
@@ -40,6 +40,7 @@ Download the jar file with corresponding version.
 
 | Version   | Jar (Scala 2.12)                                                                                                                                                                    | Jar (Scala 2.13)                                                                                                                                                                    |
 |-----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Spark 4.1 | -                                                                                                                                                                                   | [paimon-spark-4.1_2.13-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-4.1_2.13/{{< version >}}/paimon-spark-4.1_2.13-{{< version >}}.jar) |
 | Spark 4.0 | -                                                                                                                                                                                   | [paimon-spark-4.0_2.13-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-4.0_2.13/{{< version >}}/paimon-spark-4.0_2.13-{{< version >}}.jar) |
 | Spark 3.5 | [paimon-spark-3.5_2.12-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-3.5_2.12/{{< version >}}/paimon-spark-3.5_2.12-{{< version >}}.jar) | [paimon-spark-3.5_2.13-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-3.5_2.13/{{< version >}}/paimon-spark-3.5_2.13-{{< version >}}.jar) |
 | Spark 3.4 | [paimon-spark-3.4_2.12-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-3.4_2.12/{{< version >}}/paimon-spark-3.4_2.12-{{< version >}}.jar) | [paimon-spark-3.4_2.13-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-spark-3.4_2.13/{{< version >}}/paimon-spark-3.4_2.13-{{< version >}}.jar) |
@@ -52,6 +53,7 @@ Download the jar file with corresponding version.
 
 | Version   | Jar (Scala 2.12)                                                                                                                              | Jar (Scala 2.13)                                                                                                                              |
 |-----------|-----------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|
+| Spark 4.1 | -                                                                                                                                             | [paimon-spark-4.1_2.13-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-4.1_2.13/{{< version >}}/) |
 | Spark 4.0 | -                                                                                                                                             | [paimon-spark-4.0_2.13-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-4.0_2.13/{{< version >}}/) |
 | Spark 3.5 | [paimon-spark-3.5_2.12-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-3.5_2.12/{{< version >}}/) | [paimon-spark-3.5_2.13-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-3.5_2.13/{{< version >}}/) |
 | Spark 3.4 | [paimon-spark-3.4_2.12-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-3.4_2.12/{{< version >}}/) | [paimon-spark-3.4_2.13-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-spark-3.4_2.13/{{< version >}}/) |
@@ -73,6 +75,9 @@ mvn clean package -DskipTests -pl paimon-spark/paimon-spark-3.5 -am -Pscala-2.13
 
 # build paimon spark 4.0
 mvn clean package -DskipTests -pl paimon-spark/paimon-spark-4.0 -am -Pspark4
+
+# build paimon spark 4.1
+mvn clean package -DskipTests -pl paimon-spark/paimon-spark-4.1 -am -Pspark4
 ```
 
 For Spark 3.5, you can find the bundled jar in `./paimon-spark/paimon-spark-3.5/target/paimon-spark-3.5_2.12-{{< version >}}.jar`.
diff --git a/paimon-spark/paimon-spark-4.1/pom.xml b/paimon-spark/paimon-spark-4.1/pom.xml
new file mode 100644
index 000000000000..74a30570cc5b
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/pom.xml
@@ -0,0 +1,168 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.paimon</groupId>
+        <artifactId>paimon-spark</artifactId>
+        <version>1.4-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>paimon-spark-4.1_2.13</artifactId>
+    <name>Paimon : Spark : 4.1 : 2.13</name>
+
+    <properties>
+        <spark.version>4.1.1</spark.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-format</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-spark4-common_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-spark-common_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-hive_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+        </dependency>
+
+        <!-- test -->
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-spark-ut_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <scope>test</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <classifier>tests</classifier>
+            <scope>test</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-connect-shims_${scala.binary.version}</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <classifier>tests</classifier>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <classifier>tests</classifier>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>shade-paimon</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <filters>
+                                <filter>
+                                    <artifact>*</artifact>
+                                    <excludes>
+                                        <exclude>com/github/luben/zstd/**</exclude>
+                                        <exclude>**/*libzstd-jni-*.so</exclude>
+                                        <exclude>**/*libzstd-jni-*.dll</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                            <artifactSet>
+                                <includes combine.children="append">
+                                     <include>org.apache.paimon:paimon-spark4-common_${scala.binary.version}</include>
+                                </includes>
+                            </artifactSet>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
\ No newline at end of file
diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala
new file mode 100644
index 000000000000..e86195f1af0b
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueries.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.catalyst.optimizer
+
+import org.apache.paimon.spark.PaimonScan
+
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, ExprId, ScalarSubquery, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
+
+object MergePaimonScalarSubqueries extends MergePaimonScalarSubqueriesBase {
+
+  override def tryMergeDataSourceV2ScanRelation(
+      newV2ScanRelation: DataSourceV2ScanRelation,
+      cachedV2ScanRelation: DataSourceV2ScanRelation)
+      : Option[(LogicalPlan, AttributeMap[Attribute])] = {
+    (newV2ScanRelation, cachedV2ScanRelation) match {
+      case (
+            DataSourceV2ScanRelation(
+              newRelation,
+              newScan: PaimonScan,
+              newOutput,
+              newPartitioning,
+              newOrdering),
+            DataSourceV2ScanRelation(
+              cachedRelation,
+              cachedScan: PaimonScan,
+              _,
+              cachedPartitioning,
+              cacheOrdering)) =>
+        checkIdenticalPlans(newRelation, cachedRelation).flatMap {
+          outputMap =>
+            if (
+              samePartitioning(newPartitioning, cachedPartitioning, outputMap) && sameOrdering(
+                newOrdering,
+                cacheOrdering,
+                outputMap)
+            ) {
+              mergePaimonScan(newScan, cachedScan).map {
+                mergedScan =>
+                  val mergedAttributes = mergedScan
+                    .readSchema()
+                    .map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
+                  val cachedOutputNameMap = cachedRelation.output.map(a => a.name -> a).toMap
+                  val mergedOutput =
+                    mergedAttributes.map(a => cachedOutputNameMap.getOrElse(a.name, a))
+                  val newV2ScanRelation =
+                    cachedV2ScanRelation.copy(scan = mergedScan, output = mergedOutput)
+
+                  val mergedOutputNameMap = mergedOutput.map(a => a.name -> a).toMap
+                  val newOutputMap =
+                    AttributeMap(newOutput.map(a => a -> mergedOutputNameMap(a.name).toAttribute))
+
+                  newV2ScanRelation -> newOutputMap
+              }
+            } else {
+              None
+            }
+        }
+
+      case _ => None
+    }
+  }
+
+  private def sameOrdering(
+      newOrdering: Option[Seq[SortOrder]],
+      cachedOrdering: Option[Seq[SortOrder]],
+      outputAttrMap: AttributeMap[Attribute]): Boolean = {
+    val mappedNewOrdering = newOrdering.map(_.map(mapAttributes(_, outputAttrMap)))
+    mappedNewOrdering.map(_.map(_.canonicalized)) == cachedOrdering.map(_.map(_.canonicalized))
+  }
+
+  override protected def createScalarSubquery(plan: LogicalPlan, exprId: ExprId): ScalarSubquery = {
+    ScalarSubquery(plan, exprId = exprId)
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/execution/PaimonStrategyHelper.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/execution/PaimonStrategyHelper.scala
new file mode 100644
index 000000000000..9fb3a7b54a25
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/execution/PaimonStrategyHelper.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.CatalogUtils
+import org.apache.spark.sql.catalyst.plans.logical.TableSpec
+import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
+
+trait PaimonStrategyHelper {
+
+  def spark: SparkSession
+
+  protected def makeQualifiedDBObjectPath(location: String): String = {
+    CatalogUtils.makeQualifiedDBObjectPath(
+      spark.sharedState.conf.get(WAREHOUSE_PATH),
+      location,
+      spark.sharedState.hadoopConf)
+  }
+
+  protected def qualifyLocInTableSpec(tableSpec: TableSpec): TableSpec = {
+    tableSpec.copy(location = tableSpec.location.map(makeQualifiedDBObjectPath(_)))
+  }
+
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/execution/shim/PaimonCreateTableAsSelectStrategy.scala b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/execution/shim/PaimonCreateTableAsSelectStrategy.scala
new file mode 100644
index 000000000000..61e25b7c16a9
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/main/scala/org/apache/spark/sql/execution/shim/PaimonCreateTableAsSelectStrategy.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.shim
+
+import org.apache.paimon.CoreOptions
+import org.apache.paimon.iceberg.IcebergOptions
+import org.apache.paimon.spark.SparkCatalog
+import org.apache.paimon.spark.catalog.FormatTableCatalog
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.analysis.ResolvedIdentifier
+import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LogicalPlan, TableSpec}
+import org.apache.spark.sql.connector.catalog.StagingTableCatalog
+import org.apache.spark.sql.execution.{PaimonStrategyHelper, SparkPlan, SparkStrategy}
+import org.apache.spark.sql.execution.datasources.v2.CreateTableAsSelectExec
+
+import scala.collection.JavaConverters._
+
+case class PaimonCreateTableAsSelectStrategy(spark: SparkSession)
+  extends SparkStrategy
+  with PaimonStrategyHelper {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+    case CreateTableAsSelect(
+          ResolvedIdentifier(catalog: SparkCatalog, ident),
+          parts,
+          query,
+          tableSpec: TableSpec,
+          options,
+          ifNotExists,
+          true) =>
+      catalog match {
+        case _: StagingTableCatalog =>
+          throw new RuntimeException("Paimon can't extend StagingTableCatalog for now.")
+        case _ =>
+          val coreOptionKeys = CoreOptions.getOptions.asScala.map(_.key()).toSeq
+
+          // Include Iceberg compatibility options in table properties (fix for DataFrame writer options)
+          val icebergOptionKeys = IcebergOptions.getOptions.asScala.map(_.key()).toSeq
+
+          val allTableOptionKeys = coreOptionKeys ++ icebergOptionKeys
+
+          val (tableOptions, writeOptions) = options.partition {
+            case (key, _) => allTableOptionKeys.contains(key)
+          }
+          val newTableSpec = tableSpec.copy(properties = tableSpec.properties ++ tableOptions)
+
+          val isPartitionedFormatTable = {
+            catalog match {
+              case catalog: FormatTableCatalog =>
+                catalog.isFormatTable(newTableSpec.provider.orNull) && parts.nonEmpty
+              case _ => false
+            }
+          }
+
+          if (isPartitionedFormatTable) {
+            throw new UnsupportedOperationException(
+              "Using CTAS with partitioned format table is not supported yet.")
+          }
+
+          CreateTableAsSelectExec(
+            catalog.asTableCatalog,
+            ident,
+            parts,
+            query,
+            qualifyLocInTableSpec(newTableSpec),
+            writeOptions,
+            ifNotExists) :: Nil
+      }
+    case _ => Nil
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/resources/function/hive-test-udfs.jar b/paimon-spark/paimon-spark-4.1/src/test/resources/function/hive-test-udfs.jar
new file mode 100644
index 000000000000..a5bfa456f668
Binary files /dev/null and b/paimon-spark/paimon-spark-4.1/src/test/resources/function/hive-test-udfs.jar differ
diff --git a/paimon-spark/paimon-spark-4.1/src/test/resources/hive-site.xml b/paimon-spark/paimon-spark-4.1/src/test/resources/hive-site.xml
new file mode 100644
index 000000000000..bdf2bb090760
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/resources/hive-site.xml
@@ -0,0 +1,56 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<configuration>
+    <property>
+        <name>hive.metastore.integral.jdo.pushdown</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.schema.verification</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.client.capability.check</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>datanucleus.schema.autoCreateTables</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>datanucleus.schema.autoCreateAll</name>
+        <value>true</value>
+    </property>
+
+    <!-- Spark has deleted the dependency of `bonecp` and recommends this conf since 4.0, see https://issues.apache.org/jira/browse/SPARK-48538 -->
+    <property>
+        <name>datanucleus.connectionPoolingType</name>
+        <value>DBCP</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.uris</name>
+        <value>thrift://localhost:9090</value>
+        <description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
+    </property>
+</configuration>
\ No newline at end of file
diff --git a/paimon-spark/paimon-spark-4.1/src/test/resources/log4j2-test.properties b/paimon-spark/paimon-spark-4.1/src/test/resources/log4j2-test.properties
new file mode 100644
index 000000000000..6f324f5863ac
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/resources/log4j2-test.properties
@@ -0,0 +1,38 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+# Set root logger level to OFF to not flood build logs
+# set manually to INFO for debugging purposes
+rootLogger.level = OFF
+rootLogger.appenderRef.test.ref = TestLogger
+
+appender.testlogger.name = TestLogger
+appender.testlogger.type = CONSOLE
+appender.testlogger.target = SYSTEM_ERR
+appender.testlogger.layout.type = PatternLayout
+appender.testlogger.layout.pattern = %-4r [%tid %t] %-5p %c %x - %m%n
+
+logger.kafka.name = kafka
+logger.kafka.level = OFF
+logger.kafka2.name = state.change
+logger.kafka2.level = OFF
+
+logger.zookeeper.name = org.apache.zookeeper
+logger.zookeeper.level = OFF
+logger.I0Itec.name = org.I0Itec
+logger.I0Itec.level = OFF
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala
new file mode 100644
index 000000000000..9b9393be7118
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+
+class PaimonCDCSourceTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+
+  test("Paimon CDC Source: batch write and streaming read change-log with default scan mode") {
+    withTempDir {
+      checkpointDir =>
+        val tableName = "T"
+        spark.sql(s"DROP TABLE IF EXISTS $tableName")
+        spark.sql(s"""
+                     |CREATE TABLE $tableName (a INT, b STRING)
+                     |TBLPROPERTIES (
+                     |  'primary-key'='a',
+                     |  'bucket'='2',
+                     |  'changelog-producer' = 'lookup')
+                     |""".stripMargin)
+
+        spark.sql(s"INSERT INTO $tableName VALUES (1, 'v_1')")
+        spark.sql(s"INSERT INTO $tableName VALUES (2, 'v_2')")
+        spark.sql(s"INSERT INTO $tableName VALUES (2, 'v_2_new')")
+
+        val table = loadTable(tableName)
+        val location = table.location().toString
+
+        val readStream = spark.readStream
+          .format("paimon")
+          .option("read.changelog", "true")
+          .load(location)
+          .writeStream
+          .format("memory")
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .queryName("mem_table")
+          .outputMode("append")
+          .start()
+
+        val currentResult = () => spark.sql("SELECT * FROM mem_table")
+        try {
+          readStream.processAllAvailable()
+          val expertResult1 = Row("+I", 1, "v_1") :: Row("+I", 2, "v_2_new") :: Nil
+          checkAnswer(currentResult(), expertResult1)
+
+          spark.sql(s"INSERT INTO $tableName VALUES (1, 'v_1_new'), (3, 'v_3')")
+          readStream.processAllAvailable()
+          val expertResult2 =
+            Row("+I", 1, "v_1") :: Row("-U", 1, "v_1") :: Row("+U", 1, "v_1_new") :: Row(
+              "+I",
+              2,
+              "v_2_new") :: Row("+I", 3, "v_3") :: Nil
+          checkAnswer(currentResult(), expertResult2)
+        } finally {
+          readStream.stop()
+        }
+    }
+  }
+
+  test("Paimon CDC Source: batch write and streaming read change-log with scan.snapshot-id") {
+    withTempDir {
+      checkpointDir =>
+        val tableName = "T"
+        spark.sql(s"DROP TABLE IF EXISTS $tableName")
+        spark.sql(s"""
+                     |CREATE TABLE $tableName (a INT, b STRING)
+                     |TBLPROPERTIES (
+                     |  'primary-key'='a',
+                     |  'bucket'='2',
+                     |  'changelog-producer' = 'lookup')
+                     |""".stripMargin)
+
+        spark.sql(s"INSERT INTO $tableName VALUES (1, 'v_1')")
+        spark.sql(s"INSERT INTO $tableName VALUES (2, 'v_2')")
+        spark.sql(s"INSERT INTO $tableName VALUES (2, 'v_2_new')")
+
+        val table = loadTable(tableName)
+        val location = table.location().toString
+
+        val readStream = spark.readStream
+          .format("paimon")
+          .option("read.changelog", "true")
+          .option("scan.mode", "from-snapshot")
+          .option("scan.snapshot-id", 1)
+          .load(location)
+          .writeStream
+          .format("memory")
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .queryName("mem_table")
+          .outputMode("append")
+          .start()
+
+        val currentResult = () => spark.sql("SELECT * FROM mem_table")
+        try {
+          readStream.processAllAvailable()
+          val expertResult1 = Row("+I", 1, "v_1") :: Row("+I", 2, "v_2") :: Row(
+            "-U",
+            2,
+            "v_2") :: Row("+U", 2, "v_2_new") :: Nil
+          checkAnswer(currentResult(), expertResult1)
+
+          spark.sql(s"INSERT INTO $tableName VALUES (1, 'v_1_new'), (3, 'v_3')")
+          readStream.processAllAvailable()
+          val expertResult2 =
+            Row("+I", 1, "v_1") :: Row("-U", 1, "v_1") :: Row("+U", 1, "v_1_new") :: Row(
+              "+I",
+              2,
+              "v_2") :: Row("-U", 2, "v_2") :: Row("+U", 2, "v_2_new") :: Row("+I", 3, "v_3") :: Nil
+          checkAnswer(currentResult(), expertResult2)
+        } finally {
+          readStream.stop()
+        }
+    }
+  }
+
+  test("Paimon CDC Source: streaming write and streaming read change-log") {
+    withTempDirs {
+      (checkpointDir1, checkpointDir2) =>
+        val tableName = "T"
+        spark.sql(s"DROP TABLE IF EXISTS $tableName")
+        spark.sql(s"""
+                     |CREATE TABLE $tableName (a INT, b STRING)
+                     |TBLPROPERTIES (
+                     |  'primary-key'='a',
+                     |  'bucket'='2',
+                     |  'changelog-producer' = 'lookup')
+                     |""".stripMargin)
+
+        val table = loadTable(tableName)
+        val location = table.location().toString
+
+        // streaming write
+        val inputData = MemoryStream[(Int, String)]
+        val writeStream = inputData
+          .toDS()
+          .toDF("a", "b")
+          .writeStream
+          .option("checkpointLocation", checkpointDir1.getCanonicalPath)
+          .foreachBatch {
+            (batch: Dataset[Row], _: Long) =>
+              batch.write.format("paimon").mode("append").save(location)
+          }
+          .start()
+
+        // streaming read
+        val readStream = spark.readStream
+          .format("paimon")
+          .option("read.changelog", "true")
+          .option("scan.mode", "from-snapshot")
+          .option("scan.snapshot-id", 1)
+          .load(location)
+          .writeStream
+          .format("memory")
+          .option("checkpointLocation", checkpointDir2.getCanonicalPath)
+          .queryName("mem_table")
+          .outputMode("append")
+          .start()
+
+        val currentResult = () => spark.sql("SELECT * FROM mem_table")
+        try {
+          inputData.addData((1, "v_1"))
+          writeStream.processAllAvailable()
+          readStream.processAllAvailable()
+          val expertResult1 = Row("+I", 1, "v_1") :: Nil
+          checkAnswer(currentResult(), expertResult1)
+
+          inputData.addData((2, "v_2"))
+          writeStream.processAllAvailable()
+          readStream.processAllAvailable()
+          val expertResult2 = Row("+I", 1, "v_1") :: Row("+I", 2, "v_2") :: Nil
+          checkAnswer(currentResult(), expertResult2)
+
+          inputData.addData((2, "v_2_new"))
+          writeStream.processAllAvailable()
+          readStream.processAllAvailable()
+          val expertResult3 = Row("+I", 1, "v_1") :: Row("+I", 2, "v_2") :: Row(
+            "-U",
+            2,
+            "v_2") :: Row("+U", 2, "v_2_new") :: Nil
+          checkAnswer(currentResult(), expertResult3)
+
+          inputData.addData((1, "v_1_new"), (3, "v_3"))
+          writeStream.processAllAvailable()
+          readStream.processAllAvailable()
+          val expertResult4 =
+            Row("+I", 1, "v_1") :: Row("-U", 1, "v_1") :: Row("+U", 1, "v_1_new") :: Row(
+              "+I",
+              2,
+              "v_2") :: Row("-U", 2, "v_2") :: Row("+U", 2, "v_2_new") :: Row("+I", 3, "v_3") :: Nil
+          checkAnswer(currentResult(), expertResult4)
+        } finally {
+          readStream.stop()
+        }
+    }
+  }
+
+  test("Paimon CDC Source: streaming read change-log with audit_log system table") {
+    withTable("T") {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(
+            s"""
+               |CREATE TABLE T (a INT, b STRING)
+               |TBLPROPERTIES ('primary-key'='a','bucket'='2', 'changelog-producer' = 'lookup')
+               |""".stripMargin)
+
+          val readStream = spark.readStream
+            .format("paimon")
+            .table("`T$audit_log`")
+            .writeStream
+            .format("memory")
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .queryName("mem_table")
+            .outputMode("append")
+            .start()
+
+          val currentResult = () => spark.sql("SELECT * FROM mem_table")
+          try {
+            spark.sql(s"INSERT INTO T VALUES (1, 'v_1')")
+            readStream.processAllAvailable()
+            checkAnswer(currentResult(), Row("+I", 1, "v_1") :: Nil)
+
+            spark.sql(s"INSERT INTO T VALUES (2, 'v_2')")
+            readStream.processAllAvailable()
+            checkAnswer(currentResult(), Row("+I", 1, "v_1") :: Row("+I", 2, "v_2") :: Nil)
+          } finally {
+            readStream.stop()
+          }
+      }
+    }
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala
new file mode 100644
index 000000000000..9935288db9a7
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+import org.apache.paimon.Snapshot.CommitKind._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.functions.{col, mean, window}
+import org.apache.spark.sql.streaming.StreamTest
+
+import java.sql.Date
+
+class PaimonSinkTest extends PaimonSparkTestBase with StreamTest {
+
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.sql.catalog.paimon.cache-enabled", "false")
+  }
+
+  import testImplicits._
+
+  test("Paimon Sink: forEachBatch") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], id: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Sink: append mode") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and sink into it in append mode
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .format("paimon")
+            .start(location)
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Sink: complete mode") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define an append-only table and sink into it in complete mode
+          spark.sql(s"""
+                       |CREATE TABLE T (city String, population Long)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData.toDS
+            .toDF("uid", "city")
+            .groupBy("city")
+            .count()
+            .toDF("city", "population")
+            .writeStream
+            .outputMode("complete")
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .format("paimon")
+            .start(location)
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY city")
+
+          try {
+            inputData.addData((1, "HZ"), (2, "BJ"), (3, "BJ"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row("BJ", 2L) :: Row("HZ", 1L) :: Nil)
+
+            inputData.addData((4, "SH"), (5, "BJ"), (6, "HZ"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row("BJ", 3L) :: Row("HZ", 2L) :: Row("SH", 1L) :: Nil)
+
+            inputData.addData((7, "HZ"), (8, "SH"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row("BJ", 3L) :: Row("HZ", 3L) :: Row("SH", 2L) :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Sink: update mode") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and sink into it in update mode
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          intercept[RuntimeException] {
+            inputData
+              .toDF()
+              .writeStream
+              .option("checkpointLocation", checkpointDir.getCanonicalPath)
+              .outputMode("update")
+              .format("paimon")
+              .start(location)
+          }
+      }
+    }
+  }
+
+  test("Paimon Sink: aggregation and watermark") {
+    withTempDir {
+      checkpointDir =>
+        // define an append-only table and sink into it with aggregation and watermark in append mode
+        spark.sql(s"""
+                     |CREATE TABLE T (start Timestamp, stockId INT, avg_price DOUBLE)
+                     |TBLPROPERTIES ('bucket'='3', 'bucket-key'='stockId')
+                     |""".stripMargin)
+        val location = loadTable("T").location().toString
+
+        val inputData = MemoryStream[(Long, Int, Double)]
+        val data = inputData.toDS
+          .toDF("time", "stockId", "price")
+          .selectExpr("CAST(time AS timestamp) AS timestamp", "stockId", "price")
+          .withWatermark("timestamp", "10 seconds")
+          .groupBy(window($"timestamp", "5 seconds"), col("stockId"))
+          .agg(mean("price").as("avg_price"))
+          .select("window.start", "stockId", "avg_price")
+
+        val stream =
+          data.writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .format("paimon")
+            .start(location)
+
+        val query = () =>
+          spark.sql(
+            "SELECT CAST(start as BIGINT) AS start, stockId, avg_price FROM T ORDER BY start, stockId")
+
+        try {
+          inputData.addData((101L, 1, 1.0d), (102, 1, 2.0d), (104, 2, 20.0d))
+          stream.processAllAvailable()
+          inputData.addData((105L, 2, 40.0d), (107, 2, 60.0d), (115, 3, 300.0d))
+          stream.processAllAvailable()
+          inputData.addData((200L, 99, 99.9d))
+          stream.processAllAvailable()
+          checkAnswer(
+            query(),
+            Row(100L, 1, 1.5d) :: Row(100L, 2, 20.0d) :: Row(105L, 2, 50.0d) :: Row(
+              115L,
+              3,
+              300.0d) :: Nil)
+        } finally {
+          if (stream != null) {
+            stream.stop()
+          }
+        }
+    }
+  }
+
+  test("Paimon Sink: enable schema evolution") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and sink into it with schema evolution in append mode
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val date = Date.valueOf("2023-08-10")
+          spark.sql("INSERT INTO T VALUES (1, '2023-08-09'), (2, '2023-08-09')")
+          checkAnswer(
+            spark.sql("SELECT * FROM T ORDER BY a, b"),
+            Row(1, "2023-08-09") :: Row(2, "2023-08-09") :: Nil)
+
+          val inputData = MemoryStream[(Long, Date, Int)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b", "c")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .option("write.merge-schema", "true")
+            .option("write.merge-schema.explicit-cast", "true")
+            .format("paimon")
+            .start(location)
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            inputData.addData((1L, date, 123), (3L, date, 456))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row(1L, date, 123) :: Row(2L, Date.valueOf("2023-08-09"), null) :: Row(
+                3L,
+                date,
+                456) :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon SinK: set full-compaction.delta-commits with batch write") {
+    for (useV2Write <- Seq("true", "false")) {
+      withSparkSQLConf("spark.paimon.write.use-v2-write" -> useV2Write) {
+        withTable("t") {
+          sql("""
+                |CREATE TABLE t (
+                |  a INT,
+                |  b INT
+                |) TBLPROPERTIES (
+                |  'primary-key'='a',
+                |  'bucket'='1',
+                |  'full-compaction.delta-commits'='1'
+                |)
+                |""".stripMargin)
+
+          sql("INSERT INTO t VALUES (1, 1)")
+          sql("INSERT INTO t VALUES (2, 2)")
+          checkAnswer(sql("SELECT * FROM t ORDER BY a"), Seq(Row(1, 1), Row(2, 2)))
+          assert(loadTable("t").snapshotManager().latestSnapshot().commitKind == COMPACT)
+        }
+      }
+    }
+  }
+
+  test("Paimon SinK: set full-compaction.delta-commits with streaming write") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b INT)
+                       |TBLPROPERTIES (
+                       |  'primary-key'='a',
+                       |  'bucket'='1',
+                       |  'full-compaction.delta-commits'='2'
+                       |)
+                       |""".stripMargin)
+          val table = loadTable("T")
+          val location = table.location().toString
+
+          val inputData = MemoryStream[(Int, Int)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .format("paimon")
+            .start(location)
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            inputData.addData((1, 1))
+            stream.processAllAvailable()
+            checkAnswer(query(), Seq(Row(1, 1)))
+            assert(table.snapshotManager().latestSnapshot().commitKind == APPEND)
+
+            inputData.addData((2, 1))
+            stream.processAllAvailable()
+            checkAnswer(query(), Seq(Row(1, 1), Row(2, 1)))
+            assert(table.snapshotManager().latestSnapshot().commitKind == COMPACT)
+
+            inputData.addData((2, 2))
+            stream.processAllAvailable()
+            checkAnswer(query(), Seq(Row(1, 1), Row(2, 2)))
+            assert(table.snapshotManager().latestSnapshot().commitKind == APPEND)
+
+            inputData.addData((3, 1))
+            stream.processAllAvailable()
+            checkAnswer(query(), Seq(Row(1, 1), Row(2, 2), Row(3, 1)))
+            assert(table.snapshotManager().latestSnapshot().commitKind == COMPACT)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala
new file mode 100644
index 000000000000..3208609835f1
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+import org.apache.paimon.catalog.{Catalog, Identifier}
+import org.apache.paimon.data.GenericRow
+import org.apache.paimon.fs.FileIO
+import org.apache.paimon.fs.local.LocalFileIO
+import org.apache.paimon.spark.catalog.WithPaimonCatalog
+import org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
+import org.apache.paimon.spark.sql.{SparkVersionSupport, WithTableOptions}
+import org.apache.paimon.table.FileStoreTable
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.connector.catalog.{Identifier => SparkIdentifier}
+import org.apache.spark.sql.connector.read.Scan
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.paimon.Utils
+import org.apache.spark.sql.test.SharedSparkSession
+
+import java.io.File
+import java.util.{TimeZone, UUID}
+
+import scala.util.Random
+
+class PaimonSparkTestBase
+  extends QueryTest
+  with SharedSparkSession
+  with WithTableOptions
+  with SparkVersionSupport {
+
+  protected lazy val commitUser: String = UUID.randomUUID.toString
+
+  protected lazy val fileIO: FileIO = LocalFileIO.create
+
+  protected lazy val tempDBDir: File = Utils.createTempDir
+
+  protected def paimonCatalog: Catalog = {
+    spark.sessionState.catalogManager.currentCatalog.asInstanceOf[WithPaimonCatalog].paimonCatalog()
+  }
+
+  protected val dbName0: String = "test"
+
+  protected val tableName0: String = "T"
+
+  /** Add paimon ([[SparkCatalog]] in fileSystem) catalog */
+  override protected def sparkConf: SparkConf = {
+    val serializer = if (Random.nextBoolean()) {
+      "org.apache.spark.serializer.KryoSerializer"
+    } else {
+      "org.apache.spark.serializer.JavaSerializer"
+    }
+    super.sparkConf
+      .set("spark.sql.warehouse.dir", tempDBDir.getCanonicalPath)
+      .set("spark.sql.catalog.paimon", classOf[SparkCatalog].getName)
+      .set("spark.sql.catalog.paimon.warehouse", tempDBDir.getCanonicalPath)
+      .set("spark.sql.extensions", classOf[PaimonSparkSessionExtensions].getName)
+      .set("spark.serializer", serializer)
+  }
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.sql(s"USE paimon")
+    spark.sql(s"CREATE DATABASE IF NOT EXISTS paimon.$dbName0")
+    spark.sql(s"USE paimon.$dbName0")
+  }
+
+  override protected def afterAll(): Unit = {
+    try {
+      spark.sql(s"USE paimon")
+      spark.sql(s"DROP TABLE IF EXISTS $dbName0.$tableName0")
+      spark.sql("USE default")
+      spark.sql(s"DROP DATABASE paimon.$dbName0 CASCADE")
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  /** Default is paimon catalog */
+  override protected def beforeEach(): Unit = {
+    super.beforeAll()
+    spark.sql(s"USE paimon")
+    spark.sql(s"USE paimon.$dbName0")
+    spark.sql(s"DROP TABLE IF EXISTS $tableName0")
+  }
+
+  protected def withTempDirs(f: (File, File) => Unit): Unit = {
+    withTempDir(file1 => withTempDir(file2 => f(file1, file2)))
+  }
+
+  protected def withTimeZone(timeZone: String)(f: => Unit): Unit = {
+    withSparkSQLConf("spark.sql.session.timeZone" -> timeZone) {
+      val originTimeZone = TimeZone.getDefault
+      try {
+        TimeZone.setDefault(TimeZone.getTimeZone(timeZone))
+        f
+      } finally {
+        TimeZone.setDefault(originTimeZone)
+      }
+    }
+  }
+
+  // Since SPARK-46227 has changed the definition of withSQLConf that resulted in
+  // incompatibility between the Spark3.x and Spark4.x, So Paimon declare a separate method
+  // to provide the same function.
+  protected def withSparkSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    withSparkSQLConf0(pairs: _*)(f)
+  }
+
+  private def withSparkSQLConf0(pairs: (String, String)*)(f: => Unit): Unit = {
+    val conf = SQLConf.get
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map {
+      key =>
+        if (conf.contains(key)) {
+          Some(conf.getConfString(key))
+        } else {
+          None
+        }
+    }
+    (keys, values).zipped.foreach {
+      (k, v) =>
+        if (SQLConf.isStaticConfigKey(k)) {
+          throw new RuntimeException(s"Cannot modify the value of a static config: $k")
+        }
+        conf.setConfString(k, v)
+    }
+    try f
+    finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => conf.setConfString(key, value)
+        case (key, None) => conf.unsetConf(key)
+      }
+    }
+  }
+
+  def loadTable(tableName: String): FileStoreTable = {
+    loadTable(dbName0, tableName)
+  }
+
+  def loadTable(dbName: String, tableName: String): FileStoreTable = {
+    paimonCatalog.getTable(Identifier.create(dbName, tableName)).asInstanceOf[FileStoreTable]
+  }
+
+  protected def createRelationV2(tableName: String): DataSourceV2Relation = {
+    val sparkTable = SparkTable(loadTable(tableName))
+    DataSourceV2Relation.create(
+      sparkTable,
+      Some(spark.sessionState.catalogManager.currentCatalog),
+      Some(SparkIdentifier.of(Array(this.dbName0), tableName))
+    )
+  }
+
+  def getScan(sqlText: String): Scan = {
+    sql(sqlText).queryExecution.optimizedPlan
+      .collectFirst { case relation: DataSourceV2ScanRelation => relation }
+      .get
+      .scan
+  }
+
+  protected def getPaimonScan(sqlText: String): PaimonScan = {
+    getScan(sqlText).asInstanceOf[PaimonScan]
+  }
+
+  protected def getFormatTableScan(sqlText: String): PaimonFormatTableScan = {
+    getScan(sqlText).asInstanceOf[PaimonFormatTableScan]
+  }
+
+  object GenericRow {
+    def of(values: Any*): GenericRow = {
+      val row = new GenericRow(values.length)
+      values.zipWithIndex.foreach {
+        case (value, index) =>
+          row.setField(index, value)
+      }
+      row
+    }
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala
new file mode 100644
index 000000000000..df1df747897d
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+
+class AlterBranchProcedureTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+  test("Paimon Procedure: alter schema structure and test $branch syntax.") {
+    withTempDir {
+      checkpointDir =>
+        // define a change-log table and test `forEachBatch` api
+        spark.sql(s"""
+                     |CREATE TABLE T (a INT, b STRING)
+                     |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                     |""".stripMargin)
+        val location = loadTable("T").location().toString
+
+        val inputData = MemoryStream[(Int, String)]
+        val stream = inputData
+          .toDS()
+          .toDF("a", "b")
+          .writeStream
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .foreachBatch {
+            (batch: Dataset[Row], _: Long) =>
+              batch.write.format("paimon").mode("append").save(location)
+          }
+          .start()
+
+        val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+        try {
+          // snapshot-1
+          inputData.addData((1, "a"))
+          stream.processAllAvailable()
+          checkAnswer(query(), Row(1, "a") :: Nil)
+
+          // snapshot-2
+          inputData.addData((2, "b"))
+          stream.processAllAvailable()
+          checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+          // snapshot-3
+          inputData.addData((2, "b2"))
+          stream.processAllAvailable()
+          checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+
+          val table = loadTable("T")
+          val branchManager = table.branchManager()
+
+          // create branch with tag
+          checkAnswer(
+            spark.sql("CALL paimon.sys.create_tag(table => 'test.T', tag => 's_2', snapshot => 2)"),
+            Row(true) :: Nil)
+          checkAnswer(
+            spark.sql(
+              "CALL paimon.sys.create_branch(table => 'test.T', branch => 'snapshot_branch', tag => 's_2')"),
+            Row(true) :: Nil)
+          assert(branchManager.branchExists("snapshot_branch"))
+
+          spark.sql("INSERT INTO T VALUES (1, 'APPLE'), (2,'DOG'), (2, 'horse')")
+          spark.sql("ALTER TABLE `T$branch_snapshot_branch` ADD COLUMNS(c INT)")
+          spark.sql(
+            "INSERT INTO `T$branch_snapshot_branch` VALUES " + "(1,'cherry', 100), (2,'bird', 200), (3, 'wolf', 400)")
+
+          checkAnswer(
+            spark.sql("SELECT * FROM T ORDER BY a, b"),
+            Row(1, "APPLE") :: Row(2, "horse") :: Nil)
+          checkAnswer(
+            spark.sql("SELECT * FROM `T$branch_snapshot_branch` ORDER BY a, b,c"),
+            Row(1, "cherry", 100) :: Row(2, "bird", 200) :: Row(3, "wolf", 400) :: Nil)
+          assert(branchManager.branchExists("snapshot_branch"))
+        }
+    }
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala
new file mode 100644
index 000000000000..111e604b1ef0
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+
+class BranchProcedureTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+  test("Paimon Procedure: create, query, write and delete branch") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+
+            // create tags
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(table => 'test.T', tag => 'test_tag', snapshot => 2)"),
+              Row(true) :: Nil)
+            checkAnswer(
+              spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"),
+              Row("test_tag") :: Nil)
+
+            // create branch with tag
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_branch(table => 'test.T', branch => 'test_branch', tag => 'test_tag')"),
+              Row(true) :: Nil)
+            val table = loadTable("T")
+            val branchManager = table.branchManager()
+            assert(branchManager.branchExists("test_branch"))
+
+            // query from branch
+            checkAnswer(
+              spark.sql("SELECT * FROM `T$branch_test_branch` ORDER BY a"),
+              Row(1, "a") :: Row(2, "b") :: Nil
+            )
+            checkAnswer(
+              spark.read.format("paimon").option("branch", "test_branch").table("T").orderBy("a"),
+              Row(1, "a") :: Row(2, "b") :: Nil
+            )
+
+            // update branch
+            spark.sql("INSERT INTO `T$branch_test_branch` VALUES (3, 'c')")
+            checkAnswer(
+              spark.sql("SELECT * FROM `T$branch_test_branch` ORDER BY a"),
+              Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil
+            )
+            // create tags
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(table => 'test.`T$branch_test_branch`', tag => 'test_tag2', snapshot => 3)"),
+              Row(true) :: Nil)
+
+            // create branch from another branch.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_branch(table => 'test.`T$branch_test_branch`', branch => 'test_branch2', tag => 'test_tag2')"),
+              Row(true) :: Nil)
+            checkAnswer(
+              spark.sql("SELECT * FROM `T$branch_test_branch2` ORDER BY a"),
+              Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil
+            )
+
+            // create empty branch
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_branch(table => 'test.T', branch => 'empty_branch')"),
+              Row(true) :: Nil)
+            assert(branchManager.branchExists("empty_branch"))
+            checkAnswer(
+              spark.sql("SELECT * FROM `T$branch_empty_branch` ORDER BY a"),
+              Nil
+            )
+
+            // delete branch
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.delete_branch(table => 'test.T', branch => 'test_branch')"),
+              Row(true) :: Nil)
+            assert(!branchManager.branchExists("test_branch"))
+            intercept[Exception] {
+              spark.sql("SELECT * FROM `T$branch_test_branch` ORDER BY a")
+            }
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Branch: read with scan.fallback-branch") {
+    withTable("T") {
+      sql("""
+            |CREATE TABLE T (
+            |    dt STRING NOT NULL,
+            |    name STRING NOT NULL,
+            |    amount BIGINT
+            |) PARTITIONED BY (dt)
+            |""".stripMargin)
+
+      sql("ALTER TABLE T SET TBLPROPERTIES ('k1' = 'v1')")
+      sql("ALTER TABLE T SET TBLPROPERTIES ('k2' = 'v2')")
+
+      sql("CALL sys.create_branch('test.T', 'test')")
+      sql("ALTER TABLE T SET TBLPROPERTIES ('scan.fallback-branch' = 'test')")
+
+      sql(
+        "INSERT INTO `T$branch_test` VALUES ('20240725', 'apple', 4), ('20240725', 'peach', 10), ('20240726', 'cherry', 3), ('20240726', 'pear', 6)")
+      sql("INSERT INTO T VALUES ('20240725', 'apple', 5), ('20240725', 'banana', 7)")
+
+      checkAnswer(
+        sql("SELECT * FROM T ORDER BY amount"),
+        Seq(
+          Row("20240726", "cherry", 3),
+          Row("20240725", "apple", 5),
+          Row("20240726", "pear", 6),
+          Row("20240725", "banana", 7))
+      )
+
+      sql("ALTER TABLE T UNSET TBLPROPERTIES ('scan.fallback-branch')")
+      checkAnswer(
+        sql("SELECT * FROM T ORDER BY amount"),
+        Seq(Row("20240725", "apple", 5), Row("20240725", "banana", 7)))
+    }
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTest.scala
new file mode 100644
index 000000000000..322d50a62127
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+class CompactProcedureTest extends CompactProcedureTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala
new file mode 100644
index 000000000000..19f6bc25280e
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala
@@ -0,0 +1,1324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.Snapshot.CommitKind
+import org.apache.paimon.fs.Path
+import org.apache.paimon.spark.PaimonSparkTestBase
+import org.apache.paimon.spark.utils.SparkProcedureUtils
+import org.apache.paimon.table.FileStoreTable
+import org.apache.paimon.table.source.DataSplit
+
+import org.apache.spark.scheduler.{SparkListener, SparkListenerStageSubmitted}
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+import org.assertj.core.api.Assertions
+import org.scalatest.time.Span
+
+import java.util
+
+import scala.collection.JavaConverters._
+import scala.util.Random
+
+/** Test compact procedure. See [[CompactProcedure]]. */
+abstract class CompactProcedureTestBase extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+
+  // ----------------------- Minor Compact -----------------------
+
+  test("Paimon Procedure: compact aware bucket pk table with minor compact strategy") {
+    withTable("T") {
+      spark.sql(s"""
+                   |CREATE TABLE T (id INT, value STRING, pt STRING)
+                   |TBLPROPERTIES ('primary-key'='id, pt', 'bucket'='1', 'write-only'='true')
+                   |PARTITIONED BY (pt)
+                   |""".stripMargin)
+
+      val table = loadTable("T")
+
+      spark.sql(s"INSERT INTO T VALUES (1, 'a', 'p1'), (2, 'b', 'p2')")
+      spark.sql(s"INSERT INTO T VALUES (3, 'c', 'p1'), (4, 'd', 'p2')")
+
+      Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.APPEND)).isTrue
+      Assertions.assertThat(lastSnapshotId(table)).isEqualTo(2)
+
+      spark.sql(
+        "CALL sys.compact(table => 'T', compact_strategy => 'minor'," +
+          "options => 'num-sorted-run.compaction-trigger=3')")
+
+      // Due to the limitation of parameter 'num-sorted-run.compaction-trigger' = 3, so compact is not
+      // performed.
+      Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.APPEND)).isTrue
+      Assertions.assertThat(lastSnapshotId(table)).isEqualTo(2)
+
+      // Make par-p1 has 3 datafile and par-p2 has 2 datafile, so par-p2 will not be picked out to
+      // compact.
+      spark.sql(s"INSERT INTO T VALUES (1, 'a', 'p1')")
+
+      spark.sql(
+        "CALL sys.compact(table => 'T', compact_strategy => 'minor'," +
+          "options => 'num-sorted-run.compaction-trigger=3')")
+
+      Assertions.assertThat(lastSnapshotId(table)).isEqualTo(4)
+      Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+
+      val splits = table.newSnapshotReader.read.dataSplits
+      splits.forEach(
+        split => {
+          Assertions
+            .assertThat(split.dataFiles.size)
+            .isEqualTo(if (split.partition().getString(0).toString == "p2") 2 else 1)
+        })
+    }
+  }
+
+  // ----------------------- Sort Compact -----------------------
+
+  test("Paimon Procedure: sort compact") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b INT)
+                       |TBLPROPERTIES ('bucket'='-1')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, Int)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // test zorder sort
+            inputData.addData((0, 0))
+            inputData.addData((0, 1))
+            inputData.addData((0, 2))
+            inputData.addData((1, 0))
+            inputData.addData((1, 1))
+            inputData.addData((1, 2))
+            inputData.addData((2, 0))
+            inputData.addData((2, 1))
+            inputData.addData((2, 2))
+            stream.processAllAvailable()
+
+            val result = new util.ArrayList[Row]()
+            for (a <- 0 until 3) {
+              for (b <- 0 until 3) {
+                result.add(Row(a, b))
+              }
+            }
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result)
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.compact(table => 'T', order_strategy => 'zorder', order_by => 'a,b')"),
+              Row(true) :: Nil)
+
+            val result2 = new util.ArrayList[Row]()
+            result2.add(0, Row(0, 0))
+            result2.add(1, Row(0, 1))
+            result2.add(2, Row(1, 0))
+            result2.add(3, Row(1, 1))
+            result2.add(4, Row(0, 2))
+            result2.add(5, Row(1, 2))
+            result2.add(6, Row(2, 0))
+            result2.add(7, Row(2, 1))
+            result2.add(8, Row(2, 2))
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result2)
+
+            // test hilbert sort
+            val result3 = new util.ArrayList[Row]()
+            result3.add(0, Row(0, 0))
+            result3.add(1, Row(0, 1))
+            result3.add(2, Row(1, 1))
+            result3.add(3, Row(1, 0))
+            result3.add(4, Row(2, 0))
+            result3.add(5, Row(2, 1))
+            result3.add(6, Row(2, 2))
+            result3.add(7, Row(1, 2))
+            result3.add(8, Row(0, 2))
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.compact(table => 'T', order_strategy => 'hilbert', order_by => 'a,b')"),
+              Row(true) :: Nil)
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result3)
+
+            // test order sort
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.compact(table => 'T', order_strategy => 'order', order_by => 'a,b')"),
+              Row(true) :: Nil)
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: sort compact with partition") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (p INT, a INT, b INT)
+                       |TBLPROPERTIES ('bucket'='-1')
+                       |PARTITIONED BY (p)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, Int, Int)]
+          val stream = inputData
+            .toDS()
+            .toDF("p", "a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query0 = () => spark.sql("SELECT * FROM T WHERE p=0")
+          val query1 = () => spark.sql("SELECT * FROM T WHERE p=1")
+
+          try {
+            // test zorder sort
+            inputData.addData((0, 0, 0))
+            inputData.addData((0, 0, 1))
+            inputData.addData((0, 0, 2))
+            inputData.addData((0, 1, 0))
+            inputData.addData((0, 1, 1))
+            inputData.addData((0, 1, 2))
+            inputData.addData((0, 2, 0))
+            inputData.addData((0, 2, 1))
+            inputData.addData((0, 2, 2))
+
+            inputData.addData((1, 0, 0))
+            inputData.addData((1, 0, 1))
+            inputData.addData((1, 0, 2))
+            inputData.addData((1, 1, 0))
+            inputData.addData((1, 1, 1))
+            inputData.addData((1, 1, 2))
+            inputData.addData((1, 2, 0))
+            inputData.addData((1, 2, 1))
+            inputData.addData((1, 2, 2))
+            stream.processAllAvailable()
+
+            val result0 = new util.ArrayList[Row]()
+            for (a <- 0 until 3) {
+              for (b <- 0 until 3) {
+                result0.add(Row(0, a, b))
+              }
+            }
+            val result1 = new util.ArrayList[Row]()
+            for (a <- 0 until 3) {
+              for (b <- 0 until 3) {
+                result1.add(Row(1, a, b))
+              }
+            }
+            Assertions.assertThat(query0().collect()).containsExactlyElementsOf(result0)
+            Assertions.assertThat(query1().collect()).containsExactlyElementsOf(result1)
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.compact(table => 'T', partitions => 'p=0',  order_strategy => 'zorder', order_by => 'a,b')"),
+              Row(true) :: Nil)
+
+            val result2 = new util.ArrayList[Row]()
+            result2.add(0, Row(0, 0, 0))
+            result2.add(1, Row(0, 0, 1))
+            result2.add(2, Row(0, 1, 0))
+            result2.add(3, Row(0, 1, 1))
+            result2.add(4, Row(0, 0, 2))
+            result2.add(5, Row(0, 1, 2))
+            result2.add(6, Row(0, 2, 0))
+            result2.add(7, Row(0, 2, 1))
+            result2.add(8, Row(0, 2, 2))
+
+            Assertions.assertThat(query0().collect()).containsExactlyElementsOf(result2)
+            Assertions.assertThat(query1().collect()).containsExactlyElementsOf(result1)
+
+            // test hilbert sort
+            val result3 = new util.ArrayList[Row]()
+            result3.add(0, Row(0, 0, 0))
+            result3.add(1, Row(0, 0, 1))
+            result3.add(2, Row(0, 1, 1))
+            result3.add(3, Row(0, 1, 0))
+            result3.add(4, Row(0, 2, 0))
+            result3.add(5, Row(0, 2, 1))
+            result3.add(6, Row(0, 2, 2))
+            result3.add(7, Row(0, 1, 2))
+            result3.add(8, Row(0, 0, 2))
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.compact(table => 'T', partitions => 'p=0',  order_strategy => 'hilbert', order_by => 'a,b')"),
+              Row(true) :: Nil)
+
+            Assertions.assertThat(query0().collect()).containsExactlyElementsOf(result3)
+            Assertions.assertThat(query1().collect()).containsExactlyElementsOf(result1)
+
+            // test order sort
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.compact(table => 'T', partitions => 'p=0',  order_strategy => 'order', order_by => 'a,b')"),
+              Row(true) :: Nil)
+            Assertions.assertThat(query0().collect()).containsExactlyElementsOf(result0)
+            Assertions.assertThat(query1().collect()).containsExactlyElementsOf(result1)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: sort compact with multi-partitions") {
+    Seq("order", "zorder").foreach {
+      orderStrategy =>
+        {
+          withTable("T") {
+            spark.sql(s"""
+                         |CREATE TABLE T (id INT, pt STRING)
+                         |PARTITIONED BY (pt)
+                         |""".stripMargin)
+
+            spark.sql(s"""INSERT INTO T VALUES
+                         |(1, 'p1'), (3, 'p1'),
+                         |(1, 'p2'), (4, 'p2'),
+                         |(3, 'p3'), (2, 'p3'),
+                         |(1, 'p4'), (2, 'p4')
+                         |""".stripMargin)
+
+            spark.sql(s"""INSERT INTO T VALUES
+                         |(4, 'p1'), (2, 'p1'),
+                         |(2, 'p2'), (3, 'p2'),
+                         |(1, 'p3'), (4, 'p3'),
+                         |(3, 'p4'), (4, 'p4')
+                         |""".stripMargin)
+
+            checkAnswer(
+              spark.sql(
+                s"CALL sys.compact(table => 'T', order_strategy => '$orderStrategy', order_by => 'id')"),
+              Seq(true).toDF())
+
+            val result = List(Row(1), Row(2), Row(3), Row(4)).asJava
+            Seq("p1", "p2", "p3", "p4").foreach {
+              pt =>
+                Assertions
+                  .assertThat(spark.sql(s"SELECT id FROM T WHERE pt='$pt'").collect())
+                  .containsExactlyElementsOf(result)
+            }
+          }
+        }
+    }
+  }
+
+  test("Paimon Procedure: sort compact with partition filter") {
+    withTable("t") {
+      sql("CREATE TABLE t (a INT, pt INT) PARTITIONED BY (pt)")
+      sql("INSERT INTO t VALUES (1, 1)")
+      sql("INSERT INTO t VALUES (2, 1)")
+      sql(
+        "CALL sys.compact(table => 't', order_strategy => 'order', where => 'pt = 1', order_by => 'a')")
+      val table = loadTable("t")
+      assert(table.latestSnapshot().get().commitKind.equals(CommitKind.OVERWRITE))
+      checkAnswer(sql("SELECT * FROM t ORDER BY a"), Seq(Row(1, 1), Row(2, 1)))
+    }
+  }
+
+  test("Paimon Procedure: compact for pk") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b INT)
+                       |TBLPROPERTIES ('primary-key'='a,b', 'bucket'='1')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, Int)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            inputData.addData((0, 0))
+            inputData.addData((0, 1))
+            inputData.addData((0, 2))
+            inputData.addData((1, 0))
+            inputData.addData((1, 1))
+            inputData.addData((1, 2))
+            inputData.addData((2, 0))
+            inputData.addData((2, 1))
+            inputData.addData((2, 2))
+            stream.processAllAvailable()
+
+            val result = new util.ArrayList[Row]()
+            for (a <- 0 until 3) {
+              for (b <- 0 until 3) {
+                result.add(Row(a, b))
+              }
+            }
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result)
+            checkAnswer(spark.sql("CALL paimon.sys.compact(table => 'T')"), Row(true) :: Nil)
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: compact aware bucket pk table") {
+    Seq(1, -1).foreach(
+      bucket => {
+        withTable("T") {
+          spark.sql(
+            s"""
+               |CREATE TABLE T (id INT, value STRING, pt STRING)
+               |TBLPROPERTIES ('primary-key'='id, pt', 'bucket'='$bucket', 'write-only'='true')
+               |PARTITIONED BY (pt)
+               |""".stripMargin)
+
+          val table = loadTable("T")
+
+          spark.sql(s"INSERT INTO T VALUES (1, 'a', 'p1'), (2, 'b', 'p2')")
+          spark.sql(s"INSERT INTO T VALUES (3, 'c', 'p1'), (4, 'd', 'p2')")
+
+          spark.sql("CALL sys.compact(table => 'T', partitions => 'pt=\"p1\"')")
+          Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+          Assertions.assertThat(lastSnapshotId(table)).isEqualTo(3)
+
+          spark.sql(s"CALL sys.compact(table => 'T')")
+          Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+          Assertions.assertThat(lastSnapshotId(table)).isEqualTo(4)
+
+          // compact condition no longer met
+          spark.sql(s"CALL sys.compact(table => 'T')")
+          Assertions.assertThat(lastSnapshotId(table)).isEqualTo(4)
+
+          checkAnswer(
+            spark.sql(s"SELECT * FROM T ORDER BY id"),
+            Row(1, "a", "p1") :: Row(2, "b", "p2") :: Row(3, "c", "p1") :: Row(4, "d", "p2") :: Nil)
+        }
+      })
+  }
+
+  test("Paimon Procedure: compact aware bucket pk table with many small files") {
+    Seq(3, -1).foreach(
+      bucket => {
+        withTable("T") {
+          spark.sql(
+            s"""
+               |CREATE TABLE T (id INT, value STRING, pt STRING)
+               |TBLPROPERTIES ('primary-key'='id, pt', 'bucket'='$bucket', 'write-only'='true',
+               |'source.split.target-size'='128m','source.split.open-file-cost'='32m') -- simulate multiple splits in a single bucket
+               |PARTITIONED BY (pt)
+               |""".stripMargin)
+
+          val table = loadTable("T")
+
+          val count = 100
+          for (i <- 0 until count) {
+            spark.sql(s"INSERT INTO T VALUES ($i, 'a', 'p${i % 2}')")
+          }
+
+          spark.sql(s"CALL sys.compact(table => 'T')")
+          Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+          checkAnswer(spark.sql(s"SELECT COUNT(*) FROM T"), Row(count) :: Nil)
+        }
+      })
+  }
+
+  test("Paimon Procedure: compact unaware bucket append table") {
+    spark.sql(s"""
+                 |CREATE TABLE T (id INT, value STRING, pt STRING)
+                 |TBLPROPERTIES ('bucket'='-1', 'write-only'='true', 'compaction.min.file-num'='2')
+                 |PARTITIONED BY (pt)
+                 |""".stripMargin)
+
+    val table = loadTable("T")
+
+    spark.sql(s"INSERT INTO T VALUES (1, 'a', 'p1'), (2, 'b', 'p2')")
+    spark.sql(s"INSERT INTO T VALUES (3, 'c', 'p1'), (4, 'd', 'p2')")
+    spark.sql(s"INSERT INTO T VALUES (5, 'e', 'p1'), (6, 'f', 'p2')")
+
+    spark.sql("CALL sys.compact(table => 'T', partitions => 'pt=\"p1\"')")
+    Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+    Assertions.assertThat(lastSnapshotId(table)).isEqualTo(4)
+
+    spark.sql(s"CALL sys.compact(table => 'T')")
+    Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+    Assertions.assertThat(lastSnapshotId(table)).isEqualTo(5)
+
+    // compact condition no longer met
+    spark.sql(s"CALL sys.compact(table => 'T')")
+    Assertions.assertThat(lastSnapshotId(table)).isEqualTo(5)
+
+    checkAnswer(
+      spark.sql(s"SELECT * FROM T ORDER BY id"),
+      Row(1, "a", "p1") :: Row(2, "b", "p2") :: Row(3, "c", "p1") :: Row(4, "d", "p2") :: Row(
+        5,
+        "e",
+        "p1") :: Row(6, "f", "p2") :: Nil)
+  }
+
+  test("Paimon Procedure: compact unaware bucket append table with many small files") {
+    spark.sql(s"""
+                 |CREATE TABLE T (id INT, value STRING, pt STRING)
+                 |TBLPROPERTIES ('bucket'='-1', 'write-only'='true')
+                 |PARTITIONED BY (pt)
+                 |""".stripMargin)
+
+    val table = loadTable("T")
+
+    val count = 100
+    for (i <- 0 until count) {
+      spark.sql(s"INSERT INTO T VALUES ($i, 'a', 'p${i % 2}')")
+    }
+
+    spark.sql(s"CALL sys.compact(table => 'T')")
+    Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+    checkAnswer(spark.sql(s"SELECT COUNT(*) FROM T"), Row(count) :: Nil)
+  }
+
+  test("Paimon Procedure: compact with wrong usage") {
+    spark.sql(s"""
+                 |CREATE TABLE T (id INT, value STRING, pt STRING)
+                 |TBLPROPERTIES ('bucket'='-1', 'write-only'='true')
+                 |PARTITIONED BY (pt)
+                 |""".stripMargin)
+
+    assert(intercept[IllegalArgumentException] {
+      spark.sql(
+        "CALL sys.compact(table => 'T', partitions => 'pt = \"p1\"', where => 'pt = \"p1\"')")
+    }.getMessage.contains("partitions and where cannot be used together"))
+
+    assert(intercept[IllegalArgumentException] {
+      spark.sql("CALL sys.compact(table => 'T', partitions => 'id = 1')")
+    }.getMessage.contains("Only partition predicate is supported"))
+
+    assert(intercept[IllegalArgumentException] {
+      spark.sql("CALL sys.compact(table => 'T', where => 'id > 1 AND pt = \"p1\"')")
+    }.getMessage.contains("Only partition predicate is supported"))
+
+    assert(intercept[IllegalArgumentException] {
+      spark.sql("CALL sys.compact(table => 'T', order_strategy => 'sort', order_by => 'pt')")
+    }.getMessage.contains("order_by should not contain partition cols"))
+
+    assert(intercept[IllegalArgumentException] {
+      spark.sql(
+        "CALL sys.compact(table => 'T', order_strategy => 'sort', order_by => 'id', partition_idle_time =>'5s')")
+    }.getMessage.contains("sort compact do not support 'partition_idle_time'"))
+  }
+
+  test("Paimon Procedure: compact with where") {
+    spark.sql(
+      s"""
+         |CREATE TABLE T (id INT, value STRING, dt STRING, hh INT)
+         |TBLPROPERTIES ('bucket'='1', 'bucket-key'='id', 'write-only'='true', 'compaction.min.file-num'='1')
+         |PARTITIONED BY (dt, hh)
+         |""".stripMargin)
+
+    val table = loadTable("T")
+    val fileIO = table.fileIO()
+
+    spark.sql(s"INSERT INTO T VALUES (1, '1', '2024-01-01', 0), (2, '2', '2024-01-01', 1)")
+    spark.sql(s"INSERT INTO T VALUES (3, '3', '2024-01-01', 0), (4, '4', '2024-01-01', 1)")
+    spark.sql(s"INSERT INTO T VALUES (5, '5', '2024-01-02', 0), (6, '6', '2024-01-02', 1)")
+    spark.sql(s"INSERT INTO T VALUES (7, '7', '2024-01-02', 0), (8, '8', '2024-01-02', 1)")
+
+    spark.sql("CALL sys.compact(table => 'T', where => 'dt = \"2024-01-01\" and hh >= 1')")
+    Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+    Assertions
+      .assertThat(
+        fileIO.listStatus(new Path(table.location(), "dt=2024-01-01/hh=0/bucket-0")).length)
+      .isEqualTo(2)
+    Assertions
+      .assertThat(
+        fileIO.listStatus(new Path(table.location(), "dt=2024-01-01/hh=1/bucket-0")).length)
+      .isEqualTo(3)
+    Assertions
+      .assertThat(
+        fileIO.listStatus(new Path(table.location(), "dt=2024-01-02/hh=0/bucket-0")).length)
+      .isEqualTo(2)
+    Assertions
+      .assertThat(
+        fileIO.listStatus(new Path(table.location(), "dt=2024-01-02/hh=1/bucket-0")).length)
+      .isEqualTo(2)
+  }
+
+  test("Paimon test: toWhere method in CompactProcedure") {
+    val conditions = "f0=0,f1=0,f2=0;f0=1,f1=1,f2=1;f0=1,f1=2,f2=2;f3=3"
+
+    val where = SparkProcedureUtils.toWhere(conditions)
+    val whereExpected =
+      "(f0=0 AND f1=0 AND f2=0) OR (f0=1 AND f1=1 AND f2=1) OR (f0=1 AND f1=2 AND f2=2) OR (f3=3)"
+
+    Assertions.assertThat(where).isEqualTo(whereExpected)
+  }
+
+  test("Paimon Procedure: compact unaware bucket append table with option") {
+    spark.sql(s"""
+                 |CREATE TABLE T (id INT, value STRING, pt STRING)
+                 |TBLPROPERTIES ('bucket'='-1', 'write-only'='true')
+                 |PARTITIONED BY (pt)
+                 |""".stripMargin)
+
+    val table = loadTable("T")
+
+    spark.sql(s"INSERT INTO T VALUES (1, 'a', 'p1'), (2, 'b', 'p2')")
+    spark.sql(s"INSERT INTO T VALUES (3, 'c', 'p1'), (4, 'd', 'p2')")
+    spark.sql(s"INSERT INTO T VALUES (5, 'e', 'p1'), (6, 'f', 'p2')")
+
+    spark.sql(
+      "CALL sys.compact(table => 'T', partitions => 'pt=\"p1\"', options => 'compaction.min.file-num=2')")
+    Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+    Assertions.assertThat(lastSnapshotId(table)).isEqualTo(4)
+
+    spark.sql("CALL sys.compact(table => 'T', options => 'compaction.min.file-num=2')")
+    Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+    Assertions.assertThat(lastSnapshotId(table)).isEqualTo(5)
+
+    // compact condition no longer met
+    spark.sql(s"CALL sys.compact(table => 'T')")
+    Assertions.assertThat(lastSnapshotId(table)).isEqualTo(5)
+
+    checkAnswer(
+      spark.sql(s"SELECT * FROM T ORDER BY id"),
+      Row(1, "a", "p1") :: Row(2, "b", "p2") :: Row(3, "c", "p1") :: Row(4, "d", "p2") ::
+        Row(5, "e", "p1") :: Row(6, "f", "p2") :: Nil)
+  }
+
+  test("Paimon Procedure: compact with partition_idle_time for pk table") {
+    Seq(1, -1).foreach(
+      bucket => {
+        withTable("T") {
+          val dynamicBucketArgs = if (bucket == -1) " ,'dynamic-bucket.initial-buckets'='1'" else ""
+          spark.sql(
+            s"""
+               |CREATE TABLE T (id INT, value STRING, dt STRING, hh INT)
+               |TBLPROPERTIES ('primary-key'='id, dt, hh', 'bucket'='$bucket', 'write-only'='true'$dynamicBucketArgs)
+               |PARTITIONED BY (dt, hh)
+               |""".stripMargin)
+
+          val table = loadTable("T")
+
+          spark.sql(s"INSERT INTO T VALUES (1, '1', '2024-01-01', 0), (2, '2', '2024-01-01', 1)")
+          spark.sql(s"INSERT INTO T VALUES (5, '5', '2024-01-02', 0), (6, '6', '2024-01-02', 1)")
+          spark.sql(s"INSERT INTO T VALUES (3, '3', '2024-01-01', 0), (4, '4', '2024-01-01', 1)")
+          spark.sql(s"INSERT INTO T VALUES (7, '7', '2024-01-02', 0), (8, '8', '2024-01-02', 1)")
+
+          Thread.sleep(10000);
+          spark.sql(s"INSERT INTO T VALUES (9, '9', '2024-01-01', 0), (10, '10', '2024-01-02', 0)")
+
+          spark.sql("CALL sys.compact(table => 'T', partition_idle_time => '10s')")
+          val dataSplits = table.newSnapshotReader.read.dataSplits.asScala.toList
+          Assertions
+            .assertThat(dataSplits.size)
+            .isEqualTo(4)
+          Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+          for (dataSplit: DataSplit <- dataSplits) {
+            if (dataSplit.partition().getInt(1) == 0) {
+              Assertions
+                .assertThat(dataSplit.dataFiles().size())
+                .isEqualTo(3)
+            } else {
+              Assertions
+                .assertThat(dataSplit.dataFiles().size())
+                .isEqualTo(1)
+            }
+          }
+        }
+      })
+
+  }
+
+  test("Paimon Procedure: compact with partition_idle_time for unaware bucket append table") {
+    spark.sql(s"""
+                 |CREATE TABLE T (id INT, value STRING, dt STRING, hh INT)
+                 |TBLPROPERTIES ('bucket'='-1', 'write-only'='true', 'compaction.min.file-num'='2')
+                 |PARTITIONED BY (dt, hh)
+                 |""".stripMargin)
+
+    val table = loadTable("T")
+
+    spark.sql(s"INSERT INTO T VALUES (1, '1', '2024-01-01', 0), (2, '2', '2024-01-01', 1)")
+    spark.sql(s"INSERT INTO T VALUES (5, '5', '2024-01-02', 0), (6, '6', '2024-01-02', 1)")
+    spark.sql(s"INSERT INTO T VALUES (3, '3', '2024-01-01', 0), (4, '4', '2024-01-01', 1)")
+    spark.sql(s"INSERT INTO T VALUES (7, '7', '2024-01-02', 0), (8, '8', '2024-01-02', 1)")
+
+    Thread.sleep(10000);
+    spark.sql(s"INSERT INTO T VALUES (9, '9', '2024-01-01', 0), (10, '10', '2024-01-02', 0)")
+
+    spark.sql("CALL sys.compact(table => 'T', partition_idle_time => '10s')")
+    val dataSplits = table.newSnapshotReader.read.dataSplits.asScala.toList
+    Assertions
+      .assertThat(dataSplits.size)
+      .isEqualTo(4)
+    Assertions.assertThat(lastSnapshotCommand(table).equals(CommitKind.COMPACT)).isTrue
+    for (dataSplit: DataSplit <- dataSplits) {
+      if (dataSplit.partition().getInt(1) == 0) {
+        Assertions
+          .assertThat(dataSplit.dataFiles().size())
+          .isEqualTo(3)
+      } else {
+        Assertions
+          .assertThat(dataSplit.dataFiles().size())
+          .isEqualTo(1)
+      }
+    }
+  }
+
+  test("Paimon Procedure: test aware-bucket compaction read parallelism") {
+    spark.sql(s"""
+                 |CREATE TABLE T (id INT, value STRING)
+                 |TBLPROPERTIES ('primary-key'='id', 'bucket'='3', 'write-only'='true')
+                 |""".stripMargin)
+
+    val table = loadTable("T")
+    for (i <- 1 to 10) {
+      sql(s"INSERT INTO T VALUES ($i, '$i')")
+    }
+    assertResult(10)(table.snapshotManager().snapshotCount())
+
+    val buckets = table.newSnapshotReader().bucketEntries().asScala.map(_.bucket()).distinct.size
+    assertResult(3)(buckets)
+
+    val taskBuffer = scala.collection.mutable.ListBuffer.empty[Int]
+    val listener = new SparkListener {
+      override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+        taskBuffer += stageSubmitted.stageInfo.numTasks
+      }
+    }
+
+    try {
+      spark.sparkContext.addSparkListener(listener)
+
+      // spark.default.parallelism cannot be change in spark session
+      // sparkParallelism is 2, bucket is 3, use 2 as the read parallelism
+      spark.conf.set("spark.sql.shuffle.partitions", 2)
+      spark.sql("CALL sys.compact(table => 'T')")
+
+      // sparkParallelism is 5, bucket is 3, use 3 as the read parallelism
+      spark.conf.set("spark.sql.shuffle.partitions", 5)
+      spark.sql("CALL sys.compact(table => 'T')")
+
+      assertResult(Seq(2, 3))(taskBuffer)
+    } finally {
+      spark.sparkContext.removeSparkListener(listener)
+    }
+  }
+
+  test("Paimon Procedure: test unaware-bucket compaction read parallelism") {
+    spark.sql(s"""
+                 |CREATE TABLE T (id INT, value STRING)
+                 |TBLPROPERTIES ('bucket'='-1', 'write-only'='true')
+                 |""".stripMargin)
+
+    val table = loadTable("T")
+    for (i <- 1 to 12) {
+      sql(s"INSERT INTO T VALUES ($i, '$i')")
+    }
+    assertResult(12)(table.snapshotManager().snapshotCount())
+
+    val buckets = table.newSnapshotReader().bucketEntries().asScala.map(_.bucket()).distinct.size
+    // only has bucket-0
+    assertResult(1)(buckets)
+
+    val taskBuffer = scala.collection.mutable.ListBuffer.empty[Int]
+    val listener = new SparkListener {
+      override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+        taskBuffer += stageSubmitted.stageInfo.numTasks
+      }
+    }
+
+    try {
+      spark.sparkContext.addSparkListener(listener)
+
+      // spark.default.parallelism cannot be change in spark session
+      // sparkParallelism is 2, task groups is 6, use 2 as the read parallelism
+      spark.conf.set("spark.sql.shuffle.partitions", 2)
+      spark.sql(
+        "CALL sys.compact(table => 'T', options => 'source.split.open-file-cost=3200M, compaction.min.file-num=2')")
+
+      // sparkParallelism is 5, task groups is 1, use 1 as the read parallelism
+      spark.conf.set("spark.sql.shuffle.partitions", 5)
+      spark.sql(
+        "CALL sys.compact(table => 'T', options => 'source.split.open-file-cost=3200M, compaction.min.file-num=2')")
+
+      assertResult(Seq(2, 3))(taskBuffer)
+    } finally {
+      spark.sparkContext.removeSparkListener(listener)
+    }
+  }
+
+  test("Paimon Procedure: type cast in where") {
+    withTable("t") {
+      sql("""
+            |CREATE TABLE t (id INT, value STRING, day_part LONG)
+            |TBLPROPERTIES ('compaction.min.file-num'='2')
+            |PARTITIONED BY (day_part)
+            |""".stripMargin)
+      sql("INSERT INTO t VALUES (1, 'a', 20250810)")
+      sql("INSERT INTO t VALUES (2, 'b', 20250810)")
+      sql("INSERT INTO t VALUES (3, 'c', 20250811)")
+
+      sql("CALL sys.compact(table => 't', where => 'day_part < 20250811 and day_part > 20250809')")
+      val table = loadTable("t")
+      assert(table.snapshotManager().latestSnapshot().commitKind().equals(CommitKind.COMPACT))
+    }
+  }
+
+  test("Paimon Procedure: cluster for unpartitioned table") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(
+            s"""
+               |CREATE TABLE T (a INT, b INT, c STRING)
+               |TBLPROPERTIES ('bucket'='-1','num-levels'='6', 'num-sorted-run.compaction-trigger'='2', 'clustering.columns'='a,b', 'clustering.strategy'='zorder', 'clustering.incremental' = 'true')
+               |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b", "c")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            val random = new Random()
+            val randomStr = random.nextString(40)
+            // first write
+            inputData.addData((0, 0, randomStr))
+            inputData.addData((0, 1, randomStr))
+            inputData.addData((0, 2, randomStr))
+            inputData.addData((1, 0, randomStr))
+            inputData.addData((1, 1, randomStr))
+            inputData.addData((1, 2, randomStr))
+            inputData.addData((2, 0, randomStr))
+            inputData.addData((2, 1, randomStr))
+            inputData.addData((2, 2, randomStr))
+            stream.processAllAvailable()
+
+            val result = new util.ArrayList[Row]()
+            for (a <- 0 until 3) {
+              for (b <- 0 until 3) {
+                result.add(Row(a, b, randomStr))
+              }
+            }
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result)
+
+            // first cluster, the outputLevel should be 5
+            checkAnswer(spark.sql("CALL paimon.sys.compact(table => 'T')"), Row(true) :: Nil)
+
+            // first cluster result
+            val result2 = new util.ArrayList[Row]()
+            result2.add(0, Row(0, 0, randomStr))
+            result2.add(1, Row(0, 1, randomStr))
+            result2.add(2, Row(1, 0, randomStr))
+            result2.add(3, Row(1, 1, randomStr))
+            result2.add(4, Row(0, 2, randomStr))
+            result2.add(5, Row(1, 2, randomStr))
+            result2.add(6, Row(2, 0, randomStr))
+            result2.add(7, Row(2, 1, randomStr))
+            result2.add(8, Row(2, 2, randomStr))
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result2)
+
+            var clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            var dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().get(0).level()).isEqualTo(5)
+
+            // second write
+            inputData.addData((0, 3, null), (1, 3, null), (2, 3, null))
+            inputData.addData((3, 0, null), (3, 1, null), (3, 2, null), (3, 3, null))
+            stream.processAllAvailable()
+
+            val result3 = new util.ArrayList[Row]()
+            result3.addAll(result2)
+            for (a <- 0 until 3) {
+              result3.add(Row(a, 3, null))
+            }
+            for (b <- 0 until 4) {
+              result3.add(Row(3, b, null))
+            }
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result3)
+
+            // second cluster, the outputLevel should be 4
+            checkAnswer(spark.sql("CALL paimon.sys.compact(table => 'T')"), Row(true) :: Nil)
+            // second cluster result, level-5 and level-4 are individually ordered
+            val result4 = new util.ArrayList[Row]()
+            result4.addAll(result2)
+            result4.add(Row(0, 3, null))
+            result4.add(Row(1, 3, null))
+            result4.add(Row(3, 0, null))
+            result4.add(Row(3, 1, null))
+            result4.add(Row(2, 3, null))
+            result4.add(Row(3, 2, null))
+            result4.add(Row(3, 3, null))
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result4)
+
+            clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().size()).isEqualTo(2)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().get(0).level()).isEqualTo(5)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().get(1).level()).isEqualTo(4)
+
+            // full cluster
+            checkAnswer(
+              spark.sql("CALL paimon.sys.compact(table => 'T', compact_strategy => 'full')"),
+              Row(true) :: Nil)
+            val result5 = new util.ArrayList[Row]()
+            result5.add(Row(0, 0, randomStr))
+            result5.add(Row(0, 1, randomStr))
+            result5.add(Row(1, 0, randomStr))
+            result5.add(Row(1, 1, randomStr))
+            result5.add(Row(0, 2, randomStr))
+            result5.add(Row(0, 3, null))
+            result5.add(Row(1, 2, randomStr))
+            result5.add(Row(1, 3, null))
+            result5.add(Row(2, 0, randomStr))
+            result5.add(Row(2, 1, randomStr))
+            result5.add(Row(3, 0, null))
+            result5.add(Row(3, 1, null))
+            result5.add(Row(2, 2, randomStr))
+            result5.add(Row(2, 3, null))
+            result5.add(Row(3, 2, null))
+            result5.add(Row(3, 3, null))
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result5)
+
+            clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().get(0).level()).isEqualTo(5)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: cluster for partitioned table") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(
+            s"""
+               |CREATE TABLE T (a INT, b INT, c STRING, pt INT)
+               |PARTITIONED BY (pt)
+               |TBLPROPERTIES ('bucket'='-1', 'num-levels'='6', 'num-sorted-run.compaction-trigger'='2', 'clustering.columns'='a,b', 'clustering.strategy'='zorder', 'clustering.incremental' = 'true')
+               |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, Int, String, Int)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b", "c", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY pt")
+
+          try {
+            val random = new Random()
+            val randomStr = random.nextString(50)
+            // first write
+            for (pt <- 0 until 2) {
+              val c = if (pt == 0) randomStr else null
+              inputData.addData((0, 0, c, pt))
+              inputData.addData((0, 1, c, pt))
+              inputData.addData((0, 2, c, pt))
+              inputData.addData((1, 0, c, pt))
+              inputData.addData((1, 1, c, pt))
+              inputData.addData((1, 2, c, pt))
+              inputData.addData((2, 0, c, pt))
+              inputData.addData((2, 1, c, pt))
+              inputData.addData((2, 2, c, pt))
+            }
+            stream.processAllAvailable()
+
+            val result = new util.ArrayList[Row]()
+            for (pt <- 0 until 2) {
+              for (a <- 0 until 3) {
+                for (b <- 0 until 3) {
+                  val c = if (pt == 0) randomStr else null
+                  result.add(Row(a, b, c, pt))
+                }
+              }
+            }
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result)
+
+            // first cluster, the outputLevel should be 5
+            checkAnswer(spark.sql("CALL paimon.sys.compact(table => 'T')"), Row(true) :: Nil)
+
+            // first cluster result
+            val result2 = new util.ArrayList[Row]()
+            for (pt <- 0 until 2) {
+              val c = if (pt == 0) randomStr else null
+              result2.add(Row(0, 0, c, pt))
+              result2.add(Row(0, 1, c, pt))
+              result2.add(Row(1, 0, c, pt))
+              result2.add(Row(1, 1, c, pt))
+              result2.add(Row(0, 2, c, pt))
+              result2.add(Row(1, 2, c, pt))
+              result2.add(Row(2, 0, c, pt))
+              result2.add(Row(2, 1, c, pt))
+              result2.add(Row(2, 2, c, pt))
+            }
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result2)
+
+            var clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            var dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(2)
+            dataSplits.forEach(
+              dataSplit => {
+                Assertions.assertThat(dataSplit.dataFiles().size()).isEqualTo(1)
+                Assertions.assertThat(dataSplit.dataFiles().get(0).level()).isEqualTo(5)
+              })
+
+            // second write
+            for (pt <- 0 until 2) {
+              inputData.addData((0, 3, null, pt), (1, 3, null, pt), (2, 3, null, pt))
+              inputData.addData(
+                (3, 0, null, pt),
+                (3, 1, null, pt),
+                (3, 2, null, pt),
+                (3, 3, null, pt))
+            }
+            stream.processAllAvailable()
+
+            val result3 = new util.ArrayList[Row]()
+            for (pt <- 0 until 2) {
+              val c = if (pt == 0) randomStr else null
+              result3.add(Row(0, 0, c, pt))
+              result3.add(Row(0, 1, c, pt))
+              result3.add(Row(1, 0, c, pt))
+              result3.add(Row(1, 1, c, pt))
+              result3.add(Row(0, 2, c, pt))
+              result3.add(Row(1, 2, c, pt))
+              result3.add(Row(2, 0, c, pt))
+              result3.add(Row(2, 1, c, pt))
+              result3.add(Row(2, 2, c, pt))
+              for (a <- 0 until 3) {
+                result3.add(Row(a, 3, null, pt))
+              }
+              for (b <- 0 until 4) {
+                result3.add(Row(3, b, null, pt))
+              }
+            }
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result3)
+
+            // second cluster
+            checkAnswer(spark.sql("CALL paimon.sys.compact(table => 'T')"), Row(true) :: Nil)
+            val result4 = new util.ArrayList[Row]()
+            // for partition-0: only file in level-0 will be picked for clustering, outputLevel is 4
+            result4.add(Row(0, 0, randomStr, 0))
+            result4.add(Row(0, 1, randomStr, 0))
+            result4.add(Row(1, 0, randomStr, 0))
+            result4.add(Row(1, 1, randomStr, 0))
+            result4.add(Row(0, 2, randomStr, 0))
+            result4.add(Row(1, 2, randomStr, 0))
+            result4.add(Row(2, 0, randomStr, 0))
+            result4.add(Row(2, 1, randomStr, 0))
+            result4.add(Row(2, 2, randomStr, 0))
+            result4.add(Row(0, 3, null, 0))
+            result4.add(Row(1, 3, null, 0))
+            result4.add(Row(3, 0, null, 0))
+            result4.add(Row(3, 1, null, 0))
+            result4.add(Row(2, 3, null, 0))
+            result4.add(Row(3, 2, null, 0))
+            result4.add(Row(3, 3, null, 0))
+            // for partition-1:all files will be picked for clustering, outputLevel is 5
+            result4.add(Row(0, 0, null, 1))
+            result4.add(Row(0, 1, null, 1))
+            result4.add(Row(1, 0, null, 1))
+            result4.add(Row(1, 1, null, 1))
+            result4.add(Row(0, 2, null, 1))
+            result4.add(Row(0, 3, null, 1))
+            result4.add(Row(1, 2, null, 1))
+            result4.add(Row(1, 3, null, 1))
+            result4.add(Row(2, 0, null, 1))
+            result4.add(Row(2, 1, null, 1))
+            result4.add(Row(3, 0, null, 1))
+            result4.add(Row(3, 1, null, 1))
+            result4.add(Row(2, 2, null, 1))
+            result4.add(Row(2, 3, null, 1))
+            result4.add(Row(3, 2, null, 1))
+            result4.add(Row(3, 3, null, 1))
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result4)
+
+            clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(2)
+            dataSplits.forEach(
+              dataSplit => {
+                if (dataSplit.partition().getInt(0) == 1) {
+                  // partition-1
+                  Assertions.assertThat(dataSplit.dataFiles().size()).isEqualTo(1)
+                  Assertions.assertThat(dataSplit.dataFiles().get(0).level()).isEqualTo(5)
+                } else {
+                  // partition-0
+                  Assertions.assertThat(dataSplit.dataFiles().size()).isEqualTo(2)
+                  Assertions.assertThat(dataSplit.dataFiles().get(0).level()).isEqualTo(5)
+                  Assertions.assertThat(dataSplit.dataFiles().get(1).level()).isEqualTo(4)
+                }
+              })
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: cluster for partitioned table with partition filter") {
+    sql(
+      """
+        |CREATE TABLE T (a INT, b INT, pt INT)
+        |PARTITIONED BY (pt)
+        |TBLPROPERTIES (
+        |  'bucket'='-1', 'num-levels'='6', 'num-sorted-run.compaction-trigger'='2',
+        |  'clustering.columns'='a,b', 'clustering.strategy'='zorder', 'clustering.incremental' = 'true'
+        |)
+        |""".stripMargin)
+
+    sql("INSERT INTO T VALUES (0, 0, 0), (0, 0, 1)")
+    sql("INSERT INTO T VALUES (0, 1, 0), (0, 1, 1)")
+    sql("INSERT INTO T VALUES (0, 2, 0), (0, 2, 1)")
+    sql("INSERT INTO T VALUES (1, 0, 0), (1, 0, 1)")
+    sql("INSERT INTO T VALUES (1, 1, 0), (1, 1, 1)")
+    sql("INSERT INTO T VALUES (1, 2, 0), (1, 2, 1)")
+    sql("INSERT INTO T VALUES (2, 0, 0), (2, 0, 1)")
+    sql("INSERT INTO T VALUES (2, 1, 0), (2, 1, 1)")
+    sql("INSERT INTO T VALUES (2, 2, 0), (2, 2, 1)")
+
+    sql("CALL sys.compact(table => 'T', where => 'pt = 0')")
+    checkAnswer(
+      sql("select distinct partition, level from `T$files` order by partition"),
+      Seq(Row("{0}", 5), Row("{1}", 0))
+    )
+
+    sql("CALL sys.compact(table => 'T', where => 'pt = 1')")
+    checkAnswer(
+      sql("select distinct partition, level from `T$files` order by partition"),
+      Seq(Row("{0}", 5), Row("{1}", 5))
+    )
+  }
+
+  test("Paimon Procedure: cluster with deletion vectors") {
+    failAfter(Span(5, org.scalatest.time.Minutes)) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(
+            s"""
+               |CREATE TABLE T (a INT, b INT, c STRING)
+               |TBLPROPERTIES ('bucket'='-1', 'deletion-vectors.enabled'='true','num-levels'='6', 'num-sorted-run.compaction-trigger'='2', 'clustering.columns'='a,b', 'clustering.strategy'='zorder', 'clustering.incremental' = 'true')
+               |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b", "c")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            val random = new Random()
+            val randomStr = random.nextString(40)
+            // first write
+            inputData.addData((0, 0, randomStr))
+            inputData.addData((0, 1, randomStr))
+            inputData.addData((0, 2, randomStr))
+            inputData.addData((1, 0, randomStr))
+            inputData.addData((1, 1, randomStr))
+            inputData.addData((1, 2, randomStr))
+            inputData.addData((2, 0, randomStr))
+            inputData.addData((2, 1, randomStr))
+            inputData.addData((2, 2, randomStr))
+            stream.processAllAvailable()
+
+            val result = new util.ArrayList[Row]()
+            for (a <- 0 until 3) {
+              for (b <- 0 until 3) {
+                result.add(Row(a, b, randomStr))
+              }
+            }
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result)
+
+            // first cluster, the outputLevel should be 5
+            checkAnswer(spark.sql("CALL paimon.sys.compact(table => 'T')"), Row(true) :: Nil)
+
+            // first cluster result
+            val result2 = new util.ArrayList[Row]()
+            result2.add(0, Row(0, 0, randomStr))
+            result2.add(1, Row(0, 1, randomStr))
+            result2.add(2, Row(1, 0, randomStr))
+            result2.add(3, Row(1, 1, randomStr))
+            result2.add(4, Row(0, 2, randomStr))
+            result2.add(5, Row(1, 2, randomStr))
+            result2.add(6, Row(2, 0, randomStr))
+            result2.add(7, Row(2, 1, randomStr))
+            result2.add(8, Row(2, 2, randomStr))
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result2)
+
+            var clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            var dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().get(0).level()).isEqualTo(5)
+
+            // second write
+            inputData.addData((0, 3, null), (1, 3, null), (2, 3, null))
+            inputData.addData((3, 0, null), (3, 1, null), (3, 2, null), (3, 3, null))
+            stream.processAllAvailable()
+
+            // delete (0,0), which is in level-5 file
+            spark.sql("DELETE FROM T WHERE a=0 and b=0;").collect()
+            // delete (0,3), which is in level-0 file
+            spark.sql("DELETE FROM T WHERE a=0 and b=3;").collect()
+
+            val result3 = new util.ArrayList[Row]()
+            result3.addAll(result2.subList(1, result2.size()))
+            for (a <- 1 until 3) {
+              result3.add(Row(a, 3, null))
+            }
+            for (b <- 0 until 4) {
+              result3.add(Row(3, b, null))
+            }
+
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result3)
+
+            // second cluster, the outputLevel should be 4. dv index for level-0 will be updated
+            // and dv index for level-5 will be retained
+            checkAnswer(spark.sql("CALL paimon.sys.compact(table => 'T')"), Row(true) :: Nil)
+            // second cluster result, level-5 and level-4 are individually ordered
+            val result4 = new util.ArrayList[Row]()
+            result4.addAll(result2.subList(1, result2.size()))
+            result4.add(Row(1, 3, null))
+            result4.add(Row(3, 0, null))
+            result4.add(Row(3, 1, null))
+            result4.add(Row(2, 3, null))
+            result4.add(Row(3, 2, null))
+            result4.add(Row(3, 3, null))
+            Assertions.assertThat(query().collect()).containsExactlyElementsOf(result4)
+
+            clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().size()).isEqualTo(2)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().get(0).level()).isEqualTo(5)
+            Assertions.assertThat(dataSplits.get(0).deletionFiles().get().get(0)).isNotNull
+            Assertions.assertThat(dataSplits.get(0).dataFiles().get(1).level()).isEqualTo(4)
+            Assertions.assertThat(dataSplits.get(0).deletionFiles().get().get(1)).isNull()
+
+            // full cluster
+            checkAnswer(
+              spark.sql("CALL paimon.sys.compact(table => 'T', compact_strategy => 'full')"),
+              Row(true) :: Nil)
+            clusteredTable = loadTable("T")
+            checkSnapshot(clusteredTable)
+            dataSplits = clusteredTable.newSnapshotReader().read().dataSplits()
+            Assertions.assertThat(dataSplits.size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).dataFiles().size()).isEqualTo(1)
+            Assertions.assertThat(dataSplits.get(0).deletionFiles().get().get(0)).isNull()
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  def checkSnapshot(table: FileStoreTable): Unit = {
+    Assertions
+      .assertThat(table.latestSnapshot().get().commitKind().toString)
+      .isEqualTo(CommitKind.COMPACT.toString)
+  }
+
+  def lastSnapshotCommand(table: FileStoreTable): CommitKind = {
+    table.snapshotManager().latestSnapshot().commitKind()
+  }
+
+  def lastSnapshotId(table: FileStoreTable): Long = {
+    table.snapshotManager().latestSnapshotId()
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala
new file mode 100644
index 000000000000..605f80e27ad3
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+
+class CreateAndDeleteTagProcedureTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+
+  test("Paimon Procedure: create and delete tag") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(" +
+                  "table => 'test.T', tag => 'test_tag', time_retained => '5 d', snapshot => 2)"),
+              Row(true) :: Nil)
+            checkAnswer(
+              spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"),
+              Row("test_tag") :: Nil)
+            checkAnswer(
+              spark.sql("CALL paimon.sys.delete_tag(table => 'test.T', tag => 'test_tag')"),
+              Row(true) :: Nil)
+            checkAnswer(spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"), Nil)
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(table => 'test.T', tag => 'test_latestSnapshot_tag')"),
+              Row(true) :: Nil)
+            checkAnswer(
+              spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"),
+              Row("test_latestSnapshot_tag") :: Nil)
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.delete_tag(table => 'test.T', tag => 'test_latestSnapshot_tag')"),
+              Row(true) :: Nil)
+            checkAnswer(spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"), Nil)
+
+            // create test_tag_1 and test_tag_2
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(" +
+                  "table => 'test.T', tag => 'test_tag_1', snapshot => 1)"),
+              Row(true) :: Nil)
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(" +
+                  "table => 'test.T', tag => 'test_tag_2', snapshot => 2)"),
+              Row(true) :: Nil)
+
+            checkAnswer(
+              spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"),
+              Row("test_tag_1") :: Row("test_tag_2") :: Nil)
+
+            // test rename_tag
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.rename_tag(table => 'test.T', tag => 'test_tag_1', target_tag => 'test_tag_3')"),
+              Row(true) :: Nil
+            )
+            checkAnswer(
+              spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"),
+              Row("test_tag_2") :: Row("test_tag_3") :: Nil)
+
+            // delete test_tag_2 and test_tag_3
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.delete_tag(table => 'test.T', tag => 'test_tag_2,test_tag_3')"),
+              Row(true) :: Nil)
+
+            checkAnswer(spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"), Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: create same tag with same snapshot") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(" +
+                  "table => 'test.T', tag => 'test_tag', snapshot => 1)"),
+              Row(true) :: Nil)
+            checkAnswer(
+              spark.sql("SELECT count(*) FROM paimon.test.`T$tags` where tag_name = 'test_tag'"),
+              Row(1) :: Nil)
+
+            // throw exception "Tag test_tag already exists"
+            assertThrows[IllegalArgumentException] {
+              spark.sql(
+                "CALL paimon.sys.create_tag(" +
+                  "table => 'test.T', tag => 'test_tag', time_retained => '5 d', snapshot => 1)")
+            }
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: delete tag not failed if tag not exists") {
+    spark.sql("CREATE TABLE T (id STRING, name STRING) USING PAIMON")
+
+    checkAnswer(
+      spark.sql("CALL paimon.sys.delete_tag(table => 'test.T', tag => 'test_tag')"),
+      Row(true) :: Nil)
+  }
+
+  test("Paimon Procedure: delete multiple tags") {
+    spark.sql("CREATE TABLE T (id INT, name STRING) USING PAIMON")
+    spark.sql("insert into T values (1, 'a')")
+
+    // create four tags
+    spark.sql("CALL paimon.sys.create_tag(table => 'test.T', tag => 'tag-1')")
+    spark.sql("CALL paimon.sys.create_tag(table => 'test.T', tag => 'tag-2')")
+    spark.sql("CALL paimon.sys.create_tag(table => 'test.T', tag => 'tag-3')")
+    spark.sql("CALL paimon.sys.create_tag(table => 'test.T', tag => 'tag-4')")
+    checkAnswer(spark.sql("SELECT count(*) FROM paimon.test.`T$tags`"), Row(4) :: Nil)
+
+    // multiple tags with no space
+    checkAnswer(
+      spark.sql("CALL paimon.sys.delete_tag(table => 'test.T', tag => 'tag-1,tag-2')"),
+      Row(true) :: Nil)
+    checkAnswer(
+      spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"),
+      Row("tag-3") :: Row("tag-4") :: Nil)
+
+    // multiple tags with space
+    checkAnswer(
+      spark.sql("CALL paimon.sys.delete_tag(table => 'test.T', tag => 'tag-3, tag-4')"),
+      Row(true) :: Nil)
+    checkAnswer(spark.sql("SELECT tag_name FROM paimon.test.`T$tags`"), Nil)
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateGlobalVectorIndexProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateGlobalVectorIndexProcedureTest.scala
new file mode 100644
index 000000000000..b9283d996cc6
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateGlobalVectorIndexProcedureTest.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.utils.Range
+
+import scala.collection.JavaConverters._
+import scala.collection.immutable
+
+class CreateGlobalVectorIndexProcedureTest extends CreateGlobalIndexProcedureTest {
+  test("create lucene-vector-knn global index") {
+    withTable("T") {
+      spark.sql("""
+                  |CREATE TABLE T (id INT, v ARRAY<FLOAT>)
+                  |TBLPROPERTIES (
+                  |  'bucket' = '-1',
+                  |  'global-index.row-count-per-shard' = '10000',
+                  |  'row-tracking.enabled' = 'true',
+                  |  'data-evolution.enabled' = 'true')
+                  |""".stripMargin)
+
+      val values = (0 until 100)
+        .map(
+          i => s"($i, array(cast($i as float), cast(${i + 1} as float), cast(${i + 2} as float)))")
+        .mkString(",")
+      spark.sql(s"INSERT INTO T VALUES $values")
+
+      val output =
+        spark
+          .sql("CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => 'lucene-vector-knn', options => 'vector.dim=3')")
+          .collect()
+          .head
+
+      assert(output.getBoolean(0))
+
+      val table = loadTable("T")
+      val indexEntries = table
+        .store()
+        .newIndexFileHandler()
+        .scanEntries()
+        .asScala
+        .filter(_.indexFile().indexType() == "lucene-vector-knn")
+
+      assert(indexEntries.nonEmpty)
+      val totalRowCount = indexEntries.map(_.indexFile().rowCount()).sum
+      assert(totalRowCount == 100L)
+    }
+  }
+
+  test("create lucene-vector-knn global index with partition") {
+    withTable("T") {
+      spark.sql("""
+                  |CREATE TABLE T (id INT, v ARRAY<FLOAT>, pt STRING)
+                  |TBLPROPERTIES (
+                  |  'bucket' = '-1',
+                  |  'global-index.row-count-per-shard' = '10000',
+                  |  'row-tracking.enabled' = 'true',
+                  |  'data-evolution.enabled' = 'true')
+                  |  PARTITIONED BY (pt)
+                  |""".stripMargin)
+
+      var values = (0 until 65000)
+        .map(
+          i =>
+            s"($i, array(cast($i as float), cast(${i + 1} as float), cast(${i + 2} as float)), 'p0')")
+        .mkString(",")
+      spark.sql(s"INSERT INTO T VALUES $values")
+
+      values = (0 until 35000)
+        .map(
+          i =>
+            s"($i, array(cast($i as float), cast(${i + 1} as float), cast(${i + 2} as float)), 'p1')")
+        .mkString(",")
+      spark.sql(s"INSERT INTO T VALUES $values")
+
+      values = (0 until 22222)
+        .map(
+          i =>
+            s"($i, array(cast($i as float), cast(${i + 1} as float), cast(${i + 2} as float)), 'p0')")
+        .mkString(",")
+      spark.sql(s"INSERT INTO T VALUES $values")
+
+      val output =
+        spark
+          .sql("CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => 'lucene-vector-knn', options => 'vector.dim=3')")
+          .collect()
+          .head
+
+      assert(output.getBoolean(0))
+
+      val table = loadTable("T")
+      val indexEntries = table
+        .store()
+        .newIndexFileHandler()
+        .scanEntries()
+        .asScala
+        .filter(_.indexFile().indexType() == "lucene-vector-knn")
+
+      assert(indexEntries.nonEmpty)
+      val totalRowCount = indexEntries.map(_.indexFile().rowCount()).sum
+      assert(totalRowCount == 122222L)
+    }
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala
new file mode 100644
index 000000000000..b4f7d63086ae
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+import org.apache.paimon.utils.SnapshotNotExistException
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+
+class CreateTagFromTimestampProcedureTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+
+  test("Paimon Procedure: Create tags from snapshots commit-time ") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          try {
+
+            for (i <- 1 to 4) {
+              inputData.addData((i, "a"))
+              stream.processAllAvailable()
+              Thread.sleep(500L)
+            }
+
+            val table = loadTable("T")
+            val earliestCommitTime = table.snapshotManager.earliestSnapshot.timeMillis
+            val commitTime3 = table.snapshotManager.snapshot(3).timeMillis
+            val commitTime4 = table.snapshotManager.snapshot(4).timeMillis
+
+            // create tag from timestamp that earlier than the earliest snapshot commit time.
+            checkAnswer(
+              spark.sql(s"""CALL paimon.sys.create_tag_from_timestamp(
+                           |table => 'test.T',
+                           | tag => 'test_tag',
+                           |  timestamp => ${earliestCommitTime - 1})""".stripMargin),
+              Row("test_tag", 1, earliestCommitTime, "null") :: Nil
+            )
+
+            // create tag from timestamp that equals to snapshot-3 commit time.
+            checkAnswer(
+              spark.sql(s"""CALL paimon.sys.create_tag_from_timestamp(
+                           |table => 'test.T',
+                           | tag => 'test_tag2',
+                           |  timestamp => $commitTime3)""".stripMargin),
+              Row("test_tag2", 3, commitTime3, "null") :: Nil
+            )
+
+            // create tag from timestamp that later than snapshot-3 commit time.
+            checkAnswer(
+              spark.sql(s"""CALL paimon.sys.create_tag_from_timestamp(
+                           |table => 'test.T',
+                           |tag => 'test_tag3',
+                           |timestamp => ${commitTime3 + 1})""".stripMargin),
+              Row("test_tag3", 4, commitTime4, "null") :: Nil
+            )
+
+            // create tag from timestamp that later than the latest snapshot commit time and throw SnapshotNotExistException.
+            assertThrows[SnapshotNotExistException] {
+              spark.sql(s"""CALL paimon.sys.create_tag_from_timestamp(
+                           |table => 'test.T',
+                           |tag => 'test_tag3',
+                           |timestamp => ${Long.MaxValue})""".stripMargin)
+            }
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: Create tags from tags commit-time") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          try {
+            for (i <- 1 to 2) {
+              inputData.addData((i, "a"))
+              stream.processAllAvailable()
+              Thread.sleep(500L)
+            }
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(" +
+                  "table => 'test.T', tag => 'test_tag', snapshot => 1)"),
+              Row(true) :: Nil)
+
+            val table = loadTable("T")
+            val latestCommitTime = table.snapshotManager.latestSnapshot().timeMillis
+            val tagsCommitTime = table.tagManager().getOrThrow("test_tag").timeMillis
+            assert(latestCommitTime > tagsCommitTime)
+
+            // make snapshot 1 expire.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_snapshots(table => 'test.T', retain_max => 1, retain_min => 1)"),
+              Row(1) :: Nil)
+
+            // create tag from timestamp that earlier than the expired snapshot 1.
+            checkAnswer(
+              spark.sql(s"""CALL paimon.sys.create_tag_from_timestamp(
+                           |table => 'test.T',
+                           | tag => 'test_tag1',
+                           |  timestamp => ${tagsCommitTime - 1})""".stripMargin),
+              Row("test_tag1", 1, tagsCommitTime, "null") :: Nil
+            )
+
+            // create tag from timestamp that later than the expired snapshot 1.
+            checkAnswer(
+              spark.sql(s"""CALL paimon.sys.create_tag_from_timestamp(
+                           |table => 'test.T',
+                           |tag => 'test_tag2',
+                           |timestamp => ${tagsCommitTime + 1})""".stripMargin),
+              Row("test_tag2", 2, latestCommitTime, "null") :: Nil
+            )
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala
new file mode 100644
index 000000000000..c7cdc0f517a7
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala
@@ -0,0 +1,760 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+import org.assertj.core.api.Assertions.assertThatThrownBy
+
+/** IT Case for [[ExpirePartitionsProcedure]]. */
+class ExpirePartitionsProcedureTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+
+  test("Paimon Procedure: expire partitions") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt', 'bucket'='1')
+                       | PARTITIONED BY (pt)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // snapshot-1
+            inputData.addData(("a", "2024-06-01"))
+            stream.processAllAvailable()
+
+            // This partition never expires.
+            inputData.addData(("Never-expire", "9999-09-09"))
+            stream.processAllAvailable()
+
+            checkAnswer(query(), Row("a", "2024-06-01") :: Row("Never-expire", "9999-09-09") :: Nil)
+            // call expire_partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T', expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd')"),
+              Row("pt=2024-06-01") :: Nil
+            )
+
+            checkAnswer(query(), Row("Never-expire", "9999-09-09") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon procedure : expire partitions show a list of expired partitions.") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING, hm STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt,hm', 'bucket'='1')
+                       | PARTITIONED BY (pt,hm)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt", "hm")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // Show results : There are no expired partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T', expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd')"),
+              Row("No expired partitions.") :: Nil
+            )
+
+            // snapshot-1
+            inputData.addData(("a", "2024-06-01", "01:00"))
+            stream.processAllAvailable()
+            // snapshot-2
+            inputData.addData(("b", "2024-06-02", "02:00"))
+            stream.processAllAvailable()
+            // snapshot-3, never expires.
+            inputData.addData(("Never-expire", "9999-09-09", "99:99"))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row("a", "2024-06-01", "01:00") :: Row("b", "2024-06-02", "02:00") :: Row(
+                "Never-expire",
+                "9999-09-09",
+                "99:99") :: Nil)
+
+            // Show a list of expired partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'" +
+                  ", expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd')"),
+              Row("pt=2024-06-01, hm=01:00") :: Row("pt=2024-06-02, hm=02:00") :: Nil
+            )
+
+            checkAnswer(query(), Row("Never-expire", "9999-09-09", "99:99") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire partitions with values-time strategy.") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt', 'bucket'='1')
+                       | PARTITIONED BY (pt)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // snapshot-1
+            inputData.addData(("HXH", "2024-06-01"))
+            stream.processAllAvailable()
+
+            // Never expire.
+            inputData.addData(("Never-expire", "9999-09-09"))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row("HXH", "2024-06-01") :: Row("Never-expire", "9999-09-09") :: Nil)
+            // expire
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'," +
+                  " expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd'" +
+                  ",expire_strategy => 'values-time')"),
+              Row("pt=2024-06-01") :: Nil
+            )
+
+            checkAnswer(query(), Row("Never-expire", "9999-09-09") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire partitions with update-time strategy.") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt', 'bucket'='1')
+                       | PARTITIONED BY (pt)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // This partition will expire.
+            inputData.addData(("HXH", "9999-09-09"))
+            stream.processAllAvailable()
+            // Waiting for partition 'pt=9999-09-09' to expire.
+            Thread.sleep(2500L)
+            // snapshot-2
+            inputData.addData(("HXH", "2024-06-01"))
+            stream.processAllAvailable()
+
+            // Partitions that are updated within 2 second would be retained.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(" +
+                  "table => 'test.T'," +
+                  " expiration_time => '2 s'" +
+                  ",expire_strategy => 'update-time')"),
+              Row("pt=9999-09-09") :: Nil
+            )
+
+            checkAnswer(query(), Row("HXH", "2024-06-01") :: Nil)
+
+            // Waiting for all partitions to expire.
+            Thread.sleep(1500)
+            // All partition will expire.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(" +
+                  "table => 'test.T'," +
+                  " expiration_time => '1 s'" +
+                  ",expire_strategy => 'update-time')"),
+              Row("pt=2024-06-01") :: Nil
+            )
+
+            checkAnswer(query(), Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire partitions with update-time strategy in same partition.") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING, hm STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt,hm', 'bucket'='1')
+                       | PARTITIONED BY (pt,hm)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt", "hm")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // This partition will not expire.
+            inputData.addData(("HXH", "2024-06-01", "01:00"))
+            stream.processAllAvailable()
+            // Waiting for 'pt=9999-09-09, hm=99:99' partitions to expire.
+            Thread.sleep(2500L)
+            // Updating the same partition data will update partition last update time, then this partition will not expire.
+            inputData.addData(("HXH", "2024-06-01", "01:00"))
+            stream.processAllAvailable()
+
+            // The last update time of the 'pt=9999-09-09, hm=99:99' partition is updated so the partition would not expire.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'," +
+                  " expiration_time => '2 s'" +
+                  ",expire_strategy => 'update-time')"),
+              Row("No expired partitions.") :: Nil
+            )
+
+            checkAnswer(query(), Row("HXH", "2024-06-01", "01:00") :: Nil)
+            // Waiting for all partitions to expire.
+            Thread.sleep(1500)
+
+            // The partition 'dt=2024-06-01, hm=01:00' will expire.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'," +
+                  " expiration_time => '1 s'" +
+                  ",expire_strategy => 'update-time')"),
+              Row("pt=2024-06-01, hm=01:00") :: Nil
+            )
+
+            checkAnswer(query(), Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire partitions with non-date format partition.") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt', 'bucket'='1')
+                       | PARTITIONED BY (pt)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // This partition will expire.
+            inputData.addData(("HXH", "pt-1"))
+            stream.processAllAvailable()
+            Thread.sleep(2500L)
+            // snapshot-2
+            inputData.addData(("HXH", "pt-2"))
+            stream.processAllAvailable()
+
+            // Only update-time strategy support non date format partition to expire.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'," +
+                  " expiration_time => '2 s'" +
+                  ",expire_strategy => 'update-time')"),
+              Row("pt=pt-1") :: Nil
+            )
+
+            checkAnswer(query(), Row("HXH", "pt-2") :: Nil)
+
+            // Waiting for all partitions to expire.
+            Thread.sleep(1500)
+            // call expire_partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'," +
+                  " expiration_time => '1 s'" +
+                  ",expire_strategy => 'update-time')"),
+              Row("pt=pt-2") :: Nil
+            )
+
+            checkAnswer(query(), Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon procedure : expire partitions with specified time-pattern partitions.") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING, hm STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt,hm', 'bucket'='1')
+                       | PARTITIONED BY (hm, pt)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt", "hm")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // Show results : There are no expired partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T', expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd', timestamp_pattern => '$pt')"),
+              Row("No expired partitions.") :: Nil
+            )
+
+            // snapshot-1
+            inputData.addData(("a", "2024-06-01", "01:00"))
+            stream.processAllAvailable()
+            // snapshot-2
+            inputData.addData(("b", "2024-06-02", "02:00"))
+            stream.processAllAvailable()
+            // snapshot-3, never expires.
+            inputData.addData(("Never-expire", "9999-09-09", "99:99"))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row("a", "2024-06-01", "01:00") :: Row("b", "2024-06-02", "02:00") :: Row(
+                "Never-expire",
+                "9999-09-09",
+                "99:99") :: Nil)
+
+            // Show a list of expired partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'" +
+                  ", expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd HH:mm'" +
+                  ", timestamp_pattern => '$pt $hm')"),
+              Row("hm=01:00, pt=2024-06-01") :: Row("hm=02:00, pt=2024-06-02") :: Nil
+            )
+
+            checkAnswer(query(), Row("Never-expire", "9999-09-09", "99:99") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon procedure : sorted the expired partitions with max_expires.") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING, hm STRING)
+                       |TBLPROPERTIES ('primary-key'='k,pt,hm', 'bucket'='1')
+                       | PARTITIONED BY (pt,hm)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt", "hm")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // Show results : There are no expired partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T', expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd')"),
+              Row("No expired partitions.") :: Nil
+            )
+
+            inputData.addData(("a", "2024-06-02", "02:00"))
+            stream.processAllAvailable()
+            inputData.addData(("b", "2024-06-02", "01:00"))
+            stream.processAllAvailable()
+            inputData.addData(("d", "2024-06-03", "01:00"))
+            stream.processAllAvailable()
+            inputData.addData(("c", "2024-06-01", "01:00"))
+            stream.processAllAvailable()
+            // this snapshot never expires.
+            inputData.addData(("Never-expire", "9999-09-09", "99:99"))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row("a", "2024-06-02", "02:00") :: Row("b", "2024-06-02", "01:00") :: Row(
+                "d",
+                "2024-06-03",
+                "01:00") :: Row("c", "2024-06-01", "01:00") :: Row(
+                "Never-expire",
+                "9999-09-09",
+                "99:99") :: Nil
+            )
+
+            // sorted result of limited expired partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T'" +
+                  ", expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd', max_expires => 3)"),
+              Row("pt=2024-06-01, hm=01:00") :: Row("pt=2024-06-02, hm=01:00") :: Row(
+                "pt=2024-06-02, hm=02:00") :: Nil
+            )
+
+            checkAnswer(
+              query(),
+              Row("d", "2024-06-03", "01:00") :: Row("Never-expire", "9999-09-09", "99:99") :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire partitions with default num") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(
+            s"""
+               |CREATE TABLE T (k STRING, pt STRING)
+               |TBLPROPERTIES ('primary-key'='k,pt', 'bucket'='1', 'partition.expiration-max-num'='2')
+               |PARTITIONED BY (pt)
+               |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // snapshot-1
+            inputData.addData(("a", "2024-06-01"))
+            stream.processAllAvailable()
+
+            // snapshot-2
+            inputData.addData(("b", "2024-06-02"))
+            stream.processAllAvailable()
+
+            // snapshot-3
+            inputData.addData(("c", "2024-06-03"))
+            stream.processAllAvailable()
+
+            // This partition never expires.
+            inputData.addData(("Never-expire", "9999-09-09"))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row("a", "2024-06-01") :: Row("b", "2024-06-02") :: Row("c", "2024-06-03") :: Row(
+                "Never-expire",
+                "9999-09-09") :: Nil)
+            // call expire_partitions.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T', expiration_time => '1 d'" +
+                  ", timestamp_formatter => 'yyyy-MM-dd')"),
+              Row("pt=2024-06-01") :: Row("pt=2024-06-02") :: Nil
+            )
+
+            checkAnswer(query(), Row("c", "2024-06-03") :: Row("Never-expire", "9999-09-09") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire partitions load table property first") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING)
+                       |TBLPROPERTIES (
+                       |  'primary-key' = 'k,pt',
+                       |  'bucket' = '1',
+                       |  'write-only' = 'true',
+                       |  'partition.timestamp-formatter' = 'yyyy-MM-dd',
+                       |  'partition.expiration-max-num'='2')
+                       |PARTITIONED BY (pt)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // snapshot-1
+            inputData.addData(("a", "2024-06-01"))
+            stream.processAllAvailable()
+
+            // snapshot-2
+            inputData.addData(("b", "2024-06-02"))
+            stream.processAllAvailable()
+
+            // snapshot-3
+            inputData.addData(("c", "2024-06-03"))
+            stream.processAllAvailable()
+
+            // This partition never expires.
+            inputData.addData(("Never-expire", "9999-09-09"))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row("a", "2024-06-01") :: Row("b", "2024-06-02") :: Row("c", "2024-06-03") :: Row(
+                "Never-expire",
+                "9999-09-09") :: Nil)
+
+            // 'partition.timestamp-formatter' value using table property.
+            // 'partition.expiration-time' value using procedure parameter.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T', expiration_time => '1 d')"),
+              Row("pt=2024-06-01") :: Row("pt=2024-06-02") :: Nil
+            )
+
+            checkAnswer(query(), Row("c", "2024-06-03") :: Row("Never-expire", "9999-09-09") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire partitions add options parameter") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (k STRING, pt STRING)
+                       |TBLPROPERTIES (
+                       |  'primary-key' = 'k,pt',
+                       |  'bucket' = '1')
+                       |PARTITIONED BY (pt)
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(String, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("k", "pt")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T")
+
+          try {
+            // snapshot-1
+            inputData.addData(("a", "2024-06-01"))
+            stream.processAllAvailable()
+
+            // snapshot-2
+            inputData.addData(("b", "2024-06-02"))
+            stream.processAllAvailable()
+
+            // snapshot-3
+            inputData.addData(("c", "2024-06-03"))
+            stream.processAllAvailable()
+
+            // This partition never expires.
+            inputData.addData(("Never-expire", "9999-09-09"))
+            stream.processAllAvailable()
+
+            checkAnswer(
+              query(),
+              Row("a", "2024-06-01") :: Row("b", "2024-06-02") :: Row("c", "2024-06-03") :: Row(
+                "Never-expire",
+                "9999-09-09") :: Nil)
+
+            // set conf in options.
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_partitions(table => 'test.T', " +
+                  "options => 'partition.expiration-time = 1d," +
+                  " partition.expiration-max-num = 2," +
+                  " partition.expiration-batch-size = 2," +
+                  " partition.timestamp-formatter = yyyy-MM-dd')"),
+              Row("pt=2024-06-01") :: Row("pt=2024-06-02") :: Nil
+            )
+
+            checkAnswer(query(), Row("c", "2024-06-03") :: Row("Never-expire", "9999-09-09") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala
new file mode 100644
index 000000000000..bbaf88568e2d
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+import org.apache.paimon.utils.SnapshotManager
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+import org.assertj.core.api.Assertions.{assertThat, assertThatIllegalArgumentException}
+
+import java.sql.Timestamp
+
+class ExpireSnapshotsProcedureTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+
+  test("Paimon Procedure: expire snapshots") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3',
+                       |'write-only' = 'true', 'snapshot.num-retained.min' = '1')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+
+            // expire
+            checkAnswer(
+              spark.sql("CALL paimon.sys.expire_snapshots(table => 'test.T', retain_max => 2)"),
+              Row(1) :: Nil)
+
+            checkAnswer(
+              spark.sql("SELECT snapshot_id FROM paimon.test.`T$snapshots`"),
+              Row(2L) :: Row(3L) :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire snapshots retainMax retainMin value check") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+
+            // expire assert throw exception
+            assertThrows[IllegalArgumentException] {
+              spark.sql(
+                "CALL paimon.sys.expire_snapshots(table => 'test.T', retain_max => 2, retain_min => 3)")
+            }
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: test parameter order_than with string type") {
+    sql(
+      "CREATE TABLE T (a INT, b STRING) " +
+        "TBLPROPERTIES ( 'num-sorted-run.compaction-trigger' = '999'," +
+        "'write-only' = 'true', 'snapshot.num-retained.min' = '1')")
+    val table = loadTable("T")
+    val snapshotManager = table.snapshotManager
+
+    // generate 5 snapshot
+    for (i <- 1 to 5) {
+      sql(s"INSERT INTO T VALUES ($i, '$i')")
+    }
+    checkSnapshots(snapshotManager, 1, 5)
+
+    val timestamp = new Timestamp(snapshotManager.latestSnapshot().timeMillis)
+    spark.sql(
+      s"CALL paimon.sys.expire_snapshots(table => 'test.T', older_than => '${timestamp.toString}', max_deletes => 2)")
+    checkSnapshots(snapshotManager, 3, 5)
+  }
+
+  test("Paimon Procedure: expire snapshots load table property first") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3',
+                       |'snapshot.num-retained.max' = '2',
+                       |'snapshot.num-retained.min' = '1',
+                       |'write-only' = 'true')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+
+            // expire
+            checkAnswer(
+              spark.sql("CALL paimon.sys.expire_snapshots(table => 'test.T')"),
+              Row(1) :: Nil)
+
+            checkAnswer(
+              spark.sql("SELECT snapshot_id FROM paimon.test.`T$snapshots`"),
+              Row(2L) :: Row(3L) :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: expire snapshots add options parameter") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3', 'write-only' = 'true')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.expire_snapshots(table => 'test.T', options => 'snapshot.num-retained.max=2, snapshot.num-retained.min=1')"),
+              Row(1L) :: Nil
+            )
+
+            checkAnswer(
+              spark.sql("SELECT snapshot_id FROM paimon.test.`T$snapshots`"),
+              Row(2L) :: Row(3L) :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  def checkSnapshots(sm: SnapshotManager, earliest: Int, latest: Int): Unit = {
+    assertThat(sm.snapshotCount).isEqualTo(latest - earliest + 1)
+    assertThat(sm.earliestSnapshotId).isEqualTo(earliest)
+    assertThat(sm.latestSnapshotId).isEqualTo(latest)
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTest.scala
new file mode 100644
index 000000000000..d57846709877
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+class ProcedureTest extends ProcedureTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala
new file mode 100644
index 000000000000..078823c3ef37
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.procedure
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+
+class RollbackProcedureTest extends PaimonSparkTestBase with StreamTest {
+
+  import testImplicits._
+
+  test("Paimon Procedure: rollback to snapshot and tag") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val table = loadTable("T")
+          val location = table.location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            checkAnswer(
+              spark.sql(
+                "CALL paimon.sys.create_tag(table => 'test.T', tag => 'test_tag', snapshot => 1)"),
+              Row(true) :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+            assertThrows[RuntimeException] {
+              spark.sql("CALL paimon.sys.rollback(table => 'test.T_exception', version =>  '2')")
+            }
+            // rollback to snapshot
+            checkAnswer(
+              spark.sql("CALL paimon.sys.rollback(table => 'test.T', version => '2')"),
+              Row(table.latestSnapshot().get().id, 2) :: Nil)
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            // rollback to tag
+            val taggedSnapshotId = table.tagManager().getOrThrow("test_tag").trimToSnapshot().id
+            checkAnswer(
+              spark.sql("CALL paimon.sys.rollback(table => 'test.T', version => 'test_tag')"),
+              Row(table.latestSnapshot().get().id, taggedSnapshotId) :: Nil)
+            checkAnswer(query(), Row(1, "a") :: Nil)
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: rollback to tag check test") {
+    spark.sql(s"""
+                 |CREATE TABLE T (a INT, b STRING)
+                 |TBLPROPERTIES ('primary-key'='a', 'bucket'='3', 'file.format'='orc')
+                 |""".stripMargin)
+
+    val table = loadTable("T")
+
+    val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+    // snapshot-1
+    spark.sql("insert into T select 1, 'a'")
+    checkAnswer(query(), Row(1, "a") :: Nil)
+
+    checkAnswer(
+      spark.sql("CALL paimon.sys.create_tag(table => 'test.T', tag => '20250122', snapshot => 1)"),
+      Row(true) :: Nil)
+
+    // snapshot-2
+    spark.sql("insert into T select 2, 'b'")
+    checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+    // snapshot-3
+    spark.sql("insert into T select 3, 'c'")
+    checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil)
+
+    // snapshot-4
+    spark.sql("insert into T select 4, 'd'")
+    checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Row(4, "d") :: Nil)
+
+    assertThrows[RuntimeException] {
+      spark.sql("CALL paimon.sys.rollback(table => 'test.T_exception', version => '4')")
+    }
+    // rollback to snapshot
+    checkAnswer(
+      spark.sql("CALL paimon.sys.rollback(table => 'test.T', version => '3')"),
+      Row(table.latestSnapshot().get().id, 3) :: Nil)
+    checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil)
+
+    // version/snapshot/tag can only set one of them
+    assertThrows[RuntimeException] {
+      spark.sql(
+        "CALL paimon.sys.rollback(table => 'test.T', version => '20250122', tag => '20250122')")
+    }
+
+    assertThrows[RuntimeException] {
+      spark.sql("CALL paimon.sys.rollback(table => 'test.T', version => '20250122', snapshot => 1)")
+    }
+
+    assertThrows[RuntimeException] {
+      spark.sql("CALL paimon.sys.rollback(table => 'test.T', tag => '20250122', snapshot => 1)")
+    }
+
+    // rollback to snapshot
+    spark.sql("CALL paimon.sys.rollback(table => 'test.T', snapshot => 2)")
+    checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+    // rollback to tag
+    spark.sql("CALL paimon.sys.rollback(table => 'test.T', tag => '20250122')")
+    checkAnswer(query(), Row(1, "a") :: Nil)
+  }
+
+  test("Paimon Procedure: rollback to timestamp") {
+    failAfter(streamingTimeout) {
+      withTempDir {
+        checkpointDir =>
+          // define a change-log table and test `forEachBatch` api
+          spark.sql(s"""
+                       |CREATE TABLE T (a INT, b STRING)
+                       |TBLPROPERTIES ('primary-key'='a', 'bucket'='3')
+                       |""".stripMargin)
+          val location = loadTable("T").location().toString
+
+          val inputData = MemoryStream[(Int, String)]
+          val stream = inputData
+            .toDS()
+            .toDF("a", "b")
+            .writeStream
+            .option("checkpointLocation", checkpointDir.getCanonicalPath)
+            .foreachBatch {
+              (batch: Dataset[Row], _: Long) =>
+                batch.write.format("paimon").mode("append").save(location)
+            }
+            .start()
+
+          val table = loadTable("T")
+
+          val query = () => spark.sql("SELECT * FROM T ORDER BY a")
+
+          try {
+            // snapshot-1
+            inputData.addData((1, "a"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Nil)
+
+            // snapshot-2
+            inputData.addData((2, "b"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+            val timestamp = System.currentTimeMillis()
+
+            // snapshot-3
+            inputData.addData((2, "b2"))
+            stream.processAllAvailable()
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b2") :: Nil)
+
+            // rollback to timestamp
+            checkAnswer(
+              spark.sql(
+                s"CALL paimon.sys.rollback_to_timestamp(table => 'test.T', timestamp => $timestamp)"),
+              Row(table.latestSnapshot().get().id, 2) :: Nil)
+            checkAnswer(query(), Row(1, "a") :: Row(2, "b") :: Nil)
+
+          } finally {
+            stream.stop()
+          }
+      }
+    }
+  }
+
+  test("Paimon Procedure: rollback with cache") {
+    sql("CREATE TABLE T (id INT)")
+    sql("INSERT INTO T VALUES (1), (2), (3), (4)")
+    sql("DELETE FROM T WHERE id = 1")
+    sql("CALL sys.rollback(table => 'T', version => '1')")
+    sql("DELETE FROM T WHERE id = 1")
+    checkAnswer(sql("SELECT * FROM T ORDER BY id"), Seq(Row(2), Row(3), Row(4)))
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTest.scala
new file mode 100644
index 000000000000..255906d04bf2
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class AnalyzeTableTest extends AnalyzeTableTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLTest.scala
new file mode 100644
index 000000000000..b729f57b33e7
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class DDLTest extends DDLTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTest.scala
new file mode 100644
index 000000000000..cb139d2a57be
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTest.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class DDLWithHiveCatalogTest extends DDLWithHiveCatalogTestBase {}
+
+class DefaultDatabaseTest extends DefaultDatabaseTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala
new file mode 100644
index 000000000000..6170e2fd6c5c
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class DataFrameWriteTest extends DataFrameWriteTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTestBase.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTestBase.scala
new file mode 100644
index 000000000000..b25e41a3fb42
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTestBase.scala
@@ -0,0 +1,701 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+import org.apache.paimon.spark.PaimonSparkTestBase
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types.DecimalType
+import org.junit.jupiter.api.Assertions
+
+import java.sql.{Date, Timestamp}
+
+abstract class DataFrameWriteTestBase extends PaimonSparkTestBase {
+
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.sql.catalog.paimon.cache-enabled", "false")
+  }
+
+  import testImplicits._
+
+  test("Paimon dataframe: insert into partitioned table") {
+    for (useV2Write <- Seq("true", "false")) {
+      withSparkSQLConf("spark.paimon.write.use-v2-write" -> useV2Write) {
+        withTable("t") {
+          // create table
+          Seq((1, "x1", "p1"), (2, "x2", "p2"))
+            .toDF("a", "b", "pt")
+            .write
+            .format("paimon")
+            .option("primary-key", "a,pt")
+            .partitionBy("pt")
+            .saveAsTable("t")
+
+          // insert into
+          Seq((3, "x3", "p3"))
+            .toDF("a", "b", "pt")
+            .write
+            .format("paimon")
+            .mode("append")
+            .insertInto("t")
+          checkAnswer(
+            spark.read.format("paimon").table("t").orderBy("a"),
+            Seq(Row(1, "x1", "p1"), Row(2, "x2", "p2"), Row(3, "x3", "p3"))
+          )
+          checkAnswer(
+            sql("SHOW PARTITIONS t"),
+            Seq(Row("pt=p1"), Row("pt=p2"), Row("pt=p3"))
+          )
+
+          // dynamic insert overwrite
+          withSparkSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") {
+            Seq((4, "x4", "p1"))
+              .toDF("a", "b", "pt")
+              .write
+              .format("paimon")
+              .mode("overwrite")
+              .insertInto("t")
+          }
+          checkAnswer(
+            spark.read.format("paimon").table("t").orderBy("a"),
+            Seq(Row(2, "x2", "p2"), Row(3, "x3", "p3"), Row(4, "x4", "p1"))
+          )
+          checkAnswer(
+            sql("SHOW PARTITIONS t"),
+            Seq(Row("pt=p1"), Row("pt=p2"), Row("pt=p3"))
+          )
+
+          // insert overwrite
+          Seq((5, "x5", "p1"))
+            .toDF("a", "b", "pt")
+            .write
+            .format("paimon")
+            .mode("overwrite")
+            .insertInto("t")
+          checkAnswer(
+            spark.read.format("paimon").table("t").orderBy("a"),
+            Seq(Row(5, "x5", "p1"))
+          )
+          checkAnswer(
+            sql("SHOW PARTITIONS t"),
+            Seq(Row("pt=p1"))
+          )
+        }
+      }
+    }
+  }
+
+  test("Paimon dataframe: save as partitioned table") {
+    for (useV2Write <- Seq("true", "false")) {
+      withSparkSQLConf("spark.paimon.write.use-v2-write" -> useV2Write) {
+        withTable("t") {
+          // create table
+          Seq((1, "x1", "p1"), (2, "x2", "p2"))
+            .toDF("a", "b", "pt")
+            .write
+            .format("paimon")
+            .mode("append")
+            .option("primary-key", "a,pt")
+            .partitionBy("pt")
+            .saveAsTable("t")
+
+          // saveAsTable with append mode
+          Seq((3, "x3", "p3"))
+            .toDF("a", "b", "pt")
+            .write
+            .format("paimon")
+            .mode("append")
+            .saveAsTable("t")
+          checkAnswer(
+            spark.read.format("paimon").table("t").orderBy("a"),
+            Seq(Row(1, "x1", "p1"), Row(2, "x2", "p2"), Row(3, "x3", "p3"))
+          )
+          checkAnswer(
+            sql("SHOW PARTITIONS t"),
+            Seq(Row("pt=p1"), Row("pt=p2"), Row("pt=p3"))
+          )
+
+          // saveAsTable with overwrite mode will call replace table internal,
+          // so here we set the props and partitions again.
+          Seq((5, "x5", "p1"))
+            .toDF("a", "b", "pt")
+            .write
+            .format("paimon")
+            .option("primary-key", "a,pt")
+            .partitionBy("pt")
+            .mode("overwrite")
+            .saveAsTable("t")
+          checkAnswer(
+            spark.read.format("paimon").table("t").orderBy("a"),
+            Seq(Row(5, "x5", "p1"))
+          )
+          checkAnswer(
+            sql("SHOW PARTITIONS t"),
+            Seq(Row("pt=p1"))
+          )
+        }
+      }
+    }
+  }
+
+  test("Paimon: DataFrameWrite.saveAsTable") {
+    withTable("test_ctas") {
+      Seq((1L, "x1"), (2L, "x2"))
+        .toDF("a", "b")
+        .write
+        .format("paimon")
+        .mode("append")
+        .option("primary-key", "a")
+        .option("bucket", "-1")
+        .option("target-file-size", "256MB")
+        .option("write.merge-schema", "true")
+        .option("write.merge-schema.explicit-cast", "true")
+        .saveAsTable("test_ctas")
+
+      val paimonTable = loadTable("test_ctas")
+      Assertions.assertEquals(1, paimonTable.primaryKeys().size())
+      Assertions.assertEquals("a", paimonTable.primaryKeys().get(0))
+
+      // check all the core options
+      Assertions.assertEquals("-1", paimonTable.options().get("bucket"))
+      Assertions.assertEquals("256MB", paimonTable.options().get("target-file-size"))
+
+      // non-core options should not be here.
+      Assertions.assertFalse(paimonTable.options().containsKey("write.merge-schema"))
+      Assertions.assertFalse(paimonTable.options().containsKey("write.merge-schema.explicit-cast"))
+    }
+  }
+
+  test("Paimon: DataFrameWrite partition table") {
+    withTable("t") {
+      spark.sql(s"""
+                   |CREATE TABLE t (a INT, b STRING, dt STRING) PARTITIONED BY(dt)
+                   |TBLPROPERTIES ('file.format' = 'avro', 'bucket' = 2, 'bucket-key' = 'b')
+                   |""".stripMargin)
+
+      val table = loadTable("t")
+      val location = table.location().toString
+
+      Seq((1, "x1", "a"), (2, "x2", "b"))
+        .toDF("a", "b", "c")
+        .write
+        .format("paimon")
+        .mode("append")
+        .save(location)
+      checkAnswer(sql("SELECT * FROM t"), Row(1, "x1", "a") :: Row(2, "x2", "b") :: Nil)
+    }
+  }
+
+  fileFormats.foreach {
+    fileFormat =>
+      test(s"Paimon: DataFrameWrite.saveAsTable in ByName mode, file.format: $fileFormat") {
+        withTable("t1", "t2") {
+          spark.sql(s"""
+                       |CREATE TABLE t1 (col1 STRING, col2 INT, col3 DOUBLE)
+                       |TBLPROPERTIES ('file.format' = '$fileFormat')
+                       |""".stripMargin)
+
+          spark.sql(s"""
+                       |CREATE TABLE t2 (col2 INT, col3 DOUBLE, col1 STRING)
+                       |TBLPROPERTIES ('file.format' = '$fileFormat')
+                       |""".stripMargin)
+
+          sql(s"""
+                 |INSERT INTO TABLE t1 VALUES
+                 |("Hello", 1, 1.1),
+                 |("World", 2, 2.2),
+                 |("Paimon", 3, 3.3);
+                 |""".stripMargin)
+
+          spark.table("t1").write.format("paimon").mode("append").saveAsTable("t2")
+          checkAnswer(
+            sql("SELECT * FROM t2 ORDER BY col2"),
+            Row(1, 1.1d, "Hello") :: Row(2, 2.2d, "World") :: Row(3, 3.3d, "Paimon") :: Nil)
+        }
+      }
+  }
+
+  fileFormats.foreach {
+    fileFormat =>
+      test(
+        s"Paimon: DataFrameWrite.saveAsTable with complex data type in ByName mode, file.format: $fileFormat") {
+        withTable("t1", "t2") {
+          spark.sql(
+            s"""
+               |CREATE TABLE t1 (a STRING, b INT, c STRUCT<c1:DOUBLE, c2:LONG>, d ARRAY<STRUCT<d1 TIMESTAMP, d2 MAP<STRING, STRING>>>, e ARRAY<INT>)
+               |TBLPROPERTIES ('file.format' = '$fileFormat')
+               |""".stripMargin)
+
+          spark.sql(
+            s"""
+               |CREATE TABLE t2 (b INT, c STRUCT<c2:LONG, c1:DOUBLE>, d ARRAY<STRUCT<d2 MAP<STRING, STRING>, d1 TIMESTAMP>>, e ARRAY<INT>, a STRING)
+               |TBLPROPERTIES ('file.format' = '$fileFormat')
+               |""".stripMargin)
+
+          sql(s"""
+                 |INSERT INTO TABLE t1 VALUES
+                 |("Hello", 1, struct(1.1, 1000), array(struct(timestamp'2024-01-01 00:00:00', map("k1", "v1")), struct(timestamp'2024-08-01 00:00:00', map("k1", "v11"))), array(123, 345)),
+                 |("World", 2, struct(2.2, 2000), array(struct(timestamp'2024-02-01 00:00:00', map("k2", "v2"))), array(234, 456)),
+                 |("Paimon", 3, struct(3.3, 3000), null, array(345, 567));
+                 |""".stripMargin)
+
+          spark.table("t1").write.format("paimon").mode("append").saveAsTable("t2")
+          checkAnswer(
+            sql("SELECT * FROM t2 ORDER BY b"),
+            Row(
+              1,
+              Row(1000L, 1.1d),
+              Array(
+                Row(Map("k1" -> "v1"), Timestamp.valueOf("2024-01-01 00:00:00")),
+                Row(Map("k1" -> "v11"), Timestamp.valueOf("2024-08-01 00:00:00"))),
+              Array(123, 345),
+              "Hello"
+            )
+              :: Row(
+                2,
+                Row(2000L, 2.2d),
+                Array(Row(Map("k2" -> "v2"), Timestamp.valueOf("2024-02-01 00:00:00"))),
+                Array(234, 456),
+                "World")
+              :: Row(3, Row(3000L, 3.3d), null, Array(345, 567), "Paimon") :: Nil
+          )
+        }
+      }
+  }
+
+  withPk.foreach {
+    hasPk =>
+      bucketModes.foreach {
+        bucket =>
+          test(s"Write data into Paimon directly: has-pk: $hasPk, bucket: $bucket") {
+
+            val prop = if (hasPk) {
+              s"'primary-key'='a', 'bucket' = '$bucket' "
+            } else if (bucket != -1) {
+              s"'bucket-key'='a', 'bucket' = '$bucket' "
+            } else {
+              "'write-only'='true'"
+            }
+
+            spark.sql(s"""
+                         |CREATE TABLE T (a INT, b STRING)
+                         |TBLPROPERTIES ($prop)
+                         |""".stripMargin)
+
+            val paimonTable = loadTable("T")
+            val location = paimonTable.location().toString
+
+            val df1 = Seq((1, "a"), (2, "b")).toDF("a", "b")
+            df1.write.format("paimon").mode("append").save(location)
+            checkAnswer(
+              spark.sql("SELECT * FROM T ORDER BY a, b"),
+              Row(1, "a") :: Row(2, "b") :: Nil)
+
+            val df2 = Seq((1, "a2"), (3, "c")).toDF("a", "b")
+            df2.write.format("paimon").mode("append").save(location)
+            val expected = if (hasPk) {
+              Row(1, "a2") :: Row(2, "b") :: Row(3, "c") :: Nil
+            } else {
+              Row(1, "a") :: Row(1, "a2") :: Row(2, "b") :: Row(3, "c") :: Nil
+            }
+            checkAnswer(spark.sql("SELECT * FROM T ORDER BY a, b"), expected)
+
+            val df3 = Seq((4, "d"), (5, "e")).toDF("a", "b")
+            df3.write.format("paimon").mode("overwrite").save(location)
+            checkAnswer(
+              spark.sql("SELECT * FROM T ORDER BY a, b"),
+              Row(4, "d") :: Row(5, "e") :: Nil)
+          }
+      }
+  }
+
+  fileFormats.foreach {
+    format =>
+      withPk.foreach {
+        hasPk =>
+          bucketModes.foreach {
+            bucket =>
+              test(
+                s"Schema evolution: write data into Paimon: $hasPk, bucket: $bucket, format: $format") {
+                val _spark = spark
+                import _spark.implicits._
+
+                val prop = if (hasPk) {
+                  s"'primary-key'='a', 'bucket' = '$bucket', 'file.format' = '$format'"
+                } else if (bucket != -1) {
+                  s"'bucket-key'='a', 'bucket' = '$bucket', 'file.format' = '$format'"
+                } else {
+                  s"'write-only'='true', 'file.format' = '$format'"
+                }
+
+                spark.sql(s"""
+                             |CREATE TABLE T (a INT, b STRING)
+                             |TBLPROPERTIES ($prop)
+                             |""".stripMargin)
+
+                val paimonTable = loadTable("T")
+                val location = paimonTable.location().toString
+
+                val df1 = Seq((1, "a"), (2, "b")).toDF("a", "b")
+                df1.write.format("paimon").mode("append").save(location)
+                checkAnswer(
+                  spark.sql("SELECT * FROM T ORDER BY a, b"),
+                  Row(1, "a") :: Row(2, "b") :: Nil)
+
+                // Case 1: two additional fields
+                val df2 = Seq((1, "a2", 123L, Map("k" -> 11.1)), (3, "c", 345L, Map("k" -> 33.3)))
+                  .toDF("a", "b", "c", "d")
+                df2.write
+                  .format("paimon")
+                  .mode("append")
+                  .option("write.merge-schema", "true")
+                  .save(location)
+                val expected2 = if (hasPk) {
+                  Row(1, "a2", 123L, Map("k" -> 11.1)) ::
+                    Row(2, "b", null, null) :: Row(3, "c", 345L, Map("k" -> 33.3)) :: Nil
+                } else {
+                  Row(1, "a", null, null) :: Row(1, "a2", 123L, Map("k" -> 11.1)) :: Row(
+                    2,
+                    "b",
+                    null,
+                    null) :: Row(3, "c", 345L, Map("k" -> 33.3)) :: Nil
+                }
+                checkAnswer(spark.sql("SELECT * FROM T ORDER BY a, b"), expected2)
+
+                // Case 2: two fields with the evolved types: Int -> Long, Long -> Decimal
+                val df3 = Seq(
+                  (2L, "b2", BigDecimal.decimal(234), Map("k" -> 22.2)),
+                  (4L, "d", BigDecimal.decimal(456), Map("k" -> 44.4))).toDF("a", "b", "c", "d")
+                df3.write
+                  .format("paimon")
+                  .mode("append")
+                  .option("write.merge-schema", "true")
+                  .save(location)
+                val expected3 = if (hasPk) {
+                  Row(1L, "a2", BigDecimal.decimal(123), Map("k" -> 11.1)) :: Row(
+                    2L,
+                    "b2",
+                    BigDecimal.decimal(234),
+                    Map("k" -> 22.2)) :: Row(
+                    3L,
+                    "c",
+                    BigDecimal.decimal(345),
+                    Map("k" -> 33.3)) :: Row(
+                    4L,
+                    "d",
+                    BigDecimal.decimal(456),
+                    Map("k" -> 44.4)) :: Nil
+                } else {
+                  Row(1L, "a", null, null) :: Row(
+                    1L,
+                    "a2",
+                    BigDecimal.decimal(123),
+                    Map("k" -> 11.1)) :: Row(2L, "b", null, null) :: Row(
+                    2L,
+                    "b2",
+                    BigDecimal.decimal(234),
+                    Map("k" -> 22.2)) :: Row(
+                    3L,
+                    "c",
+                    BigDecimal.decimal(345),
+                    Map("k" -> 33.3)) :: Row(
+                    4L,
+                    "d",
+                    BigDecimal.decimal(456),
+                    Map("k" -> 44.4)) :: Nil
+                }
+                checkAnswer(spark.sql("SELECT * FROM T ORDER BY a, b"), expected3)
+
+                // Case 3: insert Decimal(20,18) to Decimal(38,18)
+                val df4 = Seq((99L, "df4", BigDecimal.decimal(4.0), Map("4" -> 4.1)))
+                  .toDF("a", "b", "c", "d")
+                  .selectExpr("a", "b", "cast(c as decimal(20,18)) as c", "d")
+                df4.write
+                  .format("paimon")
+                  .mode("append")
+                  .option("write.merge-schema", "true")
+                  .save(location)
+                val expected4 =
+                  expected3 ++ Seq(Row(99L, "df4", BigDecimal.decimal(4.0), Map("4" -> 4.1)))
+                checkAnswer(spark.sql("SELECT * FROM T ORDER BY a, b"), expected4)
+                val decimalType =
+                  spark.table("T").schema.apply(2).dataType.asInstanceOf[DecimalType]
+                assert(decimalType.precision == 38)
+                assert(decimalType.scale == 18)
+              }
+          }
+      }
+  }
+
+  withPk.foreach {
+    hasPk =>
+      bucketModes.foreach {
+        bucket =>
+          test(
+            s"Schema evolution: write data into Paimon with allowExplicitCast = true: $hasPk, bucket: $bucket") {
+
+            val prop = if (hasPk) {
+              s"'primary-key'='a', 'bucket' = '$bucket' "
+            } else if (bucket != -1) {
+              s"'bucket-key'='a', 'bucket' = '$bucket' "
+            } else {
+              "'write-only'='true'"
+            }
+
+            spark.sql(s"""
+                         |CREATE TABLE T (a INT, b STRING)
+                         |TBLPROPERTIES ($prop)
+                         |""".stripMargin)
+
+            val paimonTable = loadTable("T")
+            val location = paimonTable.location().toString
+
+            val df1 = Seq((1, "2023-08-01"), (2, "2023-08-02")).toDF("a", "b")
+            df1.write.format("paimon").mode("append").save(location)
+            checkAnswer(
+              spark.sql("SELECT * FROM T ORDER BY a, b"),
+              Row(1, "2023-08-01") :: Row(2, "2023-08-02") :: Nil)
+
+            // Case 1: two additional fields: DoubleType and TimestampType
+            val ts = java.sql.Timestamp.valueOf("2023-08-01 10:00:00.0")
+            val df2 = Seq((1, "2023-08-01", 12.3d, ts), (3, "2023-08-03", 34.5d, ts))
+              .toDF("a", "b", "c", "d")
+            df2.write
+              .format("paimon")
+              .mode("append")
+              .option("write.merge-schema", "true")
+              .save(location)
+            val expected2 = if (hasPk) {
+              Row(1, "2023-08-01", 12.3d, ts) ::
+                Row(2, "2023-08-02", null, null) :: Row(3, "2023-08-03", 34.5d, ts) :: Nil
+            } else {
+              Row(1, "2023-08-01", null, null) :: Row(1, "2023-08-01", 12.3d, ts) :: Row(
+                2,
+                "2023-08-02",
+                null,
+                null) :: Row(3, "2023-08-03", 34.5d, ts) :: Nil
+            }
+            checkAnswer(spark.sql("SELECT * FROM T ORDER BY a, b"), expected2)
+
+            // Case 2: a: Int -> Long, b: String -> Date, c: Long -> Int, d: Map -> String
+            val date = java.sql.Date.valueOf("2023-07-31")
+            val df3 = Seq((2L, date, 234, null), (4L, date, 456, "2023-08-01 11:00:00.0")).toDF(
+              "a",
+              "b",
+              "c",
+              "d")
+
+            // throw UnsupportedOperationException if write.merge-schema.explicit-cast = false
+            assertThrows[UnsupportedOperationException] {
+              df3.write
+                .format("paimon")
+                .mode("append")
+                .option("write.merge-schema", "true")
+                .save(location)
+            }
+            // merge schema and write data when write.merge-schema.explicit-cast = true
+            df3.write
+              .format("paimon")
+              .mode("append")
+              .option("write.merge-schema", "true")
+              .option("write.merge-schema.explicit-cast", "true")
+              .save(location)
+            val expected3 = if (hasPk) {
+              Row(1L, Date.valueOf("2023-08-01"), 12, ts.toString) :: Row(
+                2L,
+                date,
+                234,
+                null) :: Row(3L, Date.valueOf("2023-08-03"), 34, ts.toString) :: Row(
+                4L,
+                date,
+                456,
+                "2023-08-01 11:00:00.0") :: Nil
+            } else {
+              Row(1L, Date.valueOf("2023-08-01"), null, null) :: Row(
+                1L,
+                Date.valueOf("2023-08-01"),
+                12,
+                ts.toString) :: Row(2L, date, 234, null) :: Row(
+                2L,
+                Date.valueOf("2023-08-02"),
+                null,
+                null) :: Row(3L, Date.valueOf("2023-08-03"), 34, ts.toString) :: Row(
+                4L,
+                date,
+                456,
+                "2023-08-01 11:00:00.0") :: Nil
+            }
+            checkAnswer(
+              spark.sql("SELECT a, b, c, substring(d, 0, 21) FROM T ORDER BY a, b"),
+              expected3)
+
+          }
+      }
+  }
+
+  withPk.foreach {
+    hasPk =>
+      test(s"Support v2 write with overwrite, hasPk: $hasPk") {
+        withTable("t") {
+          val prop = if (hasPk) {
+            "'primary-key'='c1'"
+          } else {
+            "'write-only'='true'"
+          }
+          spark.sql(s"""
+                       |CREATE TABLE t (c1 INT, c2 STRING) PARTITIONED BY(p1 String, p2 string)
+                       |TBLPROPERTIES ($prop)
+                       |""".stripMargin)
+
+          spark
+            .range(3)
+            .selectExpr("id as c1", "id as c2", "'a' as p1", "id as p2")
+            .writeTo("t")
+            .overwrite($"p1" === "a")
+          checkAnswer(
+            spark.sql("SELECT * FROM t ORDER BY c1"),
+            Row(0, "0", "a", "0") :: Row(1, "1", "a", "1") :: Row(2, "2", "a", "2") :: Nil
+          )
+
+          spark
+            .range(7, 10)
+            .selectExpr("id as c1", "id as c2", "'a' as p1", "id as p2")
+            .writeTo("t")
+            .overwrite($"p1" === "a")
+          checkAnswer(
+            spark.sql("SELECT * FROM t ORDER BY c1"),
+            Row(7, "7", "a", "7") :: Row(8, "8", "a", "8") :: Row(9, "9", "a", "9") :: Nil
+          )
+
+          spark
+            .range(2)
+            .selectExpr("id as c1", "id as c2", "'a' as p1", "9 as p2")
+            .writeTo("t")
+            .overwrite(($"p1" <=> "a").and($"p2" === "9"))
+          checkAnswer(
+            spark.sql("SELECT * FROM t ORDER BY c1"),
+            Row(0, "0", "a", "9") :: Row(1, "1", "a", "9") :: Row(7, "7", "a", "7") ::
+              Row(8, "8", "a", "8") :: Nil
+          )
+
+          // bad case
+          val msg1 = intercept[Exception] {
+            spark
+              .range(2)
+              .selectExpr("id as c1", "id as c2", "'a' as p1", "id as p2")
+              .writeTo("t")
+              .overwrite($"p1" =!= "a")
+          }.getMessage
+          assert(msg1.contains("Only support Overwrite filters with Equal and EqualNullSafe"))
+
+          val msg2 = intercept[Exception] {
+            spark
+              .range(2)
+              .selectExpr("id as c1", "id as c2", "'a' as p1", "id as p2")
+              .writeTo("t")
+              .overwrite($"p1" === $"c2")
+          }.getMessage
+          if (gteqSpark3_4) {
+            assert(msg2.contains("Table does not support overwrite by expression"))
+          } else {
+            assert(msg2.contains("cannot translate expression to source filter"))
+          }
+
+          val msg3 = intercept[Exception] {
+            spark
+              .range(2)
+              .selectExpr("id as c1", "id as c2", "'a' as p1", "id as p2")
+              .writeTo("t")
+              .overwrite($"c1" === ($"c2" + 1))
+          }.getMessage
+          if (gteqSpark4_0) {
+            assert(msg3.contains("Table does not support overwrite by expression"))
+          } else {
+            assert(msg3.contains("cannot translate expression to source filter"))
+          }
+
+          val msg4 = intercept[Exception] {
+            spark
+              .range(2)
+              .selectExpr("id as c1", "id as c2", "'a' as p1", "id as p2")
+              .writeTo("t")
+              .overwrite(($"p1" === "a").and($"p1" === "b"))
+          }.getMessage
+          assert(msg4.contains("Only support Overwrite with one filter for each partition column"))
+
+          // Overwrite a partition which is not the specified
+          val msg5 = intercept[Exception] {
+            spark
+              .range(2)
+              .selectExpr("id as c1", "id as c2", "'a' as p1", "id as p2")
+              .writeTo("t")
+              .overwrite($"p1" === "b")
+          }.getMessage
+          assert(msg5.contains("does not belong to this partition"))
+        }
+      }
+  }
+
+  test("Paimon Schema Evolution: some columns is absent in the coming data") {
+
+    spark.sql(s"""
+                 |CREATE TABLE T (a INT, b STRING)
+                 |""".stripMargin)
+
+    val paimonTable = loadTable("T")
+    val location = paimonTable.location().toString
+
+    val df1 = Seq((1, "2023-08-01"), (2, "2023-08-02")).toDF("a", "b")
+    df1.write.format("paimon").mode("append").save(location)
+    checkAnswer(
+      spark.sql("SELECT * FROM T ORDER BY a, b"),
+      Row(1, "2023-08-01") :: Row(2, "2023-08-02") :: Nil)
+
+    // Case 1: two additional fields: DoubleType and TimestampType
+    val ts = java.sql.Timestamp.valueOf("2023-08-01 10:00:00.0")
+    val df2 = Seq((1, "2023-08-01", 12.3d, ts), (3, "2023-08-03", 34.5d, ts))
+      .toDF("a", "b", "c", "d")
+    df2.write
+      .format("paimon")
+      .mode("append")
+      .option("write.merge-schema", "true")
+      .save(location)
+
+    // Case 2: colum b and d are absent in the coming data
+    val df3 = Seq((4, 45.6d), (5, 56.7d))
+      .toDF("a", "c")
+    df3.write
+      .format("paimon")
+      .mode("append")
+      .option("write.merge-schema", "true")
+      .save(location)
+    val expected3 =
+      Row(1, "2023-08-01", null, null) :: Row(1, "2023-08-01", 12.3d, ts) :: Row(
+        2,
+        "2023-08-02",
+        null,
+        null) :: Row(3, "2023-08-03", 34.5d, ts) :: Row(4, null, 45.6d, null) :: Row(
+        5,
+        null,
+        56.7d,
+        null) :: Nil
+    checkAnswer(spark.sql("SELECT * FROM T ORDER BY a, b"), expected3)
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTest.scala
new file mode 100644
index 000000000000..8d620ece8245
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTest.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+import org.apache.spark.SparkConf
+
+class DeleteFromTableTest extends DeleteFromTableTestBase {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "false")
+  }
+}
+
+class V2DeleteFromTableTest extends DeleteFromTableTestBase {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "true")
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala
new file mode 100644
index 000000000000..c6aa77419241
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class DescribeTableTest extends DescribeTableTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/FormatTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/FormatTableTest.scala
new file mode 100644
index 000000000000..ba49976ab6c0
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/FormatTableTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class FormatTableTest extends FormatTableTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTest.scala
new file mode 100644
index 000000000000..4f66584c303b
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class InsertOverwriteTableTest extends InsertOverwriteTableTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTest.scala
new file mode 100644
index 000000000000..c83ee5493867
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTest.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+import org.apache.paimon.spark.{PaimonAppendBucketedTableTest, PaimonAppendNonBucketTableTest, PaimonPrimaryKeyBucketedTableTest, PaimonPrimaryKeyNonBucketTableTest}
+
+import org.apache.spark.SparkConf
+
+class MergeIntoPrimaryKeyBucketedTableTest
+  extends MergeIntoTableTestBase
+  with MergeIntoPrimaryKeyTableTest
+  with MergeIntoNotMatchedBySourceTest
+  with PaimonPrimaryKeyBucketedTableTest {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "false")
+  }
+}
+
+class MergeIntoPrimaryKeyNonBucketTableTest
+  extends MergeIntoTableTestBase
+  with MergeIntoPrimaryKeyTableTest
+  with MergeIntoNotMatchedBySourceTest
+  with PaimonPrimaryKeyNonBucketTableTest {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "false")
+  }
+}
+
+class MergeIntoAppendBucketedTableTest
+  extends MergeIntoTableTestBase
+  with MergeIntoAppendTableTest
+  with MergeIntoNotMatchedBySourceTest
+  with PaimonAppendBucketedTableTest {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "false")
+  }
+}
+
+class MergeIntoAppendNonBucketedTableTest
+  extends MergeIntoTableTestBase
+  with MergeIntoAppendTableTest
+  with MergeIntoNotMatchedBySourceTest
+  with PaimonAppendNonBucketTableTest {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "false")
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTest.scala
new file mode 100644
index 000000000000..635185a9ed0e
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class PaimonCompositePartitionKeyTest extends PaimonCompositePartitionKeyTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTest.scala
new file mode 100644
index 000000000000..ec140a89bbd3
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTest.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GetStructField, NamedExpression, ScalarSubquery}
+import org.apache.spark.sql.paimon.shims.SparkShimLoader
+
+class PaimonOptimizationTest extends PaimonOptimizationTestBase {
+
+  override def extractorExpression(
+      cteIndex: Int,
+      output: Seq[Attribute],
+      fieldIndex: Int): NamedExpression = {
+    GetStructField(
+      ScalarSubquery(
+        SparkShimLoader.shim
+          .createCTERelationRef(cteIndex, resolved = true, output.toSeq, isStreaming = false)),
+      fieldIndex,
+      None)
+      .as("scalarsubquery()")
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala
new file mode 100644
index 000000000000..26677d85c71a
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class PaimonPushDownTest extends PaimonPushDownTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonV1FunctionTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonV1FunctionTest.scala
new file mode 100644
index 000000000000..f37fbad27033
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonV1FunctionTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class PaimonV1FunctionTest extends PaimonV1FunctionTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTest.scala
new file mode 100644
index 000000000000..6ab8a2671b51
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class PaimonViewTest extends PaimonViewTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RewriteUpsertTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RewriteUpsertTableTest.scala
new file mode 100644
index 000000000000..412aa3b30351
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RewriteUpsertTableTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class RewriteUpsertTableTest extends RewriteUpsertTableTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowIdPushDownTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowIdPushDownTest.scala
new file mode 100644
index 000000000000..da4c9b854df3
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowIdPushDownTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class RowIdPushDownTest extends RowIdPushDownTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTest.scala
new file mode 100644
index 000000000000..9f96840a7788
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/RowTrackingTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class RowTrackingTest extends RowTrackingTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/ShowColumnsTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/ShowColumnsTest.scala
new file mode 100644
index 000000000000..6601dc2fca37
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/ShowColumnsTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class ShowColumnsTest extends PaimonShowColumnsTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/SparkV2FilterConverterTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/SparkV2FilterConverterTest.scala
new file mode 100644
index 000000000000..21c4c8a495ed
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/SparkV2FilterConverterTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class SparkV2FilterConverterTest extends SparkV2FilterConverterTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/TagDdlTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/TagDdlTest.scala
new file mode 100644
index 000000000000..92309d54167b
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/TagDdlTest.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+class TagDdlTest extends PaimonTagDdlTestBase {}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTest.scala
new file mode 100644
index 000000000000..3a0f56cd4820
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTest.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+import org.apache.spark.SparkConf
+
+class UpdateTableTest extends UpdateTableTestBase {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "false")
+  }
+}
+
+class V2UpdateTableTest extends UpdateTableTestBase {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.write.use-v2-write", "true")
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VariantTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VariantTest.scala
new file mode 100644
index 000000000000..94e9ac683f02
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VariantTest.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+import org.apache.spark.SparkConf
+
+class VariantTest extends VariantTestBase {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.variant.inferShreddingSchema", "false")
+  }
+}
+
+class VariantInferShreddingTest extends VariantTestBase {
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.paimon.variant.inferShreddingSchema", "true")
+  }
+}
diff --git a/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VectorSearchPushDownTest.scala b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VectorSearchPushDownTest.scala
new file mode 100644
index 000000000000..7ac3c5df0d00
--- /dev/null
+++ b/paimon-spark/paimon-spark-4.1/src/test/scala/org/apache/paimon/spark/sql/VectorSearchPushDownTest.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.sql
+
+import org.apache.paimon.spark.PaimonScan
+
+/** Tests for vector search table-valued function with global vector index. */
+class VectorSearchPushDownTest extends BaseVectorSearchPushDownTest {
+  test("vector search with global index") {
+    withTable("T") {
+      spark.sql("""
+                  |CREATE TABLE T (id INT, v ARRAY<FLOAT>)
+                  |TBLPROPERTIES (
+                  |  'bucket' = '-1',
+                  |  'global-index.row-count-per-shard' = '10000',
+                  |  'row-tracking.enabled' = 'true',
+                  |  'data-evolution.enabled' = 'true')
+                  |""".stripMargin)
+
+      // Insert 100 rows with predictable vectors
+      val values = (0 until 100)
+        .map(
+          i => s"($i, array(cast($i as float), cast(${i + 1} as float), cast(${i + 2} as float)))")
+        .mkString(",")
+      spark.sql(s"INSERT INTO T VALUES $values")
+
+      // Create vector index
+      val output = spark
+        .sql("CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => 'lucene-vector-knn', options => 'vector.dim=3')")
+        .collect()
+        .head
+      assert(output.getBoolean(0))
+
+      // Test vector search with table-valued function syntax
+      val result = spark
+        .sql("""
+               |SELECT * FROM vector_search('T', 'v', array(50.0f, 51.0f, 52.0f), 5)
+               |""".stripMargin)
+        .collect()
+
+      // The result should contain 5 rows
+      assert(result.length == 5)
+
+      // Vector (50, 51, 52) should be most similar to the row with id=50
+      assert(result.map(_.getInt(0)).contains(50))
+    }
+  }
+
+  test("vector search pushdown is applied in plan") {
+    withTable("T") {
+      spark.sql("""
+                  |CREATE TABLE T (id INT, v ARRAY<FLOAT>)
+                  |TBLPROPERTIES (
+                  |  'bucket' = '-1',
+                  |  'global-index.row-count-per-shard' = '10000',
+                  |  'row-tracking.enabled' = 'true',
+                  |  'data-evolution.enabled' = 'true')
+                  |""".stripMargin)
+
+      val values = (0 until 10)
+        .map(
+          i => s"($i, array(cast($i as float), cast(${i + 1} as float), cast(${i + 2} as float)))")
+        .mkString(",")
+      spark.sql(s"INSERT INTO T VALUES $values")
+
+      // Create vector index
+      spark
+        .sql("CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => 'lucene-vector-knn', options => 'vector.dim=3')")
+        .collect()
+
+      // Check that vector search is pushed down with table function syntax
+      val df = spark.sql("""
+                           |SELECT * FROM vector_search('T', 'v', array(50.0f, 51.0f, 52.0f), 5)
+                           |""".stripMargin)
+
+      // Get the scan from the executed plan (physical plan)
+      val executedPlan = df.queryExecution.executedPlan
+      val batchScans = executedPlan.collect {
+        case scan: org.apache.spark.sql.execution.datasources.v2.BatchScanExec => scan
+      }
+
+      assert(batchScans.nonEmpty, "Should have a BatchScanExec in executed plan")
+      val paimonScans = batchScans.filter(_.scan.isInstanceOf[PaimonScan])
+      assert(paimonScans.nonEmpty, "Should have a PaimonScan in executed plan")
+
+      val paimonScan = paimonScans.head.scan.asInstanceOf[PaimonScan]
+      assert(paimonScan.pushedVectorSearch.isDefined, "Vector search should be pushed down")
+      assert(paimonScan.pushedVectorSearch.get.fieldName() == "v", "Field name should be 'v'")
+      assert(paimonScan.pushedVectorSearch.get.limit() == 5, "Limit should be 5")
+    }
+  }
+
+  test("vector search topk returns correct results") {
+    withTable("T") {
+      spark.sql("""
+                  |CREATE TABLE T (id INT, v ARRAY<FLOAT>)
+                  |TBLPROPERTIES (
+                  |  'bucket' = '-1',
+                  |  'global-index.row-count-per-shard' = '10000',
+                  |  'row-tracking.enabled' = 'true',
+                  |  'data-evolution.enabled' = 'true')
+                  |""".stripMargin)
+
+      // Insert rows with distinct vectors
+      val values = (1 to 100)
+        .map {
+          i =>
+            val v = math.sqrt(3.0 * i * i)
+            val normalized = i.toFloat / v.toFloat
+            s"($i, array($normalized, $normalized, $normalized))"
+        }
+        .mkString(",")
+      spark.sql(s"INSERT INTO T VALUES $values")
+
+      // Create vector index
+      spark.sql(
+        "CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => 'lucene-vector-knn', options => 'vector.dim=3')")
+
+      // Query for top 10 similar to (1, 1, 1) normalized
+      val result = spark
+        .sql("""
+               |SELECT * FROM vector_search('T', 'v', array(0.577f, 0.577f, 0.577f), 10)
+               |""".stripMargin)
+        .collect()
+
+      assert(result.length == 10)
+    }
+  }
+}
diff --git a/pom.xml b/pom.xml
index 0db5ac8d4560..eb5844e7b559 100644
--- a/pom.xml
+++ b/pom.xml
@@ -89,7 +89,7 @@ under the License.
         <paimon-flink-common.flink.version>1.20.1</paimon-flink-common.flink.version>
         <flink.scala.binary.version>2.12</flink.scala.binary.version>
         <scala212.version>2.12.18</scala212.version>
-        <scala213.version>2.13.16</scala213.version>
+        <scala213.version>2.13.17</scala213.version>
         <scala.version>${scala212.version}</scala.version>
         <codegen.scala.version>${scala212.version}</codegen.scala.version>
         <snappy.version>1.1.10.8</snappy.version>
@@ -424,6 +424,7 @@ under the License.
             <modules>
                 <module>paimon-spark/paimon-spark4-common</module>
                 <module>paimon-spark/paimon-spark-4.0</module>
+                <module>paimon-spark/paimon-spark-4.1</module>
             </modules>
             <properties>
                 <target.java.version>17</target.java.version>