diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java index a6710ff00848..1954ca81449e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java @@ -526,8 +526,8 @@ private Plan toIncrementalPlan( totalBuckets = beforeEntries.get(0).totalBuckets(); } - // deduplicate - beforeEntries.removeIf(dataEntries::remove); + // deduplicate: remove entries common to both lists + deduplicate(beforeEntries, dataEntries); List before = beforeEntries.stream() @@ -697,4 +697,23 @@ private Map toDeletionFiles( } return deletionFiles; } + + /** + * Remove entries common to both lists using HashSet for O(n+m) complexity instead of O(n*m) + * with List.remove(). + */ + private static void deduplicate( + List beforeEntries, List dataEntries) { + Set afterSet = new HashSet<>(dataEntries); + Set commonEntries = new HashSet<>(); + beforeEntries.removeIf( + entry -> { + if (afterSet.contains(entry)) { + commonEntries.add(entry); + return true; + } + return false; + }); + dataEntries.removeAll(commonEntries); + } }