From b7c4165ae3c945f343ea313d57a30d44dcf66492 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Sat, 28 Mar 2026 07:34:13 +0100 Subject: [PATCH 01/23] started working on boundaries for building pois --- scripts/filter_osm.sh | 1 + .../service/importer/ImportService.java | 106 ++++++++++++++---- .../service/importer/ImportStatistics.java | 9 ++ 3 files changed, 95 insertions(+), 21 deletions(-) diff --git a/scripts/filter_osm.sh b/scripts/filter_osm.sh index 492e638..b2c2afa 100755 --- a/scripts/filter_osm.sh +++ b/scripts/filter_osm.sh @@ -70,6 +70,7 @@ osmium tags-filter "$INPUT_FILE" \ w/building=yes,commercial,retail,industrial,office,apartments,residential \ r/boundary=administrative \ r/type=multipolygon \ + n/addr:* \ -o "$OUTPUT_FILE" --overwrite if [ $? -eq 0 ]; then diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index b6de56a..fe99bc0 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -67,6 +67,8 @@ public class ImportService { private final PaikkaConfiguration config; private final Map tagCache = new ConcurrentHashMap<>(1000); + private final Map nodeToBuildingWayCache = new ConcurrentHashMap<>(); + private final int fileReadWindowSize; private final AtomicLong sequence = new AtomicLong(0); @@ -327,7 +329,9 @@ private void pass1DiscoveryAndIndexing(Path pbfFile, RocksDB wayIndexDb, RocksDB try { if (type == EntityType.Node) { OsmNode node = (OsmNode) container.getEntity(); - if (isPoi(node)) { + boolean isAddressNode = hasAddressTags(node); + + if (isPoi(node) || isAddressNode) { PoiIndexRec rec = buildPoiIndexRecFromEntity(node); rec.lat = node.getLatitude(); rec.lon = node.getLongitude(); @@ -335,12 +339,17 @@ private void pass1DiscoveryAndIndexing(Path pbfFile, RocksDB wayIndexDb, RocksDB poiWriter.put(key, encodePoiIndexRec(rec)); neededWriter.put(s2Helper.longToByteArray(node.getId()), ONE); stats.incrementNodesFound(); + if (isAddressNode) { + stats.incrementAddressNodesFound(); + } } } else if (type == EntityType.Way) { OsmWay way = (OsmWay) container.getEntity(); boolean isPoi = isPoi(way); boolean isAdmin = isAdministrativeBoundaryWay(way); + boolean isBuilding = isBuildingWay(way); + if (isPoi || isAdmin) { stats.incrementWaysProcessed(); int n = way.getNumberOfNodes(); @@ -349,16 +358,21 @@ private void pass1DiscoveryAndIndexing(Path pbfFile, RocksDB wayIndexDb, RocksDB long nid = way.getNodeId(j); nodeIds[j] = nid; neededWriter.put(s2Helper.longToByteArray(nid), ONE); + + // Cache node-to-building-way mapping for address resolution + if (isPoi && isBuilding) { + nodeToBuildingWayCache.put(nid, way.getId()); + } } - wayWriter.put(s2Helper.longToByteArray(way.getId()), s2Helper.longArrayToByteArray(nodeIds)); + wayWriter.put(s2Helper.longToByteArray(way.getId()), + s2Helper.longArrayToByteArray(nodeIds)); + if (isPoi) { PoiIndexRec rec = buildPoiIndexRecFromEntity(way); - // lat/lon remain NaN for ways — resolved in Pass 2 reader byte[] key = buildPoiKey((byte) 'W', way.getId()); poiWriter.put(key, encodePoiIndexRec(rec)); } } - } else if (type == EntityType.Relation) { OsmRelation relation = (OsmRelation) container.getEntity(); if (isAdministrativeBoundary(relation)) { @@ -453,6 +467,18 @@ private void pass2PoiShardingFromIndex(RocksDB nodeCache, rec.kind = kind; rec.id = id; + if (kind == 'N' && "address".equals(rec.type)) { + Long buildingWayId = nodeToBuildingWayCache.get(id); + if (buildingWayId != null) { + byte[] wayPoiBytes = poiIndexDb.get(buildPoiKey((byte) 'W', buildingWayId)); + if (wayPoiBytes != null) { + PoiIndexRec wayRec = decodePoiIndexRec(wayPoiBytes); + rec.type = wayRec.type; + rec.subtype = wayRec.subtype; + stats.incrementAddressNodesWithBuildingType(); + } + } + } // For way POIs, lat/lon is NaN — resolve from nodeCache/wayIndexDb byte[] cacheWayNodes = null; if (Double.isNaN(rec.lat) && kind == 'W') { @@ -867,6 +893,30 @@ private String intern(String s) { return tagCache.computeIfAbsent(s, k -> k); } + private boolean hasAddressTags(OsmEntity entity) { + for (int i = 0; i < entity.getNumberOfTags(); i++) { + if (entity.getTag(i).getKey().startsWith("addr:")) { + return true; + } + } + return false; + } + + private boolean isBuildingWay(OsmWay way) { + for (int i = 0; i < way.getNumberOfTags(); i++) { + OsmTag tag = way.getTag(i); + if ("building".equals(tag.getKey())) { + String val = tag.getValue(); + return switch (val) { + case "yes", "commercial", "retail", "industrial", + "office", "apartments", "residential" -> true; + default -> false; + }; + } + } + return false; + } + private void writeShardBatchAppendOnly(Map> shardBuffer, RocksDB appendDb, ImportStatistics stats) throws Exception { FlatBufferBuilder builder = new FlatBufferBuilder(1024 * 32); @@ -1244,32 +1294,27 @@ private boolean isPoi(OsmEntity entity) { switch (key) { case "amenity": return switch (val) { - case "bench", "drinking_water", "waste_basket", "bicycle_parking", "vending_machine", - "parking_entrance", "fire_hydrant" -> false; - + case "bench", "drinking_water", "waste_basket", "bicycle_parking", + "vending_machine", "parking_entrance", "fire_hydrant" -> false; default -> true; }; case "healthcare": return true; + case "emergency": return switch (val) { - case "fire_hydrant", "defibrillator", "fire_extinguisher", "siren", "life_ring", "lifeline", - "phone", "drinking_water" -> false; + case "fire_hydrant", "defibrillator", "fire_extinguisher", + "siren", "life_ring", "lifeline", "phone", "drinking_water" -> false; default -> true; }; + case "building": - if (switch (val) { - case "yes", "commercial", "retail", "industrial", "office", "apartments" -> true; - default -> false; - }) { - isInterestingBuilding = true; - } + isInterestingBuilding = true; break; - case "shop", "tourism", "leisure", "office", "craft", "place", "historic", "public_transport", - "aeroway": - // Exclude the specific sub-leisure types you mentioned earlier + case "shop", "tourism", "leisure", "office", "craft", "place", + "historic", "public_transport", "aeroway": return !key.equals("leisure") || !List.of("picnic_table", "swimming_pool").contains(val); case "railway": @@ -1277,7 +1322,6 @@ private boolean isPoi(OsmEntity entity) { break; default: - // If it's any other key in your fast-key list (like 'natural'), allow it if (isPoiFastKey(key)) return true; } } @@ -1778,20 +1822,39 @@ private PoiIndexRec buildPoiIndexRecFromEntity(OsmEntity e) { PoiIndexRec rec = new PoiIndexRec(); rec.type = "unknown"; rec.subtype = ""; + + boolean hasPoiType = false; + boolean hasAddressTags = false; + for (int i = 0; i < e.getNumberOfTags(); i++) { OsmTag t = e.getTag(i); - if (isPoiFastKey(t.getKey())) { + String k = t.getKey(); + + if (isPoiFastKey(k)) { rec.type = intern(t.getKey()); rec.subtype = intern(t.getValue()); - break; + hasPoiType = true; + } + + if (k.startsWith("addr:")) { + hasAddressTags = true; } } + + if (hasAddressTags && !hasPoiType) { + rec.type = "address"; + rec.subtype = "node"; + } + + // Second pass: collect names and address details List names = new ArrayList<>(); Set dedup = new HashSet<>(); + for (int i = 0; i < e.getNumberOfTags(); i++) { OsmTag t = e.getTag(i); String k = t.getKey(), v = t.getValue(); if (v == null || v.trim().isEmpty()) continue; + if ("name".equals(k)) { if (dedup.add("default:" + v)) names.add(new NameData("default", v)); } else if (k.startsWith("name:")) { @@ -1807,6 +1870,7 @@ private PoiIndexRec buildPoiIndexRecFromEntity(OsmEntity e) { } } } + rec.names = names; return rec; } diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 403c803..a5b138a 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -91,6 +91,8 @@ public String toString() { private final AtomicLong activeThreads = new AtomicLong(0); private final AtomicLong boundaryWaysProcessed = new AtomicLong(0); private final AtomicLong boundaryPhaseEntitiesRead = new AtomicLong(0); + private AtomicLong addressNodesFound = new AtomicLong(0); + private AtomicLong addressNodesWithBuildingType = new AtomicLong(0); private volatile String currentPhase = "Initializing"; private volatile boolean running = true; @@ -234,6 +236,13 @@ public void incrementBoundaryPhaseEntitiesRead() { boundaryPhaseEntitiesRead.incrementAndGet(); } + public void incrementAddressNodesFound() { + addressNodesFound.incrementAndGet(); + } + + public void incrementAddressNodesWithBuildingType() { + addressNodesWithBuildingType.incrementAndGet(); + } public void resetBoundaryPhaseEntitiesRead() { boundaryPhaseEntitiesRead.set(0); } From e3e35e47c1b1d2b00a0fb79f416d4e5c82173519 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Sat, 28 Mar 2026 15:13:17 +0100 Subject: [PATCH 02/23] feat: add methods for tracking buildings in import statistics --- .../service/importer/ImportStatistics.java | 56 +++++++++++++------ 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index a5b138a..78178d1 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -23,6 +23,23 @@ import java.util.concurrent.atomic.AtomicLong; class ImportStatistics { + + public void incrementBuildingsFound() { + + } + + public void incrementBuildingsProcessed() { + + } + + public void setBuildingsBytes(long buildings) { + + } + + public void setTmpBuildingGridBytes(long buildingGrid) { + + } + public enum Pass { ONE, TWO @@ -35,7 +52,8 @@ public enum Stage { SCAN_PBF_STRUCTURE("Scan PBF Structure"), CACHING_NODE_COORDINATES("Caching Node Coordinates"), PROCESSING_ADMIN_BOUNDARIES("Processing Admin Boundaries"), - COMPACTING_POIS("Compacting POIs"),; + COMPACTING_POIS("Compacting POIs"), + PROCESSING_BUILDINGS("Processing Buildings"); private final String shortName; Stage(String shortName) { @@ -243,9 +261,11 @@ public void incrementAddressNodesFound() { public void incrementAddressNodesWithBuildingType() { addressNodesWithBuildingType.incrementAndGet(); } + public void resetBoundaryPhaseEntitiesRead() { boundaryPhaseEntitiesRead.set(0); } + public String getCurrentPhase() { return currentPhase; } @@ -457,7 +477,7 @@ public void startProgressReporter() { sb.append(String.format("\033[1;90m[%d/%d]\033[0m ", currentStep, TOTAL_STEPS)); if (phase.contains("1.1.1")) { - long pbfPerSec = phaseSeconds > 0 ? (long)(getEntitiesRead() / phaseSeconds) : 0; + long pbfPerSec = phaseSeconds > 0 ? (long) (getEntitiesRead() / phaseSeconds) : 0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mScanning PBF Structure\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mPBF Entities:\033[0m %s \033[33m(%s/s)\033[0m", formatCompactNumber(getEntitiesRead()), formatCompactRate(pbfPerSec))); @@ -465,9 +485,9 @@ public void startProgressReporter() { sb.append(String.format(" │ \033[34mWays Found:\033[0m %s", formatCompactNumber(getWaysProcessed()))); sb.append(String.format(" │ \033[35mRelations:\033[0m %s", formatCompactNumber(getRelationsFound()))); - } else if (phase.contains("1.1.2")) { + } else if (phase.contains("1.1.2")) { long boundaryEntities = getBoundaryPhaseEntitiesRead(); - long pbfPerSec = phaseSeconds > 0 ? (long)(boundaryEntities / phaseSeconds) : 0; + long pbfPerSec = phaseSeconds > 0 ? (long) (boundaryEntities / phaseSeconds) : 0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mIndexing Boundary Member Ways\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mPBF Entities:\033[0m %s \033[33m(%s/s)\033[0m", formatCompactNumber(boundaryEntities), formatCompactRate(pbfPerSec))); @@ -475,7 +495,7 @@ public void startProgressReporter() { sb.append(String.format(" │ \033[35mRelations:\033[0m %s", formatCompactNumber(getRelationsFound()))); } else if (phase.contains("1.1.3")) { - long nodesPerSec = phaseSeconds > 0 ? (long)(getNodesCached() / phaseSeconds) : 0; + long nodesPerSec = phaseSeconds > 0 ? (long) (getNodesCached() / phaseSeconds) : 0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mCaching Node Coordinates\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mNodes Cached:\033[0m %s \033[33m(%s/s)\033[0m", formatCompactNumber(getNodesCached()), formatCompactRate(nodesPerSec))); @@ -483,20 +503,20 @@ public void startProgressReporter() { sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); } else if (phase.contains("1.2")) { - long boundsPerSec = phaseSeconds > 0 ? (long)(getBoundariesProcessed() / phaseSeconds) : 0; + long boundsPerSec = phaseSeconds > 0 ? (long) (getBoundariesProcessed() / phaseSeconds) : 0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mProcessing Admin Boundaries\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mBoundaries:\033[0m %s \033[33m(%s/s)\033[0m", formatCompactNumber(getBoundariesProcessed()), formatCompactRate(boundsPerSec))); sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); } else if (phase.contains("2.1")) { - long poisPerSec = phaseSeconds > 0 ? (long)(getPoisProcessed() / phaseSeconds) : 0; - long poisReadSec = phaseSeconds > 0 ? (long)(getPoiIndexRecRead() / phaseSeconds) : 0; + long poisPerSec = phaseSeconds > 0 ? (long) (getPoisProcessed() / phaseSeconds) : 0; + long poisReadSec = phaseSeconds > 0 ? (long) (getPoiIndexRecRead() / phaseSeconds) : 0; long totalFromPhase1 = getNodesFound() + getWaysProcessed(); double percentage = totalFromPhase1 > 0 ? (double) getPoisProcessed() / totalFromPhase1 * 100.0 : 0.0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mProcessing POIs & Sharding\033[0m", formatTime(elapsed))); - sb.append(String.format(" │ \033[32mPOI Index Rec Read:\033[0m %s \033[33m%s\033[0m", formatCompactNumber(getPoiIndexRecRead()), isPoiIndexRecReadDone() ? "(done)" : String.format("(%s/s)",formatCompactRate(poisReadSec)))); + sb.append(String.format(" │ \033[32mPOI Index Rec Read:\033[0m %s \033[33m%s\033[0m", formatCompactNumber(getPoiIndexRecRead()), isPoiIndexRecReadDone() ? "(done)" : String.format("(%s/s)", formatCompactRate(poisReadSec)))); sb.append(String.format(" │ \033[32mPOIs Processed:\033[0m %s \033[33m(%s/s)\033[0m", formatCompactNumber(getPoisProcessed()), formatCompactRate(poisPerSec))); sb.append(String.format(" │ \033[35mProgress:\033[0m %.4f%%", percentage)); sb.append(String.format(" │ \033[36mQueue:\033[0m %s", formatCompactNumber(getQueueSize()))); @@ -506,7 +526,7 @@ public void startProgressReporter() { long compactionElapsed = System.currentTimeMillis() - getCompactionStartTime(); double compactionPhaseSeconds = compactionElapsed / 1000.0; long shardsCompacted = getCompactionEntriesProcessed(); - long shardsPerSec = compactionPhaseSeconds > 0 ? (long)(shardsCompacted / compactionPhaseSeconds) : 0; + long shardsPerSec = compactionPhaseSeconds > 0 ? (long) (shardsCompacted / compactionPhaseSeconds) : 0; long remaining = getCompactionEntriesRemaining(); sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mCompacting POIs\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mShards Compacted:\033[0m %s \033[33m(%s/s)\033[0m", @@ -550,28 +570,28 @@ public void printFinalStatistics() { System.out.println("├─────────────────┼─────────────────┼─────────────────┤"); System.out.printf("│ \033[32mPBF Entities\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getEntitiesRead()), - formatCompactNumber((long)(getEntitiesRead() / totalSeconds))); + formatCompactNumber((long) (getEntitiesRead() / totalSeconds))); System.out.printf("│ \033[37mNodes Found\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getNodesFound()), - formatCompactNumber((long)(getNodesFound() / totalSeconds))); + formatCompactNumber((long) (getNodesFound() / totalSeconds))); System.out.printf("│ \033[34mNodes Cached\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getNodesCached()), - formatCompactNumber((long)(getNodesCached() / totalSeconds))); + formatCompactNumber((long) (getNodesCached() / totalSeconds))); System.out.printf("│ \033[35mWays Processed\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getWaysProcessed()), - formatCompactNumber((long)(getWaysProcessed() / totalSeconds))); + formatCompactNumber((long) (getWaysProcessed() / totalSeconds))); System.out.printf("│ \033[36mBoundaries\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getBoundariesProcessed()), - formatCompactNumber((long)(getBoundariesProcessed() / totalSeconds))); + formatCompactNumber((long) (getBoundariesProcessed() / totalSeconds))); System.out.printf("│ \033[33mPOIs Created\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getPoisProcessed()), - formatCompactNumber((long)(getPoisProcessed() / totalSeconds))); + formatCompactNumber((long) (getPoisProcessed() / totalSeconds))); System.out.println("└─────────────────┴─────────────────┴─────────────────┘"); long totalObjects = getNodesFound() + getNodesCached() + getWaysProcessed() + getBoundariesProcessed() + getPoisProcessed(); System.out.printf("%n\033[1;37m🚀 Overall Throughput:\033[0m \033[1;32m%s objects\033[0m processed at \033[1;33m%s objects/sec\033[0m%n", formatCompactNumber(totalObjects), - formatCompactNumber((long)(totalObjects / totalSeconds))); + formatCompactNumber((long) (totalObjects / totalSeconds))); System.out.printf("\033[1;37m💾 Database Operations:\033[0m \033[1;36m%s writes\033[0m%n", formatCompactNumber(getRocksDbWrites())); @@ -637,7 +657,7 @@ private String formatCompactNumber(long n) { if (n < 1_000_000) return String.format("%.2fk", n / 1000.0); return String.format("%.3fM", n / 1_000_000.0); } - + private String formatCompactRate(long n) { if (n < 1000) return String.valueOf(n); if (n < 1_000_000) return String.format("%.1fk", n / 1000.0); From 9f87b02728bbcabb87ae7a3adc6a8dfafe3c89bb Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 15:13:19 +0100 Subject: [PATCH 03/23] feat: implement building statistics tracking and reporting --- .../service/importer/ImportStatistics.java | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 78178d1..13a6168 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -25,19 +25,19 @@ class ImportStatistics { public void incrementBuildingsFound() { - + buildingsFound.incrementAndGet(); } public void incrementBuildingsProcessed() { - + buildingsProcessed.incrementAndGet(); } public void setBuildingsBytes(long buildings) { - + this.buildingsBytes = buildings; } public void setTmpBuildingGridBytes(long buildingGrid) { - + this.tmpBuildingGridBytes = buildingGrid; } public enum Pass { @@ -109,6 +109,8 @@ public String toString() { private final AtomicLong activeThreads = new AtomicLong(0); private final AtomicLong boundaryWaysProcessed = new AtomicLong(0); private final AtomicLong boundaryPhaseEntitiesRead = new AtomicLong(0); + private final AtomicLong buildingsFound = new AtomicLong(0); + private final AtomicLong buildingsProcessed = new AtomicLong(0); private AtomicLong addressNodesFound = new AtomicLong(0); private AtomicLong addressNodesWithBuildingType = new AtomicLong(0); @@ -125,7 +127,9 @@ public String toString() { private volatile long datasetBytes; private volatile long shardsBytes; private volatile long boundariesBytes; + private volatile long buildingsBytes; private volatile long tmpGridBytes; + private volatile long tmpBuildingGridBytes; private volatile long tmpNodeBytes; private volatile long tmpWayBytes; private volatile long tmpBoundaryWayBytes; @@ -509,6 +513,13 @@ public void startProgressReporter() { formatCompactNumber(getBoundariesProcessed()), formatCompactRate(boundsPerSec))); sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); + } else if (phase.contains("1.4")) { + long buildingsPerSec = phaseSeconds > 0 ? (long) (getBuildingsProcessed() / phaseSeconds) : 0; + sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mProcessing Building Boundaries\033[0m", formatTime(elapsed))); + sb.append(String.format(" │ \033[32mBuildings:\033[0m %s \033[33m(%s/s)\033[0m", + formatCompactNumber(getBuildingsProcessed()), formatCompactRate(buildingsPerSec))); + sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); + } else if (phase.contains("2.1")) { long poisPerSec = phaseSeconds > 0 ? (long) (getPoisProcessed() / phaseSeconds) : 0; long poisReadSec = phaseSeconds > 0 ? (long) (getPoiIndexRecRead() / phaseSeconds) : 0; @@ -583,12 +594,19 @@ public void printFinalStatistics() { System.out.printf("│ \033[36mBoundaries\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getBoundariesProcessed()), formatCompactNumber((long) (getBoundariesProcessed() / totalSeconds))); + System.out.printf("│ \033[33mBuildings Found\033[0m │ %15s │ %13s/s │%n", + formatCompactNumber(getBuildingsFound()), + formatCompactNumber((long) (getBuildingsFound() / totalSeconds))); + System.out.printf("│ \033[33mBuildings Processed\033[0m│ %15s │ %13s/s │%n", + formatCompactNumber(getBuildingsProcessed()), + formatCompactNumber((long) (getBuildingsProcessed() / totalSeconds))); System.out.printf("│ \033[33mPOIs Created\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getPoisProcessed()), formatCompactNumber((long) (getPoisProcessed() / totalSeconds))); System.out.println("└─────────────────┴─────────────────┴─────────────────┘"); - long totalObjects = getNodesFound() + getNodesCached() + getWaysProcessed() + getBoundariesProcessed() + getPoisProcessed(); + long totalObjects = getNodesFound() + getNodesCached() + getWaysProcessed() + + getBoundariesProcessed() + getBuildingsProcessed() + getPoisProcessed(); System.out.printf("%n\033[1;37m🚀 Overall Throughput:\033[0m \033[1;32m%s objects\033[0m processed at \033[1;33m%s objects/sec\033[0m%n", formatCompactNumber(totalObjects), formatCompactNumber((long) (totalObjects / totalSeconds))); @@ -599,9 +617,11 @@ public void printFinalStatistics() { System.out.println("\n\033[1;37m📦 Dataset Size:\033[0m " + formatSize(getDatasetBytes())); System.out.println(" • poi_shards: " + formatSize(getShardsBytes())); System.out.println(" • boundaries: " + formatSize(getBoundariesBytes())); + System.out.println(" • buildings: " + formatSize(getBuildingsBytes())); System.out.println("\n\033[1;37m🧹 Temporary DBs:\033[0m " + formatSize(getTmpTotalBytes())); System.out.println(" • grid_index: " + formatSize(getTmpGridBytes())); + System.out.println(" • building_grid_index:" + formatSize(getTmpBuildingGridBytes())); System.out.println(" • node_cache: " + formatSize(getTmpNodeBytes())); System.out.println(" • way_index: " + formatSize(getTmpWayBytes())); System.out.println(" • boundary_way_index: " + formatSize(getTmpBoundaryWayBytes())); From a6684c9b833aa20f222278bc6c778f6a3aa66649 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 15:14:05 +0100 Subject: [PATCH 04/23] feat: add new building-related getter methods to ImportStatistics --- .../service/importer/ImportStatistics.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 13a6168..750b8c3 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -410,6 +410,22 @@ public void setTmpPoiBytes(long v) { this.tmpPoiBytes = v; } + public long getBuildingsBytes() { + return buildingsBytes; + } + + public long getTmpBuildingGridBytes() { + return tmpBuildingGridBytes; + } + + public long getBuildingsFound() { + return buildingsFound.get(); + } + + public long getBuildingsProcessed() { + return buildingsProcessed.get(); + } + public long getTmpAppendBytes() { return tmpAppendBytes; } From f3d599c08026e456f4c2990781fa6f99c8db3657 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Sat, 28 Mar 2026 15:36:32 +0100 Subject: [PATCH 05/23] started working on boundaries for building pois --- schema/building.fbs | 17 ++ .../service/importer/ImportService.java | 240 ++++++++++++------ 2 files changed, 178 insertions(+), 79 deletions(-) create mode 100644 schema/building.fbs diff --git a/schema/building.fbs b/schema/building.fbs new file mode 100644 index 0000000..05146e6 --- /dev/null +++ b/schema/building.fbs @@ -0,0 +1,17 @@ +namespace com.dedicatedcode.paikka.flatbuffers; + +table Geometry { + data:[ubyte]; // WKB binary +} +table Building { + id: long; + name: string; // building type (residential, commercial, etc.) + code: string; // always "building" + geometry: Geometry; +} + +table BuildingList { + buildings: [Building]; +} + +root_type BuildingList; \ No newline at end of file diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index fe99bc0..8721591 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -67,7 +67,6 @@ public class ImportService { private final PaikkaConfiguration config; private final Map tagCache = new ConcurrentHashMap<>(1000); - private final Map nodeToBuildingWayCache = new ConcurrentHashMap<>(); private final int fileReadWindowSize; @@ -103,7 +102,9 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { Path shardsDbPath = dataDirectory.resolve("poi_shards"); Path boundariesDbPath = dataDirectory.resolve("boundaries"); + Path buildingsDbPath = dataDirectory.resolve("buildings"); Path gridIndexDbPath = dataDirectory.resolve("tmp/grid_index"); + Path buildingGridIndexDbPath = dataDirectory.resolve("tmp/building_grid_index"); Path appendDbPath = dataDirectory.resolve("tmp/append_poi"); Path nodeCacheDbPath = dataDirectory.resolve("tmp/node_cache"); Path wayIndexDbPath = dataDirectory.resolve("tmp/way_index"); @@ -114,7 +115,9 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { cleanupDatabase(shardsDbPath); cleanupDatabase(boundariesDbPath); + cleanupDatabase(buildingsDbPath); cleanupDatabase(gridIndexDbPath); + cleanupDatabase(buildingGridIndexDbPath); cleanupDatabase(nodeCacheDbPath); cleanupDatabase(wayIndexDbPath); cleanupDatabase(boundaryWayIndexDbPath); @@ -186,7 +189,9 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { try (RocksDB shardsDb = RocksDB.open(poiShardsOpts, shardsDbPath.toString()); RocksDB boundariesDb = RocksDB.open(poiShardsOpts, boundariesDbPath.toString()); + RocksDB buildingsDb = RocksDB.open(poiShardsOpts, buildingsDbPath.toString()); RocksDB gridIndexDb = RocksDB.open(gridOpts, gridIndexDbPath.toString()); + RocksDB buildingGridIndexDb = RocksDB.open(gridOpts, buildingGridIndexDbPath.toString()); RocksDB nodeCache = RocksDB.open(nodeOpts, nodeCacheDbPath.toString()); RocksDB wayIndexDb = RocksDB.open(wayIndexOpts, wayIndexDbPath.toString()); RocksDB neededBoundaryWaysDb = RocksDB.open(wayIndexOpts, boundaryWayIndexDbPath.toString()); @@ -207,22 +212,26 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { // PASS 2: Nodes Cache, Boundaries, POIs stats.printPhaseHeader("PASS 2: Nodes Cache, Boundaries, POIs"); long pass2Start = System.currentTimeMillis(); - stats.setCurrentPhase(3, "1.1.3: Caching node coordinates"); + stats.setCurrentPhase(3, "1.2: Caching node coordinates"); cacheNeededNodeCoordinates(pbfFile, neededNodesDb, nodeCache, stats); - stats.setCurrentPhase(4, "1.2: Processing administrative boundaries"); + stats.setCurrentPhase(4, "1.3: Processing administrative boundaries"); processAdministrativeBoundariesFromIndex(relIndexDb, nodeCache, wayIndexDb, gridIndexDb, boundariesDb, stats); - stats.setCurrentPhase(5, "2.1: Processing POIs & Sharding"); + + stats.setCurrentPhase(5, "1.4: Processing building boundaries"); + processBuildingBoundariesFromIndex(poiIndexDb, nodeCache, wayIndexDb, buildingGridIndexDb, buildingsDb, stats); + + stats.setCurrentPhase(6, "2.1: Processing POIs & Sharding"); pass2PoiShardingFromIndex(nodeCache, wayIndexDb, appendDb, boundariesDb, poiIndexDb, gridIndexDb, stats); - stats.setCurrentPhase(6, "2.2: Compacting POIs"); + stats.setCurrentPhase(7, "2.2: Compacting POIs"); compactShards(appendDb, shardsDb, stats); stats.stop(); stats.printPhaseSummary("PASS 2", pass2Start); shardsDb.compactRange(); boundariesDb.compactRange(); - + buildingsDb.compactRange(); stats.setTotalTime(System.currentTimeMillis() - totalStartTime); @@ -230,7 +239,9 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { recordSizeMetrics(stats, shardsDbPath, boundariesDbPath, + buildingsDbPath, gridIndexDbPath, + buildingGridIndexDbPath, nodeCacheDbPath, wayIndexDbPath, boundaryWayIndexDbPath, @@ -348,7 +359,6 @@ private void pass1DiscoveryAndIndexing(Path pbfFile, RocksDB wayIndexDb, RocksDB OsmWay way = (OsmWay) container.getEntity(); boolean isPoi = isPoi(way); boolean isAdmin = isAdministrativeBoundaryWay(way); - boolean isBuilding = isBuildingWay(way); if (isPoi || isAdmin) { stats.incrementWaysProcessed(); @@ -358,11 +368,6 @@ private void pass1DiscoveryAndIndexing(Path pbfFile, RocksDB wayIndexDb, RocksDB long nid = way.getNodeId(j); nodeIds[j] = nid; neededWriter.put(s2Helper.longToByteArray(nid), ONE); - - // Cache node-to-building-way mapping for address resolution - if (isPoi && isBuilding) { - nodeToBuildingWayCache.put(nid, way.getId()); - } } wayWriter.put(s2Helper.longToByteArray(way.getId()), s2Helper.longArrayToByteArray(nodeIds)); @@ -371,6 +376,9 @@ private void pass1DiscoveryAndIndexing(Path pbfFile, RocksDB wayIndexDb, RocksDB PoiIndexRec rec = buildPoiIndexRecFromEntity(way); byte[] key = buildPoiKey((byte) 'W', way.getId()); poiWriter.put(key, encodePoiIndexRec(rec)); + if ("building".equals(rec.type)) { + stats.incrementBuildingsFound(); + } } } } else if (type == EntityType.Relation) { @@ -467,18 +475,15 @@ private void pass2PoiShardingFromIndex(RocksDB nodeCache, rec.kind = kind; rec.id = id; - if (kind == 'N' && "address".equals(rec.type)) { - Long buildingWayId = nodeToBuildingWayCache.get(id); - if (buildingWayId != null) { - byte[] wayPoiBytes = poiIndexDb.get(buildPoiKey((byte) 'W', buildingWayId)); - if (wayPoiBytes != null) { - PoiIndexRec wayRec = decodePoiIndexRec(wayPoiBytes); - rec.type = wayRec.type; - rec.subtype = wayRec.subtype; - stats.incrementAddressNodesWithBuildingType(); - } - } + // Solution: The old workaround is removed. Enrichment is done at query time. + // We no longer try to enrich address nodes during import. + + // Buildings are processed in a separate phase and not included as point POIs. + if ("building".equals(rec.type)) { + it.next(); + continue; } + // For way POIs, lat/lon is NaN — resolve from nodeCache/wayIndexDb byte[] cacheWayNodes = null; if (Double.isNaN(rec.lat) && kind == 'W') { @@ -512,8 +517,6 @@ private void pass2PoiShardingFromIndex(RocksDB nodeCache, } - // ─── Worker threads: hierarchy resolution + sharding (unchanged) ─── - for (int i = 0; i < numReaders; i++) { executor.submit(() -> { stats.incrementActiveThreads(); @@ -534,7 +537,6 @@ private void pass2PoiShardingFromIndex(RocksDB nodeCache, double lon = rec.lon; byte[] boundaryWkb = null; - // For way POIs, still compute boundary WKB geometry if (rec.kind == 'W') { List coords = buildCoordinatesFromWay(nodeCache, item.cachedWayNodes); if (coords != null && coords.size() >= 3) { @@ -601,11 +603,6 @@ private void pass2PoiShardingFromIndex(RocksDB nodeCache, } } - /** - * Sorts a chunk of POI items by S2CellId (Hilbert curve order) for spatial - * locality, then emits to the processing queue in batches. - * Memory stays bounded: only one chunk is in memory at a time. - */ private void sortAndEmitChunk(List chunk, BlockingQueue> queue, ImportStatistics stats) throws InterruptedException { chunk.sort((a, b) -> Long.compareUnsigned(a.s2SortKey, b.s2SortKey)); @@ -667,13 +664,11 @@ private byte[] resolveWayCenter(PoiIndexRec rec, RocksDB nodeCache, RocksDB wayI private void cacheNeededNodeCoordinates(Path pbfFile, RocksDB neededNodesDb, RocksDB nodeCache, ImportStatistics stats) throws Exception { final int BATCH_SIZE = 50_000; - // The reader thread produces batches of nodes BlockingQueue> nodeBatchQueue = new LinkedBlockingQueue<>(200); int numProcessors = Math.max(1, config.getImportConfiguration().getThreads()); CountDownLatch latch = new CountDownLatch(numProcessors); try (ExecutorService executor = createExecutorService(numProcessors)) { - // The reader logic is good, no changes needed here. Thread reader = Thread.ofVirtual().start(() -> { List buf = new ArrayList<>(BATCH_SIZE); try { @@ -710,11 +705,8 @@ private void cacheNeededNodeCoordinates(Path pbfFile, RocksDB neededNodesDb, Roc for (int i = 0; i < numProcessors; i++) { executor.submit(() -> { stats.incrementActiveThreads(); - // --- OPTIMIZATION 1: ThreadLocal for reusable objects to reduce GC pressure --- final ThreadLocal nodeWriterLocal = ThreadLocal.withInitial(() -> new RocksBatchWriter(nodeCache, 50_000, stats)); - // Reuse a ByteBuffer for writing values final ThreadLocal valueBufferLocal = ThreadLocal.withInitial(() -> ByteBuffer.allocate(16)); - // Reuse a List of byte[] for multiGet keys. We only need one per thread. final ThreadLocal> keysListLocal = ThreadLocal.withInitial(ArrayList::new); try { @@ -723,9 +715,8 @@ private void cacheNeededNodeCoordinates(Path pbfFile, RocksDB neededNodesDb, Roc stats.setQueueSize(nodeBatchQueue.size()); if (nodes.isEmpty()) break; - // Reuse the keys list List keys = keysListLocal.get(); - keys.clear(); // Clear previous batch's keys + keys.clear(); for (OsmNode n : nodes) { keys.add(s2Helper.longToByteArray(n.getId())); } @@ -733,19 +724,14 @@ private void cacheNeededNodeCoordinates(Path pbfFile, RocksDB neededNodesDb, Roc List presence = neededNodesDb.multiGetAsList(keys); RocksBatchWriter nodeWriter = nodeWriterLocal.get(); - ByteBuffer valueBuffer = valueBufferLocal.get(); // Get the reusable buffer + ByteBuffer valueBuffer = valueBufferLocal.get(); - // --- OPTIMIZATION 2: Combine loops --- for (int idx = 0; idx < nodes.size(); idx++) { if (presence.get(idx) != null) { OsmNode n = nodes.get(idx); - - // Reset buffer position and write new data valueBuffer.clear(); valueBuffer.putDouble(n.getLatitude()); valueBuffer.putDouble(n.getLongitude()); - - // The key is already in the 'keys' list at the same index nodeWriter.put(keys.get(idx), valueBuffer.array()); stats.incrementNodesCached(); } @@ -763,8 +749,8 @@ private void cacheNeededNodeCoordinates(Path pbfFile, RocksDB neededNodesDb, Roc stats.recordError(ImportStatistics.Stage.CACHING_NODE_COORDINATES, Kind.STORE, null, "node-cache-writer-close", e); } nodeWriterLocal.remove(); - valueBufferLocal.remove(); // Clean up thread-local - keysListLocal.remove(); // Clean up thread-local + valueBufferLocal.remove(); + keysListLocal.remove(); stats.decrementActiveThreads(); latch.countDown(); } @@ -813,16 +799,15 @@ private void processAdministrativeBoundariesFromIndex(RocksDB relIndexDb, RocksD org.locationtech.jts.geom.Geometry simplified = geometrySimplificationService.simplifyByAdminLevel(geometry, rec.level); - // Simplification can produce invalid results too if (simplified == null || simplified.isEmpty() || !simplified.isValid()) { try { simplified = simplified != null ? simplified.buffer(0) : null; } catch (Exception e) { - simplified = geometry; // fall back to unsimplified + simplified = geometry; } } if (simplified == null || simplified.isEmpty() || !simplified.isValid()) { - simplified = geometry; // fall back to unsimplified + simplified = geometry; } return new BoundaryResultLite(rec.osmId, rec.level, rec.name, rec.code, simplified); @@ -831,7 +816,7 @@ private void processAdministrativeBoundariesFromIndex(RocksDB relIndexDb, RocksD stats.decrementActiveThreads(); } }); - ; + submitted.incrementAndGet(); for (Future f; (f = ecs.poll()) != null; ) { @@ -870,6 +855,96 @@ private void processAdministrativeBoundariesFromIndex(RocksDB relIndexDb, RocksD } } + private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB nodeCache, RocksDB wayIndexDb, RocksDB buildingGridIndexDb, RocksDB buildingsDb, ImportStatistics stats) throws Exception { + int maxConcurrentGeometries = 100; + Semaphore semaphore = new Semaphore(maxConcurrentGeometries); + + int numThreads = Math.max(1, config.getImportConfiguration().getThreads()); + ExecutorService executor = createExecutorService(numThreads); + ExecutorCompletionService ecs = new ExecutorCompletionService<>(executor); + AtomicInteger submitted = new AtomicInteger(0); + AtomicLong collected = new AtomicLong(0); + + try (RocksBatchWriter buildingsWriter = new RocksBatchWriter(buildingsDb, 10_000, stats)) { + try (RocksIterator it = poiIndexDb.newIterator()) { + it.seekToFirst(); + while (it.isValid()) { + byte[] key = it.key(); + if (key[0] != 'W') { // Buildings must be ways + it.next(); + continue; + } + + byte[] val = it.value(); + final PoiIndexRec rec = decodePoiIndexRec(val); + if (!"building".equals(rec.type)) { + it.next(); + continue; + } + final long wayId = bytesToLong(key, 1); + + semaphore.acquire(); + stats.incrementActiveThreads(); + ecs.submit(() -> { + try { + org.locationtech.jts.geom.Geometry geometry = buildGeometryFromWay(wayId, nodeCache, wayIndexDb, stats); + if (geometry == null) return null; + if (!geometry.isValid()) { + try { + geometry = geometry.buffer(0); + } catch (Exception e) { + return null; + } + } + if (geometry == null || geometry.isEmpty() || !geometry.isValid()) return null; + + // name=subtype (e.g., "residential"), code=null, level=100 (for hierarchy) + return new BoundaryResultLite(wayId, 100, rec.subtype, null, geometry); + } finally { + semaphore.release(); + stats.decrementActiveThreads(); + } + }); + + submitted.incrementAndGet(); + + for (Future f; (f = ecs.poll()) != null;) { + collected.incrementAndGet(); + try { + BoundaryResultLite r = f.get(); + if (r != null) { + // Re-use the same storage logic as administrative boundaries + storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); + stats.incrementBuildingsProcessed(); + } + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.CONCURRENCY, null, "building-future-poll", e); + } + } + it.next(); + } + } + long remaining = submitted.get() - collected.get(); + for (int i = 0; i < remaining; i++) { + try { + Future f = ecs.take(); + BoundaryResultLite r = f.get(); + if (r != null) { + storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); + stats.incrementBuildingsProcessed(); + } + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.CONCURRENCY, null, "building-future-take", e); + } + } + buildingsWriter.flush(); + + } finally { + executor.shutdown(); + executor.awaitTermination(30, TimeUnit.MINUTES); + } + } + private PoiData createPoiDataFromIndex(PoiIndexRec rec, double lat, double lon, List hierarchy, byte[] boundaryWkb) { AddressData addr = null; if (rec.street != null || rec.houseNumber != null || rec.postcode != null || rec.city != null || rec.country != null) { @@ -932,7 +1007,6 @@ private void writeShardBatchAppendOnly(Map> shardBuffer, Roc builder.clear(); - // Serialize ONLY the new POIs (no reading existing!) int[] poiOffsets = new int[pois.size()]; for (int i = 0; i < pois.size(); i++) { poiOffsets[i] = serializePoiData(builder, pois.get(i)); @@ -941,7 +1015,6 @@ private void writeShardBatchAppendOnly(Map> shardBuffer, Roc int poiListOffset = POIList.createPOIList(builder, poisVectorOffset); builder.finish(poiListOffset); - // Key = shardId (8 bytes) + sequence (8 bytes) — unique, no collision long seq = sequence.incrementAndGet(); byte[] key = new byte[16]; ByteBuffer.wrap(key).putLong(entry.getKey()).putLong(seq); @@ -1016,7 +1089,6 @@ private void compactShards(RocksDB appendDb, RocksDB shardsDb, ImportStatistics stats.setCompactionStartTime(System.currentTimeMillis()); stats.setCompactionEntriesTotal(sequence.get()); - // Reusable FlatBuffer accessor objects POI reusablePoi = new POI(); Name reusableName = new Name(); HierarchyItem reusableHier = new HierarchyItem(); @@ -1029,14 +1101,12 @@ private void compactShards(RocksDB appendDb, RocksDB shardsDb, ImportStatistics iterator.seekToFirst(); long currentShardId = Long.MIN_VALUE; - // Collect raw byte[] chunks per shard, build FlatBuffer only at flush time List currentShardChunks = new ArrayList<>(); while (iterator.isValid()) { byte[] key = iterator.key(); long shardId = ByteBuffer.wrap(key).order(ByteOrder.BIG_ENDIAN).getLong(); - // Shard boundary — flush previous shard if (shardId != currentShardId && currentShardId != Long.MIN_VALUE) { flushCompactedShard(currentShardChunks, currentShardId, shardsDb, writeOptions, reusablePoi, reusableName, reusableHier, reusableAddr, reusableGeom, stats); stats.incrementCompactionEntriesProcessed(currentShardChunks.size()); @@ -1045,7 +1115,6 @@ private void compactShards(RocksDB appendDb, RocksDB shardsDb, ImportStatistics currentShardId = shardId; - // IMPORTANT: copy the value bytes — RocksIterator may reuse the buffer byte[] value = iterator.value(); byte[] valueCopy = new byte[value.length]; System.arraycopy(value, 0, valueCopy, 0, value.length); @@ -1054,7 +1123,6 @@ private void compactShards(RocksDB appendDb, RocksDB shardsDb, ImportStatistics iterator.next(); } - // Flush last shard if (currentShardId != Long.MIN_VALUE && !currentShardChunks.isEmpty()) { flushCompactedShard(currentShardChunks, currentShardId, shardsDb, writeOptions, reusablePoi, reusableName, reusableHier, reusableAddr, reusableGeom, stats); stats.incrementCompactionEntriesProcessed(currentShardChunks.size()); @@ -1062,13 +1130,8 @@ private void compactShards(RocksDB appendDb, RocksDB shardsDb, ImportStatistics } } - /** - * Takes all raw FlatBuffer chunks for a single shard, reads each chunk's POIs, - * copies them into a fresh FlatBufferBuilder, and writes the merged result. - */ private void flushCompactedShard(List chunks, long shardId, RocksDB shardsDb, WriteOptions writeOptions, POI reusablePoi, Name reusableName, HierarchyItem reusableHier, Address reusableAddr, Geometry reusableGeom, ImportStatistics stats) { - // Count total POIs first int totalPois = 0; try { for (byte[] chunk : chunks) { @@ -1080,12 +1143,10 @@ private void flushCompactedShard(List chunks, long shardId, RocksDB shar stats.recordError(ImportStatistics.Stage.COMPACTING_POIS, Kind.DECODE, null, "flatbuffers-read:POIList", e); } - // Fresh builder per shard — no stale offsets FlatBufferBuilder builder = new FlatBufferBuilder(Math.max(1024, totalPois * 256)); int[] allOffsets = new int[totalPois]; int idx = 0; - // Process each chunk: the source ByteBuffer stays alive while we read from it for (byte[] chunk : chunks) { ByteBuffer buf = ByteBuffer.wrap(chunk); POIList poiList = POIList.getRootAsPOIList(buf); @@ -1168,7 +1229,6 @@ private int copyPoiFromFlatBuffer(FlatBufferBuilder builder, POI poi, Name reusa hierVecOff = POI.createHierarchyVector(builder, new int[0]); } - // Boundary geometry — use ByteBuffer slice for zero-copy transfer int boundaryOff = 0; if (poi.boundary(reusableGeom) != null && reusableGeom.dataLength() > 0) { ByteBuffer boundaryBuf = reusableGeom.dataAsByteBuffer(); @@ -1176,7 +1236,6 @@ private int copyPoiFromFlatBuffer(FlatBufferBuilder builder, POI poi, Name reusa int boundaryDataOff = Geometry.createDataVector(builder, boundaryBuf); boundaryOff = Geometry.createGeometry(builder, boundaryDataOff); } else { - // Fallback: manual copy if dataAsByteBuffer() returns null int len = reusableGeom.dataLength(); byte[] boundaryBytes = new byte[len]; for (int k = 0; k < len; k++) { @@ -1187,7 +1246,6 @@ private int copyPoiFromFlatBuffer(FlatBufferBuilder builder, POI poi, Name reusa } } - // --- Now build the POI table --- POI.startPOI(builder); POI.addId(builder, poi.id()); POI.addLat(builder, poi.lat()); @@ -1201,6 +1259,31 @@ private int copyPoiFromFlatBuffer(FlatBufferBuilder builder, POI poi, Name reusa return POI.endPOI(builder); } + private org.locationtech.jts.geom.Geometry buildGeometryFromWay(long wayId, RocksDB nodeCache, RocksDB wayIndexDb, ImportStatistics stats) { + try { + byte[] nodeSeq = wayIndexDb.get(s2Helper.longToByteArray(wayId)); + List coords = buildCoordinatesFromWay(nodeCache, nodeSeq); + + if (coords == null || coords.size() < 3) { + return null; + } + + if (!coords.getFirst().equals2D(coords.getLast())) { + coords.add(new Coordinate(coords.getFirst())); + } + + if (coords.size() < 4) { + return null; + } + + LinearRing shell = GEOMETRY_FACTORY.createLinearRing(coords.toArray(new Coordinate[0])); + return GEOMETRY_FACTORY.createPolygon(shell); + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.GEOMETRY, wayId, "build-building-geometry", e); + return null; + } + } + private org.locationtech.jts.geom.Geometry buildGeometryFromRelRec(RelRec rec, RocksDB nodeCache, RocksDB wayIndexDb, ImportStatistics stats) { List> outerRings = buildConnectedRings(toList(rec.outer), nodeCache, wayIndexDb); List> innerRings = buildConnectedRings(toList(rec.inner), nodeCache, wayIndexDb); @@ -1423,7 +1506,7 @@ private void storeBoundary(long osmId, int level, String name, String code, hasMir = true; } } catch (Exception e) { - // MIR computation failed — boundary still works, just no fast-path optimization + // MIR computation failed } int nameOffset = fbb.createString(name != null ? name : "Unknown"); @@ -1458,17 +1541,11 @@ private void storeBoundary(long osmId, int level, String name, String code, ArrayList covering = new ArrayList<>(); coverer.getCovering(rect, covering); - // Batched grid index update instead of per-cell synchronized writes batchUpdateGridIndex(gridsIndexDb, covering, osmId); boundariesWriter.put(s2Helper.longToByteArray(osmId), fbb.sizedByteArray()); } - /** - * Registers a boundary in the grid index for all covered cells. - * Uses chunked multiGet + WriteBatch for much better throughput than - * the previous per-cell synchronized approach. - */ private void batchUpdateGridIndex(RocksDB gridIndexDb, ArrayList cells, long osmId) throws RocksDBException { final int CHUNK_SIZE = 5_000; for (int offset = 0; offset < cells.size(); offset += CHUNK_SIZE) { @@ -1637,7 +1714,9 @@ private long computeDirectorySize(Path root) { private void recordSizeMetrics(ImportStatistics stats, Path shardsDbPath, Path boundariesDbPath, + Path buildingsDbPath, Path gridIndexDbPath, + Path buildingGridIndexDbPath, Path nodeCacheDbPath, Path wayIndexDbPath, Path boundaryWayIndexDbPath, @@ -1647,9 +1726,11 @@ private void recordSizeMetrics(ImportStatistics stats, Path appendDbPath) { long shards = computeDirectorySize(shardsDbPath); long boundaries = computeDirectorySize(boundariesDbPath); - long dataset = shards + boundaries; + long buildings = computeDirectorySize(buildingsDbPath); + long dataset = shards + boundaries + buildings; long grid = computeDirectorySize(gridIndexDbPath); + long buildingGrid = computeDirectorySize(buildingGridIndexDbPath); long node = computeDirectorySize(nodeCacheDbPath); long way = computeDirectorySize(wayIndexDbPath); long boundaryWay = computeDirectorySize(boundaryWayIndexDbPath); @@ -1657,13 +1738,15 @@ private void recordSizeMetrics(ImportStatistics stats, long rel = computeDirectorySize(relIndexDbPath); long poi = computeDirectorySize(poiIndexDbPath); long append = computeDirectorySize(appendDbPath); - long tmpTotal = grid + node + way + boundaryWay + needed + rel + poi + append; + long tmpTotal = grid + buildingGrid + node + way + boundaryWay + needed + rel + poi + append; stats.setShardsBytes(shards); stats.setBoundariesBytes(boundaries); + stats.setBuildingsBytes(buildings); stats.setDatasetBytes(dataset); stats.setTmpGridBytes(grid); + stats.setTmpBuildingGridBytes(buildingGrid); stats.setTmpNodeBytes(node); stats.setTmpWayBytes(way); stats.setTmpBoundaryWayBytes(boundaryWay); @@ -1846,7 +1929,6 @@ private PoiIndexRec buildPoiIndexRecFromEntity(OsmEntity e) { rec.subtype = "node"; } - // Second pass: collect names and address details List names = new ArrayList<>(); Set dedup = new HashSet<>(); @@ -1930,7 +2012,7 @@ private byte[] encodePoiIndexRec(PoiIndexRec rec) { byte[] pcB = bytes(rec.postcode); byte[] cityB = bytes(rec.city); byte[] countryB = bytes(rec.country); - int cap = 16 // ← NEW: 8 bytes lat + 8 bytes lon + int cap = 16 + 4 + typeB.length + 4 + subtypeB.length + namesSize + 4 + streetB.length + 4 + hnB.length + 4 + pcB.length + 4 + cityB.length + 4 + countryB.length; ByteBuffer bb = ByteBuffer.allocate(cap); bb.putDouble(rec.lat); @@ -2038,4 +2120,4 @@ private static String getString(ByteBuffer bb) { return new String(dst, StandardCharsets.UTF_8); } -} +} \ No newline at end of file From 1cf9d12bb24400da4b898808a625765efff84de3 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Sat, 28 Mar 2026 15:36:53 +0100 Subject: [PATCH 06/23] started working on boundaries for building pois --- .../paikka/flatbuffers/Building.java | 58 +++++++++++++++++++ .../paikka/flatbuffers/BuildingList.java | 49 ++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 src/main/java/com/dedicatedcode/paikka/flatbuffers/Building.java create mode 100644 src/main/java/com/dedicatedcode/paikka/flatbuffers/BuildingList.java diff --git a/src/main/java/com/dedicatedcode/paikka/flatbuffers/Building.java b/src/main/java/com/dedicatedcode/paikka/flatbuffers/Building.java new file mode 100644 index 0000000..601913e --- /dev/null +++ b/src/main/java/com/dedicatedcode/paikka/flatbuffers/Building.java @@ -0,0 +1,58 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +package com.dedicatedcode.paikka.flatbuffers; + +import java.nio.*; +import java.lang.*; +import java.util.*; +import com.google.flatbuffers.*; + +@SuppressWarnings("unused") +public final class Building extends Table { + public static void ValidateVersion() { Constants.FLATBUFFERS_25_2_10(); } + public static Building getRootAsBuilding(ByteBuffer _bb) { return getRootAsBuilding(_bb, new Building()); } + public static Building getRootAsBuilding(ByteBuffer _bb, Building obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } + public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } + public Building __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; } + public String name() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; } + public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(6, 1); } + public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); } + public String code() { int o = __offset(8); return o != 0 ? __string(o + bb_pos) : null; } + public ByteBuffer codeAsByteBuffer() { return __vector_as_bytebuffer(8, 1); } + public ByteBuffer codeInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 8, 1); } + public com.dedicatedcode.paikka.flatbuffers.Geometry geometry() { return geometry(new com.dedicatedcode.paikka.flatbuffers.Geometry()); } + public com.dedicatedcode.paikka.flatbuffers.Geometry geometry(com.dedicatedcode.paikka.flatbuffers.Geometry obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } + + public static int createBuilding(FlatBufferBuilder builder, + long id, + int nameOffset, + int codeOffset, + int geometryOffset) { + builder.startTable(4); + Building.addId(builder, id); + Building.addGeometry(builder, geometryOffset); + Building.addCode(builder, codeOffset); + Building.addName(builder, nameOffset); + return Building.endBuilding(builder); + } + + public static void startBuilding(FlatBufferBuilder builder) { builder.startTable(4); } + public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); } + public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(1, nameOffset, 0); } + public static void addCode(FlatBufferBuilder builder, int codeOffset) { builder.addOffset(2, codeOffset, 0); } + public static void addGeometry(FlatBufferBuilder builder, int geometryOffset) { builder.addOffset(3, geometryOffset, 0); } + public static int endBuilding(FlatBufferBuilder builder) { + int o = builder.endTable(); + return o; + } + + public static final class Vector extends BaseVector { + public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } + + public Building get(int j) { return get(new Building(), j); } + public Building get(Building obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } + } +} + diff --git a/src/main/java/com/dedicatedcode/paikka/flatbuffers/BuildingList.java b/src/main/java/com/dedicatedcode/paikka/flatbuffers/BuildingList.java new file mode 100644 index 0000000..edf97f1 --- /dev/null +++ b/src/main/java/com/dedicatedcode/paikka/flatbuffers/BuildingList.java @@ -0,0 +1,49 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +package com.dedicatedcode.paikka.flatbuffers; + +import java.nio.*; +import java.lang.*; +import java.util.*; +import com.google.flatbuffers.*; + +@SuppressWarnings("unused") +public final class BuildingList extends Table { + public static void ValidateVersion() { Constants.FLATBUFFERS_25_2_10(); } + public static BuildingList getRootAsBuildingList(ByteBuffer _bb) { return getRootAsBuildingList(_bb, new BuildingList()); } + public static BuildingList getRootAsBuildingList(ByteBuffer _bb, BuildingList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } + public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } + public BuildingList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public com.dedicatedcode.paikka.flatbuffers.Building buildings(int j) { return buildings(new com.dedicatedcode.paikka.flatbuffers.Building(), j); } + public com.dedicatedcode.paikka.flatbuffers.Building buildings(com.dedicatedcode.paikka.flatbuffers.Building obj, int j) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } + public int buildingsLength() { int o = __offset(4); return o != 0 ? __vector_len(o) : 0; } + public com.dedicatedcode.paikka.flatbuffers.Building.Vector buildingsVector() { return buildingsVector(new com.dedicatedcode.paikka.flatbuffers.Building.Vector()); } + public com.dedicatedcode.paikka.flatbuffers.Building.Vector buildingsVector(com.dedicatedcode.paikka.flatbuffers.Building.Vector obj) { int o = __offset(4); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } + + public static int createBuildingList(FlatBufferBuilder builder, + int buildingsOffset) { + builder.startTable(1); + BuildingList.addBuildings(builder, buildingsOffset); + return BuildingList.endBuildingList(builder); + } + + public static void startBuildingList(FlatBufferBuilder builder) { builder.startTable(1); } + public static void addBuildings(FlatBufferBuilder builder, int buildingsOffset) { builder.addOffset(0, buildingsOffset, 0); } + public static int createBuildingsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } + public static void startBuildingsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } + public static int endBuildingList(FlatBufferBuilder builder) { + int o = builder.endTable(); + return o; + } + public static void finishBuildingListBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); } + public static void finishSizePrefixedBuildingListBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); } + + public static final class Vector extends BaseVector { + public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } + + public BuildingList get(int j) { return get(new BuildingList(), j); } + public BuildingList get(BuildingList obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } + } +} + From 935f754fae2eff5557314ef13d6830435bdf76da Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 15:45:25 +0100 Subject: [PATCH 07/23] fix: update phase matching in ImportStatistics to correctly report node caching and admin boundaries --- .../paikka/service/importer/ImportStatistics.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 750b8c3..1c92cc5 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -523,6 +523,14 @@ public void startProgressReporter() { sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); } else if (phase.contains("1.2")) { + long nodesPerSec = phaseSeconds > 0 ? (long) (getNodesCached() / phaseSeconds) : 0; + sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mCaching Node Coordinates\033[0m", formatTime(elapsed))); + sb.append(String.format(" │ \033[32mNodes Cached:\033[0m %s \033[33m(%s/s)\033[0m", + formatCompactNumber(getNodesCached()), formatCompactRate(nodesPerSec))); + sb.append(String.format(" │ \033[36mQueue:\033[0m %s", formatCompactNumber(getQueueSize()))); + sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); + + } else if (phase.contains("1.3")) { long boundsPerSec = phaseSeconds > 0 ? (long) (getBoundariesProcessed() / phaseSeconds) : 0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mProcessing Admin Boundaries\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mBoundaries:\033[0m %s \033[33m(%s/s)\033[0m", From 5cae5d3afe59eed0a7bc2946e0ad5880bab5fcc6 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 15:50:01 +0100 Subject: [PATCH 08/23] fix: update phase matching conditions for import statistics logging --- .../paikka/service/importer/ImportStatistics.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 1c92cc5..5bfeafa 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -522,7 +522,7 @@ public void startProgressReporter() { sb.append(String.format(" │ \033[36mQueue:\033[0m %s", formatCompactNumber(getQueueSize()))); sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); - } else if (phase.contains("1.2")) { + } else if (phase.contains("1.2:")) { long nodesPerSec = phaseSeconds > 0 ? (long) (getNodesCached() / phaseSeconds) : 0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mCaching Node Coordinates\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mNodes Cached:\033[0m %s \033[33m(%s/s)\033[0m", @@ -530,7 +530,7 @@ public void startProgressReporter() { sb.append(String.format(" │ \033[36mQueue:\033[0m %s", formatCompactNumber(getQueueSize()))); sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); - } else if (phase.contains("1.3")) { + } else if (phase.contains("1.3:")) { long boundsPerSec = phaseSeconds > 0 ? (long) (getBoundariesProcessed() / phaseSeconds) : 0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mProcessing Admin Boundaries\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mBoundaries:\033[0m %s \033[33m(%s/s)\033[0m", From e0402df9fb7806c6b4b7977184dc4d49b35788e3 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 16:05:16 +0100 Subject: [PATCH 09/23] feat: optimize building processing and filter out natural features --- .../service/importer/ImportService.java | 144 +++++++++--------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index 8721591..5e5a7c7 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -856,89 +856,81 @@ private void processAdministrativeBoundariesFromIndex(RocksDB relIndexDb, RocksD } private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB nodeCache, RocksDB wayIndexDb, RocksDB buildingGridIndexDb, RocksDB buildingsDb, ImportStatistics stats) throws Exception { - int maxConcurrentGeometries = 100; - Semaphore semaphore = new Semaphore(maxConcurrentGeometries); - + int batchSize = 1000; // Process buildings in batches to reduce overhead int numThreads = Math.max(1, config.getImportConfiguration().getThreads()); ExecutorService executor = createExecutorService(numThreads); - ExecutorCompletionService ecs = new ExecutorCompletionService<>(executor); - AtomicInteger submitted = new AtomicInteger(0); - AtomicLong collected = new AtomicLong(0); - - try (RocksBatchWriter buildingsWriter = new RocksBatchWriter(buildingsDb, 10_000, stats)) { - try (RocksIterator it = poiIndexDb.newIterator()) { - it.seekToFirst(); - while (it.isValid()) { - byte[] key = it.key(); - if (key[0] != 'W') { // Buildings must be ways - it.next(); - continue; - } - + ExecutorCompletionService> ecs = new ExecutorCompletionService<>(executor); + + List buildingIds = new ArrayList<>(); + List buildingRecs = new ArrayList<>(); + + // First, collect all building IDs and their records + try (RocksIterator it = poiIndexDb.newIterator()) { + it.seekToFirst(); + while (it.isValid()) { + byte[] key = it.key(); + if (key[0] == 'W') { // Buildings must be ways byte[] val = it.value(); final PoiIndexRec rec = decodePoiIndexRec(val); - if (!"building".equals(rec.type)) { - it.next(); - continue; + if ("building".equals(rec.type)) { + final long wayId = bytesToLong(key, 1); + buildingIds.add(wayId); + buildingRecs.add(rec); } - final long wayId = bytesToLong(key, 1); - - semaphore.acquire(); - stats.incrementActiveThreads(); - ecs.submit(() -> { - try { - org.locationtech.jts.geom.Geometry geometry = buildGeometryFromWay(wayId, nodeCache, wayIndexDb, stats); - if (geometry == null) return null; - if (!geometry.isValid()) { - try { - geometry = geometry.buffer(0); - } catch (Exception e) { - return null; - } - } - if (geometry == null || geometry.isEmpty() || !geometry.isValid()) return null; - - // name=subtype (e.g., "residential"), code=null, level=100 (for hierarchy) - return new BoundaryResultLite(wayId, 100, rec.subtype, null, geometry); - } finally { - semaphore.release(); - stats.decrementActiveThreads(); - } - }); - - submitted.incrementAndGet(); - - for (Future f; (f = ecs.poll()) != null;) { - collected.incrementAndGet(); - try { - BoundaryResultLite r = f.get(); - if (r != null) { - // Re-use the same storage logic as administrative boundaries - storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); - stats.incrementBuildingsProcessed(); + } + it.next(); + } + } + + stats.setTotalBuildingsToProcess(buildingIds.size()); + + // Process in batches + int submitted = 0; + for (int start = 0; start < buildingIds.size(); start += batchSize) { + int end = Math.min(start + batchSize, buildingIds.size()); + List batchIds = buildingIds.subList(start, end); + List batchRecs = buildingRecs.subList(start, end); + + final int batchStart = start; + ecs.submit(() -> { + List results = new ArrayList<>(); + for (int i = 0; i < batchIds.size(); i++) { + long wayId = batchIds.get(i); + PoiIndexRec rec = batchRecs.get(i); + try { + org.locationtech.jts.geom.Geometry geometry = buildGeometryFromWay(wayId, nodeCache, wayIndexDb, stats); + if (geometry == null) continue; + if (!geometry.isValid()) { + try { + geometry = geometry.buffer(0); + } catch (Exception e) { + continue; } - } catch (Exception e) { - stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.CONCURRENCY, null, "building-future-poll", e); } + if (geometry == null || geometry.isEmpty() || !geometry.isValid()) continue; + + results.add(new BoundaryResultLite(wayId, 100, rec.subtype, null, geometry)); + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.GEOMETRY, wayId, "build-building-geometry-batch", e); } - it.next(); } - } - long remaining = submitted.get() - collected.get(); - for (int i = 0; i < remaining; i++) { - try { - Future f = ecs.take(); - BoundaryResultLite r = f.get(); - if (r != null) { - storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); - stats.incrementBuildingsProcessed(); - } - } catch (Exception e) { - stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.CONCURRENCY, null, "building-future-take", e); + return results; + }); + submitted++; + } + + try (RocksBatchWriter buildingsWriter = new RocksBatchWriter(buildingsDb, 10_000, stats)) { + int completed = 0; + while (completed < submitted) { + Future> future = ecs.take(); + List batchResults = future.get(); + for (BoundaryResultLite r : batchResults) { + storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); + stats.incrementBuildingsProcessed(); } + completed++; } buildingsWriter.flush(); - } finally { executor.shutdown(); executor.awaitTermination(30, TimeUnit.MINUTES); @@ -1396,6 +1388,14 @@ private boolean isPoi(OsmEntity entity) { isInterestingBuilding = true; break; + case "natural": + // Filter out natural features that are not useful as POIs + return switch (val) { + case "tree", "wood", "scrub", "heath", "grassland", "fell", + "bare_rock", "scree", "shingle", "sand", "mud" -> false; + default -> true; + }; + case "shop", "tourism", "leisure", "office", "craft", "place", "historic", "public_transport", "aeroway": return !key.equals("leisure") || !List.of("picnic_table", "swimming_pool").contains(val); @@ -2120,4 +2120,4 @@ private static String getString(ByteBuffer bb) { return new String(dst, StandardCharsets.UTF_8); } -} \ No newline at end of file +} From 293f59842cd22b6fe2910c892ee07f4f18bd5892 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 16:07:01 +0100 Subject: [PATCH 10/23] fix: remove unsupported method call in ImportService --- .../dedicatedcode/paikka/service/importer/ImportService.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index 5e5a7c7..1d8dda9 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -882,8 +882,6 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node } } - stats.setTotalBuildingsToProcess(buildingIds.size()); - // Process in batches int submitted = 0; for (int start = 0; start < buildingIds.size(); start += batchSize) { @@ -891,7 +889,6 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node List batchIds = buildingIds.subList(start, end); List batchRecs = buildingRecs.subList(start, end); - final int batchStart = start; ecs.submit(() -> { List results = new ArrayList<>(); for (int i = 0; i < batchIds.size(); i++) { From 075ca8af0e7a3d98882de9d3bb5467771a018de9 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 17:29:29 +0100 Subject: [PATCH 11/23] feat: Improve building boundary processing with memory-efficient streaming approach The key improvements in this commit are: 1. Removed upfront collection of all building IDs and records 2. Added a semaphore to limit in-flight batches 3. Stream through `poiIndexDb` with a `RocksIterator` 4. Process buildings in batches of 1000 5. Drain completed futures inline to keep memory bounded 6. Added atomic counters to track submitted and collected batches 7. Final drain of all remaining futures This approach solves the memory issue with planet.pbf imports by: - Never collecting all building records into memory - Limiting concurrent processing - Streaming through the database - Draining futures continuously The implementation follows the pattern used in other methods like `processAdministrativeBoundariesFromIndex`, ensuring consistent memory management and parallel processing. --- .../service/importer/ImportService.java | 156 +++++++++++++----- 1 file changed, 112 insertions(+), 44 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index 1d8dda9..14cb37b 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -856,17 +856,23 @@ private void processAdministrativeBoundariesFromIndex(RocksDB relIndexDb, RocksD } private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB nodeCache, RocksDB wayIndexDb, RocksDB buildingGridIndexDb, RocksDB buildingsDb, ImportStatistics stats) throws Exception { + int maxInFlight = 100; // maximum number of batches submitted but not yet completed + Semaphore semaphore = new Semaphore(maxInFlight); int batchSize = 1000; // Process buildings in batches to reduce overhead int numThreads = Math.max(1, config.getImportConfiguration().getThreads()); ExecutorService executor = createExecutorService(numThreads); ExecutorCompletionService> ecs = new ExecutorCompletionService<>(executor); - List buildingIds = new ArrayList<>(); - List buildingRecs = new ArrayList<>(); + AtomicInteger submitted = new AtomicInteger(0); + AtomicLong collected = new AtomicLong(0); - // First, collect all building IDs and their records - try (RocksIterator it = poiIndexDb.newIterator()) { + try (RocksBatchWriter buildingsWriter = new RocksBatchWriter(buildingsDb, 10_000, stats); + RocksIterator it = poiIndexDb.newIterator()) { + it.seekToFirst(); + List currentBatchIds = new ArrayList<>(batchSize); + List currentBatchRecs = new ArrayList<>(batchSize); + while (it.isValid()) { byte[] key = it.key(); if (key[0] == 'W') { // Buildings must be ways @@ -874,59 +880,121 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node final PoiIndexRec rec = decodePoiIndexRec(val); if ("building".equals(rec.type)) { final long wayId = bytesToLong(key, 1); - buildingIds.add(wayId); - buildingRecs.add(rec); + currentBatchIds.add(wayId); + currentBatchRecs.add(rec); + + if (currentBatchIds.size() >= batchSize) { + // submit this batch + semaphore.acquire(); + stats.incrementActiveThreads(); + List idsToProcess = new ArrayList<>(currentBatchIds); + List recsToProcess = new ArrayList<>(currentBatchRecs); + ecs.submit(() -> { + try { + List results = new ArrayList<>(); + for (int i = 0; i < idsToProcess.size(); i++) { + long wayIdInner = idsToProcess.get(i); + PoiIndexRec recInner = recsToProcess.get(i); + try { + org.locationtech.jts.geom.Geometry geometry = buildGeometryFromWay(wayIdInner, nodeCache, wayIndexDb, stats); + if (geometry == null) continue; + if (!geometry.isValid()) { + try { + geometry = geometry.buffer(0); + } catch (Exception e) { + continue; + } + } + if (geometry == null || geometry.isEmpty() || !geometry.isValid()) continue; + + results.add(new BoundaryResultLite(wayIdInner, 100, recInner.subtype, null, geometry)); + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.GEOMETRY, wayIdInner, "build-building-geometry-batch", e); + } + } + return results; + } finally { + semaphore.release(); + stats.decrementActiveThreads(); + } + }); + submitted.incrementAndGet(); + currentBatchIds.clear(); + currentBatchRecs.clear(); + + // drain completed futures to keep memory bounded + Future> f; + while ((f = ecs.poll()) != null) { + collected.incrementAndGet(); + try { + List batchResults = f.get(); + for (BoundaryResultLite r : batchResults) { + storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); + stats.incrementBuildingsProcessed(); + } + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.CONCURRENCY, null, "building-future-poll", e); + } + } + } } } it.next(); } - } - - // Process in batches - int submitted = 0; - for (int start = 0; start < buildingIds.size(); start += batchSize) { - int end = Math.min(start + batchSize, buildingIds.size()); - List batchIds = buildingIds.subList(start, end); - List batchRecs = buildingRecs.subList(start, end); - ecs.submit(() -> { - List results = new ArrayList<>(); - for (int i = 0; i < batchIds.size(); i++) { - long wayId = batchIds.get(i); - PoiIndexRec rec = batchRecs.get(i); + // submit remaining partial batch + if (!currentBatchIds.isEmpty()) { + semaphore.acquire(); + stats.incrementActiveThreads(); + List idsToProcess = new ArrayList<>(currentBatchIds); + List recsToProcess = new ArrayList<>(currentBatchRecs); + ecs.submit(() -> { try { - org.locationtech.jts.geom.Geometry geometry = buildGeometryFromWay(wayId, nodeCache, wayIndexDb, stats); - if (geometry == null) continue; - if (!geometry.isValid()) { + List results = new ArrayList<>(); + for (int i = 0; i < idsToProcess.size(); i++) { + long wayIdInner = idsToProcess.get(i); + PoiIndexRec recInner = recsToProcess.get(i); try { - geometry = geometry.buffer(0); + org.locationtech.jts.geom.Geometry geometry = buildGeometryFromWay(wayIdInner, nodeCache, wayIndexDb, stats); + if (geometry == null) continue; + if (!geometry.isValid()) { + try { + geometry = geometry.buffer(0); + } catch (Exception e) { + continue; + } + } + if (geometry == null || geometry.isEmpty() || !geometry.isValid()) continue; + + results.add(new BoundaryResultLite(wayIdInner, 100, recInner.subtype, null, geometry)); } catch (Exception e) { - continue; + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.GEOMETRY, wayIdInner, "build-building-geometry-batch", e); } } - if (geometry == null || geometry.isEmpty() || !geometry.isValid()) continue; - - results.add(new BoundaryResultLite(wayId, 100, rec.subtype, null, geometry)); - } catch (Exception e) { - stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.GEOMETRY, wayId, "build-building-geometry-batch", e); + return results; + } finally { + semaphore.release(); + stats.decrementActiveThreads(); } + }); + submitted.incrementAndGet(); + } + + // drain all remaining futures + long remaining = submitted.get() - collected.get(); + for (int i = 0; i < remaining; i++) { + try { + Future> f = ecs.take(); + List batchResults = f.get(); + for (BoundaryResultLite r : batchResults) { + storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); + stats.incrementBuildingsProcessed(); + } + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.CONCURRENCY, null, "building-future-take", e); } - return results; - }); - submitted++; - } - - try (RocksBatchWriter buildingsWriter = new RocksBatchWriter(buildingsDb, 10_000, stats)) { - int completed = 0; - while (completed < submitted) { - Future> future = ecs.take(); - List batchResults = future.get(); - for (BoundaryResultLite r : batchResults) { - storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); - stats.incrementBuildingsProcessed(); - } - completed++; } + buildingsWriter.flush(); } finally { executor.shutdown(); From 6d49186e9906a738bc25d56eec620257ad52f795 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Sat, 28 Mar 2026 18:11:52 +0100 Subject: [PATCH 12/23] started working on boundaries for building pois --- .../service/importer/ImportStatistics.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 5bfeafa..a29c775 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -600,34 +600,34 @@ public void printFinalStatistics() { System.out.printf("\n\033[1;37m⏱️ Total Import Time:\033[0m \033[1;33m%s\033[0m%n%n", formatTime(getTotalTime())); System.out.println("\033[1;37m📊 Processing Summary:\033[0m"); - System.out.println("┌─────────────────┬─────────────────┬─────────────────┐"); - System.out.println("│ \033[1mEntity Type\033[0m │ \033[1mTotal Count\033[0m │ \033[1mAvg Speed\033[0m │"); - System.out.println("├─────────────────┼─────────────────┼─────────────────┤"); - System.out.printf("│ \033[32mPBF Entities\033[0m │ %15s │ %13s/s │%n", + System.out.println("┌────────────────────┬─────────────────┬─────────────────┐"); + System.out.println("│ \033[1mEntity Type\033[0m │ \033[1mTotal Count\033[0m │ \033[1mAvg Speed\033[0m │"); + System.out.println("├────────────────────┼─────────────────┼─────────────────┤"); + System.out.printf("│ \033[32mPBF Entities\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getEntitiesRead()), formatCompactNumber((long) (getEntitiesRead() / totalSeconds))); - System.out.printf("│ \033[37mNodes Found\033[0m │ %15s │ %13s/s │%n", + System.out.printf("│ \033[37mNodes Found\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getNodesFound()), formatCompactNumber((long) (getNodesFound() / totalSeconds))); - System.out.printf("│ \033[34mNodes Cached\033[0m │ %15s │ %13s/s │%n", + System.out.printf("│ \033[34mNodes Cached\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getNodesCached()), formatCompactNumber((long) (getNodesCached() / totalSeconds))); - System.out.printf("│ \033[35mWays Processed\033[0m │ %15s │ %13s/s │%n", + System.out.printf("│ \033[35mWays Processed\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getWaysProcessed()), formatCompactNumber((long) (getWaysProcessed() / totalSeconds))); - System.out.printf("│ \033[36mBoundaries\033[0m │ %15s │ %13s/s │%n", + System.out.printf("│ \033[36mBoundaries\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getBoundariesProcessed()), formatCompactNumber((long) (getBoundariesProcessed() / totalSeconds))); - System.out.printf("│ \033[33mBuildings Found\033[0m │ %15s │ %13s/s │%n", + System.out.printf("│ \033[31mBuildings Found\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getBuildingsFound()), formatCompactNumber((long) (getBuildingsFound() / totalSeconds))); - System.out.printf("│ \033[33mBuildings Processed\033[0m│ %15s │ %13s/s │%n", + System.out.printf("│ \033[31mBuildings Processed\033[0m│ %15s │ %13s/s │%n", formatCompactNumber(getBuildingsProcessed()), formatCompactNumber((long) (getBuildingsProcessed() / totalSeconds))); - System.out.printf("│ \033[33mPOIs Created\033[0m │ %15s │ %13s/s │%n", + System.out.printf("│ \033[33mPOIs Created\033[0m │ %15s │ %13s/s │%n", formatCompactNumber(getPoisProcessed()), formatCompactNumber((long) (getPoisProcessed() / totalSeconds))); - System.out.println("└─────────────────┴─────────────────┴─────────────────┘"); + System.out.println("└────────────────────┴─────────────────┴─────────────────┘"); long totalObjects = getNodesFound() + getNodesCached() + getWaysProcessed() + getBoundariesProcessed() + getBuildingsProcessed() + getPoisProcessed(); From b33c1269c2ece5cd96eee537d1390c71e8d7f0d3 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 18:58:06 +0100 Subject: [PATCH 13/23] feat: add building service and enhance POI response with building info --- .../paikka/controller/AdminController.java | 12 +- .../paikka/service/BuildingService.java | 299 ++++++++++++++++++ .../service/ReverseGeocodingService.java | 13 +- 3 files changed, 320 insertions(+), 4 deletions(-) create mode 100644 src/main/java/com/dedicatedcode/paikka/service/BuildingService.java diff --git a/src/main/java/com/dedicatedcode/paikka/controller/AdminController.java b/src/main/java/com/dedicatedcode/paikka/controller/AdminController.java index 5d8bf47..6146856 100644 --- a/src/main/java/com/dedicatedcode/paikka/controller/AdminController.java +++ b/src/main/java/com/dedicatedcode/paikka/controller/AdminController.java @@ -19,6 +19,7 @@ import com.dedicatedcode.paikka.service.ReverseGeocodingService; import com.dedicatedcode.paikka.service.BoundaryService; import com.dedicatedcode.paikka.service.MetadataService; +import com.dedicatedcode.paikka.service.BuildingService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; @@ -44,11 +45,16 @@ public class AdminController { private final ReverseGeocodingService reverseGeocodingService; private final BoundaryService boundaryService; private final MetadataService metadataService; + private final BuildingService buildingService; - public AdminController(ReverseGeocodingService reverseGeocodingService, BoundaryService boundaryService, MetadataService metadataService) { + public AdminController(ReverseGeocodingService reverseGeocodingService, + BoundaryService boundaryService, + MetadataService metadataService, + BuildingService buildingService) { this.reverseGeocodingService = reverseGeocodingService; this.boundaryService = boundaryService; this.metadataService = metadataService; + this.buildingService = buildingService; } @PostMapping(value = "/refresh-db", produces = "application/json") @@ -64,6 +70,10 @@ public ResponseEntity refreshDatabase() { logger.info("Reloading boundaries database..."); boundaryService.reloadDatabase(); + // Reload the building service (buildings database) + logger.info("Reloading buildings database..."); + buildingService.reloadDatabase(); + // Reload metadata logger.info("Reloading metadata..."); metadataService.reload(); diff --git a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java new file mode 100644 index 0000000..149ee41 --- /dev/null +++ b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java @@ -0,0 +1,299 @@ +/* + * This file is part of paikka. + * + * Paikka is free software: you can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License + * as published by the Free Software Foundation, either version 3 or + * any later version. + * + * Paikka is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Affero General Public License for more details. + * You should have received a copy of the GNU Affero General Public License + * along with Paikka. If not, see . + */ + +package com.dedicatedcode.paikka.service; + +import com.dedicatedcode.paikka.config.PaikkaConfiguration; +import com.dedicatedcode.paikka.flatbuffers.Boundary; +import com.dedicatedcode.paikka.flatbuffers.Geometry; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.io.WKBReader; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.stereotype.Service; + +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +/** + * Service for querying building information from the buildings database. + */ +@Service +@ConditionalOnProperty(name = "paikka.import-mode", havingValue = "false", matchIfMissing = true) +public class BuildingService { + + private static final Logger logger = LoggerFactory.getLogger(BuildingService.class); + private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory(); + + private final PaikkaConfiguration config; + private final S2Helper s2Helper; + private RocksDB buildingsDb; + + public BuildingService(PaikkaConfiguration config, S2Helper s2Helper) { + this.config = config; + this.s2Helper = s2Helper; + initializeRocksDB(); + } + + private void initializeRocksDB() { + if (buildingsDb != null) { + return; + } + + try { + RocksDB.loadLibrary(); + Path buildingsDbPath = Paths.get(config.getDataDir(), "buildings"); + + // Check if the database directory exists + if (!buildingsDbPath.toFile().exists()) { + logger.warn("Buildings database not found at: {}", buildingsDbPath); + return; + } + + Options options = new Options().setCreateIfMissing(false); + this.buildingsDb = RocksDB.openReadOnly(options, buildingsDbPath.toString()); + logger.info("Successfully initialized RocksDB for buildings"); + } catch (Exception e) { + logger.warn("Failed to initialize RocksDB for buildings: {}", e.getMessage()); + this.buildingsDb = null; + } + } + + public synchronized void reloadDatabase() { + logger.info("Reloading buildings database..."); + + if (buildingsDb != null) { + try { + buildingsDb.close(); + logger.info("Closed existing buildings database connection"); + } catch (Exception e) { + logger.warn("Error closing existing buildings database: {}", e.getMessage()); + } + buildingsDb = null; + } + + initializeRocksDB(); + + if (buildingsDb != null) { + logger.info("Buildings database reloaded successfully"); + } else { + logger.warn("Buildings database reload completed but database is not available"); + } + } + + /** + * Get building information for a specific OSM ID. + * This searches in the given shard and surrounding shards if needed. + */ + public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { + if (buildingsDb == null) { + logger.debug("Buildings database not initialized"); + return null; + } + + try { + // First, try to get the building directly by its OSM ID + byte[] key = s2Helper.longToByteArray(osmId); + byte[] data = buildingsDb.get(key); + + if (data != null) { + return decodeBuildingInfo(data, osmId); + } + + // If not found directly, search in surrounding shards + // Get the shard ID for the given coordinates + long centerShardId = s2Helper.getShardId(lat, lon); + + // Search in the center shard and neighbor shards + List shardsToSearch = new ArrayList<>(); + shardsToSearch.add(centerShardId); + + // Add neighbor shards + shardsToSearch.addAll(s2Helper.getNeighborShards(centerShardId)); + + // Search through all shards + for (Long shardId : shardsToSearch) { + // In the buildings database, buildings are stored by OSM ID, not shard ID + // So we need a different approach. Let's scan for the building by OSM ID + // Actually, buildings are stored by OSM ID, so we already tried that. + // If not found, we need to search by location. + + // We could implement a spatial search, but for now, let's just return null + // if not found by direct OSM ID lookup + break; + } + + logger.debug("Building with OSM ID {} not found in buildings database", osmId); + return null; + + } catch (RocksDBException e) { + logger.error("RocksDB error while querying building info for OSM ID {}", osmId, e); + return null; + } catch (Exception e) { + logger.error("Error while querying building info for OSM ID {}", osmId, e); + return null; + } + } + + /** + * Get building information for a specific OSM ID without location. + * This is a simpler version that doesn't search surrounding shards. + */ + public BuildingInfo getBuildingInfo(long osmId) { + if (buildingsDb == null) { + logger.debug("Buildings database not initialized"); + return null; + } + + try { + byte[] key = s2Helper.longToByteArray(osmId); + byte[] data = buildingsDb.get(key); + + if (data != null) { + return decodeBuildingInfo(data, osmId); + } + + logger.debug("Building with OSM ID {} not found in buildings database", osmId); + return null; + + } catch (RocksDBException e) { + logger.error("RocksDB error while querying building info for OSM ID {}", osmId, e); + return null; + } + } + + private BuildingInfo decodeBuildingInfo(byte[] data, long osmId) { + try { + ByteBuffer buffer = ByteBuffer.wrap(data); + Boundary boundary = Boundary.getRootAsBoundary(buffer); + + BuildingInfo info = new BuildingInfo(); + info.setOsmId(osmId); + info.setLevel(boundary.level()); + info.setName(boundary.name()); + info.setCode(boundary.code()); + info.setType(boundary.name()); // In buildings, name often contains building type + + // Decode geometry if present + if (boundary.geometry() != null) { + Geometry geometry = boundary.geometry(); + if (geometry.dataLength() > 0) { + byte[] wkbData = new byte[geometry.dataLength()]; + for (int i = 0; i < geometry.dataLength(); i++) { + wkbData[i] = (byte) geometry.data(i); + } + + try { + WKBReader wkbReader = new WKBReader(); + org.locationtech.jts.geom.Geometry jtsGeometry = wkbReader.read(wkbData); + info.setGeometry(jtsGeometry); + info.setBoundaryWkb(wkbData); + } catch (Exception e) { + logger.warn("Failed to decode geometry for building OSM ID {}: {}", osmId, e.getMessage()); + } + } + } + + return info; + } catch (Exception e) { + logger.error("Failed to decode building info for OSM ID {}", osmId, e); + return null; + } + } + + /** + * Check if the buildings database is available. + */ + public boolean isAvailable() { + return buildingsDb != null; + } + + /** + * Represents building information retrieved from the buildings database. + */ + public static class BuildingInfo { + private long osmId; + private int level; + private String name; + private String code; + private String type; + private org.locationtech.jts.geom.Geometry geometry; + private byte[] boundaryWkb; + + public long getOsmId() { + return osmId; + } + + public void setOsmId(long osmId) { + this.osmId = osmId; + } + + public int getLevel() { + return level; + } + + public void setLevel(int level) { + this.level = level; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public org.locationtech.jts.geom.Geometry getGeometry() { + return geometry; + } + + public void setGeometry(org.locationtech.jts.geom.Geometry geometry) { + this.geometry = geometry; + } + + public byte[] getBoundaryWkb() { + return boundaryWkb; + } + + public void setBoundaryWkb(byte[] boundaryWkb) { + this.boundaryWkb = boundaryWkb; + } + } +} diff --git a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java index 38623e3..89798fb 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java @@ -50,11 +50,13 @@ public class ReverseGeocodingService { private final PaikkaConfiguration config; private final S2Helper s2Helper; + private final BuildingService buildingService; private RocksDB shardsDb; - public ReverseGeocodingService(PaikkaConfiguration config, S2Helper s2Helper) { + public ReverseGeocodingService(PaikkaConfiguration config, S2Helper s2Helper, BuildingService buildingService) { this.config = config; this.s2Helper = s2Helper; + this.buildingService = buildingService; initializeRocksDB(); } @@ -176,10 +178,15 @@ public List findNearbyPOIs(double lat, double lon, String lang, int // Find the closest POIs up to the limit List closestPOIs = findClosestPOIs(allPOIs, lat, lon, limit); - // Convert POIs to response format + // Convert POIs to response format and enhance with building info if needed List results = new ArrayList<>(); for (POIData poi : closestPOIs) { - results.add(convertPOIToResponse(poi, lat, lon, lang)); + POIResponse response = convertPOIToResponse(poi, lat, lon, lang); + + // Enhance with building information if this is a building or related to a building + enhanceWithBuildingInfo(response, poi); + + results.add(response); } logger.debug("Final result: {} POIs found from {} searched shards", results.size(), searchedShards.size()); From 9c6c3585ef53578f870727fe7170c3b38ddbca19 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sat, 28 Mar 2026 19:07:48 +0100 Subject: [PATCH 14/23] feat: add building info enhancement method to POI response --- .../service/ReverseGeocodingService.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java index 89798fb..e3b4fd2 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java @@ -424,6 +424,36 @@ private POIResponse convertPOIToResponse(POIData poi, double queryLat, double qu return response; } + + private void enhanceWithBuildingInfo(POIResponse response, POIData poi) { + // Try to get building info for the POI's OSM ID + BuildingService.BuildingInfo buildingInfo = buildingService.getBuildingInfo(poi.id()); + if (buildingInfo != null) { + // Update type and subtype with building information + if (buildingInfo.getType() != null) { + response.setType(buildingInfo.getType()); + } + // For subtype, we can use building name or code + if (buildingInfo.getName() != null) { + response.setSubtype(buildingInfo.getName()); + } else if (buildingInfo.getCode() != null) { + response.setSubtype(buildingInfo.getCode()); + } + + // If building has boundary data, set it in the response + if (buildingInfo.getBoundaryWkb() != null && buildingInfo.getBoundaryWkb().length > 0) { + try { + WKBReader wkbReader = new WKBReader(); + Geometry geometry = wkbReader.read(buildingInfo.getBoundaryWkb()); + GeoJsonGeometry geoJsonGeometry = convertJtsToGeoJson(geometry); + response.setBoundary(geoJsonGeometry); + logger.debug("Enhanced POI {} with building boundary", poi.id()); + } catch (Exception e) { + logger.warn("Failed to convert building boundary to GeoJSON for POI {}: {}", poi.id(), e.getMessage()); + } + } + } + } private record POIData(long id, float lat, float lon, String type, String subtype, Map names, From 95f33e4c24b5787a41dcbccc7ff54b64f5c4d560 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Sun, 29 Mar 2026 07:10:44 +0200 Subject: [PATCH 15/23] started working on boundaries for building pois --- .../paikka/service/importer/ImportStatistics.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index a29c775..d1ae7c9 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -111,8 +111,8 @@ public String toString() { private final AtomicLong boundaryPhaseEntitiesRead = new AtomicLong(0); private final AtomicLong buildingsFound = new AtomicLong(0); private final AtomicLong buildingsProcessed = new AtomicLong(0); - private AtomicLong addressNodesFound = new AtomicLong(0); - private AtomicLong addressNodesWithBuildingType = new AtomicLong(0); + private final AtomicLong addressNodesFound = new AtomicLong(0); + private final AtomicLong addressNodesWithBuildingType = new AtomicLong(0); private volatile String currentPhase = "Initializing"; private volatile boolean running = true; From f912d5fcfedba11b35456b8258eb59a4732f698b Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sun, 29 Mar 2026 07:15:55 +0200 Subject: [PATCH 16/23] refactor: Adjust building import process to use append-only and sharding strategy --- .../paikka/service/BuildingService.java | 100 +++---- .../service/ReverseGeocodingService.java | 2 +- .../service/importer/ImportService.java | 253 +++++++++++++++--- 3 files changed, 249 insertions(+), 106 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java index 149ee41..e14df11 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java @@ -110,42 +110,32 @@ public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { logger.debug("Buildings database not initialized"); return null; } - + try { - // First, try to get the building directly by its OSM ID - byte[] key = s2Helper.longToByteArray(osmId); - byte[] data = buildingsDb.get(key); - - if (data != null) { - return decodeBuildingInfo(data, osmId); - } - - // If not found directly, search in surrounding shards - // Get the shard ID for the given coordinates long centerShardId = s2Helper.getShardId(lat, lon); - - // Search in the center shard and neighbor shards List shardsToSearch = new ArrayList<>(); shardsToSearch.add(centerShardId); - - // Add neighbor shards shardsToSearch.addAll(s2Helper.getNeighborShards(centerShardId)); - - // Search through all shards + for (Long shardId : shardsToSearch) { - // In the buildings database, buildings are stored by OSM ID, not shard ID - // So we need a different approach. Let's scan for the building by OSM ID - // Actually, buildings are stored by OSM ID, so we already tried that. - // If not found, we need to search by location. - - // We could implement a spatial search, but for now, let's just return null - // if not found by direct OSM ID lookup - break; + byte[] key = s2Helper.longToByteArray(shardId); + byte[] data = buildingsDb.get(key); + + if (data != null) { + ByteBuffer buffer = ByteBuffer.wrap(data); + com.dedicatedcode.paikka.flatbuffers.BuildingList buildingList = com.dedicatedcode.paikka.flatbuffers.BuildingList.getRootAsBuildingList(buffer); + for (int i = 0; i < buildingList.buildingsLength(); i++) { + com.dedicatedcode.paikka.flatbuffers.Building building = buildingList.buildings(i); + if (building != null && building.id() == osmId) { + return decodeBuildingInfo(building); + } + } + } } - + logger.debug("Building with OSM ID {} not found in buildings database", osmId); return null; - + } catch (RocksDBException e) { logger.error("RocksDB error while querying building info for OSM ID {}", osmId, e); return null; @@ -155,68 +145,38 @@ public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { } } - /** - * Get building information for a specific OSM ID without location. - * This is a simpler version that doesn't search surrounding shards. - */ - public BuildingInfo getBuildingInfo(long osmId) { - if (buildingsDb == null) { - logger.debug("Buildings database not initialized"); - return null; - } - - try { - byte[] key = s2Helper.longToByteArray(osmId); - byte[] data = buildingsDb.get(key); - - if (data != null) { - return decodeBuildingInfo(data, osmId); - } - - logger.debug("Building with OSM ID {} not found in buildings database", osmId); - return null; - - } catch (RocksDBException e) { - logger.error("RocksDB error while querying building info for OSM ID {}", osmId, e); - return null; - } - } - - private BuildingInfo decodeBuildingInfo(byte[] data, long osmId) { + private BuildingInfo decodeBuildingInfo(com.dedicatedcode.paikka.flatbuffers.Building building) { try { - ByteBuffer buffer = ByteBuffer.wrap(data); - Boundary boundary = Boundary.getRootAsBoundary(buffer); - BuildingInfo info = new BuildingInfo(); - info.setOsmId(osmId); - info.setLevel(boundary.level()); - info.setName(boundary.name()); - info.setCode(boundary.code()); - info.setType(boundary.name()); // In buildings, name often contains building type - + info.setOsmId(building.id()); + info.setLevel(100); // Buildings don't have an admin_level, using a high value. + info.setName(building.name()); + info.setCode(building.code()); + info.setType(building.name()); // In buildings, name often contains building type + // Decode geometry if present - if (boundary.geometry() != null) { - Geometry geometry = boundary.geometry(); + if (building.geometry() != null) { + Geometry geometry = building.geometry(); if (geometry.dataLength() > 0) { byte[] wkbData = new byte[geometry.dataLength()]; for (int i = 0; i < geometry.dataLength(); i++) { wkbData[i] = (byte) geometry.data(i); } - + try { WKBReader wkbReader = new WKBReader(); org.locationtech.jts.geom.Geometry jtsGeometry = wkbReader.read(wkbData); info.setGeometry(jtsGeometry); info.setBoundaryWkb(wkbData); } catch (Exception e) { - logger.warn("Failed to decode geometry for building OSM ID {}: {}", osmId, e.getMessage()); + logger.warn("Failed to decode geometry for building OSM ID {}: {}", building.id(), e.getMessage()); } } } - + return info; } catch (Exception e) { - logger.error("Failed to decode building info for OSM ID {}", osmId, e); + logger.error("Failed to decode building info for OSM ID {}", building.id(), e); return null; } } diff --git a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java index e3b4fd2..52495ad 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java @@ -427,7 +427,7 @@ private POIResponse convertPOIToResponse(POIData poi, double queryLat, double qu private void enhanceWithBuildingInfo(POIResponse response, POIData poi) { // Try to get building info for the POI's OSM ID - BuildingService.BuildingInfo buildingInfo = buildingService.getBuildingInfo(poi.id()); + BuildingService.BuildingInfo buildingInfo = buildingService.getBuildingInfo(poi.id(), poi.lat(), poi.lon()); if (buildingInfo != null) { // Update type and subtype with building information if (buildingInfo.getType() != null) { diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index 14cb37b..0b6fda9 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -71,6 +71,7 @@ public class ImportService { private final int fileReadWindowSize; private final AtomicLong sequence = new AtomicLong(0); + private final AtomicLong buildingSequence = new AtomicLong(0); public ImportService(S2Helper s2Helper, GeometrySimplificationService geometrySimplificationService, PaikkaConfiguration config) { this.s2Helper = s2Helper; @@ -103,6 +104,7 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { Path shardsDbPath = dataDirectory.resolve("poi_shards"); Path boundariesDbPath = dataDirectory.resolve("boundaries"); Path buildingsDbPath = dataDirectory.resolve("buildings"); + Path appendBuildingDbPath = dataDirectory.resolve("tmp/append_building"); Path gridIndexDbPath = dataDirectory.resolve("tmp/grid_index"); Path buildingGridIndexDbPath = dataDirectory.resolve("tmp/building_grid_index"); Path appendDbPath = dataDirectory.resolve("tmp/append_poi"); @@ -116,6 +118,7 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { cleanupDatabase(shardsDbPath); cleanupDatabase(boundariesDbPath); cleanupDatabase(buildingsDbPath); + cleanupDatabase(appendBuildingDbPath); cleanupDatabase(gridIndexDbPath); cleanupDatabase(buildingGridIndexDbPath); cleanupDatabase(nodeCacheDbPath); @@ -190,6 +193,7 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { try (RocksDB shardsDb = RocksDB.open(poiShardsOpts, shardsDbPath.toString()); RocksDB boundariesDb = RocksDB.open(poiShardsOpts, boundariesDbPath.toString()); RocksDB buildingsDb = RocksDB.open(poiShardsOpts, buildingsDbPath.toString()); + RocksDB appendBuildingDb = RocksDB.open(appendOpts, appendBuildingDbPath.toString()); RocksDB gridIndexDb = RocksDB.open(gridOpts, gridIndexDbPath.toString()); RocksDB buildingGridIndexDb = RocksDB.open(gridOpts, buildingGridIndexDbPath.toString()); RocksDB nodeCache = RocksDB.open(nodeOpts, nodeCacheDbPath.toString()); @@ -219,13 +223,14 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { processAdministrativeBoundariesFromIndex(relIndexDb, nodeCache, wayIndexDb, gridIndexDb, boundariesDb, stats); stats.setCurrentPhase(5, "1.4: Processing building boundaries"); - processBuildingBoundariesFromIndex(poiIndexDb, nodeCache, wayIndexDb, buildingGridIndexDb, buildingsDb, stats); + processBuildingBoundariesFromIndex(poiIndexDb, nodeCache, wayIndexDb, buildingGridIndexDb, appendBuildingDb, stats); stats.setCurrentPhase(6, "2.1: Processing POIs & Sharding"); pass2PoiShardingFromIndex(nodeCache, wayIndexDb, appendDb, boundariesDb, poiIndexDb, gridIndexDb, stats); stats.setCurrentPhase(7, "2.2: Compacting POIs"); compactShards(appendDb, shardsDb, stats); + compactBuildingShards(appendBuildingDb, buildingsDb, stats); stats.stop(); stats.printPhaseSummary("PASS 2", pass2Start); @@ -240,6 +245,7 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { shardsDbPath, boundariesDbPath, buildingsDbPath, + appendBuildingDbPath, gridIndexDbPath, buildingGridIndexDbPath, nodeCacheDbPath, @@ -855,24 +861,36 @@ private void processAdministrativeBoundariesFromIndex(RocksDB relIndexDb, RocksD } } - private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB nodeCache, RocksDB wayIndexDb, RocksDB buildingGridIndexDb, RocksDB buildingsDb, ImportStatistics stats) throws Exception { - int maxInFlight = 100; // maximum number of batches submitted but not yet completed + private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB nodeCache, RocksDB wayIndexDb, RocksDB buildingGridIndexDb, RocksDB appendBuildingDb, ImportStatistics stats) throws Exception { + int maxInFlight = 100; Semaphore semaphore = new Semaphore(maxInFlight); - int batchSize = 1000; // Process buildings in batches to reduce overhead + int batchSize = 1000; int numThreads = Math.max(1, config.getImportConfiguration().getThreads()); ExecutorService executor = createExecutorService(numThreads); - ExecutorCompletionService> ecs = new ExecutorCompletionService<>(executor); - + ExecutorCompletionService> ecs = new ExecutorCompletionService<>(executor); + AtomicInteger submitted = new AtomicInteger(0); AtomicLong collected = new AtomicLong(0); - - try (RocksBatchWriter buildingsWriter = new RocksBatchWriter(buildingsDb, 10_000, stats); + + AtomicReference>> shardBufferRef = new AtomicReference<>(new ConcurrentHashMap<>()); + Runnable flushTask = () -> { + try { + Map> bufferToFlush = shardBufferRef.getAndSet(new ConcurrentHashMap<>()); + if (!bufferToFlush.isEmpty()) { + writeBuildingShardBatchAppendOnly(bufferToFlush, appendBuildingDb, stats); + } + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.STORE, null, "flush-building-append-batch", e); + } + }; + + try (PeriodicFlusher _ = PeriodicFlusher.start("building-shard-flush", 5, 5, flushTask); RocksIterator it = poiIndexDb.newIterator()) { - + it.seekToFirst(); List currentBatchIds = new ArrayList<>(batchSize); List currentBatchRecs = new ArrayList<>(batchSize); - + while (it.isValid()) { byte[] key = it.key(); if (key[0] == 'W') { // Buildings must be ways @@ -882,16 +900,15 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node final long wayId = bytesToLong(key, 1); currentBatchIds.add(wayId); currentBatchRecs.add(rec); - + if (currentBatchIds.size() >= batchSize) { - // submit this batch semaphore.acquire(); stats.incrementActiveThreads(); List idsToProcess = new ArrayList<>(currentBatchIds); List recsToProcess = new ArrayList<>(currentBatchRecs); ecs.submit(() -> { try { - List results = new ArrayList<>(); + List results = new ArrayList<>(); for (int i = 0; i < idsToProcess.size(); i++) { long wayIdInner = idsToProcess.get(i); PoiIndexRec recInner = recsToProcess.get(i); @@ -906,8 +923,7 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node } } if (geometry == null || geometry.isEmpty() || !geometry.isValid()) continue; - - results.add(new BoundaryResultLite(wayIdInner, 100, recInner.subtype, null, geometry)); + results.add(new BuildingData(wayIdInner, recInner.subtype, null, geometry)); } catch (Exception e) { stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.GEOMETRY, wayIdInner, "build-building-geometry-batch", e); } @@ -921,15 +937,17 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node submitted.incrementAndGet(); currentBatchIds.clear(); currentBatchRecs.clear(); - - // drain completed futures to keep memory bounded - Future> f; + + Future> f; while ((f = ecs.poll()) != null) { collected.incrementAndGet(); try { - List batchResults = f.get(); - for (BoundaryResultLite r : batchResults) { - storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); + List batchResults = f.get(); + Map> currentActiveBuffer = shardBufferRef.get(); + for (BuildingData r : batchResults) { + Coordinate center = r.geometry().getEnvelopeInternal().centre(); + long shardId = s2Helper.getShardId(center.getY(), center.getX()); + currentActiveBuffer.computeIfAbsent(shardId, k -> new CopyOnWriteArrayList<>()).add(r); stats.incrementBuildingsProcessed(); } } catch (Exception e) { @@ -941,8 +959,7 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node } it.next(); } - - // submit remaining partial batch + if (!currentBatchIds.isEmpty()) { semaphore.acquire(); stats.incrementActiveThreads(); @@ -950,7 +967,7 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node List recsToProcess = new ArrayList<>(currentBatchRecs); ecs.submit(() -> { try { - List results = new ArrayList<>(); + List results = new ArrayList<>(); for (int i = 0; i < idsToProcess.size(); i++) { long wayIdInner = idsToProcess.get(i); PoiIndexRec recInner = recsToProcess.get(i); @@ -965,8 +982,7 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node } } if (geometry == null || geometry.isEmpty() || !geometry.isValid()) continue; - - results.add(new BoundaryResultLite(wayIdInner, 100, recInner.subtype, null, geometry)); + results.add(new BuildingData(wayIdInner, recInner.subtype, null, geometry)); } catch (Exception e) { stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.GEOMETRY, wayIdInner, "build-building-geometry-batch", e); } @@ -979,23 +995,24 @@ private void processBuildingBoundariesFromIndex(RocksDB poiIndexDb, RocksDB node }); submitted.incrementAndGet(); } - - // drain all remaining futures + long remaining = submitted.get() - collected.get(); for (int i = 0; i < remaining; i++) { try { - Future> f = ecs.take(); - List batchResults = f.get(); - for (BoundaryResultLite r : batchResults) { - storeBoundary(r.osmId(), r.level(), r.name(), r.code(), r.geometry(), buildingsWriter, buildingGridIndexDb); + Future> f = ecs.take(); + List batchResults = f.get(); + Map> currentActiveBuffer = shardBufferRef.get(); + for (BuildingData r : batchResults) { + Coordinate center = r.geometry().getEnvelopeInternal().centre(); + long shardId = s2Helper.getShardId(center.getY(), center.getX()); + currentActiveBuffer.computeIfAbsent(shardId, k -> new CopyOnWriteArrayList<>()).add(r); stats.incrementBuildingsProcessed(); } } catch (Exception e) { stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.CONCURRENCY, null, "building-future-take", e); } } - - buildingsWriter.flush(); + flushTask.run(); } finally { executor.shutdown(); executor.awaitTermination(30, TimeUnit.MINUTES); @@ -1141,6 +1158,167 @@ private int serializePoiData(FlatBufferBuilder builder, PoiData poi) { return POI.endPOI(builder); } + private void compactBuildingShards(RocksDB appendDb, RocksDB buildingsDb, ImportStatistics stats) { + stats.setCompactionStartTime(System.currentTimeMillis()); + stats.setCompactionEntriesTotal(buildingSequence.get()); + + Building reusableBuilding = new Building(); + Geometry reusableGeom = new Geometry(); + + try (RocksIterator iterator = appendDb.newIterator(); + WriteOptions writeOptions = new WriteOptions().setDisableWAL(true)) { + + iterator.seekToFirst(); + + long currentShardId = Long.MIN_VALUE; + List currentShardChunks = new ArrayList<>(); + + while (iterator.isValid()) { + byte[] key = iterator.key(); + long shardId = ByteBuffer.wrap(key).order(ByteOrder.BIG_ENDIAN).getLong(); + + if (shardId != currentShardId && currentShardId != Long.MIN_VALUE) { + flushCompactedBuildingShard(currentShardChunks, currentShardId, buildingsDb, writeOptions, reusableBuilding, reusableGeom, stats); + stats.incrementCompactionEntriesProcessed(currentShardChunks.size()); + currentShardChunks.clear(); + } + + currentShardId = shardId; + + byte[] value = iterator.value(); + byte[] valueCopy = new byte[value.length]; + System.arraycopy(value, 0, valueCopy, 0, value.length); + currentShardChunks.add(valueCopy); + + iterator.next(); + } + + if (currentShardId != Long.MIN_VALUE && !currentShardChunks.isEmpty()) { + flushCompactedBuildingShard(currentShardChunks, currentShardId, buildingsDb, writeOptions, reusableBuilding, reusableGeom, stats); + stats.incrementCompactionEntriesProcessed(currentShardChunks.size()); + } + } + } + + private void flushCompactedBuildingShard(List chunks, long shardId, RocksDB buildingsDb, WriteOptions writeOptions, Building reusableBuilding, Geometry reusableGeom, ImportStatistics stats) { + int totalBuildings = 0; + try { + for (byte[] chunk : chunks) { + ByteBuffer buf = ByteBuffer.wrap(chunk); + BuildingList buildingList = BuildingList.getRootAsBuildingList(buf); + totalBuildings += buildingList.buildingsLength(); + } + } catch (Exception e) { + stats.recordError(ImportStatistics.Stage.COMPACTING_POIS, Kind.DECODE, null, "flatbuffers-read:BuildingList", e); + } + + FlatBufferBuilder builder = new FlatBufferBuilder(Math.max(1024, totalBuildings * 256)); + int[] allOffsets = new int[totalBuildings]; + int idx = 0; + + for (byte[] chunk : chunks) { + ByteBuffer buf = ByteBuffer.wrap(chunk); + BuildingList buildingList = BuildingList.getRootAsBuildingList(buf); + int count = buildingList.buildingsLength(); + + for (int i = 0; i < count; i++) { + buildingList.buildings(reusableBuilding, i); + allOffsets[idx++] = copyBuildingFromFlatBuffer(builder, reusableBuilding, reusableGeom); + } + } + + int buildingsVec = BuildingList.createBuildingsVector(builder, allOffsets); + int buildingList = BuildingList.createBuildingList(builder, buildingsVec); + builder.finish(buildingList); + try { + buildingsDb.put(writeOptions, s2Helper.longToByteArray(shardId), builder.sizedByteArray()); + } catch (RocksDBException e) { + stats.recordError(ImportStatistics.Stage.COMPACTING_POIS, Kind.STORE, null, "rocks-put:buildings", e); + } + } + + private int copyBuildingFromFlatBuffer(FlatBufferBuilder builder, Building building, Geometry reusableGeom) { + String nameStr = building.name(); + String codeStr = building.code(); + int nameOff = nameStr != null ? builder.createString(nameStr) : 0; + int codeOff = codeStr != null ? builder.createString(codeStr) : 0; + + int geometryOff = 0; + if (building.geometry(reusableGeom) != null && reusableGeom.dataLength() > 0) { + ByteBuffer geometryBuf = reusableGeom.dataAsByteBuffer(); + if (geometryBuf != null) { + int geometryDataOff = Geometry.createDataVector(builder, geometryBuf); + geometryOff = Geometry.createGeometry(builder, geometryDataOff); + } + } + + Building.startBuilding(builder); + Building.addId(builder, building.id()); + if (nameOff != 0) Building.addName(builder, nameOff); + if (codeOff != 0) Building.addCode(builder, codeOff); + if (geometryOff != 0) Building.addGeometry(builder, geometryOff); + return Building.endBuilding(builder); + } + + private void writeBuildingShardBatchAppendOnly(Map> shardBuffer, RocksDB appendDb, ImportStatistics stats) throws Exception { + FlatBufferBuilder builder = new FlatBufferBuilder(1024 * 32); + + try (WriteBatch batch = new WriteBatch(); WriteOptions writeOptions = new WriteOptions()) { + + for (Iterator>> it = shardBuffer.entrySet().iterator(); it.hasNext(); ) { + Map.Entry> entry = it.next(); + List buildings = entry.getValue(); + if (buildings.isEmpty()) { + it.remove(); + continue; + } + + builder.clear(); + + int[] buildingOffsets = new int[buildings.size()]; + for (int i = 0; i < buildings.size(); i++) { + buildingOffsets[i] = serializeBuildingData(builder, buildings.get(i)); + } + int buildingsVectorOffset = BuildingList.createBuildingsVector(builder, buildingOffsets); + int buildingListOffset = BuildingList.createBuildingList(builder, buildingsVectorOffset); + builder.finish(buildingListOffset); + + long seq = buildingSequence.incrementAndGet(); + byte[] key = new byte[16]; + ByteBuffer.wrap(key).putLong(entry.getKey()).putLong(seq); + + batch.put(key, builder.sizedByteArray()); + it.remove(); + } + + try { + appendDb.write(writeOptions, batch); + } catch (RocksDBException e) { + stats.recordError(ImportStatistics.Stage.PROCESSING_BUILDINGS, Kind.STORE, null, "rocks-write:append_building", e); + } + stats.incrementRocksDbWrites(); + } + } + + private int serializeBuildingData(FlatBufferBuilder builder, BuildingData building) { + int nameOff = building.name() != null ? builder.createString(building.name()) : 0; + int codeOff = building.code() != null ? builder.createString(building.code()) : 0; + + int geometryOff = 0; + if (building.geometry() != null) { + byte[] wkb = new WKBWriter().write(building.geometry()); + int geometryDataOff = Geometry.createDataVector(builder, wkb); + geometryOff = Geometry.createGeometry(builder, geometryDataOff); + } + + Building.startBuilding(builder); + Building.addId(builder, building.id()); + if (nameOff != 0) Building.addName(builder, nameOff); + if (codeOff != 0) Building.addCode(builder, codeOff); + if (geometryOff != 0) Building.addGeometry(builder, geometryOff); + return Building.endBuilding(builder); + } + private void compactShards(RocksDB appendDb, RocksDB shardsDb, ImportStatistics stats) { stats.setCompactionStartTime(System.currentTimeMillis()); @@ -1780,6 +1958,7 @@ private void recordSizeMetrics(ImportStatistics stats, Path shardsDbPath, Path boundariesDbPath, Path buildingsDbPath, + Path appendBuildingDbPath, Path gridIndexDbPath, Path buildingGridIndexDbPath, Path nodeCacheDbPath, @@ -1803,7 +1982,8 @@ private void recordSizeMetrics(ImportStatistics stats, long rel = computeDirectorySize(relIndexDbPath); long poi = computeDirectorySize(poiIndexDbPath); long append = computeDirectorySize(appendDbPath); - long tmpTotal = grid + buildingGrid + node + way + boundaryWay + needed + rel + poi + append; + long appendBuilding = computeDirectorySize(appendBuildingDbPath); + long tmpTotal = grid + buildingGrid + node + way + boundaryWay + needed + rel + poi + append + appendBuilding; stats.setShardsBytes(shards); stats.setBoundariesBytes(boundaries); @@ -1837,6 +2017,9 @@ private record AddressData(String street, String houseNumber, String postcode, S private record BoundaryResultLite(long osmId, int level, String name, String code, org.locationtech.jts.geom.Geometry geometry) { } + private record BuildingData(long id, String name, String code, org.locationtech.jts.geom.Geometry geometry) { + } + private Long safeEntityId(EntityContainer container) { try { OsmEntity e = container.getEntity(); From 17cbf297ccd8269eee1b194589326703862c4ec1 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sun, 29 Mar 2026 08:02:34 +0200 Subject: [PATCH 17/23] feat: add building compaction phase with updated import statistics and reporting --- .../paikka/service/importer/ImportService.java | 1 + .../service/importer/ImportStatistics.java | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index 0b6fda9..b694a21 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -230,6 +230,7 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { stats.setCurrentPhase(7, "2.2: Compacting POIs"); compactShards(appendDb, shardsDb, stats); + stats.setCurrentPhase(8, "2.3: Compacting Buildings"); compactBuildingShards(appendBuildingDb, buildingsDb, stats); stats.stop(); stats.printPhaseSummary("PASS 2", pass2Start); diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index d1ae7c9..71bac2b 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -53,6 +53,7 @@ public enum Stage { CACHING_NODE_COORDINATES("Caching Node Coordinates"), PROCESSING_ADMIN_BOUNDARIES("Processing Admin Boundaries"), COMPACTING_POIS("Compacting POIs"), + COMPACTING_BUILDINGS("Compacting Buildings"), PROCESSING_BUILDINGS("Processing Buildings"); private final String shortName; @@ -139,7 +140,7 @@ public String toString() { private volatile long tmpTotalBytes; private volatile long tmpAppendBytes; - private final int TOTAL_STEPS = 6; + private final int TOTAL_STEPS = 8; private int currentStep = 0; public long getEntitiesRead() { @@ -539,9 +540,11 @@ public void startProgressReporter() { } else if (phase.contains("1.4")) { long buildingsPerSec = phaseSeconds > 0 ? (long) (getBuildingsProcessed() / phaseSeconds) : 0; + double percentage = getBuildingsFound() > 0 ? (double) getBuildingsProcessed() / getBuildingsFound() * 100.0 : 0.0; sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mProcessing Building Boundaries\033[0m", formatTime(elapsed))); sb.append(String.format(" │ \033[32mBuildings:\033[0m %s \033[33m(%s/s)\033[0m", formatCompactNumber(getBuildingsProcessed()), formatCompactRate(buildingsPerSec))); + sb.append(String.format(" │ \033[35mProgress:\033[0m %.2f%%", percentage)); sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); } else if (phase.contains("2.1")) { @@ -568,6 +571,17 @@ public void startProgressReporter() { formatCompactNumber(shardsCompacted), formatCompactRate(shardsPerSec))); sb.append(String.format(" │ \033[37mRemaining:\033[0m %s", formatCompactNumber(remaining))); + } else if (phase.contains("2.3")) { + long compactionElapsed = System.currentTimeMillis() - getCompactionStartTime(); + double compactionPhaseSeconds = compactionElapsed / 1000.0; + long shardsCompacted = getCompactionEntriesProcessed(); + long shardsPerSec = compactionPhaseSeconds > 0 ? (long) (shardsCompacted / compactionPhaseSeconds) : 0; + long remaining = getCompactionEntriesRemaining(); + sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mCompacting Buildings\033[0m", formatTime(elapsed))); + sb.append(String.format(" │ \033[32mShards Compacted:\033[0m %s \033[33m(%s/s)\033[0m", + formatCompactNumber(shardsCompacted), formatCompactRate(shardsPerSec))); + sb.append(String.format(" │ \033[37mRemaining:\033[0m %s", formatCompactNumber(remaining))); + } else { sb.append(String.format("\033[1;36m[%s]\033[0m %s", formatTime(elapsed), phase)); } From fbb35556258453b36cae44a869fafb5cad70c809 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sun, 29 Mar 2026 08:04:45 +0200 Subject: [PATCH 18/23] refactor: rename buildings database to buildings_shards --- .../java/com/dedicatedcode/paikka/service/BuildingService.java | 2 +- .../dedicatedcode/paikka/service/importer/ImportService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java index e14df11..ce6e17d 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java @@ -62,7 +62,7 @@ private void initializeRocksDB() { try { RocksDB.loadLibrary(); - Path buildingsDbPath = Paths.get(config.getDataDir(), "buildings"); + Path buildingsDbPath = Paths.get(config.getDataDir(), "buildings_shards"); // Check if the database directory exists if (!buildingsDbPath.toFile().exists()) { diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index b694a21..bfd1832 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -103,7 +103,7 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { Path shardsDbPath = dataDirectory.resolve("poi_shards"); Path boundariesDbPath = dataDirectory.resolve("boundaries"); - Path buildingsDbPath = dataDirectory.resolve("buildings"); + Path buildingsDbPath = dataDirectory.resolve("buildings_shards"); Path appendBuildingDbPath = dataDirectory.resolve("tmp/append_building"); Path gridIndexDbPath = dataDirectory.resolve("tmp/grid_index"); Path buildingGridIndexDbPath = dataDirectory.resolve("tmp/building_grid_index"); From 3a92cd76149d1d6ec71e1d8bbaefa6ef49242197 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Sun, 29 Mar 2026 10:39:26 +0200 Subject: [PATCH 19/23] started working on boundaries for building pois --- .../paikka/service/importer/ImportStatistics.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 71bac2b..58d8f77 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -653,9 +653,9 @@ public void printFinalStatistics() { formatCompactNumber(getRocksDbWrites())); System.out.println("\n\033[1;37m📦 Dataset Size:\033[0m " + formatSize(getDatasetBytes())); - System.out.println(" • poi_shards: " + formatSize(getShardsBytes())); - System.out.println(" • boundaries: " + formatSize(getBoundariesBytes())); - System.out.println(" • buildings: " + formatSize(getBuildingsBytes())); + System.out.println(" • poi_shards : " + formatSize(getShardsBytes())); + System.out.println(" • boundaries : " + formatSize(getBoundariesBytes())); + System.out.println(" • building_shards: " + formatSize(getBuildingsBytes())); System.out.println("\n\033[1;37m🧹 Temporary DBs:\033[0m " + formatSize(getTmpTotalBytes())); System.out.println(" • grid_index: " + formatSize(getTmpGridBytes())); From 18c08cf3b94483c2c7cd0320ac58091c4e851417 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sun, 29 Mar 2026 12:15:25 +0200 Subject: [PATCH 20/23] refactor: add progress tracking for POI index scanning phase --- .../service/importer/ImportService.java | 13 ++++++++---- .../service/importer/ImportStatistics.java | 20 +++++++++++++++++-- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index bfd1832..b33cb7e 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -225,12 +225,11 @@ public void importData(String pbfFilePath, String dataDir) throws Exception { stats.setCurrentPhase(5, "1.4: Processing building boundaries"); processBuildingBoundariesFromIndex(poiIndexDb, nodeCache, wayIndexDb, buildingGridIndexDb, appendBuildingDb, stats); - stats.setCurrentPhase(6, "2.1: Processing POIs & Sharding"); pass2PoiShardingFromIndex(nodeCache, wayIndexDb, appendDb, boundariesDb, poiIndexDb, gridIndexDb, stats); - stats.setCurrentPhase(7, "2.2: Compacting POIs"); + stats.setCurrentPhase(8, "2.2: Compacting POIs"); compactShards(appendDb, shardsDb, stats); - stats.setCurrentPhase(8, "2.3: Compacting Buildings"); + stats.setCurrentPhase(9, "2.3: Compacting Buildings"); compactBuildingShards(appendBuildingDb, buildingsDb, stats); stats.stop(); stats.printPhaseSummary("PASS 2", pass2Start); @@ -446,11 +445,17 @@ private void pass2PoiShardingFromIndex(RocksDB nodeCache, try (ExecutorService executor = createExecutorService(numReaders); ReadOptions ro = new ReadOptions().setReadaheadSize(8 * 1024 * 1024)) { com.github.benmanes.caffeine.cache.Cache globalBoundaryCache = Caffeine.newBuilder().maximumSize(1000).recordStats().build(); ThreadLocal hierarchyCacheThreadLocal = ThreadLocal.withInitial(() -> new HierarchyCache(boundariesDb, gridIndexDb, s2Helper, globalBoundaryCache)); + + stats.setCurrentPhase(6, "1.5: Preparing POI Sharding"); long total = 0; try (RocksIterator it = poiIndexDb.newIterator(ro)) { - for (it.seekToFirst(); it.isValid(); it.next()) total++; + for (it.seekToFirst(); it.isValid(); it.next()) { + total++; + stats.incrementPoiIndexEntriesScanned(); + } } + stats.setCurrentPhase(7, "2.1: Processing POIs & Sharding"); long step = Math.max(1, total / numReaders); List splitKeys = new ArrayList<>(); try (RocksIterator it = poiIndexDb.newIterator(ro)) { diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java index 58d8f77..5d0ec0c 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportStatistics.java @@ -54,7 +54,8 @@ public enum Stage { PROCESSING_ADMIN_BOUNDARIES("Processing Admin Boundaries"), COMPACTING_POIS("Compacting POIs"), COMPACTING_BUILDINGS("Compacting Buildings"), - PROCESSING_BUILDINGS("Processing Buildings"); + PROCESSING_BUILDINGS("Processing Buildings"), + PREPARING_POI_SHARDING("Preparing POI Sharding"); private final String shortName; Stage(String shortName) { @@ -104,6 +105,7 @@ public String toString() { private final AtomicLong boundariesProcessed = new AtomicLong(0); private final AtomicLong poisProcessed = new AtomicLong(0); private final AtomicLong poiIndexRecRead = new AtomicLong(0); + private final AtomicLong poiIndexEntriesScanned = new AtomicLong(0); private final AtomicBoolean poiIndexRecReadDone = new AtomicBoolean(false); private final AtomicLong rocksDbWrites = new AtomicLong(0); private final AtomicLong queueSize = new AtomicLong(0); @@ -140,7 +142,7 @@ public String toString() { private volatile long tmpTotalBytes; private volatile long tmpAppendBytes; - private final int TOTAL_STEPS = 8; + private final int TOTAL_STEPS = 9; private int currentStep = 0; public long getEntitiesRead() { @@ -207,6 +209,14 @@ public void incrementPoiIndexRecRead() { poiIndexRecRead.incrementAndGet(); } + public long getPoiIndexEntriesScanned() { + return poiIndexEntriesScanned.get(); + } + + public void incrementPoiIndexEntriesScanned() { + poiIndexEntriesScanned.incrementAndGet(); + } + public boolean isPoiIndexRecReadDone() { return poiIndexRecReadDone.get(); } @@ -547,6 +557,12 @@ public void startProgressReporter() { sb.append(String.format(" │ \033[35mProgress:\033[0m %.2f%%", percentage)); sb.append(String.format(" │ \033[37mThreads:\033[0m %d", getActiveThreads())); + } else if (phase.contains("1.5")) { + long scannedPerSec = phaseSeconds > 0 ? (long) (getPoiIndexEntriesScanned() / phaseSeconds) : 0; + sb.append(String.format("\033[1;36m[%s]\033[0m \033[1mPreparing POI Sharding\033[0m", formatTime(elapsed))); + sb.append(String.format(" │ \033[32mPOI Index Scanned:\033[0m %s \033[33m(%s/s)\033[0m", + formatCompactNumber(getPoiIndexEntriesScanned()), formatCompactRate(scannedPerSec))); + } else if (phase.contains("2.1")) { long poisPerSec = phaseSeconds > 0 ? (long) (getPoisProcessed() / phaseSeconds) : 0; long poisReadSec = phaseSeconds > 0 ? (long) (getPoiIndexRecRead() / phaseSeconds) : 0; From 9340ba065fb6b711f25375b86d3b1ba7814b2fd0 Mon Sep 17 00:00:00 2001 From: "Daniel Graf (aider-ce)" Date: Sun, 29 Mar 2026 20:02:26 +0200 Subject: [PATCH 21/23] refactor: modify getBuildingInfo to use point-in-polygon geometry check --- .../paikka/service/BuildingService.java | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java index ce6e17d..8b4a7d4 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java @@ -19,7 +19,9 @@ import com.dedicatedcode.paikka.config.PaikkaConfiguration; import com.dedicatedcode.paikka.flatbuffers.Boundary; import com.dedicatedcode.paikka.flatbuffers.Geometry; +import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.geom.Point; import org.locationtech.jts.io.WKBReader; import org.rocksdb.Options; import org.rocksdb.RocksDB; @@ -102,7 +104,7 @@ public synchronized void reloadDatabase() { } /** - * Get building information for a specific OSM ID. + * Get building information for a point (lat, lon) by checking if it is contained in a building. * This searches in the given shard and surrounding shards if needed. */ public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { @@ -112,6 +114,7 @@ public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { } try { + Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(lon, lat)); long centerShardId = s2Helper.getShardId(lat, lon); List shardsToSearch = new ArrayList<>(); shardsToSearch.add(centerShardId); @@ -126,21 +129,36 @@ public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { com.dedicatedcode.paikka.flatbuffers.BuildingList buildingList = com.dedicatedcode.paikka.flatbuffers.BuildingList.getRootAsBuildingList(buffer); for (int i = 0; i < buildingList.buildingsLength(); i++) { com.dedicatedcode.paikka.flatbuffers.Building building = buildingList.buildings(i); - if (building != null && building.id() == osmId) { - return decodeBuildingInfo(building); + if (building != null && building.geometry() != null) { + Geometry geometryFb = building.geometry(); + if (geometryFb.dataLength() > 0) { + byte[] wkbData = new byte[geometryFb.dataLength()]; + for (int j = 0; j < geometryFb.dataLength(); j++) { + wkbData[j] = (byte) geometryFb.data(j); + } + try { + WKBReader wkbReader = new WKBReader(); + org.locationtech.jts.geom.Geometry jtsGeometry = wkbReader.read(wkbData); + if (jtsGeometry.contains(point)) { + return decodeBuildingInfo(building); + } + } catch (Exception e) { + logger.warn("Failed to read geometry for building {}", building.id(), e); + } + } } } } } - logger.debug("Building with OSM ID {} not found in buildings database", osmId); + logger.debug("Building containing point ({}, {}) not found in buildings database", lon, lat); return null; } catch (RocksDBException e) { - logger.error("RocksDB error while querying building info for OSM ID {}", osmId, e); + logger.error("RocksDB error while querying building info for point ({}, {})", lon, lat, e); return null; } catch (Exception e) { - logger.error("Error while querying building info for OSM ID {}", osmId, e); + logger.error("Error while querying building info for point ({}, {})", lon, lat, e); return null; } } From 1870100ef4a288b014b2e665e4d071ced49846d8 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Mon, 30 Mar 2026 03:53:41 +0200 Subject: [PATCH 22/23] started working on boundaries for building pois --- scripts/filter_osm.sh | 4 ++-- .../paikka/service/importer/ImportService.java | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/filter_osm.sh b/scripts/filter_osm.sh index b2c2afa..b876351 100755 --- a/scripts/filter_osm.sh +++ b/scripts/filter_osm.sh @@ -54,8 +54,8 @@ echo "Input file: $INPUT_FILE" echo "Output file: $OUTPUT_FILE" echo "" osmium tags-filter "$INPUT_FILE" \ - nwr/amenity!=bench,drinking_water,waste_basket,bicycle_parking,vending_machine,parking_entrance,fire_hydrant,recycling \ - nwr/emergency!=fire_hydrant,defibrillator \ + nwr/amenity!=bench,drinking_water,waste_basket,bicycle_parking,vending_machine,parking_entrance,fire_hydrant,recycling,post_box,atm,loading_ramp,parcel_locker,trolley_bay \ + nwr/emergency!=fire_hydrant,defibrillator,fire_service_inlet \ nw/shop \ nw/tourism \ nw/leisure \ diff --git a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java index b33cb7e..968315e 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/importer/ImportService.java @@ -1628,8 +1628,8 @@ private boolean isPoi(OsmEntity entity) { case "emergency": return switch (val) { - case "fire_hydrant", "defibrillator", "fire_extinguisher", - "siren", "life_ring", "lifeline", "phone", "drinking_water" -> false; + case "fire_hydrant", "fire_service_inlet", "defibrillator", "fire_extinguisher", + "siren", "life_ring", "lifeline", "phone", "drinking_water", "yes" -> false; default -> true; }; @@ -1652,6 +1652,8 @@ private boolean isPoi(OsmEntity entity) { case "railway": if ("station".equals(val)) return true; break; + case "man_made": + return false; default: if (isPoiFastKey(key)) return true; From 927460f54f5e109757e4782a93a3734792ce63e5 Mon Sep 17 00:00:00 2001 From: Daniel Graf Date: Mon, 30 Mar 2026 04:10:52 +0200 Subject: [PATCH 23/23] cleanup --- .../dedicatedcode/paikka/service/BuildingService.java | 9 ++++----- .../paikka/service/ReverseGeocodingService.java | 5 ++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java index 8b4a7d4..82911c9 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/BuildingService.java @@ -17,7 +17,6 @@ package com.dedicatedcode.paikka.service; import com.dedicatedcode.paikka.config.PaikkaConfiguration; -import com.dedicatedcode.paikka.flatbuffers.Boundary; import com.dedicatedcode.paikka.flatbuffers.Geometry; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.GeometryFactory; @@ -107,7 +106,7 @@ public synchronized void reloadDatabase() { * Get building information for a point (lat, lon) by checking if it is contained in a building. * This searches in the given shard and surrounding shards if needed. */ - public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { + public BuildingInfo getBuildingInfo(double lat, double lon) { if (buildingsDb == null) { logger.debug("Buildings database not initialized"); return null; @@ -139,7 +138,7 @@ public BuildingInfo getBuildingInfo(long osmId, double lat, double lon) { try { WKBReader wkbReader = new WKBReader(); org.locationtech.jts.geom.Geometry jtsGeometry = wkbReader.read(wkbData); - if (jtsGeometry.contains(point)) { + if (jtsGeometry.intersects(point)) { return decodeBuildingInfo(building); } } catch (Exception e) { @@ -167,10 +166,10 @@ private BuildingInfo decodeBuildingInfo(com.dedicatedcode.paikka.flatbuffers.Bui try { BuildingInfo info = new BuildingInfo(); info.setOsmId(building.id()); - info.setLevel(100); // Buildings don't have an admin_level, using a high value. + info.setLevel(100); info.setName(building.name()); info.setCode(building.code()); - info.setType(building.name()); // In buildings, name often contains building type + info.setType(building.name()); // Decode geometry if present if (building.geometry() != null) { diff --git a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java index 52495ad..955c90b 100644 --- a/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java +++ b/src/main/java/com/dedicatedcode/paikka/service/ReverseGeocodingService.java @@ -426,12 +426,11 @@ private POIResponse convertPOIToResponse(POIData poi, double queryLat, double qu } private void enhanceWithBuildingInfo(POIResponse response, POIData poi) { - // Try to get building info for the POI's OSM ID - BuildingService.BuildingInfo buildingInfo = buildingService.getBuildingInfo(poi.id(), poi.lat(), poi.lon()); + BuildingService.BuildingInfo buildingInfo = buildingService.getBuildingInfo(poi.lat(), poi.lon()); if (buildingInfo != null) { // Update type and subtype with building information if (buildingInfo.getType() != null) { - response.setType(buildingInfo.getType()); + response.setType("building"); } // For subtype, we can use building name or code if (buildingInfo.getName() != null) {