From 367b5e86ea8b5b0cbef0aee465f8b0c11cb87e78 Mon Sep 17 00:00:00 2001 From: Davide Polato Date: Fri, 20 Mar 2026 16:51:09 +0100 Subject: [PATCH 1/4] #1831 Bump Solr dependencies aligned to 10.0.0 and update THIRD-PARTY --- THIRD-PARTY.txt | 36 ++++++++++--------- .../solr/configsets/docs/conf/solrconfig.xml | 2 +- .../configsets/metrics/conf/solrconfig.xml | 2 +- .../configsets/status/conf/solrconfig.xml | 2 +- external/solr/pom.xml | 14 +++++++- 5 files changed, 36 insertions(+), 20 deletions(-) diff --git a/THIRD-PARTY.txt b/THIRD-PARTY.txt index a4475a0f6..d83bdb35b 100644 --- a/THIRD-PARTY.txt +++ b/THIRD-PARTY.txt @@ -27,6 +27,7 @@ List of third-party dependencies grouped by their license type. * Apache Commons Crypto (org.apache.commons:commons-crypto:1.1.0 - https://commons.apache.org/proper/commons-crypto/) * Apache Commons CSV (org.apache.commons:commons-csv:1.14.1 - https://commons.apache.org/proper/commons-csv/) * Apache Commons Exec (org.apache.commons:commons-exec:1.5.0 - https://commons.apache.org/proper/commons-exec/) + * Apache Commons IO (commons-io:commons-io:2.17.0 - https://commons.apache.org/proper/commons-io/) * Apache Commons IO (commons-io:commons-io:2.20.0 - https://commons.apache.org/proper/commons-io/) * Apache Commons Lang (org.apache.commons:commons-lang3:3.20.0 - https://commons.apache.org/proper/commons-lang/) * Apache Commons Logging (commons-logging:commons-logging:1.2 - http://commons.apache.org/proper/commons-logging/) @@ -51,7 +52,6 @@ List of third-party dependencies grouped by their license type. * Apache HBase Unsafe Wrapper (org.apache.hbase.thirdparty:hbase-unsafe:4.1.12 - https://hbase.apache.org/hbase-unsafe) * Apache HttpAsyncClient (org.apache.httpcomponents:httpasyncclient:4.1.5 - http://hc.apache.org/httpcomponents-asyncclient) * Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.14 - http://hc.apache.org/httpcomponents-client-ga) - * Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.14 - http://hc.apache.org/httpcomponents-client-ga) * Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.16 - http://hc.apache.org/httpcomponents-core-ga) * Apache HttpCore NIO (org.apache.httpcomponents:httpcore-nio:4.4.16 - http://hc.apache.org/httpcomponents-core-ga) * Apache James :: Mime4j :: Core (org.apache.james:apache-mime4j-core:0.8.13 - http://james.apache.org/mime4j/apache-mime4j-core) @@ -81,10 +81,10 @@ List of third-party dependencies grouped by their license type. * Apache POI (org.apache.poi:poi-scratchpad:5.4.1 - https://poi.apache.org/) * Apache POI - API based on OPC and OOXML schemas (org.apache.poi:poi-ooxml:5.4.1 - https://poi.apache.org/) * Apache POI - Common (org.apache.poi:poi:5.4.1 - https://poi.apache.org/) - * Apache Solr (module: api) (org.apache.solr:solr-api:9.10.1 - https://solr.apache.org/) - * Apache Solr (module: solrj) (org.apache.solr:solr-solrj:9.10.1 - https://solr.apache.org/) - * Apache Solr (module: solrj-streaming) (org.apache.solr:solr-solrj-streaming:9.10.1 - https://solr.apache.org/) - * Apache Solr (module: solrj-zookeeper) (org.apache.solr:solr-solrj-zookeeper:9.10.1 - https://solr.apache.org/) + * Apache Solr (module: api) (org.apache.solr:solr-api:10.0.0 - https://solr.apache.org/) + * Apache Solr (module: solrj) (org.apache.solr:solr-solrj:10.0.0 - https://solr.apache.org/) + * Apache Solr (module: solrj-jetty) (org.apache.solr:solr-solrj-jetty:10.0.0 - https://solr.apache.org/) + * Apache Solr (module: solrj-zookeeper) (org.apache.solr:solr-solrj-zookeeper:10.0.0 - https://solr.apache.org/) * Apache Tika Apple parser module (org.apache.tika:tika-parser-apple-module:3.2.3 - https://tika.apache.org/tika-parser-apple-module/) * Apache Tika audiovideo parser module (org.apache.tika:tika-parser-audiovideo-module:3.2.3 - https://tika.apache.org/tika-parser-audiovideo-module/) * Apache Tika cad parser module (org.apache.tika:tika-parser-cad-module:3.2.3 - https://tika.apache.org/tika-parser-cad-module/) @@ -125,6 +125,8 @@ List of third-party dependencies grouped by their license type. * Commons Logging (commons-logging:commons-logging:1.1.3 - http://commons.apache.org/proper/commons-logging/) * compiler (com.github.spullara.mustache.java:compiler:0.9.14 - http://github.com/spullara/mustache.java) * Crawler-commons (com.github.crawler-commons:crawler-commons:1.6 - https://github.com/crawler-commons/crawler-commons) + * Curator Client (org.apache.curator:curator-client:5.9.0 - https://curator.apache.org/curator-client) + * Curator Framework (org.apache.curator:curator-framework:5.9.0 - https://curator.apache.org/curator-framework) * error-prone annotations (com.google.errorprone:error_prone_annotations:2.36.0 - https://errorprone.info/error_prone_annotations) * error-prone annotations (com.google.errorprone:error_prone_annotations:2.41.0 - https://errorprone.info/error_prone_annotations) * error-prone annotations (com.google.errorprone:error_prone_annotations:2.43.0 - https://errorprone.info/error_prone_annotations) @@ -185,6 +187,7 @@ List of third-party dependencies grouped by their license type. * Netty/Codec/Compression (io.netty:netty-codec-compression:4.2.7.Final - https://netty.io/netty-codec-compression/) * Netty/Codec/Marshalling (io.netty:netty-codec-marshalling:4.2.7.Final - https://netty.io/netty-codec-marshalling/) * Netty/Codec/Protobuf (io.netty:netty-codec-protobuf:4.2.7.Final - https://netty.io/netty-codec-protobuf/) + * Netty/Codec (io.netty:netty-codec:4.1.119.Final - https://netty.io/netty-codec/) * Netty/Codec (io.netty:netty-codec:4.2.7.Final - https://netty.io/netty-codec/) * Netty/Common (io.netty:netty-common:4.1.119.Final - https://netty.io/netty-common/) * Netty/Common (io.netty:netty-common:4.2.7.Final - https://netty.io/netty-common/) @@ -198,6 +201,7 @@ List of third-party dependencies grouped by their license type. * Netty/Transport/Native/Epoll (io.netty:netty-transport-native-epoll:4.1.119.Final - https://netty.io/netty-transport-native-epoll/) * Netty/Transport/Native/Unix/Common (io.netty:netty-transport-native-unix-common:4.1.119.Final - https://netty.io/netty-transport-native-unix-common/) * Netty/Transport/Native/Unix/Common (io.netty:netty-transport-native-unix-common:4.2.7.Final - https://netty.io/netty-transport-native-unix-common/) + * Netty/Transport (io.netty:netty-transport:4.1.119.Final - https://netty.io/netty-transport/) * Netty/Transport (io.netty:netty-transport:4.2.7.Final - https://netty.io/netty-transport/) * Non-Blocking Reactive Foundation for the JVM (io.projectreactor:reactor-core:3.7.5 - https://github.com/reactor/reactor-core) * Objenesis (org.objenesis:objenesis:3.3 - http://objenesis.org/objenesis) @@ -272,16 +276,16 @@ List of third-party dependencies grouped by their license type. Apache License, Version 2.0, Eclipse Public License - Version 2.0 - * Jetty :: ALPN :: Client (org.eclipse.jetty:jetty-alpn-client:10.0.26 - https://jetty.org/jetty-alpn-parent/jetty-alpn-client) - * Jetty :: ALPN :: JDK9 Client Implementation (org.eclipse.jetty:jetty-alpn-java-client:10.0.26 - https://jetty.org/jetty-alpn-parent/jetty-alpn-java-client) - * Jetty :: Asynchronous HTTP Client (org.eclipse.jetty:jetty-client:10.0.26 - https://jetty.org/jetty-client) - * Jetty :: HTTP2 :: Client (org.eclipse.jetty.http2:http2-client:10.0.26 - https://jetty.org/http2-parent/http2-client) - * Jetty :: HTTP2 :: Common (org.eclipse.jetty.http2:http2-common:10.0.26 - https://jetty.org/http2-parent/http2-common) - * Jetty :: HTTP2 :: HPACK (org.eclipse.jetty.http2:http2-hpack:10.0.26 - https://jetty.org/http2-parent/http2-hpack) - * Jetty :: HTTP2 :: HTTP Client Transport (org.eclipse.jetty.http2:http2-http-client-transport:10.0.26 - https://jetty.org/http2-parent/http2-http-client-transport) - * Jetty :: Http Utility (org.eclipse.jetty:jetty-http:10.0.26 - https://jetty.org/jetty-http) - * Jetty :: IO Utility (org.eclipse.jetty:jetty-io:10.0.26 - https://jetty.org/jetty-io) - * Jetty :: Utilities (org.eclipse.jetty:jetty-util:10.0.26 - https://jetty.org/jetty-util) + * Core :: ALPN :: Client (org.eclipse.jetty:jetty-alpn-client:12.0.27 - https://jetty.org/jetty-core/jetty-alpn/jetty-alpn-client) + * Core :: ALPN :: Java Client (org.eclipse.jetty:jetty-alpn-java-client:12.0.27 - https://jetty.org/jetty-core/jetty-alpn/jetty-alpn-java-client) + * Core :: HTTP (org.eclipse.jetty:jetty-http:12.0.27 - https://jetty.org/jetty-core/jetty-http) + * Core :: HTTP2 :: Client (org.eclipse.jetty.http2:jetty-http2-client:12.0.27 - https://jetty.org/jetty-core/jetty-http2/jetty-http2-client) + * Core :: HTTP2 :: Client Transport (org.eclipse.jetty.http2:jetty-http2-client-transport:12.0.27 - https://jetty.org/jetty-core/jetty-http2/jetty-http2-client-transport) + * Core :: HTTP2 :: Common (org.eclipse.jetty.http2:jetty-http2-common:12.0.27 - https://jetty.org/jetty-core/jetty-http2/jetty-http2-common) + * Core :: HTTP2 :: HPACK (org.eclipse.jetty.http2:jetty-http2-hpack:12.0.27 - https://jetty.org/jetty-core/jetty-http2/jetty-http2-hpack) + * Core :: HTTP Client (org.eclipse.jetty:jetty-client:12.0.27 - https://jetty.org/jetty-core/jetty-client) + * Core :: IO (org.eclipse.jetty:jetty-io:12.0.27 - https://jetty.org/jetty-core/jetty-io) + * Core :: Utilities (org.eclipse.jetty:jetty-util:12.0.27 - https://jetty.org/jetty-core/jetty-util) Apache License, Version 2.0, LGPL-2.1-or-later @@ -374,7 +378,7 @@ List of third-party dependencies grouped by their license type. * jsoup Java HTML Parser (org.jsoup:jsoup:1.22.1 - https://jsoup.org/) * JTokkit (com.knuddels:jtokkit:1.1.0 - https://github.com/knuddelsgmbh/jtokkit) * org.brotli:dec (org.brotli:dec:0.1.2 - http://brotli.org/dec) - * semver4j (org.semver4j:semver4j:5.8.0 - https://github.com/semver4j/semver4j) + * semver4j (org.semver4j:semver4j:6.0.0 - https://github.com/semver4j/semver4j) * SLF4J API Module (org.slf4j:slf4j-api:1.7.36 - http://www.slf4j.org) * SLF4J API Module (org.slf4j:slf4j-api:1.7.6 - http://www.slf4j.org) * SLF4J API Module (org.slf4j:slf4j-api:2.0.17 - http://www.slf4j.org) diff --git a/external/solr/configsets/docs/conf/solrconfig.xml b/external/solr/configsets/docs/conf/solrconfig.xml index 82dd1420c..d3e6d1638 100755 --- a/external/solr/configsets/docs/conf/solrconfig.xml +++ b/external/solr/configsets/docs/conf/solrconfig.xml @@ -18,7 +18,7 @@ specific language governing permissions and limitations under the License. --> - 9.0.0 + 10.0.0 diff --git a/external/solr/configsets/metrics/conf/solrconfig.xml b/external/solr/configsets/metrics/conf/solrconfig.xml index 7c6384b85..8d91e9378 100755 --- a/external/solr/configsets/metrics/conf/solrconfig.xml +++ b/external/solr/configsets/metrics/conf/solrconfig.xml @@ -18,7 +18,7 @@ specific language governing permissions and limitations under the License. --> - 9.0.0 + 10.0.0 diff --git a/external/solr/configsets/status/conf/solrconfig.xml b/external/solr/configsets/status/conf/solrconfig.xml index 82dd1420c..d3e6d1638 100755 --- a/external/solr/configsets/status/conf/solrconfig.xml +++ b/external/solr/configsets/status/conf/solrconfig.xml @@ -18,7 +18,7 @@ specific language governing permissions and limitations under the License. --> - 9.0.0 + 10.0.0 diff --git a/external/solr/pom.xml b/external/solr/pom.xml index 8d7a108e2..4d2b419a4 100644 --- a/external/solr/pom.xml +++ b/external/solr/pom.xml @@ -38,7 +38,7 @@ under the License. Solr resources for StormCrawler - 9.10.1 + 10.0.0 @@ -48,6 +48,18 @@ under the License. ${solr.version} + + org.apache.solr + solr-solrj-jetty + ${solr.version} + + + + org.apache.solr + solr-solrj-zookeeper + ${solr.version} + + org.testcontainers junit-jupiter From deeb71a76afcb2b01f979c1d026f4f18b15ea819 Mon Sep 17 00:00:00 2001 From: Davide Polato Date: Fri, 20 Mar 2026 17:17:42 +0100 Subject: [PATCH 2/4] #1831 Migrate HTTP clients to Jetty and update URL parsing for Solr 10 --- .../stormcrawler/solr/SolrConnection.java | 152 +++++++++--------- .../solr/persistence/SolrSpout.java | 4 +- 2 files changed, 74 insertions(+), 82 deletions(-) diff --git a/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java b/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java index 0219a1988..6bbcbfab3 100644 --- a/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java +++ b/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java @@ -30,11 +30,9 @@ import java.util.concurrent.TimeUnit; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient; -import org.apache.solr.client.solrj.impl.ConcurrentUpdateHttp2SolrClient; -import org.apache.solr.client.solrj.impl.Http2SolrClient; -import org.apache.solr.client.solrj.impl.LBHttp2SolrClient; -import org.apache.solr.client.solrj.impl.LBSolrClient; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.jetty.ConcurrentUpdateJettySolrClient; +import org.apache.solr.client.solrj.jetty.HttpJettySolrClient; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; @@ -107,8 +105,8 @@ private void flushAllUpdates(boolean force) { return; } - CloudHttp2SolrClient cloudHttp2SolrClient = (CloudHttp2SolrClient) client; - DocCollection col = cloudHttp2SolrClient.getClusterState().getCollection(collection); + CloudSolrClient cloudClient = (CloudSolrClient) client; + DocCollection col = cloudClient.getClusterState().getCollection(collection); // Flush all slices for (var entry : updateQueues.entrySet()) { @@ -125,7 +123,7 @@ private void flushAllUpdates(boolean force) { return; } - flushUpdates(leader, waitingUpdates, cloudHttp2SolrClient); + flushUpdates(leader, waitingUpdates, cloudClient); } } } @@ -134,8 +132,8 @@ private void updateAsync(Update update) { synchronized (lock) { lastUpdate = System.currentTimeMillis(); - CloudHttp2SolrClient cloudHttp2SolrClient = (CloudHttp2SolrClient) client; - DocCollection col = cloudHttp2SolrClient.getClusterState().getCollection(collection); + CloudSolrClient cloudClient = (CloudSolrClient) client; + DocCollection col = cloudClient.getClusterState().getCollection(collection); // Find slice for this update Slice slice = null; @@ -165,7 +163,7 @@ private void updateAsync(Update update) { return; } - flushUpdates(leader, waitingUpdates, cloudHttp2SolrClient); + flushUpdates(leader, waitingUpdates, cloudClient); } } } @@ -175,14 +173,10 @@ private void updateAsync(Update update) { * leader goes down before handling it. */ private void flushUpdates( - Replica leader, - List waitingUpdates, - CloudHttp2SolrClient cloudHttp2SolrClient) { + Replica leader, + List waitingUpdates, + CloudSolrClient cloudClient) { - List endpoints = new ArrayList<>(); - endpoints.add(new LBSolrClient.Endpoint(leader.getBaseUrl(), leader.getCoreName())); - - // Separate deletions and documents List deletionIds = new ArrayList<>(); List docs = new ArrayList<>(); @@ -195,24 +189,22 @@ private void flushUpdates( } UpdateRequest updateRequest = new UpdateRequest(); - updateRequest.add(docs); - updateRequest.deleteById(deletionIds); + if (!docs.isEmpty()) { + updateRequest.add(docs); + } + if (!deletionIds.isEmpty()) { + updateRequest.deleteById(deletionIds); + } List batch = new ArrayList<>(waitingUpdates); waitingUpdates.clear(); - // Get the async client - LBHttp2SolrClient lbHttp2SolrClient = cloudHttp2SolrClient.getLbClient(); - LBSolrClient.Req req = new LBSolrClient.Req(updateRequest, endpoints); - - lbHttp2SolrClient - .requestAsync(req) - .whenComplete( - (futureResponse, throwable) -> { - if (throwable != null) { - LOG.error("Exception caught while updating", throwable); - - // The request failed => add the batch back to the pending updates + CompletableFuture.runAsync( + () -> { + try { + cloudClient.request(updateRequest, collection); + } catch (Exception e) { + LOG.error("Exception caught while updating", e); synchronized (lock) { waitingUpdates.addAll(batch); } @@ -250,44 +242,25 @@ public void deleteByIdAsync(String id) { public CompletableFuture requestAsync(QueryRequest request) { if (cloud) { - CloudHttp2SolrClient cloudHttp2SolrClient = (CloudHttp2SolrClient) client; - - // Find the shard to route the request to - String shardId = request.getParams().get("shards"); - if (shardId == null) { - shardId = "shard1"; - } - - Slice slice = - cloudHttp2SolrClient - .getClusterState() - .getCollection(collection) - .getSlice(shardId); - - // Will get results from the first successful replica of this shard - List endpoints = new ArrayList<>(); - - for (Replica replica : slice.getReplicas()) { - if (replica.getState() == Replica.State.ACTIVE) { - endpoints.add( - new LBSolrClient.Endpoint(replica.getBaseUrl(), replica.getCoreName())); + CloudSolrClient cloudClient = (CloudSolrClient) client; + return CompletableFuture.supplyAsync(() -> { + try { + var resp = cloudClient.request(request, collection); + QueryResponse qr = new QueryResponse(); + qr.setResponse(resp); + return qr; + } catch (Exception e) { + throw new java.util.concurrent.CompletionException(e); } - } - - // Shuffle the endpoints for basic load balancing - Collections.shuffle(endpoints); - - // Get the async client - LBHttp2SolrClient lbHttp2SolrClient = cloudHttp2SolrClient.getLbClient(); - LBSolrClient.Req req = new LBSolrClient.Req(request, endpoints); - - return lbHttp2SolrClient - .requestAsync(req) - .thenApply(rsp -> new QueryResponse(rsp.getResponse(), lbHttp2SolrClient)); + }); } else { - return ((Http2SolrClient) client) - .requestAsync(request) - .thenApply(nl -> new QueryResponse(nl, client)); + return ((HttpJettySolrClient) client) + .requestAsync(request) + .thenApply(resp -> { + QueryResponse qr = new QueryResponse(); + qr.setResponse(resp); + return qr; + }); } } @@ -315,20 +288,19 @@ public static SolrConnection getConnection(Map stormConf, String boolean statusCollection = boltType.equals("status"); if (StringUtils.isNotBlank(zkHost)) { - - CloudHttp2SolrClient.Builder builder = - new CloudHttp2SolrClient.Builder( + CloudSolrClient.Builder builder = + new CloudSolrClient.Builder( Collections.singletonList(zkHost), Optional.empty()); if (StringUtils.isNotBlank(collection)) { builder.withDefaultCollection(collection); } - CloudHttp2SolrClient cloudHttp2SolrClient = builder.build(); + CloudSolrClient cloudClient = builder.build(); return new SolrConnection( - cloudHttp2SolrClient, - cloudHttp2SolrClient, + cloudClient, + cloudClient, true, collection, statusCollection, @@ -337,18 +309,38 @@ public static SolrConnection getConnection(Map stormConf, String } else if (StringUtils.isNotBlank(solrUrl)) { - Http2SolrClient http2SolrClient = new Http2SolrClient.Builder(solrUrl).build(); + String rootUrl = solrUrl; + String defaultColl = collection; + + if (!solrUrl.endsWith("/solr")) { + int lastSlash = solrUrl.lastIndexOf('/'); + if (lastSlash != -1) { + rootUrl = solrUrl.substring(0, lastSlash); + if (StringUtils.isBlank(defaultColl)) { + defaultColl = solrUrl.substring(lastSlash + 1); + } + } + } - ConcurrentUpdateHttp2SolrClient concurrentUpdateHttp2SolrClient = - new ConcurrentUpdateHttp2SolrClient.Builder(solrUrl, http2SolrClient, true) + HttpJettySolrClient.Builder httpBuilder = new HttpJettySolrClient.Builder(rootUrl); + if (StringUtils.isNotBlank(defaultColl)) { + httpBuilder.withDefaultCollection(defaultColl); + } + HttpJettySolrClient httpJettySolrClient = httpBuilder.build(); + + ConcurrentUpdateJettySolrClient concurrentUpdateJettySolrClient = + (ConcurrentUpdateJettySolrClient) + new ConcurrentUpdateJettySolrClient.Builder( + rootUrl, httpJettySolrClient, true) + .withDefaultCollection(defaultColl) .withQueueSize(queueSize) .build(); return new SolrConnection( - http2SolrClient, - concurrentUpdateHttp2SolrClient, + httpJettySolrClient, + concurrentUpdateJettySolrClient, false, - collection, + defaultColl, statusCollection, updateQueueSize, noUpdateThreshold); diff --git a/external/solr/src/main/java/org/apache/stormcrawler/solr/persistence/SolrSpout.java b/external/solr/src/main/java/org/apache/stormcrawler/solr/persistence/SolrSpout.java index 10b437c54..6ecd56322 100644 --- a/external/solr/src/main/java/org/apache/stormcrawler/solr/persistence/SolrSpout.java +++ b/external/solr/src/main/java/org/apache/stormcrawler/solr/persistence/SolrSpout.java @@ -23,9 +23,9 @@ import java.util.Map; import java.util.concurrent.CompletableFuture; import org.apache.commons.lang3.StringUtils; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.SolrQuery.ORDER; import org.apache.solr.client.solrj.response.Group; import org.apache.solr.client.solrj.response.GroupCommand; import org.apache.solr.client.solrj.response.QueryResponse; From 721d8921ddd78f28351ce4b199c540fef0c62d36 Mon Sep 17 00:00:00 2001 From: Davide Polato Date: Fri, 20 Mar 2026 17:19:28 +0100 Subject: [PATCH 3/4] #1831 Update Testcontainers, CLI flags and Docker configs for Solr 10 --- .../stormcrawler/solr/SolrConnection.java | 70 +++++++++---------- .../solr/CloudConnectionTest.java | 18 ++--- .../stormcrawler/solr/SolrContainerTest.java | 18 ++--- .../solr/bolt/IndexerBoltTest.java | 6 +- .../src/test/resources/docker-compose.yml | 8 +-- 5 files changed, 61 insertions(+), 59 deletions(-) diff --git a/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java b/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java index 6bbcbfab3..03ec4db66 100644 --- a/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java +++ b/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java @@ -173,9 +173,7 @@ private void updateAsync(Update update) { * leader goes down before handling it. */ private void flushUpdates( - Replica leader, - List waitingUpdates, - CloudSolrClient cloudClient) { + Replica leader, List waitingUpdates, CloudSolrClient cloudClient) { List deletionIds = new ArrayList<>(); List docs = new ArrayList<>(); @@ -200,16 +198,16 @@ private void flushUpdates( waitingUpdates.clear(); CompletableFuture.runAsync( - () -> { - try { - cloudClient.request(updateRequest, collection); - } catch (Exception e) { - LOG.error("Exception caught while updating", e); - synchronized (lock) { - waitingUpdates.addAll(batch); - } - } - }); + () -> { + try { + cloudClient.request(updateRequest, collection); + } catch (Exception e) { + LOG.error("Exception caught while updating", e); + synchronized (lock) { + waitingUpdates.addAll(batch); + } + } + }); } private Slice getSlice(SolrInputDocument doc, DocCollection col) { @@ -243,24 +241,26 @@ public void deleteByIdAsync(String id) { public CompletableFuture requestAsync(QueryRequest request) { if (cloud) { CloudSolrClient cloudClient = (CloudSolrClient) client; - return CompletableFuture.supplyAsync(() -> { - try { - var resp = cloudClient.request(request, collection); - QueryResponse qr = new QueryResponse(); - qr.setResponse(resp); - return qr; - } catch (Exception e) { - throw new java.util.concurrent.CompletionException(e); - } - }); + return CompletableFuture.supplyAsync( + () -> { + try { + var resp = cloudClient.request(request, collection); + QueryResponse qr = new QueryResponse(); + qr.setResponse(resp); + return qr; + } catch (Exception e) { + throw new java.util.concurrent.CompletionException(e); + } + }); } else { return ((HttpJettySolrClient) client) - .requestAsync(request) - .thenApply(resp -> { - QueryResponse qr = new QueryResponse(); - qr.setResponse(resp); - return qr; - }); + .requestAsync(request) + .thenApply( + resp -> { + QueryResponse qr = new QueryResponse(); + qr.setResponse(resp); + return qr; + }); } } @@ -329,12 +329,12 @@ public static SolrConnection getConnection(Map stormConf, String HttpJettySolrClient httpJettySolrClient = httpBuilder.build(); ConcurrentUpdateJettySolrClient concurrentUpdateJettySolrClient = - (ConcurrentUpdateJettySolrClient) - new ConcurrentUpdateJettySolrClient.Builder( - rootUrl, httpJettySolrClient, true) - .withDefaultCollection(defaultColl) - .withQueueSize(queueSize) - .build(); + (ConcurrentUpdateJettySolrClient) + new ConcurrentUpdateJettySolrClient.Builder( + rootUrl, httpJettySolrClient, true) + .withDefaultCollection(defaultColl) + .withQueueSize(queueSize) + .build(); return new SolrConnection( httpJettySolrClient, diff --git a/external/solr/src/test/java/org/apache/stormcrawler/solr/CloudConnectionTest.java b/external/solr/src/test/java/org/apache/stormcrawler/solr/CloudConnectionTest.java index 743b79729..5f072ccd5 100644 --- a/external/solr/src/test/java/org/apache/stormcrawler/solr/CloudConnectionTest.java +++ b/external/solr/src/test/java/org/apache/stormcrawler/solr/CloudConnectionTest.java @@ -24,9 +24,9 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.TimeUnit; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient; +import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.DocCollection; @@ -41,22 +41,25 @@ class CloudConnectionTest extends SolrCloudContainerTest { private static final Logger LOG = LoggerFactory.getLogger(CloudConnectionTest.class); private static SolrConnection connection; - private static CloudHttp2SolrClient client; + private static CloudSolrClient client; @BeforeAll static void setup() { createCollection("docs", 1, 2); + String zkHost = environment.getServiceHost("zookeeper", 2181); + Integer zkPort = environment.getServicePort("zookeeper", 2181); + String zkConnect = zkHost + ":" + zkPort; + Map conf = new HashMap<>(); - conf.put("solr.indexer.zkhost", "localhost:2181"); + conf.put("solr.indexer.zkhost", zkConnect); conf.put("solr.indexer.collection", "docs"); conf.put("solr.indexer.batchUpdateSize", 1); conf.put("solr.indexer.flushAfterNoUpdatesMillis", 5_000); connection = SolrConnection.getConnection(conf, "indexer"); client = - new CloudHttp2SolrClient.Builder( - Collections.singletonList("localhost:2181"), Optional.empty()) + new CloudSolrClient.Builder(Collections.singletonList(zkConnect), Optional.empty()) .withDefaultCollection("docs") .build(); } @@ -106,10 +109,9 @@ private void startNode(int port) { container.execInContainer( "/opt/solr/bin/solr", "start", - "-c", "-p", Integer.toString(port), - "-s", + "--solr-home", "/var/solr/" + port, "-z", "zookeeper:2181"); diff --git a/external/solr/src/test/java/org/apache/stormcrawler/solr/SolrContainerTest.java b/external/solr/src/test/java/org/apache/stormcrawler/solr/SolrContainerTest.java index 12690775d..9378d40cd 100644 --- a/external/solr/src/test/java/org/apache/stormcrawler/solr/SolrContainerTest.java +++ b/external/solr/src/test/java/org/apache/stormcrawler/solr/SolrContainerTest.java @@ -38,7 +38,7 @@ public abstract class SolrContainerTest { protected static ExecutorService executorService; - private static final DockerImageName image = DockerImageName.parse("solr:9.10.0"); + private static final DockerImageName image = DockerImageName.parse("solr:10.0.0"); private static final String configsetsPath = new File("configsets").getAbsolutePath(); @Container @@ -48,7 +48,7 @@ public abstract class SolrContainerTest { .withCopyFileToContainer( MountableFile.forHostPath(configsetsPath), "/opt/solr/server/solr/configsets") - .withCommand("solr-foreground -c") + .withCommand("solr-foreground") .waitingFor(Wait.forHttp("/solr/admin/cores?action=STATUS").forStatusCode(200)); @BeforeAll @@ -74,24 +74,24 @@ protected ExecResult createCollection(String collectionName, int shards) "/opt/solr/bin/solr", "zk", "upconfig", - "-n", + "--conf-name", collectionName, - "-d", + "--conf-dir", "/opt/solr/server/solr/configsets/" + collectionName, - "-z", + "--zk-host", "localhost:9983"); // Create the collection return container.execInContainer( "/opt/solr/bin/solr", "create", - "-c", + "--name", collectionName, - "-n", + "--conf-name", collectionName, - "-sh", + "--shards", String.valueOf(shards), - "-rf", + "--replication-factor", "1"); } } diff --git a/external/solr/src/test/java/org/apache/stormcrawler/solr/bolt/IndexerBoltTest.java b/external/solr/src/test/java/org/apache/stormcrawler/solr/bolt/IndexerBoltTest.java index dd896dbb4..5c8958107 100644 --- a/external/solr/src/test/java/org/apache/stormcrawler/solr/bolt/IndexerBoltTest.java +++ b/external/solr/src/test/java/org/apache/stormcrawler/solr/bolt/IndexerBoltTest.java @@ -29,9 +29,9 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.Http2SolrClient; +import org.apache.solr.client.solrj.jetty.HttpJettySolrClient; +import org.apache.solr.client.solrj.request.SolrQuery; import org.apache.solr.common.SolrDocumentList; import org.apache.storm.task.OutputCollector; import org.apache.storm.tuple.Tuple; @@ -114,7 +114,7 @@ void basicTest() bolt.cleanup(); // Make sure the document is indexed in Solr - SolrClient client = new Http2SolrClient.Builder(getSolrBaseUrl() + "/docs").build(); + SolrClient client = new HttpJettySolrClient.Builder(getSolrBaseUrl() + "/docs").build(); client.commit(); SolrQuery query = new SolrQuery("*:*"); diff --git a/external/solr/src/test/resources/docker-compose.yml b/external/solr/src/test/resources/docker-compose.yml index 8771a92b2..8d8881ad1 100644 --- a/external/solr/src/test/resources/docker-compose.yml +++ b/external/solr/src/test/resources/docker-compose.yml @@ -30,7 +30,7 @@ services: retries: 5 solr: - image: solr:9.10.0 + image: solr:10.0.0 hostname: solr ports: - "8983:8983" @@ -38,10 +38,10 @@ services: depends_on: - zookeeper environment: - - SOLR_HOST=localhost + - SOLR_HOST_ADVERTISE=localhost command: > bash -c ' mkdir -p /var/solr/{8983,8984} && - solr start -c -p 8984 -s /var/solr/8984 -z zookeeper:2181 && - solr start -c -p 8983 -s /var/solr/8983 -z zookeeper:2181 && + solr start -p 8984 --solr-home /var/solr/8984 -z zookeeper:2181 && + solr start -p 8983 --solr-home /var/solr/8983 -z zookeeper:2181 && tail -f /var/solr/logs/solr.log' From f97b3df26bc61c221aad217fb75e7c3dae8fec3f Mon Sep 17 00:00:00 2001 From: Davide Polato Date: Mon, 23 Mar 2026 10:54:23 +0100 Subject: [PATCH 4/4] #1831 Use real async Solr updates via LBAsyncSolrClient - Drop pseudo-async CompletableFuture.runAsync() around blocking cloudClient.request(). - Use NIO-based LBAsyncSolrClient.requestAsync() for Solr 10. - Route writes via LBSolrClient.Endpoint built from leader baseUrl/coreName. - Access CloudHttp2SolrClient#getLbClient() via explicit cast, with a note on potential SolrJ API changes. --- .../solr/configsets/docs/conf/solrconfig.xml | 2 +- .../configsets/metrics/conf/solrconfig.xml | 2 +- .../configsets/status/conf/solrconfig.xml | 2 +- .../stormcrawler/solr/SolrConnection.java | 62 ++++++++++++++----- 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/external/solr/configsets/docs/conf/solrconfig.xml b/external/solr/configsets/docs/conf/solrconfig.xml index d3e6d1638..ca4ce7e2b 100755 --- a/external/solr/configsets/docs/conf/solrconfig.xml +++ b/external/solr/configsets/docs/conf/solrconfig.xml @@ -18,7 +18,7 @@ specific language governing permissions and limitations under the License. --> - 10.0.0 + 10.3 diff --git a/external/solr/configsets/metrics/conf/solrconfig.xml b/external/solr/configsets/metrics/conf/solrconfig.xml index 8d91e9378..d3fc7668e 100755 --- a/external/solr/configsets/metrics/conf/solrconfig.xml +++ b/external/solr/configsets/metrics/conf/solrconfig.xml @@ -18,7 +18,7 @@ specific language governing permissions and limitations under the License. --> - 10.0.0 + 10.3 diff --git a/external/solr/configsets/status/conf/solrconfig.xml b/external/solr/configsets/status/conf/solrconfig.xml index d3e6d1638..ca4ce7e2b 100755 --- a/external/solr/configsets/status/conf/solrconfig.xml +++ b/external/solr/configsets/status/conf/solrconfig.xml @@ -18,7 +18,7 @@ specific language governing permissions and limitations under the License. --> - 10.0.0 + 10.3 diff --git a/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java b/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java index 03ec4db66..e69344143 100644 --- a/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java +++ b/external/solr/src/main/java/org/apache/stormcrawler/solr/SolrConnection.java @@ -30,7 +30,10 @@ import java.util.concurrent.TimeUnit; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.impl.LBAsyncSolrClient; +import org.apache.solr.client.solrj.impl.LBSolrClient; import org.apache.solr.client.solrj.jetty.ConcurrentUpdateJettySolrClient; import org.apache.solr.client.solrj.jetty.HttpJettySolrClient; import org.apache.solr.client.solrj.request.QueryRequest; @@ -106,7 +109,11 @@ private void flushAllUpdates(boolean force) { } CloudSolrClient cloudClient = (CloudSolrClient) client; - DocCollection col = cloudClient.getClusterState().getCollection(collection); + DocCollection col = + cloudClient + .getClusterStateProvider() + .getClusterState() + .getCollection(collection); // Flush all slices for (var entry : updateQueues.entrySet()) { @@ -133,7 +140,11 @@ private void updateAsync(Update update) { lastUpdate = System.currentTimeMillis(); CloudSolrClient cloudClient = (CloudSolrClient) client; - DocCollection col = cloudClient.getClusterState().getCollection(collection); + DocCollection col = + cloudClient + .getClusterStateProvider() + .getClusterState() + .getCollection(collection); // Find slice for this update Slice slice = null; @@ -186,6 +197,10 @@ private void flushUpdates( } } + if (docs.isEmpty() && deletionIds.isEmpty()) { + return; + } + UpdateRequest updateRequest = new UpdateRequest(); if (!docs.isEmpty()) { updateRequest.add(docs); @@ -197,17 +212,32 @@ private void flushUpdates( List batch = new ArrayList<>(waitingUpdates); waitingUpdates.clear(); - CompletableFuture.runAsync( - () -> { - try { - cloudClient.request(updateRequest, collection); - } catch (Exception e) { - LOG.error("Exception caught while updating", e); - synchronized (lock) { - waitingUpdates.addAll(batch); - } - } - }); + // Building the endpoint for the current leader + LBSolrClient.Endpoint endpoint = + new LBSolrClient.Endpoint(leader.getBaseUrl(), leader.getCoreName()); + List endpoints = Collections.singletonList(endpoint); + LBSolrClient.Req req = new LBSolrClient.Req(updateRequest, endpoints); + + /* + * Retrieve the async LB client from the CloudSolrClient. + * NOTE: This relies on the current internal implementation of CloudSolrClient.Builder, + * which returns a CloudHttp2SolrClient.getLbClient() is protected there and is + * not part of the public SolrJ 10 API surface, so this may require adjustments in future Solr versions. + */ + LBAsyncSolrClient lbAsyncSolrClient = + (LBAsyncSolrClient) ((CloudHttp2SolrClient) cloudClient).getLbClient(); + + lbAsyncSolrClient + .requestAsync(req) + .whenComplete( + (response, throwable) -> { + if (throwable != null) { + LOG.error("Exception caught while updating", throwable); + synchronized (lock) { + waitingUpdates.addAll(batch); + } + } + }); } private Slice getSlice(SolrInputDocument doc, DocCollection col) { @@ -288,9 +318,11 @@ public static SolrConnection getConnection(Map stormConf, String boolean statusCollection = boltType.equals("status"); if (StringUtils.isNotBlank(zkHost)) { + HttpJettySolrClient jettyClient = new HttpJettySolrClient.Builder().build(); + CloudSolrClient.Builder builder = - new CloudSolrClient.Builder( - Collections.singletonList(zkHost), Optional.empty()); + new CloudSolrClient.Builder(Collections.singletonList(zkHost), Optional.empty()) + .withHttpClient(jettyClient); if (StringUtils.isNotBlank(collection)) { builder.withDefaultCollection(collection);