From bb85306728f9738bebb719bbc4a0a52910f626e0 Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Wed, 1 Apr 2026 10:06:16 -0400 Subject: [PATCH] SOLR-18121: Rm 'rm-in-advance' fetcher codepath Prior to this commit, the IndexFetcher had support for an edge case where if a full-index-fetch was needed but the receiving host didn't have adequate disk space, then the IndexFetcher would delete the existing (out of date) copy of the index to free up space. Unfortunately, this optimization/"feature" was never documented in the ref-guide, had very little test coverage, and contained a number of resource-leaks that appeared to trigger 100% of the time. While the feature might've worked when initially written, the rare-ness of the triggering condition and lack of tests had led to some severe bitrot over time. This commit removes this optimization, largely by walking back changes in 'b061947' which added it initially. --- .../java/org/apache/solr/core/SolrCore.java | 8 +- .../org/apache/solr/handler/IndexFetcher.java | 102 +------ .../solr/handler/ReplicationHandler.java | 1 - .../solr/update/DefaultSolrCoreState.java | 2 +- .../TestReplicationHandlerDiskOverFlow.java | 254 ------------------ 5 files changed, 3 insertions(+), 364 deletions(-) delete mode 100644 solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerDiskOverFlow.java diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 2176c0a392a..2d41e162f4a 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -258,8 +258,6 @@ public class SolrCore implements SolrInfoBean, Closeable { private final String metricTag = SolrMetricProducer.getUniqueMetricTag(this, null); private final SolrMetricsContext solrMetricsContext; - public volatile boolean searchEnabled = true; - public volatile boolean indexEnabled = true; public volatile boolean readOnly = false; private PackageListeners packageListeners = new PackageListeners(this); @@ -2143,11 +2141,7 @@ boolean areAllSearcherReferencesEmpty() { * @see #withSearcher(IOFunction) */ public RefCounted getSearcher() { - if (searchEnabled) { - return getSearcher(false, true, null); - } - throw new SolrException( - SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Search is temporarily disabled"); + return getSearcher(false, true, null); } /** diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java index 7b4b7c2fdf9..27d7f018748 100644 --- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java +++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java @@ -49,7 +49,6 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; -import java.nio.file.FileStore; import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; @@ -72,7 +71,6 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.BooleanSupplier; -import java.util.function.Function; import java.util.stream.Collectors; import java.util.zip.Adler32; import java.util.zip.Checksum; @@ -185,8 +183,6 @@ public class IndexFetcher { private boolean skipCommitOnLeaderVersionZero = true; - private boolean clearLocalIndexFirst = false; - private static final String INTERRUPT_RESPONSE_MESSAGE = "Interrupted while waiting for modify lock"; @@ -409,7 +405,6 @@ IndexFetchResult fetchLatestIndex(boolean forceReplication) IndexFetchResult fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException { - this.clearLocalIndexFirst = false; boolean cleanupDone = false; boolean successfulInstall = false; markReplicationStart(); @@ -697,9 +692,7 @@ IndexFetchResult fetchLatestIndex(boolean forceReplication, boolean forceCoreRel // let the system know we are changing dir's and the old one // may be closed if (indexDir != null) { - if (!this.clearLocalIndexFirst) { // it was closed earlier - solrCore.getDirectoryFactory().doneWithDirectory(indexDir); - } + solrCore.getDirectoryFactory().doneWithDirectory(indexDir); // Cleanup all index files not associated with any *named* snapshot. solrCore.deleteNonSnapshotIndexFiles(indexDirPath); } @@ -722,8 +715,6 @@ IndexFetchResult fetchLatestIndex(boolean forceReplication, boolean forceCoreRel } } } finally { - solrCore.searchEnabled = true; - solrCore.indexEnabled = true; if (!isFullCopyNeeded) { solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(solrCore); } @@ -927,9 +918,6 @@ private void logReplicationTimeAndConfFiles( props.setProperty(INDEX_REPLICATED_AT, String.valueOf(replicationTime)); props.setProperty(PREVIOUS_CYCLE_TIME_TAKEN, String.valueOf(replicationTimeTaken)); props.setProperty(TIMES_INDEX_REPLICATED, String.valueOf(indexCount)); - if (clearLocalIndexFirst) { - props.setProperty(CLEARED_LOCAL_IDX, "true"); - } if (modifiedConfFiles != null && !modifiedConfFiles.isEmpty()) { props.setProperty(CONF_FILES_REPLICATED, confFiles.toString()); props.setProperty(CONF_FILES_REPLICATED_AT, String.valueOf(replicationTime)); @@ -1128,22 +1116,12 @@ private long downloadIndexFiles( || (tmpIndexDir instanceof FilterDirectory && FilterDirectory.unwrap(tmpIndexDir) instanceof FSDirectory)); - long totalSpaceRequired = 0; - for (Map file : filesToDownload) { - long size = (Long) file.get(SIZE); - totalSpaceRequired += size; - } - if (log.isInfoEnabled()) { log.info( "tmpIndexDir_type : {} , {}", tmpIndexDir.getClass(), FilterDirectory.unwrap(tmpIndexDir)); } - long usableSpace = usableDiskSpaceProvider.apply(tmpIndexDirPath); - if (getApproxTotalSpaceReqd(totalSpaceRequired) > usableSpace) { - deleteFilesInAdvance(indexDir, indexDirPath, totalSpaceRequired, usableSpace); - } for (Map file : filesToDownload) { String filename = (String) file.get(NAME); @@ -1201,85 +1179,9 @@ private long downloadIndexFiles( // only for testing purposes. do not use this anywhere else // -----------START---------------------- static BooleanSupplier testWait = () -> true; - static Function usableDiskSpaceProvider = dir -> getUsableSpace(dir); // ------------ END--------------------- - private static Long getUsableSpace(String dir) { - try { - Path file = Path.of(dir); - if (Files.notExists(file)) { - file = file.getParent(); - // this is not a disk directory. so just pretend that there is enough space - if (Files.notExists(file)) { - return Long.MAX_VALUE; - } - } - FileStore fileStore = Files.getFileStore(file); - return fileStore.getUsableSpace(); - } catch (IOException e) { - throw new SolrException(ErrorCode.SERVER_ERROR, "Could not free disk space", e); - } - } - - private long getApproxTotalSpaceReqd(long totalSpaceRequired) { - long approxTotalSpaceReqd = (long) (totalSpaceRequired * 1.05); // add 5% extra for safety - // we should have an extra of 100MB free after everything is downloaded - approxTotalSpaceReqd += (100 * 1024 * 1024); - return approxTotalSpaceReqd; - } - - private void deleteFilesInAdvance( - Directory indexDir, String indexDirPath, long usableDiskSpace, long totalSpaceRequired) - throws IOException { - long actualSpaceReqd = totalSpaceRequired; - List filesTobeDeleted = new ArrayList<>(); - long clearedSpace = 0; - // go through each file to check if this needs to be deleted - for (String f : indexDir.listAll()) { - for (Map fileInfo : filesToDownload) { - if (f.equals(fileInfo.get(NAME))) { - String filename = (String) fileInfo.get(NAME); - long size = (Long) fileInfo.get(SIZE); - CompareResult compareResult = - compareFile(indexDir, filename, size, (Long) fileInfo.get(CHECKSUM)); - if (!compareResult.equal || filesToAlwaysDownloadIfNoChecksums(f, size, compareResult)) { - filesTobeDeleted.add(f); - clearedSpace += size; - } else { - /*this file will not be downloaded*/ - actualSpaceReqd -= size; - } - } - } - } - if (usableDiskSpace > getApproxTotalSpaceReqd(actualSpaceReqd)) { - // after considering the files actually available locally we really don't need to do any - // delete - return; - } - log.info( - "This disk does not have enough space to download the index from leader. So cleaning up the local index. " - + " This may lead to loss of data/or node if index replication fails in between"); - // now we should disable searchers and index writers because this core will not have all the - // required files - this.clearLocalIndexFirst = true; - this.solrCore.searchEnabled = false; - this.solrCore.indexEnabled = false; - solrCore.getDirectoryFactory().doneWithDirectory(indexDir); - solrCore.deleteNonSnapshotIndexFiles(indexDirPath); - this.solrCore.closeSearcher(); - assert testWait.getAsBoolean(); - solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(this.solrCore, false); - for (String f : filesTobeDeleted) { - try { - indexDir.deleteFile(f); - } catch (FileNotFoundException | NoSuchFileException e) { - // no problem , it was deleted by someone else - } - } - } - static boolean filesToAlwaysDownloadIfNoChecksums( String filename, long size, CompareResult compareResult) { // without checksums to compare, we always download .si, .liv, segments_N, @@ -2061,8 +1963,6 @@ String getLeaderCoreUrl() { static final String TIMES_INDEX_REPLICATED = "timesIndexReplicated"; - static final String CLEARED_LOCAL_IDX = "clearedLocalIndexFirst"; - static final String CONF_FILES_REPLICATED = "confFilesReplicated"; static final String CONF_FILES_REPLICATED_AT = "confFilesReplicatedAt"; diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java index 05eb923965f..f4c79f8e7f2 100644 --- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java @@ -1139,7 +1139,6 @@ private void addReplicationProperties(BiConsumer consumer, Prope addVal(consumer, IndexFetcher.TIMES_FAILED, props, Integer.class); addVal(consumer, IndexFetcher.REPLICATION_FAILED_AT, props, Date.class); addVal(consumer, IndexFetcher.PREVIOUS_CYCLE_TIME_TAKEN, props, Long.class); - addVal(consumer, IndexFetcher.CLEARED_LOCAL_IDX, props, Boolean.class); } private void addVal( diff --git a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java index 55d681de4ec..9a2e7e96dd5 100644 --- a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java +++ b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java @@ -108,7 +108,7 @@ private void closeIndexWriter(IndexWriterCloser closer) { @Override public RefCounted getIndexWriter(SolrCore core, boolean failOnReadOnly) throws IOException { - if (core != null && (!core.indexEnabled || (core.readOnly && failOnReadOnly))) { + if (core != null && (core.readOnly && failOnReadOnly)) { throw new SolrException( SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Indexing is temporarily disabled"); } diff --git a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerDiskOverFlow.java b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerDiskOverFlow.java deleted file mode 100644 index d450325bc33..00000000000 --- a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerDiskOverFlow.java +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler; - -import static org.apache.solr.handler.ReplicationHandler.CMD_FETCH_INDEX; -import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST; -import static org.apache.solr.handler.ReplicationTestHelper.invokeReplicationCommand; -import static org.apache.solr.handler.TestReplicationHandler.createAndStartJetty; - -import java.io.StringWriter; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BooleanSupplier; -import java.util.function.Function; -import org.apache.lucene.tests.util.TestUtil; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.request.SolrQuery; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.util.Utils; -import org.apache.solr.embedded.JettySolrRunner; -import org.apache.solr.util.LogLevel; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@LogLevel("org.apache.solr.handler.IndexFetcher=DEBUG") -@SolrTestCaseJ4.SuppressSSL -public class TestReplicationHandlerDiskOverFlow extends SolrTestCaseJ4 { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private static final String expectedErr = "Search is temporarily disabled"; - Function originalDiskSpaceprovider = null; - BooleanSupplier originalTestWait = null; - - JettySolrRunner leaderJetty, followerJetty; - SolrClient leaderClient, followerClient; - ReplicationTestHelper.SolrInstance leader = null, follower = null; - - @Override - @Before - public void setUp() throws Exception { - originalDiskSpaceprovider = IndexFetcher.usableDiskSpaceProvider; - originalTestWait = IndexFetcher.testWait; - - super.setUp(); - // Disable URL allow-list checks for replication tests - System.setProperty("solr.security.allow.urls.enabled", "false"); - System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory"); - String factory = - random().nextInt(100) < 75 - ? "solr.NRTCachingDirectoryFactory" - : "solr.StandardDirectoryFactory"; // test the default most of the time - System.setProperty("solr.directoryFactory", factory); - leader = new ReplicationTestHelper.SolrInstance(createTempDir("solr-instance"), "leader", null); - leader.setUp(); - leaderJetty = createAndStartJetty(leader); - leaderClient = - ReplicationTestHelper.createNewSolrClient( - TestReplicationHandler.buildUrl(leaderJetty.getLocalPort()), DEFAULT_TEST_CORENAME); - System.setProperty(TEST_URL_ALLOW_LIST, leaderJetty.getBaseUrl().toString()); - - follower = - new ReplicationTestHelper.SolrInstance( - createTempDir("solr-instance"), "follower", leaderJetty.getLocalPort()); - follower.setUp(); - followerJetty = createAndStartJetty(follower); - followerClient = - ReplicationTestHelper.createNewSolrClient( - TestReplicationHandler.buildUrl(followerJetty.getLocalPort()), DEFAULT_TEST_CORENAME); - } - - @Override - @After - public void tearDown() throws Exception { - super.tearDown(); - if (null != leaderJetty) { - leaderJetty.stop(); - leaderJetty = null; - } - if (null != followerJetty) { - followerJetty.stop(); - followerJetty = null; - } - leader = follower = null; - if (null != leaderClient) { - leaderClient.close(); - leaderClient = null; - } - if (null != followerClient) { - followerClient.close(); - followerClient = null; - } - - IndexFetcher.usableDiskSpaceProvider = originalDiskSpaceprovider; - IndexFetcher.testWait = originalTestWait; - } - - @Test - public void testDiskOverFlow() throws Exception { - invokeReplicationCommand( - TestReplicationHandler.buildUrl(followerJetty.getLocalPort()) + "/" + DEFAULT_TEST_CORENAME, - "disablepoll"); - // index docs - log.info("Indexing to LEADER"); - int docsInLeader = 1000; - long szLeader = indexDocs(leaderClient, docsInLeader, 0); - log.info("Indexing to FOLLOWER"); - long szFollower = indexDocs(followerClient, 1200, 1000); - - IndexFetcher.usableDiskSpaceProvider = - new Function() { - @Override - public Long apply(String s) { - return szLeader; - } - }; - - // we don't need/want the barrier to be cyclic, so we use a ref that our barrier action will - // null out to prevent it from being triggered multiple times (which shouldn't happen anyway) - final AtomicReference commonBarrier = new AtomicReference<>(); - commonBarrier.set( - new CyclicBarrier( - 2, - () -> { - commonBarrier.set(null); - })); - - final List threadFailures = new ArrayList<>(7); - - IndexFetcher.testWait = - new BooleanSupplier() { - @Override - public boolean getAsBoolean() { - try { - final CyclicBarrier barrier = commonBarrier.get(); - if (null != barrier) { - barrier.await(60, TimeUnit.SECONDS); - } - } catch (Exception e) { - log.error("IndexFetcher Thread Failure", e); - threadFailures.add(e); - } - return true; - } - }; - - new Thread( - () -> { - try { - for (int i = 0; i < 100; i++) { - final CyclicBarrier barrier = commonBarrier.get(); - assertNotNull( - "why is query thread still looping if barrier has already been cleared?", - barrier); - try { - QueryResponse rsp = - followerClient.query(new SolrQuery().setQuery("*:*").setRows(0)); - Thread.sleep(200); - } catch (SolrException e) { - if (e.code() == SolrException.ErrorCode.SERVICE_UNAVAILABLE.code - && e.getMessage().contains(expectedErr)) { - log.info("Got expected exception", e); - // now let the barrier complete & clear itself, and we're done - barrier.await(60, TimeUnit.SECONDS); - return; // break out - } - // else... - // not our expected exception, re-throw to fail fast... - throw e; - } - } - // if we made it this far, something is wrong... - throw new RuntimeException( - "Query thread gave up waiting for expected error: " + expectedErr); - } catch (Exception e) { - log.error("Query Thread Failure", e); - threadFailures.add(e); - } - }) - .start(); - - QueryResponse response = - followerClient.query( - new SolrQuery() - .add("qt", "/replication") - .add("command", CMD_FETCH_INDEX) - .add("wait", "true")); - - assertEquals("Replication command status", "OK", response._getStr("status")); - - assertEquals("threads encountered failures (see logs for when)", List.of(), threadFailures); - - response = followerClient.query(new SolrQuery().setQuery("*:*").setRows(0)); - assertEquals("docs in follower", docsInLeader, response.getResults().getNumFound()); - - response = - followerClient.query( - new SolrQuery() - .add("qt", "/replication") - .add("command", ReplicationHandler.CMD_DETAILS)); - if (log.isInfoEnabled()) { - log.info("DETAILS {}", Utils.writeJson(response, new StringWriter(), true)); - } - assertEquals( - "follower's clearedLocalIndexFirst (from rep details)", - "true", - response._getStr("details/follower/clearedLocalIndexFirst")); - } - - @SuppressWarnings("unchecked") - private long indexDocs(SolrClient client, int totalDocs, int start) throws Exception { - for (int i = 0; i < totalDocs; i++) - ReplicationTestHelper.index( - client, "id", i + start, "name", TestUtil.randomSimpleString(random(), 1000, 5000)); - client.commit(true, true); - QueryResponse response = - client.query( - new SolrQuery() - .add("qt", "/replication") - .add("command", "filelist") - .add("generation", "-1")); - - long totalSize = 0; - for (Map map : (List>) response.getResponse().get(CMD_GET_FILE_LIST)) { - Long sz = (Long) map.get(ReplicationHandler.SIZE); - totalSize += sz; - } - return totalSize; - } -}