diff --git a/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexRequestBody.java b/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexRequestBody.java index ecc3081014e..39f4490e2eb 100644 --- a/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexRequestBody.java +++ b/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexRequestBody.java @@ -30,4 +30,21 @@ public class UpgradeCoreIndexRequestBody { "updateChain to be used for reindexing during index upgrade if you don't want to use the one used by /update by default") @JsonProperty public String updateChain; + + @Schema( + description = + "Internal flag set by the collection-level UPGRADECOLLECTIONINDEX command to indicate" + + " this upgrade is being coordinated by SolrCloud. When true, the SolrCloud guard is" + + " bypassed, a local-only update chain is used (no distributed forwarding), and the" + + " core's readOnly flag is temporarily cleared for the duration of the upgrade.") + @JsonProperty + public Boolean cloudMode; + + @Schema( + description = + "When true, only checks whether the index contains old-format segments without performing" + + " any upgrade. Returns the number of segments needing upgrade and whether the index" + + " is already at the current Lucene format. No writes are performed.") + @JsonProperty + public Boolean checkOnly; } diff --git a/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexResponse.java b/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexResponse.java index 09e0ba1e8b2..b099d5d56ef 100644 --- a/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexResponse.java +++ b/solr/api/src/java/org/apache/solr/client/api/model/UpgradeCoreIndexResponse.java @@ -35,4 +35,10 @@ public class UpgradeCoreIndexResponse extends SolrJerseyResponse { @Schema(description = "Status of the core index upgrade operation.") @JsonProperty public String upgradeStatus; + + @Schema( + description = + "Whether the index is fully at the current Lucene format. Set when checkOnly=true.") + @JsonProperty + public Boolean indexUpgraded; } diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java index ab8a8c58e4c..9b29ffcb568 100644 --- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java @@ -65,6 +65,7 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.REPLACENODE; import static org.apache.solr.common.params.CollectionParams.CollectionAction.RESTORE; import static org.apache.solr.common.params.CollectionParams.CollectionAction.SPLITSHARD; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.UPGRADECOLLECTIONINDEX; import static org.apache.solr.common.params.CommonParams.NAME; import io.opentelemetry.api.trace.Span; @@ -189,7 +190,8 @@ private CommandMap(OverseerNodePrioritizer overseerPrioritizer, CollectionComman Map.entry(ADDREPLICA, new AddReplicaCmd(ccc)), Map.entry(MOVEREPLICA, new MoveReplicaCmd(ccc)), Map.entry(REINDEXCOLLECTION, new ReindexCollectionCmd(ccc)), - Map.entry(RENAME, new RenameCmd(ccc))); + Map.entry(RENAME, new RenameCmd(ccc)), + Map.entry(UPGRADECOLLECTIONINDEX, new UpgradeCollectionIndexCmd(ccc))); } CollApiCmds.CollectionApiCommand getActionCommand(CollectionParams.CollectionAction action) { diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/UpgradeCollectionIndexCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/UpgradeCollectionIndexCmd.java new file mode 100644 index 00000000000..acc04d01ff4 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/UpgradeCollectionIndexCmd.java @@ -0,0 +1,399 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud.api.collections; + +import static org.apache.solr.common.params.CommonParams.NAME; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import org.apache.solr.cloud.DistributedClusterStateUpdater; +import org.apache.solr.cloud.Overseer; +import org.apache.solr.cloud.api.collections.CollectionHandlingUtils.ShardRequestTracker; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.params.CollectionParams; +import org.apache.solr.common.params.CoreAdminParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ShardHandler; +import org.apache.solr.util.TimeOut; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Upgrades a collection's index by rewriting old-format Lucene segments into the current format. + * + *

The operation sets the collection to readOnly, then for each shard: + * + *

    + *
  1. Upgrades the shard leader's index locally (no distributed forwarding). + *
  2. Upgrades each NRT non-leader replica's index locally in parallel. + *
  3. TLOG and PULL replicas converge by replicating committed leader states via their normal + * background replication mechanism. Intermediate commits during upgrade are allowed, so these + * replicas may fetch more than once before reaching the terminal state. + *
  4. Polls every live replica with {@code checkOnly=true} until all report no old-format + * segments remaining. + *
+ * + *

After all shards are processed and validated, readOnly is cleared. If validation fails, the + * collection remains read-only so writes do not resume on a partially upgraded collection. + * + * @see org.apache.solr.handler.admin.api.UpgradeCoreIndex + */ +public class UpgradeCollectionIndexCmd implements CollApiCmds.CollectionApiCommand { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** + * Parameter name for the configurable convergence timeout (in seconds). If not specified, + * defaults to {@link #DEFAULT_REPLICA_CONVERGENCE_TIMEOUT_SECS}. + */ + public static final String REPLICA_CONVERGENCE_TIMEOUT_SECS_PARAM = + "replicaConvergenceTimeoutSecs"; + + /** Default timeout for waiting for TLOG/PULL replicas to converge after the upgrade. */ + static final int DEFAULT_REPLICA_CONVERGENCE_TIMEOUT_SECS = 1800; // 30 minutes + + private static final long REPLICA_CONVERGENCE_POLL_MS = TimeUnit.SECONDS.toMillis(30); + + private final CollectionCommandContext ccc; + + public UpgradeCollectionIndexCmd(CollectionCommandContext ccc) { + this.ccc = ccc; + } + + @Override + public void call(AdminCmdContext adminCmdContext, ZkNodeProps message, NamedList results) + throws Exception { + String collectionName = message.getStr(NAME); + if (collectionName == null) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Collection name is required for UPGRADECOLLECTIONINDEX"); + } + + ClusterState clusterState = adminCmdContext.getClusterState(); + DocCollection collection = clusterState.getCollectionOrNull(collectionName); + if (collection == null) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' not found"); + } + + log.info("Starting UPGRADECOLLECTIONINDEX for collection [{}]", collectionName); + + final int convergenceTimeoutSecs = + message.getInt( + REPLICA_CONVERGENCE_TIMEOUT_SECS_PARAM, DEFAULT_REPLICA_CONVERGENCE_TIMEOUT_SECS); + + boolean readOnlyWasSet = false; + boolean upgradeCompleted = false; + try { + // 1. Set collection readOnly to block external writes + setCollectionReadOnly(collectionName, true); + readOnlyWasSet = true; + + // 2. Process each shard + Map shardResults = new LinkedHashMap<>(); + for (Slice slice : collection.getSlices()) { + NamedList shardResult = new NamedList<>(); + upgradeShardIndex( + adminCmdContext, collectionName, slice, shardResult, convergenceTimeoutSecs); + shardResults.put(slice.getName(), shardResult); + } + results.add("shardResults", shardResults); + upgradeCompleted = true; + + log.info("UPGRADECOLLECTIONINDEX completed successfully for collection [{}]", collectionName); + } finally { + // 3. Clear readOnly only after all shards are verified upgraded. On failure we keep the + // collection read-only to avoid resuming writes on a partially upgraded collection. + if (readOnlyWasSet && upgradeCompleted) { + try { + setCollectionReadOnly(collectionName, false); + } catch (Exception e) { + log.error( + "Failed to clear readOnly on collection [{}] after UPGRADECOLLECTIONINDEX. " + + "Collection remains read-only and must be manually restored via " + + "MODIFYCOLLECTION action.", + collectionName, + e); + results.add( + "warning", + "Failed to clear readOnly on collection. Use MODIFYCOLLECTION to restore writes."); + } + } else if (readOnlyWasSet) { + log.warn( + "UPGRADECOLLECTIONINDEX did not complete successfully for collection [{}]; keeping" + + " collection readOnly for operator intervention.", + collectionName); + results.add( + "warning", + "Upgrade did not complete successfully. Collection remains read-only until it is" + + " manually restored via MODIFYCOLLECTION."); + } + } + } + + private void upgradeShardIndex( + AdminCmdContext adminCmdContext, + String collectionName, + Slice slice, + NamedList shardResult, + int convergenceTimeoutSecs) + throws Exception { + + String shardName = slice.getName(); + log.info("Upgrading shard [{}] of collection [{}]", shardName, collectionName); + + // Refresh cluster state to get the current leader + ClusterState clusterState = ccc.getZkStateReader().getClusterState(); + + Replica leader = ccc.getZkStateReader().getLeaderRetry(collectionName, shardName, 30000); + + ShardHandler shardHandler = ccc.newShardHandler(); + ShardRequestTracker tracker = CollectionHandlingUtils.asyncRequestTracker(adminCmdContext, ccc); + + // Step 1: Upgrade the leader (can be NRT or TLOG — both are leader-eligible and have + // an active IndexWriter when serving as leader) + log.info( + "Upgrading leader [{}] (type={}) for shard [{}]", + leader.getCoreName(), + leader.getType(), + shardName); + ModifiableSolrParams leaderParams = buildUpgradeParams(leader.getCoreName()); + tracker.sendShardRequest(leader, leaderParams, shardHandler); + tracker.processResponses( + shardResult, shardHandler, true, "Leader upgrade failed for shard " + shardName); + + // Step 2: Upgrade NRT non-leader replicas in parallel. + // NRT replicas own their index via IndexWriter, so they can upgrade locally. + // If the leader is TLOG, all NRT replicas in the shard are non-leaders and are upgraded here. + List nrtNonLeaders = new ArrayList<>(); + for (Replica replica : slice.getReplicas(EnumSet.of(Replica.Type.NRT))) { + if (!replica.getName().equals(leader.getName()) + && clusterState.liveNodesContain(replica.getNodeName())) { + nrtNonLeaders.add(replica); + } + } + + if (!nrtNonLeaders.isEmpty()) { + log.info( + "Upgrading {} NRT non-leader replica(s) for shard [{}]", nrtNonLeaders.size(), shardName); + + ShardHandler nrtHandler = ccc.newShardHandler(); + ShardRequestTracker nrtTracker = + CollectionHandlingUtils.asyncRequestTracker(adminCmdContext, ccc); + for (Replica nrtReplica : nrtNonLeaders) { + ModifiableSolrParams nrtParams = buildUpgradeParams(nrtReplica.getCoreName()); + nrtTracker.sendShardRequest(nrtReplica, nrtParams, nrtHandler); + } + nrtTracker.processResponses( + shardResult, nrtHandler, true, "NRT replica upgrade failed for shard " + shardName); + } + + // Step 3: TLOG and PULL non-leader replicas converge via their normal background + // replication from committed leader states. Because auto-commit may persist intermediate + // progress during the leader upgrade, these replicas can begin converging before the leader's + // final explicit commit, so completion must be gated on replica-side validation rather than on + // assuming a single terminal fetch. We do not upgrade them locally to avoid racing with their + // background replication thread. + List replicatingReplicas = new ArrayList<>(); + for (Replica replica : slice.getReplicas(EnumSet.of(Replica.Type.TLOG, Replica.Type.PULL))) { + if (!replica.getName().equals(leader.getName())) { + replicatingReplicas.add(replica.getCoreName() + " (" + replica.getType() + ")"); + } + } + if (!replicatingReplicas.isEmpty()) { + log.info( + "TLOG/PULL replicas for shard [{}] will converge via replication from leader: {}", + shardName, + replicatingReplicas); + shardResult.add("replicatingFromLeader", replicatingReplicas); + } + + waitForShardReplicaConvergence( + adminCmdContext, collectionName, slice, shardResult, convergenceTimeoutSecs); + + log.info("Shard [{}] upgrade complete", shardName); + } + + private ModifiableSolrParams buildUpgradeParams(String coreName) { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.UPGRADECOREINDEX.toString()); + params.set(CoreAdminParams.CORE, coreName); + params.set("cloudMode", "true"); + return params; + } + + private ModifiableSolrParams buildCheckOnlyParams(String coreName) { + ModifiableSolrParams params = buildUpgradeParams(coreName); + params.set("checkOnly", "true"); + return params; + } + + private void waitForShardReplicaConvergence( + AdminCmdContext adminCmdContext, + String collectionName, + Slice slice, + NamedList shardResult, + int convergenceTimeoutSecs) + throws Exception { + final String shardName = slice.getName(); + final TimeOut timeout = + new TimeOut( + convergenceTimeoutSecs, TimeUnit.SECONDS, ccc.getSolrCloudManager().getTimeSource()); + + // Monotonically accumulates replica keys that have confirmed indexUpgraded=true. + // A replica that confirms in iteration N is not re-checked in iteration N+1, so a + // momentary probe failure (GC pause, brief network blip) on a subsequent poll cannot + // un-confirm an already-verified replica. + final Set confirmedConverged = new HashSet<>(); + + while (true) { + ClusterState latestClusterState = ccc.getZkStateReader().getClusterState(); + Slice latestSlice = latestClusterState.getCollection(collectionName).getSlice(shardName); + List allReplicas = new ArrayList<>(latestSlice.getReplicas()); + + // Probe only the live replicas that have not yet been confirmed. Non-live replicas + // remain in pendingReplicas until they recover and report indexUpgraded=true. + List toCheck = new ArrayList<>(); + for (Replica replica : allReplicas) { + String key = CollectionHandlingUtils.requestKey(replica); + if (!confirmedConverged.contains(key) + && latestClusterState.liveNodesContain(replica.getNodeName())) { + toCheck.add(replica); + } + } + + if (!toCheck.isEmpty()) { + NamedList checkResults = new NamedList<>(); + ShardHandler checkHandler = ccc.newShardHandler(); + ShardRequestTracker checkTracker = + CollectionHandlingUtils.syncRequestTracker(adminCmdContext, ccc); + for (Replica replica : toCheck) { + checkTracker.sendShardRequest( + replica, buildCheckOnlyParams(replica.getCoreName()), checkHandler); + } + // abortOnError=false: transient request failures are treated as "not yet converged" + // rather than fatal errors that would permanently leave the collection read-only. + checkTracker.processResponses( + checkResults, + checkHandler, + false, + "Replica validation failed for shard " + shardName + " during UPGRADECOLLECTIONINDEX"); + + updateConfirmedConverged(checkResults, confirmedConverged); + } + + // Pending = ALL replicas of the shard (live or not) not yet confirmed. A down replica + // is still unverified and must eventually confirm before readOnly is cleared. + List pendingReplicas = + allReplicas.stream() + .map(CollectionHandlingUtils::requestKey) + .filter(key -> !confirmedConverged.contains(key)) + .toList(); + + if (pendingReplicas.isEmpty()) { + shardResult.add( + "validatedReplicas", allReplicas.stream().map(Replica::getCoreName).toList()); + return; + } + + if (timeout.hasTimedOut()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Timed out waiting for shard " + + shardName + + " replicas to converge to the upgraded index format after " + + convergenceTimeoutSecs + + "s: " + + pendingReplicas); + } + + log.info( + "Shard [{}] waiting for replica convergence ({} pending, {}s remaining): {}", + shardName, + pendingReplicas.size(), + timeout.timeLeft(TimeUnit.SECONDS), + pendingReplicas); + timeout.sleep(REPLICA_CONVERGENCE_POLL_MS); + } + } + + /** + * Adds to {@code confirmedConverged} the keys of replicas that responded with {@code + * indexUpgraded=true} in this poll round. + */ + private void updateConfirmedConverged( + NamedList checkResults, Set confirmedConverged) { + @SuppressWarnings("unchecked") + SimpleOrderedMap successes = (SimpleOrderedMap) checkResults.get("success"); + if (successes == null) { + return; + } + for (int i = 0; i < successes.size(); i++) { + Object responseObj = successes.getVal(i); + if ((responseObj instanceof NamedList response) + && Boolean.TRUE.equals(response.get("indexUpgraded"))) { + confirmedConverged.add(successes.getName(i)); + } + } + } + + /** + * Sets or clears the readOnly property on a collection via MODIFYCOLLECTION. Setting readOnly to + * true blocks all external writes at the {@link + * org.apache.solr.update.processor.DistributedZkUpdateProcessor} layer. + */ + private void setCollectionReadOnly(String collectionName, boolean readOnly) throws Exception { + String readOnlyValue = readOnly ? "true" : null; // null clears the property + + log.info("Setting readOnly={} on collection [{}]", readOnly, collectionName); + + ZkNodeProps props = + new ZkNodeProps( + Overseer.QUEUE_OPERATION, + CollectionParams.CollectionAction.MODIFYCOLLECTION.toLower(), + ZkStateReader.COLLECTION_PROP, + collectionName, + ZkStateReader.READ_ONLY, + readOnlyValue); + + if (ccc.getDistributedClusterStateUpdater().isDistributedStateUpdate()) { + ccc.getDistributedClusterStateUpdater() + .doSingleStateUpdate( + DistributedClusterStateUpdater.MutatingCommand.CollectionModifyCollection, + props, + ccc.getSolrCloudManager(), + ccc.getZkStateReader()); + } else { + ccc.offerStateUpdate(props); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index 0fa337f6362..8a30df9f511 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -86,6 +86,7 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.RESTORE; import static org.apache.solr.common.params.CollectionParams.CollectionAction.SPLITSHARD; import static org.apache.solr.common.params.CollectionParams.CollectionAction.SYNCSHARD; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.UPGRADECOLLECTIONINDEX; import static org.apache.solr.common.params.CollectionParams.SOURCE_NODE; import static org.apache.solr.common.params.CollectionParams.TARGET_NODE; import static org.apache.solr.common.params.CommonAdminParams.ASYNC; @@ -142,6 +143,7 @@ import org.apache.solr.cloud.api.collections.CollectionHandlingUtils; import org.apache.solr.cloud.api.collections.DistributedCollectionConfigSetCommandRunner; import org.apache.solr.cloud.api.collections.ReindexCollectionCmd; +import org.apache.solr.cloud.api.collections.UpgradeCollectionIndexCmd; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.cloud.DocCollection; @@ -590,6 +592,16 @@ public enum CollectionOperation implements CollectionOp { return m; }), + UPGRADECOLLECTIONINDEX_OP( + UPGRADECOLLECTIONINDEX, + (req, rsp, h) -> { + Map m = copy(req.getParams().required(), null, NAME); + copy(req.getParams(), m, FOLLOW_ALIASES); + copy( + req.getParams(), m, UpgradeCollectionIndexCmd.REPLICA_CONVERGENCE_TIMEOUT_SECS_PARAM); + return m; + }), + SYNCSHARD_OP( SYNCSHARD, (req, rsp, h) -> { diff --git a/solr/core/src/java/org/apache/solr/handler/admin/UpgradeCoreIndexOp.java b/solr/core/src/java/org/apache/solr/handler/admin/UpgradeCoreIndexOp.java index 8fff5c93d31..c34d3441e09 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/UpgradeCoreIndexOp.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/UpgradeCoreIndexOp.java @@ -51,17 +51,27 @@ public boolean isExpensive() { public void execute(CoreAdminHandler.CallInfo it) throws Exception { assert it.handler.coreContainer != null; - if (it.handler.coreContainer.isZooKeeperAware()) { + SolrParams params = it.req.getParams(); + final boolean cloudMode = params.getBool("cloudMode", false); + final boolean checkOnly = params.getBool("checkOnly", false); + + // checkOnly is a read-only operation and is allowed in SolrCloud mode. + // Full upgrade requires either cloudMode (coordinator-initiated) or standalone mode. + if (it.handler.coreContainer.isZooKeeperAware() && !cloudMode && !checkOnly) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, - "action=UPGRADECOREINDEX is not supported in SolrCloud mode. As an alternative, in order to upgrade index, configure LatestVersionMergePolicyFactory in solrconfig.xml and reindex the data in your collection."); + "action=UPGRADECOREINDEX is not supported in SolrCloud mode. Use the" + + " UPGRADECOLLECTIONINDEX Collections API action instead, or configure" + + " LatestVersionMergePolicyFactory in solrconfig.xml and reindex the data in your" + + " collection."); } - SolrParams params = it.req.getParams(); String cname = params.required().get(CoreAdminParams.CORE); final boolean isAsync = params.get(CommonAdminParams.ASYNC) != null; final var requestBody = new UpgradeCoreIndexRequestBody(); requestBody.updateChain = params.get(UpdateParams.UPDATE_CHAIN); + requestBody.cloudMode = cloudMode; + requestBody.checkOnly = checkOnly; UpgradeCoreIndex upgradeCoreIndexApi = UPGRADE_CORE_INDEX_FACTORY.create( diff --git a/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java b/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java index c91f107e952..c3ea1844a79 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java @@ -41,6 +41,8 @@ import org.apache.solr.client.api.model.UpgradeCoreIndexResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.UpdateParams; import org.apache.solr.common.util.NamedList; @@ -61,6 +63,8 @@ import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.update.DocumentBuilder; +import org.apache.solr.update.processor.LogUpdateProcessorFactory; +import org.apache.solr.update.processor.RunUpdateProcessorFactory; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessorChain; import org.apache.solr.util.RefCounted; @@ -95,7 +99,9 @@ public class UpgradeCoreIndex extends CoreAdminAPIBase { public enum CoreIndexUpgradeStatus { UPGRADE_SUCCESSFUL, ERROR, - NO_UPGRADE_NEEDED + NO_UPGRADE_NEEDED, + /** Returned by {@code checkOnly=true} when old-format segments are present. */ + UPGRADE_NEEDED } private static final int RETRY_COUNT_FOR_SEGMENT_DELETION = 5; @@ -139,11 +145,38 @@ private UpgradeCoreIndexResponse performUpgrade( private UpgradeCoreIndexResponse performUpgradeImpl( SolrCore core, UpgradeCoreIndexRequestBody requestBody, UpgradeCoreIndexResponse response) { + final boolean cloudMode = Boolean.TRUE.equals(requestBody.cloudMode); + final boolean checkOnly = Boolean.TRUE.equals(requestBody.checkOnly); + boolean readOnlyWasCleared = false; + + // checkOnly: count old-format segments and return without performing any writes + if (checkOnly) { + return performCheckOnly(core, response); + } + RefCounted iwRef = null; MergePolicy originalMergePolicy = null; int numSegmentsEligibleForUpgrade = 0, numSegmentsUpgraded = 0; String coreName = core.getName(); try { + // In cloud mode, the collection-level readOnly property is set in ZooKeeper by the + // UPGRADECOLLECTIONINDEX coordinator. DistributedZkUpdateProcessor checks this ZK property + // on every request and blocks external writes regardless of core.readOnly. Our stripped-down + // chain bypasses DistributedZkUpdateProcessor entirely, so external writes remain blocked. + // + // The core.readOnly volatile flag is normally NOT set by MODIFYCOLLECTION on running cores + // (it is only synced on core reload). However, it may have been set by other operations + // (e.g., RestoreCmd). If it is set, we need to temporarily clear it so that + // DefaultSolrCoreState.getIndexWriter() does not reject our IndexWriter request. + if (cloudMode && core.readOnly) { + log.info( + "Cloud mode upgrade: temporarily clearing core.readOnly on core [{}] for upgrade." + + " External writes remain blocked by the collection-level readOnly in ZooKeeper.", + coreName); + core.readOnly = false; + readOnlyWasCleared = true; + } + iwRef = core.getSolrCoreState().getIndexWriter(core); IndexWriter iw = iwRef.get(); @@ -173,7 +206,9 @@ private UpgradeCoreIndexResponse performUpgradeImpl( DocValuesIteratorCache dvICache = new DocValuesIteratorCache(searcherRef.get()); UpdateRequestProcessorChain updateProcessorChain = - getUpdateProcessorChain(core, requestBody.updateChain); + cloudMode + ? buildLocalOnlyChain(core) + : getUpdateProcessorChain(core, requestBody.updateChain); for (LeafReaderContext lrc : leafContexts) { if (!shouldUpgradeSegment(lrc)) { @@ -199,12 +234,33 @@ private UpgradeCoreIndexResponse performUpgradeImpl( searcherRef.decref(); } + // Restore original merge policy BEFORE commit so that the commit can trigger + // normal merging which will clean up tombstone segments (old segments whose live docs + // have all been migrated to new segments). + if (originalMergePolicy != null) { + iw.getConfig().setMergePolicy(originalMergePolicy); + originalMergePolicy = null; // prevent double-restore in finally block + } + try { doCommit(core); } catch (IOException e) { throw new CoreAdminAPIBaseException(e); } + // After commit, run expungeDeletes to remove tombstone segments (old-format segments + // whose live documents have all been re-indexed into new-format segments). + try { + doExpungeDeletes(core); + } catch (IOException e) { + log.warn( + "expungeDeletes failed on core [{}] during index upgrade; tombstone segments may" + + " remain until the next merge cycle", + coreName, + e); + // Non-fatal: the upgrade itself succeeded; tombstone cleanup can happen later + } + boolean indexUpgraded = isIndexUpgraded(core); if (!indexUpgraded) { @@ -234,7 +290,7 @@ private UpgradeCoreIndexResponse performUpgradeImpl( throw new CoreAdminAPIBaseException(ioEx); } finally { - // Restore original merge policy + // Restore original merge policy if not already restored if (iwRef != null) { IndexWriter iw = iwRef.get(); if (originalMergePolicy != null) { @@ -242,11 +298,44 @@ private UpgradeCoreIndexResponse performUpgradeImpl( } iwRef.decref(); } + // Restore core readOnly flag if we cleared it + if (readOnlyWasCleared) { + core.readOnly = true; + log.info("Cloud mode upgrade: restored readOnly on core [{}]", coreName); + } } return response; } + /** + * Counts old-format segments without performing any writes. Returns immediately with the count + * and whether the index is already at the current Lucene format. + */ + private UpgradeCoreIndexResponse performCheckOnly( + SolrCore core, UpgradeCoreIndexResponse response) { + String coreName = core.getName(); + RefCounted searcherRef = core.getSearcher(); + try { + int segmentsNeedingUpgrade = 0; + for (LeafReaderContext lrc : searcherRef.get().getIndexReader().leaves()) { + if (shouldUpgradeSegment(lrc)) { + segmentsNeedingUpgrade++; + } + } + response.core = coreName; + response.numSegmentsEligibleForUpgrade = segmentsNeedingUpgrade; + response.indexUpgraded = (segmentsNeedingUpgrade == 0); + response.upgradeStatus = + response.indexUpgraded + ? CoreIndexUpgradeStatus.NO_UPGRADE_NEEDED.toString() + : CoreIndexUpgradeStatus.UPGRADE_NEEDED.toString(); + return response; + } finally { + searcherRef.decref(); + } + } + private boolean shouldUpgradeSegment(LeafReaderContext lrc) { Version segmentMinVersion = null; @@ -376,6 +465,24 @@ private boolean isIndexUpgraded(SolrCore core) throws IOException { } } + /** + * Builds a local-only update processor chain for SolrCloud mode upgrades. This chain consists of + * only {@link LogUpdateProcessorFactory} and {@link RunUpdateProcessorFactory}, deliberately + * excluding {@link org.apache.solr.update.processor.DistributedUpdateProcessorFactory}. This + * ensures: + * + *
    + *
  • No distributed forwarding of upgrade adds to other replicas + *
  • No version reassignment (documents retain their existing {@code _version_} values) + *
  • No readOnly check from {@link + * org.apache.solr.update.processor.DistributedZkUpdateProcessor} + *
+ */ + private UpdateRequestProcessorChain buildLocalOnlyChain(SolrCore core) { + return new UpdateRequestProcessorChain( + List.of(new LogUpdateProcessorFactory(), new RunUpdateProcessorFactory()), core); + } + private void doCommit(SolrCore core) throws IOException { try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, new ModifiableSolrParams())) { CommitUpdateCommand cmd = new CommitUpdateCommand(req, false); // optimize=false @@ -386,6 +493,23 @@ private void doCommit(SolrCore core) throws IOException { } } + /** + * Runs expungeDeletes to clean up tombstone segments — old-format segments whose live documents + * have all been re-indexed into new-format segments. Without this, {@link #isIndexUpgraded} would + * fail because the tombstone segments still report an old {@code minVersion}. + */ + private void doExpungeDeletes(SolrCore core) throws IOException { + try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, new ModifiableSolrParams())) { + CommitUpdateCommand cmd = new CommitUpdateCommand(req, false); + cmd.expungeDeletes = true; + core.getUpdateHandler().commit(cmd); + } catch (IOException ioEx) { + log.warn( + "Error during expungeDeletes on core [{}] during index upgrade", core.getName(), ioEx); + throw ioEx; + } + } + private void processSegment( LeafReaderContext leafReaderContext, UpdateRequestProcessorChain processorChain, @@ -422,6 +546,12 @@ private void processSegment( AddUpdateCommand currDocCmd = new AddUpdateCommand(solrRequest); currDocCmd.solrDoc = solrDoc; + // Preserve the original _version_ on the command so that the tlog entry + // is consistent with the indexed document's _version_ field. + SolrInputField versionField = solrDoc.getField(CommonParams.VERSION_FIELD); + if (versionField != null) { + currDocCmd.setVersion(((Number) versionField.getValue()).longValue()); + } processor.processAdd(currDocCmd); } } finally { diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java index 6ae82508df4..69a7a3de888 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java @@ -143,7 +143,8 @@ enum CollectionAction { COLSTATUS(true, LockLevel.NONE), // this command implements its own locking REINDEXCOLLECTION(true, LockLevel.NONE), - RENAME(true, LockLevel.COLLECTION); + RENAME(true, LockLevel.COLLECTION), + UPGRADECOLLECTIONINDEX(true, LockLevel.COLLECTION); public final boolean isWrite; public final String lowerName;