From 2bfb8c08b72a69ac2a257157bd128b24b21db651 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Tue, 17 Feb 2026 17:28:38 -0500 Subject: [PATCH 01/46] add distributed support for /luke (WiP) --- .../handler/admin/LukeRequestHandler.java | 321 +++++++++++++++++- .../pages/luke-request-handler.adoc | 48 +++ .../client/solrj/response/LukeResponse.java | 39 ++- 3 files changed, 387 insertions(+), 21 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index e3d07b1cbca7..8daef459237b 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -19,6 +19,8 @@ import static org.apache.lucene.index.IndexOptions.DOCS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; +import static org.apache.solr.common.params.CommonParams.DISTRIB; +import static org.apache.solr.common.params.CommonParams.PATH; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -28,6 +30,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; +import java.util.Collections; +import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -67,15 +71,23 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.client.api.model.CoreStatusResponse; +import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.api.V2ApiUtils; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.ShardHandler; +import org.apache.solr.handler.component.ShardHandlerFactory; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.CopyField; @@ -85,6 +97,7 @@ import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.security.AuthorizationContext; import org.apache.solr.update.SolrIndexWriter; +import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -97,7 +110,7 @@ * @see SegmentsInfoRequestHandler * @since solr 1.2 */ -public class LukeRequestHandler extends RequestHandlerBase { +public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAware { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String NUMTERMS = "numTerms"; @@ -108,6 +121,20 @@ public class LukeRequestHandler extends RequestHandlerBase { static final int HIST_ARRAY_SIZE = 33; + // Response keys not available via LukeResponse typed accessors + private static final String KEY_DELETED_DOCS = "deletedDocs"; + private static final String KEY_SEGMENT_COUNT = "segmentCount"; + private static final String KEY_DYNAMIC_BASE = "dynamicBase"; + private static final String KEY_INDEX_FLAGS = "index"; + private static final String KEY_HISTOGRAM = "histogram"; + + private ShardHandlerFactory shardHandlerFactory; + + @Override + public void inform(SolrCore core) { + this.shardHandlerFactory = core.getCoreContainer().getShardHandlerFactory(); + } + @Override public Name getPermissionName(AuthorizationContext request) { return Name.READ_PERM; @@ -131,10 +158,17 @@ public static ShowStyle get(String v) { @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { + SolrParams params = req.getParams(); + + if (params.getBool(DISTRIB, false) + && req.getCoreContainer().isZooKeeperAware() + && handleDistributed(req, rsp)) { + return; + } + IndexSchema schema = req.getSchema(); SolrIndexSearcher searcher = req.getSearcher(); DirectoryReader reader = searcher.getIndexReader(); - SolrParams params = req.getParams(); ShowStyle style = ShowStyle.get(params.get("show")); // If no doc is given, show all fields and top terms @@ -196,6 +230,280 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw rsp.setHttpCaching(false); } + /** + * @return true if the request was handled in distributed mode, false if prepDistributed + * short-circuited (e.g. single-shard collection) and the caller should fall through to local + * logic. + */ + private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException { + ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); + ResponseBuilder rb = new ResponseBuilder(req, rsp, Collections.emptyList()); + shardHandler.prepDistributed(rb); + + String[] shards = rb.shards; + if (shards == null || shards.length == 0) { + return false; + } + + ShardRequest sreq = new ShardRequest(); + sreq.shards = shards; + sreq.actualShards = shards; + sreq.responses = new ArrayList<>(shards.length); + + String reqPath = (String) req.getContext().get(PATH); + + for (String shard : shards) { + ModifiableSolrParams params = new ModifiableSolrParams(req.getParams()); + params.set(CommonParams.QT, reqPath != null ? reqPath : "/admin/luke"); + ShardHandler.setShardAttributesToParams(params, ShardRequest.PURPOSE_PRIVATE); + shardHandler.submit(sreq, shard, params); + } + + ShardResponse lastSrsp = shardHandler.takeCompletedOrError(); + if (lastSrsp == null) { + throw new SolrException(ErrorCode.SERVER_ERROR, "No responses received from shards"); + } + + List responses = sreq.responses; + for (ShardResponse srsp : responses) { + if (srsp.getException() != null) { + shardHandler.cancelAll(); + if (srsp.getException() instanceof SolrException) { + throw (SolrException) srsp.getException(); + } + throw new SolrException(ErrorCode.SERVER_ERROR, srsp.getException()); + } + } + + responses.sort(Comparator.comparing(LukeRequestHandler::shardSortKey)); + + mergeDistributedResponses(rsp, responses); + rsp.setHttpCaching(false); + return true; + } + + private static String shardSortKey(ShardResponse srsp) { + return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); + } + + @SuppressWarnings("unchecked") + private void mergeDistributedResponses(SolrQueryResponse rsp, List responses) { + long totalNumDocs = 0; + long totalMaxDoc = 0; + long totalDeletedDocs = 0; + long totalSegmentCount = 0; + + Map> mergedFields = new TreeMap<>(); + // fieldName -> (attrName -> (value -> shardAddress)) for mismatch diagnostics + Map>> fieldAttrSources = new HashMap<>(); + SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); + NamedList firstSchema = null; + NamedList firstInfo = null; + + for (ShardResponse srsp : responses) { + String shardAddr = shardSortKey(srsp); + NamedList shardRsp = srsp.getSolrResponse().getResponse(); + + LukeResponse lukeRsp = new LukeResponse(); + lukeRsp.setResponse(shardRsp); + + SimpleOrderedMap perShardEntry = new SimpleOrderedMap<>(); + + NamedList shardIndex = lukeRsp.getIndexInfo(); + if (shardIndex != null) { + if (lukeRsp.getNumDocs() != null) totalNumDocs += lukeRsp.getNumDocs(); + if (lukeRsp.getMaxDoc() != null) totalMaxDoc += lukeRsp.getMaxDoc(); + if (lukeRsp.getDeletedDocs() != null) totalDeletedDocs += lukeRsp.getDeletedDocs(); + if (lukeRsp.getSegmentCount() != null) totalSegmentCount += lukeRsp.getSegmentCount(); + + perShardEntry.add("index", shardIndex); + } + + Map shardFieldInfo = lukeRsp.getFieldInfo(); + NamedList shardFields = (NamedList) shardRsp.get("fields"); + if (shardFieldInfo != null && shardFields != null) { + SimpleOrderedMap perShardFields = new SimpleOrderedMap<>(); + boolean hasDetailedStats = false; + + for (Map.Entry entry : shardFieldInfo.entrySet()) { + String fieldName = entry.getKey(); + LukeResponse.FieldInfo fi = entry.getValue(); + NamedList fieldData = (NamedList) shardFields.get(fieldName); + if (fieldData == null) continue; + + SimpleOrderedMap merged = + mergedFields.computeIfAbsent(fieldName, k -> new SimpleOrderedMap<>()); + + // Attributes parsed by LukeResponse.FieldInfo — validate consistency via string form + mergeValidatedAttr(merged, "type", fi.getType(), shardAddr, fieldName, fieldAttrSources); + mergeValidatedAttr( + merged, "schema", fi.getSchema(), shardAddr, fieldName, fieldAttrSources); + + // Attributes not parsed by LukeResponse.FieldInfo — fall back to raw NamedList + mergeValidatedStringAttr( + merged, fieldData, fieldName, KEY_INDEX_FLAGS, shardAddr, fieldAttrSources); + mergeValidatedStringAttr( + merged, fieldData, fieldName, KEY_DYNAMIC_BASE, shardAddr, fieldAttrSources); + + long docs = fi.getDocs(); + if (docs > 0 || fieldData.get("docs") != null) { + Long currentDocs = (Long) merged.get("docs"); + if (currentDocs == null) { + merged.add("docs", docs); + } else { + merged.setVal(merged.indexOf("docs", 0), currentDocs + docs); + } + } + + // Detailed stats not parsed by FieldInfo — kept per-shard, not merged + NamedList topTerms = fi.getTopTerms(); + Object histogram = fieldData.get(KEY_HISTOGRAM); + + if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { + hasDetailedStats = true; + SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); + if (topTerms != null) detailedFieldInfo.add("topTerms", topTerms); + if (fi.getDistinct() > 0) detailedFieldInfo.add("distinct", fi.getDistinct()); + if (histogram != null) detailedFieldInfo.add(KEY_HISTOGRAM, histogram); + perShardFields.add(fieldName, detailedFieldInfo); + } + } + + if (hasDetailedStats) { + perShardEntry.add("fields", perShardFields); + } + } + + if (firstSchema == null) { + firstSchema = (NamedList) shardRsp.get("schema"); + } + if (firstInfo == null) { + firstInfo = (NamedList) shardRsp.get("info"); + } + + shardsInfo.add(shardAddr, perShardEntry); + } + + SimpleOrderedMap mergedIndex = new SimpleOrderedMap<>(); + mergedIndex.add("numDocs", totalNumDocs); + mergedIndex.add("maxDoc", totalMaxDoc); + mergedIndex.add(KEY_DELETED_DOCS, totalDeletedDocs); + mergedIndex.add(KEY_SEGMENT_COUNT, totalSegmentCount); + rsp.add("index", mergedIndex); + + if (!mergedFields.isEmpty()) { + SimpleOrderedMap fieldsResult = new SimpleOrderedMap<>(); + for (Map.Entry> entry : mergedFields.entrySet()) { + fieldsResult.add(entry.getKey(), entry.getValue()); + } + rsp.add("fields", fieldsResult); + } + + if (firstSchema != null) { + rsp.add("schema", firstSchema); + } + if (firstInfo != null) { + rsp.add("info", firstInfo); + } + + rsp.add("shards", shardsInfo); + } + + /** Validates that a typed attribute value is identical across shards. */ + private void mergeValidatedAttr( + SimpleOrderedMap merged, + String attrName, + Object val, + String shardAddr, + String fieldName, + Map>> fieldAttrSources) { + if (val == null) return; + String valStr = val.toString(); + Object existing = merged.get(attrName); + + if (existing == null) { + merged.add(attrName, val); + fieldAttrSources + .computeIfAbsent(fieldName, k -> new HashMap<>()) + .computeIfAbsent(attrName, k -> new HashMap<>()) + .put(valStr, shardAddr); + } else { + String existingStr = existing.toString(); + if (!existingStr.equals(valStr)) { + String firstShard = + fieldAttrSources + .getOrDefault(fieldName, Collections.emptyMap()) + .getOrDefault(attrName, Collections.emptyMap()) + .getOrDefault(existingStr, "unknown"); + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Field '" + + fieldName + + "' has inconsistent '" + + attrName + + "' across shards: '" + + existingStr + + "' (from " + + firstShard + + ") vs '" + + valStr + + "' (from " + + shardAddr + + ")"); + } + } + } + + /** + * Validates that a string attribute is identical across shards, merging it into the target. Throws + * a SolrException on mismatch. + */ + private void mergeValidatedStringAttr( + SimpleOrderedMap merged, + NamedList fieldData, + String fieldName, + String attrName, + String shardAddr, + Map>> fieldAttrSources) { + Object val = fieldData.get(attrName); + if (val == null) return; + + String valStr = val.toString(); + Object existing = merged.get(attrName); + + if (existing == null) { + merged.add(attrName, val); + fieldAttrSources + .computeIfAbsent(fieldName, k -> new HashMap<>()) + .computeIfAbsent(attrName, k -> new HashMap<>()) + .put(valStr, shardAddr); + } else { + String existingStr = existing.toString(); + if (!existingStr.equals(valStr)) { + String firstShard = + fieldAttrSources + .getOrDefault(fieldName, Collections.emptyMap()) + .getOrDefault(attrName, Collections.emptyMap()) + .getOrDefault(existingStr, "unknown"); + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Field '" + + fieldName + + "' has inconsistent '" + + attrName + + "' across shards: '" + + existingStr + + "' (from " + + firstShard + + ") vs '" + + valStr + + "' (from " + + shardAddr + + ")"); + } + } + } + /** * @return a string representing a IndexableField's flags. */ @@ -422,7 +730,7 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { - fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); + fieldMap.add(KEY_DYNAMIC_BASE, schema.getDynamicPattern(sfield.getName())); } Terms terms = reader.terms(fieldName); // Not indexed, so we need to report what we can (it made it through the fl param if @@ -441,10 +749,9 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re try { IndexableField fld = doc.getField(fieldName); if (fld != null) { - fieldMap.add("index", getFieldFlags(fld)); + fieldMap.add(KEY_INDEX_FLAGS, getFieldFlags(fld)); } else { - // it is a non-stored field... - fieldMap.add("index", "(unstored field)"); + fieldMap.add(KEY_INDEX_FLAGS, "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: {}", fieldName); @@ -734,7 +1041,7 @@ private static void getDetailedFieldInfo( fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram - fieldMap.add("histogram", tiq.histogram.toNamedList()); + fieldMap.add(KEY_HISTOGRAM, tiq.histogram.toNamedList()); } private static List toListOfStrings(SchemaField[] raw) { diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index fb795f62cc17..b9fea92c62f2 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -83,6 +83,18 @@ The number of top terms for each field. Choose whether `/luke` should return the index-flags for each field. Fetching and returning the index-flags for each field in the index has non-zero cost, and can slow down requests to `/luke`. +`distrib`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `false` +|=== ++ +When set to `true` in SolrCloud mode, the handler aggregates results from all shards in the collection. +Additive index metrics (`numDocs`, `maxDoc`, `deletedDocs`, `segmentCount`) are summed across shards. +Field types and schema flags are validated for consistency across shards. +Per-shard index details and per-field detailed statistics are returned under a `shards` key. + == LukeRequestHandler Examples All of the examples in this section assume you are running the "techproducts" Solr example: @@ -118,3 +130,39 @@ Alternatively, to work through the Lucene native id: http://localhost:8983/solr/techproducts/admin/luke?fl=manu&docId=0 From SolrJ, you can access /luke using the {solr-javadocs}/solrj/org/apache/solr/client/solrj/request/LukeRequest.html[`LukeRequest`] object. + +== Distributed Mode (SolrCloud) + +When running in SolrCloud, the Luke handler can aggregate results from all shards in a collection by setting `distrib=true`. +By default, `distrib` is `false` and the handler inspects only the local shard's index. + +To get a collection-wide view: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke?distrib=true&numTerms=0 + +To get detailed field statistics across all shards for a specific field: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke?distrib=true&fl=manu + +=== Response Structure + +In distributed mode, the response contains: + +* `index` -- Merged additive metrics summed across all shards: `numDocs`, `maxDoc`, `deletedDocs`, `segmentCount`. +* `fields` -- Merged field metadata. For each field: `type`, `schema` flags, `index` flags, and `dynamicBase` are validated to be consistent across shards. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. +* `schema` -- Schema information from the first responding shard (identical across shards sharing the same configset). +* `info` -- Static info from the first responding shard. +* `shards` -- Per-shard details, ordered lexicographically by shard address. Each entry contains: +** `index` -- Full index info for that shard (including `directory`, `segmentsFile`, `version`, `current`, `hasDeletions`, `lastModified`, `userData`). +** `fields` -- Only present when `fl` triggers detailed statistics. Contains per-field `topTerms`, `distinct`, and `histogram` from that shard. + +=== Merge Semantics + +Field `type`, `schema` flags, `index` flags, and `dynamicBase` are validated for consistency across shards. +If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. + +Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not merged across shards. +These statistics are shard-local and appear in each shard's entry under the `shards` key. +For cross-shard term-level aggregation, use Solr's xref:query-guide:faceting.adoc[faceting API] which provides refinement and distributed aggregation. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index c38a2caf8300..2150cbb7ee49 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -114,8 +114,8 @@ public static class FieldInfo implements Serializable { String name; String type; String schema; - int docs; - int distinct; + long docs; + long distinct; EnumSet flags; boolean cacheableFaceting; NamedList topTerms; @@ -135,9 +135,9 @@ public void read(NamedList nl) { } else if ("schema".equals(entry.getKey())) { schema = (String) entry.getValue(); } else if ("docs".equals(entry.getKey())) { - docs = (Integer) entry.getValue(); + docs = ((Number) entry.getValue()).longValue(); } else if ("distinct".equals(entry.getKey())) { - distinct = (Integer) entry.getValue(); + distinct = ((Number) entry.getValue()).longValue(); } else if ("cacheableFaceting".equals(entry.getKey())) { cacheableFaceting = (Boolean) entry.getValue(); } else if ("topTerms".equals(entry.getKey())) { @@ -170,11 +170,11 @@ public String getType() { return type; } - public int getDistinct() { + public long getDistinct() { return distinct; } - public int getDocs() { + public long getDocs() { return docs; } @@ -257,19 +257,30 @@ public String getIndexDirectory() { return (String) indexInfo.get("directory"); } - public Integer getNumDocs() { + private Long getIndexLong(String key) { if (indexInfo == null) return null; - return (Integer) indexInfo.get("numDocs"); + Number n = (Number) indexInfo.get(key); + return n != null ? n.longValue() : null; } - public Integer getMaxDoc() { - if (indexInfo == null) return null; - return (Integer) indexInfo.get("maxDoc"); + public Long getNumDocs() { + return getIndexLong("numDocs"); } - public Integer getNumTerms() { - if (indexInfo == null) return null; - return (Integer) indexInfo.get("numTerms"); + public Long getMaxDoc() { + return getIndexLong("maxDoc"); + } + + public Long getDeletedDocs() { + return getIndexLong("deletedDocs"); + } + + public Long getSegmentCount() { + return getIndexLong("segmentCount"); + } + + public Long getNumTerms() { + return getIndexLong("numTerms"); } public Map getFieldTypeInfo() { From e0da01996e3146bca1735763363ee5398c53c89a Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Tue, 17 Feb 2026 21:09:10 -0500 Subject: [PATCH 02/46] stylistic edits --- .../handler/admin/LukeRequestHandler.java | 103 +++++------------- 1 file changed, 26 insertions(+), 77 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 8daef459237b..606d582e2a44 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -311,10 +311,18 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List shardIndex = lukeRsp.getIndexInfo(); if (shardIndex != null) { - if (lukeRsp.getNumDocs() != null) totalNumDocs += lukeRsp.getNumDocs(); - if (lukeRsp.getMaxDoc() != null) totalMaxDoc += lukeRsp.getMaxDoc(); - if (lukeRsp.getDeletedDocs() != null) totalDeletedDocs += lukeRsp.getDeletedDocs(); - if (lukeRsp.getSegmentCount() != null) totalSegmentCount += lukeRsp.getSegmentCount(); + if (lukeRsp.getNumDocs() != null) { + totalNumDocs += lukeRsp.getNumDocs(); + } + if (lukeRsp.getMaxDoc() != null){ + totalMaxDoc += lukeRsp.getMaxDoc(); + } + if (lukeRsp.getDeletedDocs() != null){ + totalDeletedDocs += lukeRsp.getDeletedDocs(); + } + if (lukeRsp.getSegmentCount() != null){ + totalSegmentCount += lukeRsp.getSegmentCount(); + } perShardEntry.add("index", shardIndex); } @@ -323,36 +331,29 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List shardFields = (NamedList) shardRsp.get("fields"); if (shardFieldInfo != null && shardFields != null) { SimpleOrderedMap perShardFields = new SimpleOrderedMap<>(); - boolean hasDetailedStats = false; for (Map.Entry entry : shardFieldInfo.entrySet()) { String fieldName = entry.getKey(); LukeResponse.FieldInfo fi = entry.getValue(); NamedList fieldData = (NamedList) shardFields.get(fieldName); - if (fieldData == null) continue; + if (fieldData == null) { + continue; + } SimpleOrderedMap merged = mergedFields.computeIfAbsent(fieldName, k -> new SimpleOrderedMap<>()); // Attributes parsed by LukeResponse.FieldInfo — validate consistency via string form - mergeValidatedAttr(merged, "type", fi.getType(), shardAddr, fieldName, fieldAttrSources); - mergeValidatedAttr( - merged, "schema", fi.getSchema(), shardAddr, fieldName, fieldAttrSources); + validateAndMerge(merged, fieldName, fieldData, shardAddr, "type", fieldAttrSources); + validateAndMerge(merged, fieldName, fieldData, shardAddr, "schema", fieldAttrSources); // Attributes not parsed by LukeResponse.FieldInfo — fall back to raw NamedList - mergeValidatedStringAttr( - merged, fieldData, fieldName, KEY_INDEX_FLAGS, shardAddr, fieldAttrSources); - mergeValidatedStringAttr( - merged, fieldData, fieldName, KEY_DYNAMIC_BASE, shardAddr, fieldAttrSources); + validateAndMerge(merged, fieldName, fieldData, KEY_INDEX_FLAGS, shardAddr, fieldAttrSources); + validateAndMerge(merged, fieldName, fieldData, KEY_DYNAMIC_BASE, shardAddr, fieldAttrSources); long docs = fi.getDocs(); if (docs > 0 || fieldData.get("docs") != null) { - Long currentDocs = (Long) merged.get("docs"); - if (currentDocs == null) { - merged.add("docs", docs); - } else { - merged.setVal(merged.indexOf("docs", 0), currentDocs + docs); - } + merged.compute("docs", (key, val) -> val == null ? docs : (Long) val + docs); } // Detailed stats not parsed by FieldInfo — kept per-shard, not merged @@ -360,7 +361,7 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List 0 || histogram != null) { - hasDetailedStats = true; + perShardEntry.putIfAbsent("fields", perShardFields); SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); if (topTerms != null) detailedFieldInfo.add("topTerms", topTerms); if (fi.getDistinct() > 0) detailedFieldInfo.add("distinct", fi.getDistinct()); @@ -368,10 +369,6 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List merged, - String attrName, - Object val, - String shardAddr, String fieldName, - Map>> fieldAttrSources) { - if (val == null) return; - String valStr = val.toString(); - Object existing = merged.get(attrName); - - if (existing == null) { - merged.add(attrName, val); - fieldAttrSources - .computeIfAbsent(fieldName, k -> new HashMap<>()) - .computeIfAbsent(attrName, k -> new HashMap<>()) - .put(valStr, shardAddr); - } else { - String existingStr = existing.toString(); - if (!existingStr.equals(valStr)) { - String firstShard = - fieldAttrSources - .getOrDefault(fieldName, Collections.emptyMap()) - .getOrDefault(attrName, Collections.emptyMap()) - .getOrDefault(existingStr, "unknown"); - throw new SolrException( - ErrorCode.SERVER_ERROR, - "Field '" - + fieldName - + "' has inconsistent '" - + attrName - + "' across shards: '" - + existingStr - + "' (from " - + firstShard - + ") vs '" - + valStr - + "' (from " - + shardAddr - + ")"); - } - } - } - - /** - * Validates that a string attribute is identical across shards, merging it into the target. Throws - * a SolrException on mismatch. - */ - private void mergeValidatedStringAttr( - SimpleOrderedMap merged, NamedList fieldData, - String fieldName, String attrName, String shardAddr, Map>> fieldAttrSources) { Object val = fieldData.get(attrName); - if (val == null) return; - + if (val == null) { + return; + } String valStr = val.toString(); Object existing = merged.get(attrName); - if (existing == null) { merged.add(attrName, val); fieldAttrSources From bfd870277d29b9d00b676ff701decf3bbfa5fa3f Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Tue, 17 Feb 2026 21:16:15 -0500 Subject: [PATCH 03/46] fix test --- .../org/apache/solr/handler/admin/LukeRequestHandler.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 606d582e2a44..5fba58d80c36 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -344,8 +344,8 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List new SimpleOrderedMap<>()); // Attributes parsed by LukeResponse.FieldInfo — validate consistency via string form - validateAndMerge(merged, fieldName, fieldData, shardAddr, "type", fieldAttrSources); - validateAndMerge(merged, fieldName, fieldData, shardAddr, "schema", fieldAttrSources); + validateAndMerge(merged, fieldName, fieldData, "type", shardAddr, fieldAttrSources); + validateAndMerge(merged, fieldName, fieldData, "schema", shardAddr, fieldAttrSources); // Attributes not parsed by LukeResponse.FieldInfo — fall back to raw NamedList validateAndMerge(merged, fieldName, fieldData, KEY_INDEX_FLAGS, shardAddr, fieldAttrSources); From 974879a942d0d9e9829c335f2f9086ec0a0ea594 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 18 Feb 2026 18:12:17 -0500 Subject: [PATCH 04/46] simplify field info parsing --- .../handler/admin/LukeRequestHandler.java | 215 ++++++------ .../admin/LukeRequestHandlerDistribTest.java | 314 ++++++++++++++++++ .../pages/luke-request-handler.adoc | 11 +- .../client/solrj/response/LukeResponse.java | 30 +- 4 files changed, 448 insertions(+), 122 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 5fba58d80c36..0d2f37fc69e6 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -31,12 +31,12 @@ import java.util.Arrays; import java.util.Base64; import java.util.Collections; -import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -121,9 +121,23 @@ public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAw static final int HIST_ARRAY_SIZE = 33; - // Response keys not available via LukeResponse typed accessors + // Response section keys + private static final String RSP_INDEX = "index"; + private static final String RSP_FIELDS = "fields"; + private static final String RSP_SCHEMA = "schema"; + private static final String RSP_INFO = "info"; + private static final String RSP_SHARDS = "shards"; + + // Field-level keys + private static final String KEY_NUM_DOCS = "numDocs"; + private static final String KEY_MAX_DOC = "maxDoc"; private static final String KEY_DELETED_DOCS = "deletedDocs"; private static final String KEY_SEGMENT_COUNT = "segmentCount"; + private static final String KEY_TYPE = "type"; + private static final String KEY_SCHEMA_FLAGS = "schema"; + private static final String KEY_DOCS = "docs"; + private static final String KEY_DISTINCT = "distinct"; + private static final String KEY_TOP_TERMS = "topTerms"; private static final String KEY_DYNAMIC_BASE = "dynamicBase"; private static final String KEY_INDEX_FLAGS = "index"; private static final String KEY_HISTOGRAM = "histogram"; @@ -174,7 +188,7 @@ && handleDistributed(req, rsp)) { // If no doc is given, show all fields and top terms final var indexVals = new SimpleOrderedMap<>(); V2ApiUtils.squashIntoNamedList(indexVals, getIndexInfo(reader)); - rsp.add("index", indexVals); + rsp.add(RSP_INDEX, indexVals); if (ShowStyle.INDEX == style) { return; // that's all we need @@ -215,9 +229,9 @@ && handleDistributed(req, rsp)) { docinfo.add("solr", doc); rsp.add("doc", docinfo); } else if (ShowStyle.SCHEMA == style) { - rsp.add("schema", getSchemaInfo(req.getSchema())); + rsp.add(RSP_SCHEMA, getSchemaInfo(req.getSchema())); } else { - rsp.add("fields", getIndexedFieldsInfo(req)); + rsp.add(RSP_FIELDS, getIndexedFieldsInfo(req)); } // Add some generally helpful information @@ -226,10 +240,12 @@ && handleDistributed(req, rsp)) { info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents."); - rsp.add("info", info); + rsp.add(RSP_INFO, info); rsp.setHttpCaching(false); } + private record FieldOrigin(String shardAddr, LukeResponse.FieldInfo fieldInfo) {} + /** * @return true if the request was handled in distributed mode, false if prepDistributed * short-circuited (e.g. single-shard collection) and the caller should fall through to local @@ -255,7 +271,7 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) t for (String shard : shards) { ModifiableSolrParams params = new ModifiableSolrParams(req.getParams()); params.set(CommonParams.QT, reqPath != null ? reqPath : "/admin/luke"); - ShardHandler.setShardAttributesToParams(params, ShardRequest.PURPOSE_PRIVATE); + ShardHandler.setShardAttributesToParams(params, sreq.purpose); shardHandler.submit(sreq, shard, params); } @@ -275,14 +291,12 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) t } } - responses.sort(Comparator.comparing(LukeRequestHandler::shardSortKey)); - mergeDistributedResponses(rsp, responses); rsp.setHttpCaching(false); return true; } - private static String shardSortKey(ShardResponse srsp) { + private static String shardAddress(ShardResponse srsp) { return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); } @@ -294,14 +308,13 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List> mergedFields = new TreeMap<>(); - // fieldName -> (attrName -> (value -> shardAddress)) for mismatch diagnostics - Map>> fieldAttrSources = new HashMap<>(); + Map fieldOrigins = new HashMap<>(); SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); NamedList firstSchema = null; NamedList firstInfo = null; for (ShardResponse srsp : responses) { - String shardAddr = shardSortKey(srsp); + String shardAddr = shardAddress(srsp); NamedList shardRsp = srsp.getSolrResponse().getResponse(); LukeResponse lukeRsp = new LukeResponse(); @@ -311,145 +324,143 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List shardIndex = lukeRsp.getIndexInfo(); if (shardIndex != null) { - if (lukeRsp.getNumDocs() != null) { - totalNumDocs += lukeRsp.getNumDocs(); - } - if (lukeRsp.getMaxDoc() != null){ - totalMaxDoc += lukeRsp.getMaxDoc(); - } - if (lukeRsp.getDeletedDocs() != null){ - totalDeletedDocs += lukeRsp.getDeletedDocs(); - } - if (lukeRsp.getSegmentCount() != null){ - totalSegmentCount += lukeRsp.getSegmentCount(); - } + totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocs()).orElse(0L); + totalMaxDoc = Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0L)); + totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocs()).orElse(0L); + totalSegmentCount += Optional.ofNullable(lukeRsp.getSegmentCount()).orElse(0L); - perShardEntry.add("index", shardIndex); + perShardEntry.add(RSP_INDEX, shardIndex); } Map shardFieldInfo = lukeRsp.getFieldInfo(); - NamedList shardFields = (NamedList) shardRsp.get("fields"); - if (shardFieldInfo != null && shardFields != null) { + if (shardFieldInfo != null) { SimpleOrderedMap perShardFields = new SimpleOrderedMap<>(); for (Map.Entry entry : shardFieldInfo.entrySet()) { String fieldName = entry.getKey(); LukeResponse.FieldInfo fi = entry.getValue(); - NamedList fieldData = (NamedList) shardFields.get(fieldName); - if (fieldData == null) { - continue; - } SimpleOrderedMap merged = mergedFields.computeIfAbsent(fieldName, k -> new SimpleOrderedMap<>()); - // Attributes parsed by LukeResponse.FieldInfo — validate consistency via string form - validateAndMerge(merged, fieldName, fieldData, "type", shardAddr, fieldAttrSources); - validateAndMerge(merged, fieldName, fieldData, "schema", shardAddr, fieldAttrSources); + FieldOrigin origin = fieldOrigins.get(fieldName); + if (origin == null) { + fieldOrigins.put(fieldName, new FieldOrigin(shardAddr, fi)); + // First shard: populate merged with schema-derived attrs + merged.add(KEY_TYPE, fi.getType()); + merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); + Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); + if (dynBase != null) { + merged.add(KEY_DYNAMIC_BASE, dynBase); + } + } else { + // Subsequent shards: validate consistency of schema-derived attrs + validateFieldAttr( + fieldName, KEY_TYPE, fi.getType(), origin.fieldInfo().getType(), + shardAddr, origin.shardAddr()); + validateFieldAttr( + fieldName, KEY_SCHEMA_FLAGS, fi.getSchema(), origin.fieldInfo().getSchema(), + shardAddr, origin.shardAddr()); + validateFieldAttr( + fieldName, KEY_DYNAMIC_BASE, + fi.getExtras().get(KEY_DYNAMIC_BASE), + origin.fieldInfo().getExtras().get(KEY_DYNAMIC_BASE), + shardAddr, origin.shardAddr()); + } - // Attributes not parsed by LukeResponse.FieldInfo — fall back to raw NamedList - validateAndMerge(merged, fieldName, fieldData, KEY_INDEX_FLAGS, shardAddr, fieldAttrSources); - validateAndMerge(merged, fieldName, fieldData, KEY_DYNAMIC_BASE, shardAddr, fieldAttrSources); + // Index flags: take first non-null (index-derived, may differ across shards) + merged.computeIfAbsent(KEY_INDEX_FLAGS, k -> fi.getExtras().get(KEY_INDEX_FLAGS)); long docs = fi.getDocs(); - if (docs > 0 || fieldData.get("docs") != null) { - merged.compute("docs", (key, val) -> val == null ? docs : (Long) val + docs); + if (docs > 0) { + merged.compute(KEY_DOCS, (key, val) -> val == null ? docs : (Long) val + docs); } - // Detailed stats not parsed by FieldInfo — kept per-shard, not merged + // Detailed stats — kept per-shard, not merged NamedList topTerms = fi.getTopTerms(); - Object histogram = fieldData.get(KEY_HISTOGRAM); + Object histogram = fi.getExtras().get(KEY_HISTOGRAM); if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { - perShardEntry.putIfAbsent("fields", perShardFields); + perShardEntry.putIfAbsent(RSP_FIELDS, perShardFields); SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); - if (topTerms != null) detailedFieldInfo.add("topTerms", topTerms); - if (fi.getDistinct() > 0) detailedFieldInfo.add("distinct", fi.getDistinct()); - if (histogram != null) detailedFieldInfo.add(KEY_HISTOGRAM, histogram); + if (topTerms != null) { + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + } + if (fi.getDistinct() > 0) { + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); + } + if (histogram != null) { + detailedFieldInfo.add(KEY_HISTOGRAM, histogram); + } perShardFields.add(fieldName, detailedFieldInfo); } } } if (firstSchema == null) { - firstSchema = (NamedList) shardRsp.get("schema"); + firstSchema = (NamedList) shardRsp.get(RSP_SCHEMA); } if (firstInfo == null) { - firstInfo = (NamedList) shardRsp.get("info"); + firstInfo = (NamedList) shardRsp.get(RSP_INFO); } shardsInfo.add(shardAddr, perShardEntry); } SimpleOrderedMap mergedIndex = new SimpleOrderedMap<>(); - mergedIndex.add("numDocs", totalNumDocs); - mergedIndex.add("maxDoc", totalMaxDoc); + mergedIndex.add(KEY_NUM_DOCS, totalNumDocs); + mergedIndex.add(KEY_MAX_DOC, totalMaxDoc); mergedIndex.add(KEY_DELETED_DOCS, totalDeletedDocs); mergedIndex.add(KEY_SEGMENT_COUNT, totalSegmentCount); - rsp.add("index", mergedIndex); + rsp.add(RSP_INDEX, mergedIndex); if (!mergedFields.isEmpty()) { SimpleOrderedMap fieldsResult = new SimpleOrderedMap<>(); for (Map.Entry> entry : mergedFields.entrySet()) { fieldsResult.add(entry.getKey(), entry.getValue()); } - rsp.add("fields", fieldsResult); + rsp.add(RSP_FIELDS, fieldsResult); } if (firstSchema != null) { - rsp.add("schema", firstSchema); + rsp.add(RSP_SCHEMA, firstSchema); } if (firstInfo != null) { - rsp.add("info", firstInfo); + rsp.add(RSP_INFO, firstInfo); } - rsp.add("shards", shardsInfo); + rsp.add(RSP_SHARDS, shardsInfo); } - /** Validates that an attribute value is identical across shards. */ - private void validateAndMerge( - SimpleOrderedMap merged, + /** Validates that a schema-derived attribute value is identical across shards. */ + private void validateFieldAttr( String fieldName, - NamedList fieldData, String attrName, - String shardAddr, - Map>> fieldAttrSources) { - Object val = fieldData.get(attrName); - if (val == null) { + Object currentVal, + Object originVal, + String currentShardAddr, + String originShardAddr) { + if (currentVal == null && originVal == null) { return; } - String valStr = val.toString(); - Object existing = merged.get(attrName); - if (existing == null) { - merged.add(attrName, val); - fieldAttrSources - .computeIfAbsent(fieldName, k -> new HashMap<>()) - .computeIfAbsent(attrName, k -> new HashMap<>()) - .put(valStr, shardAddr); - } else { - String existingStr = existing.toString(); - if (!existingStr.equals(valStr)) { - String firstShard = - fieldAttrSources - .getOrDefault(fieldName, Collections.emptyMap()) - .getOrDefault(attrName, Collections.emptyMap()) - .getOrDefault(existingStr, "unknown"); - throw new SolrException( - ErrorCode.SERVER_ERROR, - "Field '" - + fieldName - + "' has inconsistent '" - + attrName - + "' across shards: '" - + existingStr - + "' (from " - + firstShard - + ") vs '" - + valStr - + "' (from " - + shardAddr - + ")"); - } + String currentStr = currentVal != null ? currentVal.toString() : null; + String originStr = originVal != null ? originVal.toString() : null; + if (!Objects.equals(currentStr, originStr)) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Field '" + + fieldName + + "' has inconsistent '" + + attrName + + "' across shards: '" + + originStr + + "' (from " + + originShardAddr + + ") vs '" + + currentStr + + "' (from " + + currentShardAddr + + ")"); } } @@ -586,8 +597,8 @@ private static SimpleOrderedMap getDocumentFieldsInfo( SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); - f.add("type", (ftype == null) ? null : ftype.getTypeName()); - f.add("schema", getFieldFlags(sfield)); + f.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + f.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); @@ -674,8 +685,8 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re SchemaField sfield = schema.getFieldOrNull(fieldName); FieldType ftype = (sfield == null) ? null : sfield.getType(); - fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName()); - fieldMap.add("schema", getFieldFlags(sfield)); + fieldMap.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + fieldMap.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { @@ -707,7 +718,7 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re } } } - fieldMap.add("docs", terms.getDocCount()); + fieldMap.add(KEY_DOCS, terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); @@ -984,10 +995,10 @@ private static void getDetailedFieldInfo( } } tiq.histogram.add(buckets); - fieldMap.add("distinct", tiq.distinctTerms); + fieldMap.add(KEY_DISTINCT, tiq.distinctTerms); // Include top terms - fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); + fieldMap.add(KEY_TOP_TERMS, tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram fieldMap.add(KEY_HISTOGRAM, tiq.histogram.toNamedList()); diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java new file mode 100644 index 000000000000..d1ef59159cc9 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.LukeResponse; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class LukeRequestHandlerDistribTest extends SolrCloudTestCase { + + private static final String COLLECTION = "lukeDistribTest"; + private static final int NUM_DOCS = 20; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2).addConfig("conf", configset("cloud-dynamic")).configure(); + + CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(COLLECTION, 2, 2); + + List docs = new ArrayList<>(); + for (int i = 0; i < NUM_DOCS; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", String.valueOf(i)); + doc.addField("name", "name_" + i); + doc.addField("subject", "subject value " + (i % 5)); + docs.add(doc); + } + cluster.getSolrClient().add(COLLECTION, docs); + cluster.getSolrClient().commit(COLLECTION); + } + + @AfterClass + public static void afterClass() throws Exception { + shutdownCluster(); + } + + /** Sends a luke request and wraps the raw response in a typed {@link LukeResponse}. */ + private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + if (extra != null) { + for (Map.Entry entry : + extra.getMap().entrySet()) { + params.set(entry.getKey(), entry.getValue()); + } + } + QueryRequest req = new QueryRequest(params); + NamedList raw = cluster.getSolrClient().request(req, collection); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + return rsp; + } + + @Test + @SuppressWarnings("unchecked") + public void testDistributedMerge() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + assertEquals("merged numDocs should equal total docs", NUM_DOCS, rsp.getNumDocs().longValue()); + assertTrue("merged maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertNotNull("deletedDocs should be present", rsp.getDeletedDocs()); + assertNotNull("segmentCount should be present", rsp.getSegmentCount()); + + NamedList shards = (NamedList) rsp.getResponse().get("shards"); + assertNotNull("shards section should be present", shards); + assertEquals("should have 2 shard entries", 2, shards.size()); + + // Each shard should have its own index info; per-shard numDocs should sum to total + long sumShardDocs = 0; + for (int i = 0; i < shards.size(); i++) { + NamedList shardEntry = (NamedList) shards.getVal(i); + LukeResponse shardLuke = new LukeResponse(); + shardLuke.setResponse(shardEntry); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); + assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); + sumShardDocs += shardLuke.getNumDocs(); + } + assertEquals( + "sum of per-shard numDocs should equal merged numDocs", + rsp.getNumDocs().longValue(), + sumShardDocs); + } + + @Test + public void testDistributedFieldsMerge() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("field type should be present", nameField.getType()); + assertNotNull("schema flags should be present", nameField.getSchema()); + assertEquals("merged docs count for 'name' should equal total docs", NUM_DOCS, nameField.getDocs()); + + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id field type should be string", "string", idField.getType()); + } + + @Test + @SuppressWarnings("unchecked") + public void testDetailedFieldStatsPerShard() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("fl", "name"); + params.set("numTerms", "5"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + // Top-level fields should NOT have topTerms, distinct, histogram + LukeResponse.FieldInfo nameField = rsp.getFieldInfo().get("name"); + assertNotNull("'name' field should be present", nameField); + assertNull("topTerms should NOT be in top-level fields", nameField.getTopTerms()); + assertEquals("distinct should NOT be in top-level fields", 0, nameField.getDistinct()); + + // Per-shard entries should have detailed stats + NamedList shards = (NamedList) rsp.getResponse().get("shards"); + assertNotNull("shards section should be present", shards); + + boolean foundDetailedStats = false; + for (int i = 0; i < shards.size(); i++) { + NamedList shardEntry = (NamedList) shards.getVal(i); + NamedList shardFields = (NamedList) shardEntry.get("fields"); + if (shardFields != null) { + NamedList shardNameField = (NamedList) shardFields.get("name"); + if (shardNameField != null) { + foundDetailedStats = true; + assertTrue( + "per-shard field should have topTerms, distinct, or histogram", + shardNameField.get("topTerms") != null + || shardNameField.get("distinct") != null + || shardNameField.get("histogram") != null); + } + } + } + assertTrue("at least one shard should have detailed field stats", foundDetailedStats); + } + + @Test + public void testLocalModeDefault() throws Exception { + LukeResponse rsp = requestLuke(COLLECTION, null); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull("shards key should NOT be present in local mode", rsp.getResponse().get("shards")); + } + + @Test + public void testExplicitDistribFalse() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "false"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull( + "shards key should NOT be present with distrib=false", rsp.getResponse().get("shards")); + } + + /** + * 12 shards, 1 document: only one shard has data, the other 11 are empty. Verifies that + * schema-derived attributes (type, schema flags, dynamicBase) merge correctly when most shards + * have no documents, and that index-derived attributes (index flags, docs count) degrade + * gracefully. + */ + @Test + @SuppressWarnings("unchecked") + public void testSparseShards() throws Exception { + String collection = "lukeSparse12"; + CollectionAdminRequest.createCollection(collection, "conf", 12, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(collection, 12, 12); + + try { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "only-one"); + doc.addField("name", "sparse test"); + doc.addField("subject", "subject value"); + doc.addField("cat_s", "category"); + cluster.getSolrClient().add(collection, doc); + cluster.getSolrClient().commit(collection); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + + LukeResponse rsp = requestLuke(collection, params); + + // Index-level stats + assertEquals("numDocs should be 1", 1, rsp.getNumDocs().longValue()); + assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocs().longValue()); + + NamedList shards = (NamedList) rsp.getResponse().get("shards"); + assertNotNull("shards section should be present", shards); + assertEquals("should have 12 shard entries", 12, shards.size()); + + // Exactly one shard should have numDocs=1 + long sumShardDocs = 0; + for (int i = 0; i < shards.size(); i++) { + NamedList shardEntry = (NamedList) shards.getVal(i); + LukeResponse shardLuke = new LukeResponse(); + shardLuke.setResponse(shardEntry); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); + sumShardDocs += shardLuke.getNumDocs(); + } + assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); + + // Field-level checks + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + // Schema-derived attrs should be present for all fields, even with 11 empty shards + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id type", "string", idField.getType()); + assertNotNull("id schema flags", idField.getSchema()); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("name type", nameField.getType()); + assertNotNull("name schema flags", nameField.getSchema()); + assertEquals("name docs should be 1", 1, nameField.getDocs()); + + // Dynamic field — should have dynamicBase in extras + LukeResponse.FieldInfo catField = fields.get("cat_s"); + assertNotNull("'cat_s' field should be present", catField); + assertNotNull("cat_s type", catField.getType()); + assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); + + // Verify index flags are present (from the one shard that has the document). + // Fields that are indexed and have a live doc should get index flags via the merge's + // computeIfAbsent (take-first-non-null) logic. + NamedList mergedFields = (NamedList) rsp.getResponse().get("fields"); + assertNotNull("merged fields NamedList should be present", mergedFields); + NamedList rawNameField = (NamedList) mergedFields.get("name"); + assertNotNull("raw 'name' field should be in merged fields", rawNameField); + // The index flags key may or may not be present depending on whether the field is indexed + // and stored — but if present, it should be a non-empty string + Object indexFlags = rawNameField.get("index"); + if (indexFlags != null) { + assertTrue("index flags should be a non-empty string", indexFlags.toString().length() > 0); + } + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + + @Test + public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { + String singleShardCollection = "lukeSingleShard"; + CollectionAdminRequest.createCollection(singleShardCollection, "conf", 1, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(singleShardCollection, 1, 1); + + try { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "single-1"); + doc.addField("name", "test_name"); + cluster.getSolrClient().add(singleShardCollection, doc); + cluster.getSolrClient().commit(singleShardCollection); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + + LukeResponse rsp = requestLuke(singleShardCollection, params); + + assertNotNull( + "index info should be present even with distrib=true on single shard", + rsp.getIndexInfo()); + assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocs().longValue()); + assertNull( + "shards key should NOT be present when falling back to local", + rsp.getResponse().get("shards")); + } finally { + CollectionAdminRequest.deleteCollection(singleShardCollection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } +} diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index b9fea92c62f2..c0531b4a2ede 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -91,7 +91,7 @@ Fetching and returning the index-flags for each field in the index has non-zero |=== + When set to `true` in SolrCloud mode, the handler aggregates results from all shards in the collection. -Additive index metrics (`numDocs`, `maxDoc`, `deletedDocs`, `segmentCount`) are summed across shards. +Additive index metrics (`numDocs`, `deletedDocs`, `segmentCount`) are summed across shards; `maxDoc` is the maximum across shards. Field types and schema flags are validated for consistency across shards. Per-shard index details and per-field detailed statistics are returned under a `shards` key. @@ -150,18 +150,19 @@ http://localhost:8983/solr/techproducts/admin/luke?distrib=true&fl=manu In distributed mode, the response contains: -* `index` -- Merged additive metrics summed across all shards: `numDocs`, `maxDoc`, `deletedDocs`, `segmentCount`. -* `fields` -- Merged field metadata. For each field: `type`, `schema` flags, `index` flags, and `dynamicBase` are validated to be consistent across shards. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. +* `index` -- Merged metrics across all shards: `numDocs`, `deletedDocs`, `segmentCount` are summed; `maxDoc` is the maximum across shards. +* `fields` -- Merged field metadata. For each field: `type`, `schema` flags, and `dynamicBase` are validated to be consistent across shards; `index` flags use the first non-null value. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. * `schema` -- Schema information from the first responding shard (identical across shards sharing the same configset). * `info` -- Static info from the first responding shard. -* `shards` -- Per-shard details, ordered lexicographically by shard address. Each entry contains: +* `shards` -- Per-shard details in response-completion order. Each entry contains: ** `index` -- Full index info for that shard (including `directory`, `segmentsFile`, `version`, `current`, `hasDeletions`, `lastModified`, `userData`). ** `fields` -- Only present when `fl` triggers detailed statistics. Contains per-field `topTerms`, `distinct`, and `histogram` from that shard. === Merge Semantics -Field `type`, `schema` flags, `index` flags, and `dynamicBase` are validated for consistency across shards. +Field `type`, `schema` flags, and `dynamicBase` are validated for consistency across shards. If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. +The `index` flags are index-derived (not schema-derived) and may legitimately differ across shards; the first non-null value is used. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not merged across shards. These statistics are shard-local and appear in each shard's entry under the `shards` key. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 2150cbb7ee49..bbd9f10632da 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -119,6 +119,7 @@ public static class FieldInfo implements Serializable { EnumSet flags; boolean cacheableFaceting; NamedList topTerms; + Map extras = new HashMap<>(); public FieldInfo(String n) { name = n; @@ -127,21 +128,16 @@ public FieldInfo(String n) { @SuppressWarnings("unchecked") public void read(NamedList nl) { for (Map.Entry entry : nl) { - if ("type".equals(entry.getKey())) { - type = (String) entry.getValue(); - } - if ("flags".equals(entry.getKey())) { - flags = parseFlags((String) entry.getValue()); - } else if ("schema".equals(entry.getKey())) { - schema = (String) entry.getValue(); - } else if ("docs".equals(entry.getKey())) { - docs = ((Number) entry.getValue()).longValue(); - } else if ("distinct".equals(entry.getKey())) { - distinct = ((Number) entry.getValue()).longValue(); - } else if ("cacheableFaceting".equals(entry.getKey())) { - cacheableFaceting = (Boolean) entry.getValue(); - } else if ("topTerms".equals(entry.getKey())) { - topTerms = (NamedList) entry.getValue(); + String key = entry.getKey(); + switch (key) { + case "type" -> type = (String) entry.getValue(); + case "flags" -> flags = parseFlags((String) entry.getValue()); + case "schema" -> schema = (String) entry.getValue(); + case "docs" -> docs = ((Number) entry.getValue()).longValue(); + case "distinct" -> distinct = ((Number) entry.getValue()).longValue(); + case "cacheableFaceting" -> cacheableFaceting = (Boolean) entry.getValue(); + case "topTerms" -> topTerms = (NamedList) entry.getValue(); + default -> extras.put(key, entry.getValue()); } } } @@ -193,6 +189,10 @@ public EnumSet getSchemaFlags() { public NamedList getTopTerms() { return topTerms; } + + public Map getExtras() { + return extras; + } } private NamedList indexInfo; From 38792734d7a8d3942e3af72af46f928514a6ab48 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 19 Feb 2026 11:41:40 -0500 Subject: [PATCH 05/46] revert backwards incompat changes --- .../handler/admin/LukeRequestHandler.java | 180 ++++++++++-------- .../admin/LukeRequestHandlerDistribTest.java | 71 ++++--- .../client/solrj/response/LukeResponse.java | 76 ++++++-- 3 files changed, 200 insertions(+), 127 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 0d2f37fc69e6..75621ede133e 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -307,27 +307,38 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List> mergedFields = new TreeMap<>(); + Map> fieldLookup = new HashMap<>(); + SimpleOrderedMap fieldsResult = new SimpleOrderedMap<>(); Map fieldOrigins = new HashMap<>(); SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); - NamedList firstSchema = null; - NamedList firstInfo = null; + + if (!responses.isEmpty()) { + ShardResponse firstRsp = responses.getFirst(); + NamedList firstShardRsp = firstRsp.getSolrResponse().getResponse(); + if (firstShardRsp == null) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Unexpected empty response from shard: " + shardAddress(firstRsp)); + } + Object schema = firstShardRsp.get(RSP_SCHEMA); + if (schema != null) rsp.add(RSP_SCHEMA, schema); + Object info = firstShardRsp.get(RSP_INFO); + if (info != null) rsp.add(RSP_INFO, info); + } for (ShardResponse srsp : responses) { String shardAddr = shardAddress(srsp); NamedList shardRsp = srsp.getSolrResponse().getResponse(); - LukeResponse lukeRsp = new LukeResponse(); lukeRsp.setResponse(shardRsp); - SimpleOrderedMap perShardEntry = new SimpleOrderedMap<>(); - NamedList shardIndex = lukeRsp.getIndexInfo(); if (shardIndex != null) { - totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocs()).orElse(0L); - totalMaxDoc = Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0L)); - totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocs()).orElse(0L); - totalSegmentCount += Optional.ofNullable(lukeRsp.getSegmentCount()).orElse(0L); + totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocsAsLong()).orElse(0L); + totalMaxDoc = + Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDocAsLong()).orElse(0L)); + totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocsAsLong()).orElse(0L); + totalSegmentCount += Optional.ofNullable(lukeRsp.getSegmentCountAsLong()).orElse(0L); perShardEntry.add(RSP_INDEX, shardIndex); } @@ -340,70 +351,17 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List merged = - mergedFields.computeIfAbsent(fieldName, k -> new SimpleOrderedMap<>()); - - FieldOrigin origin = fieldOrigins.get(fieldName); - if (origin == null) { - fieldOrigins.put(fieldName, new FieldOrigin(shardAddr, fi)); - // First shard: populate merged with schema-derived attrs - merged.add(KEY_TYPE, fi.getType()); - merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); - Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); - if (dynBase != null) { - merged.add(KEY_DYNAMIC_BASE, dynBase); - } - } else { - // Subsequent shards: validate consistency of schema-derived attrs - validateFieldAttr( - fieldName, KEY_TYPE, fi.getType(), origin.fieldInfo().getType(), - shardAddr, origin.shardAddr()); - validateFieldAttr( - fieldName, KEY_SCHEMA_FLAGS, fi.getSchema(), origin.fieldInfo().getSchema(), - shardAddr, origin.shardAddr()); - validateFieldAttr( - fieldName, KEY_DYNAMIC_BASE, - fi.getExtras().get(KEY_DYNAMIC_BASE), - origin.fieldInfo().getExtras().get(KEY_DYNAMIC_BASE), - shardAddr, origin.shardAddr()); - } - - // Index flags: take first non-null (index-derived, may differ across shards) - merged.computeIfAbsent(KEY_INDEX_FLAGS, k -> fi.getExtras().get(KEY_INDEX_FLAGS)); - - long docs = fi.getDocs(); - if (docs > 0) { - merged.compute(KEY_DOCS, (key, val) -> val == null ? docs : (Long) val + docs); + SimpleOrderedMap merged = fieldLookup.get(fieldName); + if (merged == null) { + merged = new SimpleOrderedMap<>(); + fieldLookup.put(fieldName, merged); + fieldsResult.add(fieldName, merged); } - // Detailed stats — kept per-shard, not merged - NamedList topTerms = fi.getTopTerms(); - Object histogram = fi.getExtras().get(KEY_HISTOGRAM); - - if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { - perShardEntry.putIfAbsent(RSP_FIELDS, perShardFields); - SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); - if (topTerms != null) { - detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); - } - if (fi.getDistinct() > 0) { - detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); - } - if (histogram != null) { - detailedFieldInfo.add(KEY_HISTOGRAM, histogram); - } - perShardFields.add(fieldName, detailedFieldInfo); - } + mergeShardField( + shardAddr, fi, fieldName, merged, fieldOrigins, perShardFields, perShardEntry); } } - - if (firstSchema == null) { - firstSchema = (NamedList) shardRsp.get(RSP_SCHEMA); - } - if (firstInfo == null) { - firstInfo = (NamedList) shardRsp.get(RSP_INFO); - } - shardsInfo.add(shardAddr, perShardEntry); } @@ -414,22 +372,84 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List fieldsResult = new SimpleOrderedMap<>(); - for (Map.Entry> entry : mergedFields.entrySet()) { - fieldsResult.add(entry.getKey(), entry.getValue()); - } + if (fieldsResult.size() > 0) { rsp.add(RSP_FIELDS, fieldsResult); } - if (firstSchema != null) { - rsp.add(RSP_SCHEMA, firstSchema); + rsp.add(RSP_SHARDS, shardsInfo); + } + + private void mergeShardField( + String shardAddr, + LukeResponse.FieldInfo fi, + String fieldName, + SimpleOrderedMap merged, + Map fieldOrigins, + SimpleOrderedMap perShardFields, + SimpleOrderedMap perShardEntry) { + + FieldOrigin origin = fieldOrigins.get(fieldName); + if (origin == null) { + fieldOrigins.put(fieldName, new FieldOrigin(shardAddr, fi)); + // First shard: populate merged with schema-derived attrs + merged.add(KEY_TYPE, fi.getType()); + merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); + Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); + if (dynBase != null) { + merged.add(KEY_DYNAMIC_BASE, dynBase); + } + } else { + // Subsequent shards: validate consistency of schema-derived attrs + validateFieldAttr( + fieldName, + KEY_TYPE, + fi.getType(), + origin.fieldInfo().getType(), + shardAddr, + origin.shardAddr()); + validateFieldAttr( + fieldName, + KEY_SCHEMA_FLAGS, + fi.getSchema(), + origin.fieldInfo().getSchema(), + shardAddr, + origin.shardAddr()); + validateFieldAttr( + fieldName, + KEY_DYNAMIC_BASE, + fi.getExtras().get(KEY_DYNAMIC_BASE), + origin.fieldInfo().getExtras().get(KEY_DYNAMIC_BASE), + shardAddr, + origin.shardAddr()); } - if (firstInfo != null) { - rsp.add(RSP_INFO, firstInfo); + + // Index flags: take first non-null (index-derived, may differ across shards) + merged.computeIfAbsent(KEY_INDEX_FLAGS, k -> fi.getExtras().get(KEY_INDEX_FLAGS)); + + Long docsAsLong = fi.getDocsAsLong(); + if (docsAsLong != null && docsAsLong > 0) { + merged.compute( + "docsAsLong", (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); } - rsp.add(RSP_SHARDS, shardsInfo); + // Detailed stats — kept per-shard, not merged + NamedList topTerms = fi.getTopTerms(); + Object histogram = fi.getExtras().get(KEY_HISTOGRAM); + + if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { + perShardEntry.putIfAbsent(RSP_FIELDS, perShardFields); + SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); + if (topTerms != null) { + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + } + if (fi.getDistinct() > 0) { + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); + } + if (histogram != null) { + detailedFieldInfo.add(KEY_HISTOGRAM, histogram); + } + perShardFields.add(fieldName, detailedFieldInfo); + } } /** Validates that a schema-derived attribute value is identical across shards. */ diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index d1ef59159cc9..d6f9ff6aef49 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -79,35 +79,33 @@ private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) } @Test - @SuppressWarnings("unchecked") public void testDistributedMerge() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("distrib", "true"); LukeResponse rsp = requestLuke(COLLECTION, params); - assertEquals("merged numDocs should equal total docs", NUM_DOCS, rsp.getNumDocs().longValue()); - assertTrue("merged maxDoc should be > 0", rsp.getMaxDoc() > 0); - assertNotNull("deletedDocs should be present", rsp.getDeletedDocs()); - assertNotNull("segmentCount should be present", rsp.getSegmentCount()); + assertEquals( + "merged numDocs should equal total docs", NUM_DOCS, rsp.getNumDocsAsLong().longValue()); + assertTrue("merged maxDoc should be > 0", rsp.getMaxDocAsLong() > 0); + assertNotNull("deletedDocs should be present", rsp.getDeletedDocsAsLong()); + assertNotNull("segmentCount should be present", rsp.getSegmentCountAsLong()); - NamedList shards = (NamedList) rsp.getResponse().get("shards"); + Map shards = rsp.getShardResponses(); assertNotNull("shards section should be present", shards); assertEquals("should have 2 shard entries", 2, shards.size()); // Each shard should have its own index info; per-shard numDocs should sum to total long sumShardDocs = 0; - for (int i = 0; i < shards.size(); i++) { - NamedList shardEntry = (NamedList) shards.getVal(i); - LukeResponse shardLuke = new LukeResponse(); - shardLuke.setResponse(shardEntry); - assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); - assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); - sumShardDocs += shardLuke.getNumDocs(); + for (Map.Entry entry : shards.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); + assertNotNull("each shard should have maxDoc", shardLuke.getMaxDocAsLong()); + sumShardDocs += shardLuke.getNumDocsAsLong(); } assertEquals( "sum of per-shard numDocs should equal merged numDocs", - rsp.getNumDocs().longValue(), + rsp.getNumDocsAsLong().longValue(), sumShardDocs); } @@ -125,7 +123,10 @@ public void testDistributedFieldsMerge() throws Exception { assertNotNull("'name' field should be present", nameField); assertNotNull("field type should be present", nameField.getType()); assertNotNull("schema flags should be present", nameField.getSchema()); - assertEquals("merged docs count for 'name' should equal total docs", NUM_DOCS, nameField.getDocs()); + assertEquals( + "merged docs count for 'name' should equal total docs", + NUM_DOCS, + nameField.getDocsAsLong().longValue()); LukeResponse.FieldInfo idField = fields.get("id"); assertNotNull("'id' field should be present", idField); @@ -149,13 +150,15 @@ public void testDetailedFieldStatsPerShard() throws Exception { assertEquals("distinct should NOT be in top-level fields", 0, nameField.getDistinct()); // Per-shard entries should have detailed stats - NamedList shards = (NamedList) rsp.getResponse().get("shards"); + Map shards = rsp.getShardResponses(); assertNotNull("shards section should be present", shards); boolean foundDetailedStats = false; - for (int i = 0; i < shards.size(); i++) { - NamedList shardEntry = (NamedList) shards.getVal(i); - NamedList shardFields = (NamedList) shardEntry.get("fields"); + for (Map.Entry entry : shards.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + // Access the raw shard entry for per-shard fields + NamedList shardRaw = shardLuke.getResponse(); + NamedList shardFields = (NamedList) shardRaw.get("fields"); if (shardFields != null) { NamedList shardNameField = (NamedList) shardFields.get("name"); if (shardNameField != null) { @@ -176,7 +179,7 @@ public void testLocalModeDefault() throws Exception { LukeResponse rsp = requestLuke(COLLECTION, null); assertNotNull("index info should be present", rsp.getIndexInfo()); - assertNull("shards key should NOT be present in local mode", rsp.getResponse().get("shards")); + assertNull("shards should NOT be present in local mode", rsp.getShardResponses()); } @Test @@ -187,8 +190,7 @@ public void testExplicitDistribFalse() throws Exception { LukeResponse rsp = requestLuke(COLLECTION, params); assertNotNull("index info should be present", rsp.getIndexInfo()); - assertNull( - "shards key should NOT be present with distrib=false", rsp.getResponse().get("shards")); + assertNull("shards should NOT be present with distrib=false", rsp.getShardResponses()); } /** @@ -220,22 +222,20 @@ public void testSparseShards() throws Exception { LukeResponse rsp = requestLuke(collection, params); // Index-level stats - assertEquals("numDocs should be 1", 1, rsp.getNumDocs().longValue()); - assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); - assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocs().longValue()); + assertEquals("numDocs should be 1", 1, rsp.getNumDocsAsLong().longValue()); + assertTrue("maxDoc should be > 0", rsp.getMaxDocAsLong() > 0); + assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocsAsLong().longValue()); - NamedList shards = (NamedList) rsp.getResponse().get("shards"); + Map shards = rsp.getShardResponses(); assertNotNull("shards section should be present", shards); assertEquals("should have 12 shard entries", 12, shards.size()); // Exactly one shard should have numDocs=1 long sumShardDocs = 0; - for (int i = 0; i < shards.size(); i++) { - NamedList shardEntry = (NamedList) shards.getVal(i); - LukeResponse shardLuke = new LukeResponse(); - shardLuke.setResponse(shardEntry); - assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); - sumShardDocs += shardLuke.getNumDocs(); + for (Map.Entry entry : shards.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); + sumShardDocs += shardLuke.getNumDocsAsLong(); } assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); @@ -253,7 +253,7 @@ public void testSparseShards() throws Exception { assertNotNull("'name' field should be present", nameField); assertNotNull("name type", nameField.getType()); assertNotNull("name schema flags", nameField.getSchema()); - assertEquals("name docs should be 1", 1, nameField.getDocs()); + assertEquals("name docs should be 1", 1, nameField.getDocsAsLong().longValue()); // Dynamic field — should have dynamicBase in extras LukeResponse.FieldInfo catField = fields.get("cat_s"); @@ -302,10 +302,9 @@ public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { assertNotNull( "index info should be present even with distrib=true on single shard", rsp.getIndexInfo()); - assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocs().longValue()); + assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocsAsLong().longValue()); assertNull( - "shards key should NOT be present when falling back to local", - rsp.getResponse().get("shards")); + "shards should NOT be present when falling back to local", rsp.getShardResponses()); } finally { CollectionAdminRequest.deleteCollection(singleShardCollection) .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index bbd9f10632da..9d4de3080505 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.solr.common.luke.FieldFlag; @@ -114,8 +115,10 @@ public static class FieldInfo implements Serializable { String name; String type; String schema; - long docs; - long distinct; + int docs; + int distinct; + Long docsAsLong; + Long distinctAsLong; EnumSet flags; boolean cacheableFaceting; NamedList topTerms; @@ -133,8 +136,17 @@ public void read(NamedList nl) { case "type" -> type = (String) entry.getValue(); case "flags" -> flags = parseFlags((String) entry.getValue()); case "schema" -> schema = (String) entry.getValue(); - case "docs" -> docs = ((Number) entry.getValue()).longValue(); - case "distinct" -> distinct = ((Number) entry.getValue()).longValue(); + case "docs" -> { + docs = (Integer) entry.getValue(); + docsAsLong = (long) docs; // widen, lossless + } + case "docsAsLong" -> docsAsLong = (Long) entry.getValue(); + // Don't set docs — narrowing Long→int is lossy + case "distinct" -> { + distinct = (Integer) entry.getValue(); + distinctAsLong = (long) distinct; // widen, lossless + } + case "distinctAsLong" -> distinctAsLong = (Long) entry.getValue(); case "cacheableFaceting" -> cacheableFaceting = (Boolean) entry.getValue(); case "topTerms" -> topTerms = (NamedList) entry.getValue(); default -> extras.put(key, entry.getValue()); @@ -166,14 +178,22 @@ public String getType() { return type; } - public long getDistinct() { + public int getDistinct() { return distinct; } - public long getDocs() { + public Long getDistinctAsLong() { + return distinctAsLong; + } + + public int getDocs() { return docs; } + public Long getDocsAsLong() { + return docsAsLong; + } + public String getName() { return name; } @@ -199,6 +219,7 @@ public Map getExtras() { private Map fieldInfo; private Map dynamicFieldInfo; private Map fieldTypeInfo; + private Map shardResponses; @Override @SuppressWarnings("unchecked") @@ -247,6 +268,17 @@ public void setResponse(NamedList res) { } } } + + // Parse shards section (present in distributed responses) + NamedList shardsNL = (NamedList) res.get("shards"); + if (shardsNL != null) { + shardResponses = new LinkedHashMap<>(); + for (Map.Entry entry : shardsNL) { + LukeResponse shardRsp = new LukeResponse(); + shardRsp.setResponse((NamedList) entry.getValue()); + shardResponses.put(entry.getKey(), shardRsp); + } + } } // ---------------------------------------------------------------- @@ -263,23 +295,41 @@ private Long getIndexLong(String key) { return n != null ? n.longValue() : null; } - public Long getNumDocs() { + public Integer getNumDocs() { + if (indexInfo == null) return null; + Object val = indexInfo.get("numDocs"); + return val instanceof Integer i ? i : null; + } + + public Long getNumDocsAsLong() { return getIndexLong("numDocs"); } - public Long getMaxDoc() { + public Integer getMaxDoc() { + if (indexInfo == null) return null; + Object val = indexInfo.get("maxDoc"); + return val instanceof Integer i ? i : null; + } + + public Long getMaxDocAsLong() { return getIndexLong("maxDoc"); } - public Long getDeletedDocs() { + public Long getDeletedDocsAsLong() { return getIndexLong("deletedDocs"); } - public Long getSegmentCount() { + public Long getSegmentCountAsLong() { return getIndexLong("segmentCount"); } - public Long getNumTerms() { + public Integer getNumTerms() { + if (indexInfo == null) return null; + Object val = indexInfo.get("numTerms"); + return val instanceof Integer i ? i : null; + } + + public Long getNumTermsAsLong() { return getIndexLong("numTerms"); } @@ -311,5 +361,9 @@ public FieldInfo getDynamicFieldInfo(String f) { return dynamicFieldInfo.get(f); } + public Map getShardResponses() { + return shardResponses; + } + // ---------------------------------------------------------------- } From df081d5375ffb2937e81199d92d69707a2c02bf4 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 19 Feb 2026 14:49:23 -0500 Subject: [PATCH 06/46] backtrack API expansion --- .../handler/admin/LukeRequestHandler.java | 9 ++++---- .../admin/LukeRequestHandlerDistribTest.java | 7 +++--- .../client/solrj/response/LukeResponse.java | 23 +------------------ 3 files changed, 9 insertions(+), 30 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 75621ede133e..902936da1ba1 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -303,9 +303,9 @@ private static String shardAddress(ShardResponse srsp) { @SuppressWarnings("unchecked") private void mergeDistributedResponses(SolrQueryResponse rsp, List responses) { long totalNumDocs = 0; - long totalMaxDoc = 0; + int totalMaxDoc = 0; long totalDeletedDocs = 0; - long totalSegmentCount = 0; + int totalSegmentCount = 0; Map> fieldLookup = new HashMap<>(); SimpleOrderedMap fieldsResult = new SimpleOrderedMap<>(); @@ -336,9 +336,10 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List 0", rsp.getMaxDocAsLong() > 0); + assertTrue("merged maxDoc should be > 0", rsp.getMaxDoc() > 0); assertNotNull("deletedDocs should be present", rsp.getDeletedDocsAsLong()); - assertNotNull("segmentCount should be present", rsp.getSegmentCountAsLong()); Map shards = rsp.getShardResponses(); assertNotNull("shards section should be present", shards); @@ -100,7 +99,7 @@ public void testDistributedMerge() throws Exception { for (Map.Entry entry : shards.entrySet()) { LukeResponse shardLuke = entry.getValue(); assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); - assertNotNull("each shard should have maxDoc", shardLuke.getMaxDocAsLong()); + assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); sumShardDocs += shardLuke.getNumDocsAsLong(); } assertEquals( @@ -223,7 +222,7 @@ public void testSparseShards() throws Exception { // Index-level stats assertEquals("numDocs should be 1", 1, rsp.getNumDocsAsLong().longValue()); - assertTrue("maxDoc should be > 0", rsp.getMaxDocAsLong() > 0); + assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocsAsLong().longValue()); Map shards = rsp.getShardResponses(); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 9d4de3080505..0ea9fadf9026 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -118,7 +118,6 @@ public static class FieldInfo implements Serializable { int docs; int distinct; Long docsAsLong; - Long distinctAsLong; EnumSet flags; boolean cacheableFaceting; NamedList topTerms; @@ -142,11 +141,7 @@ public void read(NamedList nl) { } case "docsAsLong" -> docsAsLong = (Long) entry.getValue(); // Don't set docs — narrowing Long→int is lossy - case "distinct" -> { - distinct = (Integer) entry.getValue(); - distinctAsLong = (long) distinct; // widen, lossless - } - case "distinctAsLong" -> distinctAsLong = (Long) entry.getValue(); + case "distinct" -> distinct = (Integer) entry.getValue(); case "cacheableFaceting" -> cacheableFaceting = (Boolean) entry.getValue(); case "topTerms" -> topTerms = (NamedList) entry.getValue(); default -> extras.put(key, entry.getValue()); @@ -182,10 +177,6 @@ public int getDistinct() { return distinct; } - public Long getDistinctAsLong() { - return distinctAsLong; - } - public int getDocs() { return docs; } @@ -311,28 +302,16 @@ public Integer getMaxDoc() { return val instanceof Integer i ? i : null; } - public Long getMaxDocAsLong() { - return getIndexLong("maxDoc"); - } - public Long getDeletedDocsAsLong() { return getIndexLong("deletedDocs"); } - public Long getSegmentCountAsLong() { - return getIndexLong("segmentCount"); - } - public Integer getNumTerms() { if (indexInfo == null) return null; Object val = indexInfo.get("numTerms"); return val instanceof Integer i ? i : null; } - public Long getNumTermsAsLong() { - return getIndexLong("numTerms"); - } - public Map getFieldTypeInfo() { return fieldTypeInfo; } From 9fba72edf36320230c323fec3a8511237e9ee47b Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 19 Feb 2026 15:17:17 -0500 Subject: [PATCH 07/46] tidy up --- .../handler/admin/LukeRequestHandler.java | 14 +++--- .../admin/LukeRequestHandlerDistribTest.java | 3 +- .../client/solrj/response/LukeResponse.java | 45 ++++++++++++------- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 902936da1ba1..18a86bc8f3fd 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -251,7 +251,8 @@ private record FieldOrigin(String shardAddr, LukeResponse.FieldInfo fieldInfo) { * short-circuited (e.g. single-shard collection) and the caller should fall through to local * logic. */ - private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException { + private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) + throws IOException { ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); ResponseBuilder rb = new ResponseBuilder(req, rsp, Collections.emptyList()); shardHandler.prepDistributed(rb); @@ -321,9 +322,13 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List shardIndex = lukeRsp.getIndexInfo(); if (shardIndex != null) { totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocsAsLong()).orElse(0L); - totalMaxDoc = - Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0)); + totalMaxDoc = Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0)); totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocsAsLong()).orElse(0L); Number segCount = (Number) shardIndex.get(KEY_SEGMENT_COUNT); totalSegmentCount += segCount != null ? segCount.intValue() : 0; diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index eb8b376554cc..6047c5590194 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -66,8 +66,7 @@ private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) params.set("qt", "/admin/luke"); params.set("numTerms", "0"); if (extra != null) { - for (Map.Entry entry : - extra.getMap().entrySet()) { + for (Map.Entry entry : extra.getMap().entrySet()) { params.set(entry.getKey(), entry.getValue()); } } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 0ea9fadf9026..66faa1ac7d3b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -130,21 +130,26 @@ public FieldInfo(String n) { @SuppressWarnings("unchecked") public void read(NamedList nl) { for (Map.Entry entry : nl) { - String key = entry.getKey(); - switch (key) { - case "type" -> type = (String) entry.getValue(); - case "flags" -> flags = parseFlags((String) entry.getValue()); - case "schema" -> schema = (String) entry.getValue(); - case "docs" -> { - docs = (Integer) entry.getValue(); - docsAsLong = (long) docs; // widen, lossless - } - case "docsAsLong" -> docsAsLong = (Long) entry.getValue(); + if ("type".equals(entry.getKey())) { + type = (String) entry.getValue(); + } else if ("flags".equals(entry.getKey())) { + flags = parseFlags((String) entry.getValue()); + } else if ("schema".equals(entry.getKey())) { + schema = (String) entry.getValue(); + } else if ("docs".equals(entry.getKey())) { + docs = (Integer) entry.getValue(); + docsAsLong = (long) docs; // widen, lossless + } else if ("docsAsLong".equals(entry.getKey())) { // Don't set docs — narrowing Long→int is lossy - case "distinct" -> distinct = (Integer) entry.getValue(); - case "cacheableFaceting" -> cacheableFaceting = (Boolean) entry.getValue(); - case "topTerms" -> topTerms = (NamedList) entry.getValue(); - default -> extras.put(key, entry.getValue()); + docsAsLong = (Long) entry.getValue(); + } else if ("distinct".equals(entry.getKey())) { + distinct = (Integer) entry.getValue(); + } else if ("cacheableFaceting".equals(entry.getKey())) { + cacheableFaceting = (Boolean) entry.getValue(); + } else if ("topTerms".equals(entry.getKey())) { + topTerms = (NamedList) entry.getValue(); + } else { + extras.put(entry.getKey(), entry.getValue()); } } } @@ -287,7 +292,9 @@ private Long getIndexLong(String key) { } public Integer getNumDocs() { - if (indexInfo == null) return null; + if (indexInfo == null) { + return null; + } Object val = indexInfo.get("numDocs"); return val instanceof Integer i ? i : null; } @@ -297,7 +304,9 @@ public Long getNumDocsAsLong() { } public Integer getMaxDoc() { - if (indexInfo == null) return null; + if (indexInfo == null) { + return null; + } Object val = indexInfo.get("maxDoc"); return val instanceof Integer i ? i : null; } @@ -307,7 +316,9 @@ public Long getDeletedDocsAsLong() { } public Integer getNumTerms() { - if (indexInfo == null) return null; + if (indexInfo == null) { + return null; + } Object val = indexInfo.get("numTerms"); return val instanceof Integer i ? i : null; } From 30ce01e6fdd7d5ac89e354343b8340a509124567 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 19 Feb 2026 17:20:44 -0500 Subject: [PATCH 08/46] refactor --- .../handler/admin/LukeRequestHandler.java | 134 +++++++++++------- .../admin/LukeRequestHandlerDistribTest.java | 23 ++- 2 files changed, 99 insertions(+), 58 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 18a86bc8f3fd..a4f43a280e1a 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -136,6 +136,7 @@ public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAw private static final String KEY_TYPE = "type"; private static final String KEY_SCHEMA_FLAGS = "schema"; private static final String KEY_DOCS = "docs"; + private static final String KEY_DOCS_AS_LONG = "docsAsLong"; private static final String KEY_DISTINCT = "distinct"; private static final String KEY_TOP_TERMS = "topTerms"; private static final String KEY_DYNAMIC_BASE = "dynamicBase"; @@ -244,7 +245,23 @@ && handleDistributed(req, rsp)) { rsp.setHttpCaching(false); } - private record FieldOrigin(String shardAddr, LukeResponse.FieldInfo fieldInfo) {} + /** Tracks the first-seen valid properties of a field across shards. */ + private static class ExpectedFieldConfig { + final String shardAddr; + final LukeResponse.FieldInfo fieldInfo; + Object indexFlags; + String indexFlagsShardAddr; + + ExpectedFieldConfig(String shardAddr, LukeResponse.FieldInfo fieldInfo) { + this.shardAddr = shardAddr; + this.fieldInfo = fieldInfo; + Object flags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); + if (flags != null) { + this.indexFlags = flags; + this.indexFlagsShardAddr = shardAddr; + } + } + } /** * @return true if the request was handled in distributed mode, false if prepDistributed @@ -301,16 +318,14 @@ private static String shardAddress(ShardResponse srsp) { return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); } - @SuppressWarnings("unchecked") private void mergeDistributedResponses(SolrQueryResponse rsp, List responses) { long totalNumDocs = 0; int totalMaxDoc = 0; long totalDeletedDocs = 0; int totalSegmentCount = 0; - Map> fieldLookup = new HashMap<>(); - SimpleOrderedMap fieldsResult = new SimpleOrderedMap<>(); - Map fieldOrigins = new HashMap<>(); + Map> mergedFields = new HashMap<>(); + Map expectedFieldConfigs = new HashMap<>(); SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); if (!responses.isEmpty()) { @@ -356,15 +371,29 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List merged = fieldLookup.get(fieldName); - if (merged == null) { - merged = new SimpleOrderedMap<>(); - fieldLookup.put(fieldName, merged); - fieldsResult.add(fieldName, merged); - } + SimpleOrderedMap merged = + mergedFields.computeIfAbsent(fieldName, k -> new SimpleOrderedMap<>()); + + mergeShardField(shardAddr, fi, merged, expectedFieldConfigs); + + // Detailed stats — kept per-shard, not merged + NamedList topTerms = fi.getTopTerms(); + Object histogram = fi.getExtras().get(KEY_HISTOGRAM); - mergeShardField( - shardAddr, fi, fieldName, merged, fieldOrigins, perShardFields, perShardEntry); + if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { + perShardEntry.putIfAbsent(RSP_FIELDS, perShardFields); + SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); + if (topTerms != null) { + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + } + if (fi.getDistinct() > 0) { + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); + } + if (histogram != null) { + detailedFieldInfo.add(KEY_HISTOGRAM, histogram); + } + perShardFields.add(fieldName, detailedFieldInfo); + } } } shardsInfo.add(shardAddr, perShardEntry); @@ -377,8 +406,12 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List 0) { - rsp.add(RSP_FIELDS, fieldsResult); + if (!mergedFields.isEmpty()) { + SimpleOrderedMap mergedFieldsNL = new SimpleOrderedMap<>(); + for (Map.Entry> entry : mergedFields.entrySet()) { + mergedFieldsNL.add(entry.getKey(), entry.getValue()); + } + rsp.add(RSP_FIELDS, mergedFieldsNL); } rsp.add(RSP_SHARDS, shardsInfo); @@ -387,73 +420,70 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List merged, - Map fieldOrigins, - SimpleOrderedMap perShardFields, - SimpleOrderedMap perShardEntry) { + Map expectedFieldConfigs) { - FieldOrigin origin = fieldOrigins.get(fieldName); + String fieldName = fi.getName(); + ExpectedFieldConfig origin = expectedFieldConfigs.get(fieldName); if (origin == null) { - fieldOrigins.put(fieldName, new FieldOrigin(shardAddr, fi)); - // First shard: populate merged with schema-derived attrs + origin = new ExpectedFieldConfig(shardAddr, fi); + expectedFieldConfigs.put(fieldName, origin); + // First shard to report this field: populate merged with schema-derived attrs merged.add(KEY_TYPE, fi.getType()); merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); if (dynBase != null) { merged.add(KEY_DYNAMIC_BASE, dynBase); } + if (origin.indexFlags != null) { + merged.add(KEY_INDEX_FLAGS, origin.indexFlags); + } } else { - // Subsequent shards: validate consistency of schema-derived attrs + // Subsequent shards: validate consistency validateFieldAttr( fieldName, KEY_TYPE, fi.getType(), - origin.fieldInfo().getType(), + origin.fieldInfo.getType(), shardAddr, - origin.shardAddr()); + origin.shardAddr); validateFieldAttr( fieldName, KEY_SCHEMA_FLAGS, fi.getSchema(), - origin.fieldInfo().getSchema(), + origin.fieldInfo.getSchema(), shardAddr, - origin.shardAddr()); + origin.shardAddr); validateFieldAttr( fieldName, KEY_DYNAMIC_BASE, fi.getExtras().get(KEY_DYNAMIC_BASE), - origin.fieldInfo().getExtras().get(KEY_DYNAMIC_BASE), + origin.fieldInfo.getExtras().get(KEY_DYNAMIC_BASE), shardAddr, - origin.shardAddr()); + origin.shardAddr); + + Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); + if (indexFlags != null) { + if (origin.indexFlags == null) { + origin.indexFlags = indexFlags; + origin.indexFlagsShardAddr = shardAddr; + merged.add(KEY_INDEX_FLAGS, indexFlags); + } else { + validateFieldAttr( + fieldName, + KEY_INDEX_FLAGS, + indexFlags, + origin.indexFlags, + shardAddr, + origin.indexFlagsShardAddr); + } + } } - // Index flags: take first non-null (index-derived, may differ across shards) - merged.computeIfAbsent(KEY_INDEX_FLAGS, k -> fi.getExtras().get(KEY_INDEX_FLAGS)); - Long docsAsLong = fi.getDocsAsLong(); if (docsAsLong != null && docsAsLong > 0) { merged.compute( - "docsAsLong", (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); - } - - // Detailed stats — kept per-shard, not merged - NamedList topTerms = fi.getTopTerms(); - Object histogram = fi.getExtras().get(KEY_HISTOGRAM); - - if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { - perShardEntry.putIfAbsent(RSP_FIELDS, perShardFields); - SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); - if (topTerms != null) { - detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); - } - if (fi.getDistinct() > 0) { - detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); - } - if (histogram != null) { - detailedFieldInfo.add(KEY_HISTOGRAM, histogram); - } - perShardFields.add(fieldName, detailedFieldInfo); + KEY_DOCS_AS_LONG, (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); } } diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 6047c5590194..f7a4bdfe4c50 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -108,6 +108,7 @@ public void testDistributedMerge() throws Exception { } @Test + @SuppressWarnings("unchecked") public void testDistributedFieldsMerge() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("distrib", "true"); @@ -129,6 +130,14 @@ public void testDistributedFieldsMerge() throws Exception { LukeResponse.FieldInfo idField = fields.get("id"); assertNotNull("'id' field should be present", idField); assertEquals("id field type should be string", "string", idField.getType()); + + // Index flags should be consistent across shards (both shards have data for "name"). + // The merge validates non-null index flags for consistency; if they were inconsistent, + // the request would have thrown an error. Verify the merged result has index flags. + NamedList mergedFieldsNL = (NamedList) rsp.getResponse().get("fields"); + NamedList rawNameField = (NamedList) mergedFieldsNL.get("name"); + assertNotNull( + "index flags should be present when both shards have data", rawNameField.get("index")); } @Test @@ -259,12 +268,14 @@ public void testSparseShards() throws Exception { assertNotNull("cat_s type", catField.getType()); assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); - // Verify index flags are present (from the one shard that has the document). - // Fields that are indexed and have a live doc should get index flags via the merge's - // computeIfAbsent (take-first-non-null) logic. - NamedList mergedFields = (NamedList) rsp.getResponse().get("fields"); - assertNotNull("merged fields NamedList should be present", mergedFields); - NamedList rawNameField = (NamedList) mergedFields.get("name"); + // Verify index flags in the merged response for the static "name" field. + // Luke only reports fields present in the Lucene index (via reader.getFieldInfos()), + // so only the shard with the document contributes "name" to the merge. The merge + // validates consistency of index flags across shards (null is always consistent), + // but with 11 empty shards, only one shard contributes index flags here. + NamedList mergedFieldsNL = (NamedList) rsp.getResponse().get("fields"); + assertNotNull("merged fields NamedList should be present", mergedFieldsNL); + NamedList rawNameField = (NamedList) mergedFieldsNL.get("name"); assertNotNull("raw 'name' field should be in merged fields", rawNameField); // The index flags key may or may not be present depending on whether the field is indexed // and stored — but if present, it should be a non-empty string From bb3397f2f2145377239ab742765c521edacd4e6f Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 19 Feb 2026 18:23:21 -0500 Subject: [PATCH 09/46] refactor with MergedFieldData --- .../handler/admin/LukeRequestHandler.java | 237 +++++++++--------- 1 file changed, 123 insertions(+), 114 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index a4f43a280e1a..4024b61175c3 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -245,21 +245,53 @@ && handleDistributed(req, rsp)) { rsp.setHttpCaching(false); } - /** Tracks the first-seen valid properties of a field across shards. */ - private static class ExpectedFieldConfig { + /** Per-field accumulation state across shards: merged response data and index flags tracking. */ + private static class MergedFieldData { + final SimpleOrderedMap merged = new SimpleOrderedMap<>(); + final String originalShardAddr; + private Object indexFlags; + private String indexFlagsShardAddr; + + MergedFieldData(String shardAddr, Object indexFlags) { + this.originalShardAddr = shardAddr; + if (indexFlags != null) { + this.indexFlags = indexFlags; + this.indexFlagsShardAddr = shardAddr; + } + } + } + + private static class ShardData { final String shardAddr; - final LukeResponse.FieldInfo fieldInfo; - Object indexFlags; - String indexFlagsShardAddr; + final Map shardFieldInfo; + private NamedList indexInfo; + private SimpleOrderedMap detailedFields; - ExpectedFieldConfig(String shardAddr, LukeResponse.FieldInfo fieldInfo) { + ShardData(String shardAddr, Map shardFieldInfo) { this.shardAddr = shardAddr; - this.fieldInfo = fieldInfo; - Object flags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); - if (flags != null) { - this.indexFlags = flags; - this.indexFlagsShardAddr = shardAddr; + this.shardFieldInfo = shardFieldInfo; + } + + void setIndexInfo(NamedList indexInfo) { + this.indexInfo = indexInfo; + } + + void addDetailedFieldInfo(String fieldName, SimpleOrderedMap fieldStats) { + if (detailedFields == null) { + detailedFields = new SimpleOrderedMap<>(); + } + detailedFields.add(fieldName, fieldStats); + } + + SimpleOrderedMap toResponseEntry() { + SimpleOrderedMap entry = new SimpleOrderedMap<>(); + if (indexInfo != null) { + entry.add(RSP_INDEX, indexInfo); + } + if (detailedFields != null) { + entry.add(RSP_FIELDS, detailedFields); } + return entry; } } @@ -268,8 +300,7 @@ private static class ExpectedFieldConfig { * short-circuited (e.g. single-shard collection) and the caller should fall through to local * logic. */ - private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) - throws IOException { + private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); ResponseBuilder rb = new ResponseBuilder(req, rsp, Collections.emptyList()); shardHandler.prepDistributed(rb); @@ -324,9 +355,7 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List> mergedFields = new HashMap<>(); - Map expectedFieldConfigs = new HashMap<>(); - SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); + Map mergedFields = new HashMap<>(); if (!responses.isEmpty()) { ShardResponse firstRsp = responses.getFirst(); @@ -346,12 +375,14 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List shardDataList = new ArrayList<>(); + for (ShardResponse srsp : responses) { - String shardAddr = shardAddress(srsp); NamedList shardRsp = srsp.getSolrResponse().getResponse(); LukeResponse lukeRsp = new LukeResponse(); lukeRsp.setResponse(shardRsp); - SimpleOrderedMap perShardEntry = new SimpleOrderedMap<>(); + ShardData shardData = new ShardData(shardAddress(srsp), lukeRsp.getFieldInfo()); + NamedList shardIndex = lukeRsp.getIndexInfo(); if (shardIndex != null) { totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocsAsLong()).orElse(0L); @@ -360,43 +391,19 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List shardFieldInfo = lukeRsp.getFieldInfo(); - if (shardFieldInfo != null) { - SimpleOrderedMap perShardFields = new SimpleOrderedMap<>(); - - for (Map.Entry entry : shardFieldInfo.entrySet()) { - String fieldName = entry.getKey(); - LukeResponse.FieldInfo fi = entry.getValue(); - - SimpleOrderedMap merged = - mergedFields.computeIfAbsent(fieldName, k -> new SimpleOrderedMap<>()); - - mergeShardField(shardAddr, fi, merged, expectedFieldConfigs); - - // Detailed stats — kept per-shard, not merged - NamedList topTerms = fi.getTopTerms(); - Object histogram = fi.getExtras().get(KEY_HISTOGRAM); + processShardFields(shardData, mergedFields); + shardDataList.add(shardData); + } - if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { - perShardEntry.putIfAbsent(RSP_FIELDS, perShardFields); - SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); - if (topTerms != null) { - detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); - } - if (fi.getDistinct() > 0) { - detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); - } - if (histogram != null) { - detailedFieldInfo.add(KEY_HISTOGRAM, histogram); - } - perShardFields.add(fieldName, detailedFieldInfo); - } - } + SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); + for (ShardData sd : shardDataList) { + SimpleOrderedMap entry = sd.toResponseEntry(); + if (!entry.isEmpty()) { + shardsInfo.add(sd.shardAddr, entry); } - shardsInfo.add(shardAddr, perShardEntry); } SimpleOrderedMap mergedIndex = new SimpleOrderedMap<>(); @@ -408,8 +415,8 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedFieldsNL = new SimpleOrderedMap<>(); - for (Map.Entry> entry : mergedFields.entrySet()) { - mergedFieldsNL.add(entry.getKey(), entry.getValue()); + for (Map.Entry entry : mergedFields.entrySet()) { + mergedFieldsNL.add(entry.getKey(), entry.getValue().merged); } rsp.add(RSP_FIELDS, mergedFieldsNL); } @@ -417,90 +424,92 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedFields) { + if (shardData.shardFieldInfo == null) { + return; + } + for (Map.Entry entry : shardData.shardFieldInfo.entrySet()) { + String fieldName = entry.getKey(); + LukeResponse.FieldInfo fi = entry.getValue(); + + mergeShardField(shardData.shardAddr, fi, mergedFields); + + // Detailed stats — kept per-shard, not merged + NamedList topTerms = fi.getTopTerms(); + Object histogram = fi.getExtras().get(KEY_HISTOGRAM); + + if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { + SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); + if (topTerms != null) { + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + } + if (fi.getDistinct() > 0) { + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); + } + if (histogram != null) { + detailedFieldInfo.add(KEY_HISTOGRAM, histogram); + } + shardData.addDetailedFieldInfo(fieldName, detailedFieldInfo); + } + } + } + private void mergeShardField( - String shardAddr, - LukeResponse.FieldInfo fi, - SimpleOrderedMap merged, - Map expectedFieldConfigs) { + String shardAddr, LukeResponse.FieldInfo fi, Map mergedFields) { String fieldName = fi.getName(); - ExpectedFieldConfig origin = expectedFieldConfigs.get(fieldName); - if (origin == null) { - origin = new ExpectedFieldConfig(shardAddr, fi); - expectedFieldConfigs.put(fieldName, origin); + Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); + + MergedFieldData fieldData = mergedFields.get(fieldName); + if (fieldData == null) { + fieldData = new MergedFieldData(shardAddr, indexFlags); + mergedFields.put(fieldName, fieldData); + // First shard to report this field: populate merged with schema-derived attrs - merged.add(KEY_TYPE, fi.getType()); - merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); + fieldData.merged.add(KEY_TYPE, fi.getType()); + fieldData.merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); if (dynBase != null) { - merged.add(KEY_DYNAMIC_BASE, dynBase); + fieldData.merged.add(KEY_DYNAMIC_BASE, dynBase); } - if (origin.indexFlags != null) { - merged.add(KEY_INDEX_FLAGS, origin.indexFlags); + if (fieldData.indexFlags != null) { + fieldData.merged.add(KEY_INDEX_FLAGS, fieldData.indexFlags); } - } else { - // Subsequent shards: validate consistency - validateFieldAttr( - fieldName, - KEY_TYPE, - fi.getType(), - origin.fieldInfo.getType(), - shardAddr, - origin.shardAddr); - validateFieldAttr( - fieldName, - KEY_SCHEMA_FLAGS, - fi.getSchema(), - origin.fieldInfo.getSchema(), - shardAddr, - origin.shardAddr); - validateFieldAttr( - fieldName, - KEY_DYNAMIC_BASE, - fi.getExtras().get(KEY_DYNAMIC_BASE), - origin.fieldInfo.getExtras().get(KEY_DYNAMIC_BASE), - shardAddr, - origin.shardAddr); - - Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); - if (indexFlags != null) { - if (origin.indexFlags == null) { - origin.indexFlags = indexFlags; - origin.indexFlagsShardAddr = shardAddr; - merged.add(KEY_INDEX_FLAGS, indexFlags); - } else { - validateFieldAttr( - fieldName, - KEY_INDEX_FLAGS, - indexFlags, - origin.indexFlags, - shardAddr, - origin.indexFlagsShardAddr); - } + } else if (indexFlags != null) { + // Subsequent shards: validate index flags consistency + if (fieldData.indexFlags == null) { + fieldData.indexFlags = indexFlags; + fieldData.indexFlagsShardAddr = shardAddr; + fieldData.merged.add(KEY_INDEX_FLAGS, indexFlags); + } else { + validateFieldAttr( + fieldName, KEY_INDEX_FLAGS, indexFlags, fieldData.indexFlags, + shardAddr, fieldData.indexFlagsShardAddr); } } Long docsAsLong = fi.getDocsAsLong(); if (docsAsLong != null && docsAsLong > 0) { - merged.compute( + fieldData.merged.compute( KEY_DOCS_AS_LONG, (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); } } - /** Validates that a schema-derived attribute value is identical across shards. */ + /** Validates that a field attribute value is identical across shards. */ private void validateFieldAttr( String fieldName, String attrName, Object currentVal, - Object originVal, + Object expectedVal, String currentShardAddr, - String originShardAddr) { - if (currentVal == null && originVal == null) { + String expectedShardAddr) { + if (currentVal == null && expectedVal == null) { return; } String currentStr = currentVal != null ? currentVal.toString() : null; - String originStr = originVal != null ? originVal.toString() : null; - if (!Objects.equals(currentStr, originStr)) { + String expectedStr = expectedVal != null ? expectedVal.toString() : null; + if (!Objects.equals(currentStr, expectedStr)) { throw new SolrException( ErrorCode.SERVER_ERROR, "Field '" @@ -508,9 +517,9 @@ private void validateFieldAttr( + "' has inconsistent '" + attrName + "' across shards: '" - + originStr + + expectedStr + "' (from " - + originShardAddr + + expectedShardAddr + ") vs '" + currentStr + "' (from " From cac6d5700608c019711ef7c543e84cbd932c75de Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 20 Feb 2026 14:31:03 -0500 Subject: [PATCH 10/46] actually let's validate these --- .../handler/admin/LukeRequestHandler.java | 50 +++++++++++++------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 4024b61175c3..af9ebbf60dd6 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -245,17 +245,20 @@ && handleDistributed(req, rsp)) { rsp.setHttpCaching(false); } - /** Per-field accumulation state across shards: merged response data and index flags tracking. */ + /** Per-field accumulation state across shards: merged response data and field validation. */ private static class MergedFieldData { final SimpleOrderedMap merged = new SimpleOrderedMap<>(); final String originalShardAddr; + final LukeResponse.FieldInfo originalFieldInfo; private Object indexFlags; private String indexFlagsShardAddr; - MergedFieldData(String shardAddr, Object indexFlags) { + MergedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { this.originalShardAddr = shardAddr; - if (indexFlags != null) { - this.indexFlags = indexFlags; + this.originalFieldInfo = fieldInfo; + Object flags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); + if (flags != null) { + this.indexFlags = flags; this.indexFlagsShardAddr = shardAddr; } } @@ -459,11 +462,10 @@ private void mergeShardField( String shardAddr, LukeResponse.FieldInfo fi, Map mergedFields) { String fieldName = fi.getName(); - Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); MergedFieldData fieldData = mergedFields.get(fieldName); if (fieldData == null) { - fieldData = new MergedFieldData(shardAddr, indexFlags); + fieldData = new MergedFieldData(shardAddr, fi); mergedFields.put(fieldName, fieldData); // First shard to report this field: populate merged with schema-derived attrs @@ -476,16 +478,32 @@ private void mergeShardField( if (fieldData.indexFlags != null) { fieldData.merged.add(KEY_INDEX_FLAGS, fieldData.indexFlags); } - } else if (indexFlags != null) { - // Subsequent shards: validate index flags consistency - if (fieldData.indexFlags == null) { - fieldData.indexFlags = indexFlags; - fieldData.indexFlagsShardAddr = shardAddr; - fieldData.merged.add(KEY_INDEX_FLAGS, indexFlags); - } else { - validateFieldAttr( - fieldName, KEY_INDEX_FLAGS, indexFlags, fieldData.indexFlags, - shardAddr, fieldData.indexFlagsShardAddr); + } else { + // Subsequent shards: validate consistency + validateFieldAttr( + fieldName, KEY_TYPE, fi.getType(), + fieldData.originalFieldInfo.getType(), + shardAddr, fieldData.originalShardAddr); + validateFieldAttr( + fieldName, KEY_SCHEMA_FLAGS, fi.getSchema(), + fieldData.originalFieldInfo.getSchema(), + shardAddr, fieldData.originalShardAddr); + validateFieldAttr( + fieldName, KEY_DYNAMIC_BASE, fi.getExtras().get(KEY_DYNAMIC_BASE), + fieldData.originalFieldInfo.getExtras().get(KEY_DYNAMIC_BASE), + shardAddr, fieldData.originalShardAddr); + + Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); + if (indexFlags != null) { + if (fieldData.indexFlags == null) { + fieldData.indexFlags = indexFlags; + fieldData.indexFlagsShardAddr = shardAddr; + fieldData.merged.add(KEY_INDEX_FLAGS, indexFlags); + } else { + validateFieldAttr( + fieldName, KEY_INDEX_FLAGS, indexFlags, fieldData.indexFlags, + shardAddr, fieldData.indexFlagsShardAddr); + } } } From a4410f3a05661b420f03884d4d4fc62a79f92eec Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 20 Feb 2026 14:33:04 -0500 Subject: [PATCH 11/46] actually let's validate these --- .../modules/indexing-guide/pages/luke-request-handler.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index c0531b4a2ede..2186aeec5b78 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -166,4 +166,5 @@ The `index` flags are index-derived (not schema-derived) and may legitimately di Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not merged across shards. These statistics are shard-local and appear in each shard's entry under the `shards` key. -For cross-shard term-level aggregation, use Solr's xref:query-guide:faceting.adoc[faceting API] which provides refinement and distributed aggregation. +For cross-shard term-level aggregation, Solr's xref:query-guide:faceting.adoc[faceting API] provides refinement and distributed aggregation, though it differs from Luke's term statistics: faceting ignores deleted documents, is more flexible, and is generally more expensive. +A case could be made for adding merge strategies for Luke-powered term statistics (which offer better performance at the cost of flexibility), however this would require community interest to motivate. From 6fec56e9b67d6193b46c81a67b78ad87edaf4bd5 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 20 Feb 2026 15:15:41 -0500 Subject: [PATCH 12/46] tidy --- .../handler/admin/LukeRequestHandler.java | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index af9ebbf60dd6..f48330bd1db5 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -427,8 +427,7 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedFields) { + private void processShardFields(ShardData shardData, Map mergedFields) { if (shardData.shardFieldInfo == null) { return; } @@ -481,17 +480,26 @@ private void mergeShardField( } else { // Subsequent shards: validate consistency validateFieldAttr( - fieldName, KEY_TYPE, fi.getType(), + fieldName, + KEY_TYPE, + fi.getType(), fieldData.originalFieldInfo.getType(), - shardAddr, fieldData.originalShardAddr); + shardAddr, + fieldData.originalShardAddr); validateFieldAttr( - fieldName, KEY_SCHEMA_FLAGS, fi.getSchema(), + fieldName, + KEY_SCHEMA_FLAGS, + fi.getSchema(), fieldData.originalFieldInfo.getSchema(), - shardAddr, fieldData.originalShardAddr); + shardAddr, + fieldData.originalShardAddr); validateFieldAttr( - fieldName, KEY_DYNAMIC_BASE, fi.getExtras().get(KEY_DYNAMIC_BASE), + fieldName, + KEY_DYNAMIC_BASE, + fi.getExtras().get(KEY_DYNAMIC_BASE), fieldData.originalFieldInfo.getExtras().get(KEY_DYNAMIC_BASE), - shardAddr, fieldData.originalShardAddr); + shardAddr, + fieldData.originalShardAddr); Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); if (indexFlags != null) { @@ -501,8 +509,12 @@ private void mergeShardField( fieldData.merged.add(KEY_INDEX_FLAGS, indexFlags); } else { validateFieldAttr( - fieldName, KEY_INDEX_FLAGS, indexFlags, fieldData.indexFlags, - shardAddr, fieldData.indexFlagsShardAddr); + fieldName, + KEY_INDEX_FLAGS, + indexFlags, + fieldData.indexFlags, + shardAddr, + fieldData.indexFlagsShardAddr); } } } From 1cd6ac6a87d6f14653f1ff6627d251bdbe080ebc Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 20 Feb 2026 17:34:40 -0500 Subject: [PATCH 13/46] remove unnecessary ternary --- .../java/org/apache/solr/handler/admin/LukeRequestHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index f48330bd1db5..4e4fc2799ce7 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -322,7 +322,7 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { for (String shard : shards) { ModifiableSolrParams params = new ModifiableSolrParams(req.getParams()); - params.set(CommonParams.QT, reqPath != null ? reqPath : "/admin/luke"); + params.set(CommonParams.QT, reqPath); ShardHandler.setShardAttributesToParams(params, sreq.purpose); shardHandler.submit(sreq, shard, params); } From 06b913405c307936cd797c9b6f1a8087c80c27ef Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 20 Feb 2026 18:02:02 -0500 Subject: [PATCH 14/46] don't show nulled out fields with show=schema --- .../handler/admin/LukeRequestHandler.java | 6 +++- .../admin/LukeRequestHandlerDistribTest.java | 32 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 4e4fc2799ce7..4899a092c157 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -384,7 +384,11 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List shardRsp = srsp.getSolrResponse().getResponse(); LukeResponse lukeRsp = new LukeResponse(); lukeRsp.setResponse(shardRsp); - ShardData shardData = new ShardData(shardAddress(srsp), lukeRsp.getFieldInfo()); + // Only process field info if the shard explicitly included it in its response. + // LukeResponse.getFieldInfo() falls back to schema.fields which has incomplete data. + Map fieldInfo = + shardRsp.get(RSP_FIELDS) != null ? lukeRsp.getFieldInfo() : null; + ShardData shardData = new ShardData(shardAddress(srsp), fieldInfo); NamedList shardIndex = lukeRsp.getIndexInfo(); if (shardIndex != null) { diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index f7a4bdfe4c50..2eba54ca2b1d 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -289,6 +289,38 @@ public void testSparseShards() throws Exception { } } + @Test + @SuppressWarnings("unchecked") + public void testDistribShowSchema() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("show", "schema"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + NamedList raw = rsp.getResponse(); + NamedList schema = (NamedList) raw.get("schema"); + assertNotNull("schema section should be present", schema); + + NamedList fields = (NamedList) schema.get("fields"); + assertNotNull("schema fields should be present", fields); + assertNotNull("'id' should be in schema fields", fields.get("id")); + assertNotNull("'name' should be in schema fields", fields.get("name")); + + assertNotNull("dynamicFields should be present", schema.get("dynamicFields")); + assertNotNull("uniqueKeyField should be present", schema.get("uniqueKeyField")); + assertEquals("uniqueKeyField should be 'id'", "id", schema.get("uniqueKeyField")); + assertNotNull("types should be present", schema.get("types")); + assertNotNull("similarity should be present", schema.get("similarity")); + + // show=schema should not produce merged top-level fields (matches local mode behavior) + assertNull("top-level fields should not be present with show=schema", raw.get("fields")); + + // Shards are present for consistency: each shard entry mirrors the per-shard index info, + // just as the top-level index section is present in local mode with show=schema + assertNotNull("shards should still be present with show=schema", raw.get("shards")); + } + @Test public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { String singleShardCollection = "lukeSingleShard"; From f0a9750bb9b2dfa7f2406fc26115e0f4c6c47c84 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Sat, 21 Feb 2026 15:13:32 -0500 Subject: [PATCH 15/46] test degenerate mismatch schema validation --- .../admin/LukeRequestHandlerDistribTest.java | 141 +++++++++++++++++- 1 file changed, 140 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 2eba54ca2b1d..28b04e60c437 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -16,19 +16,30 @@ */ package org.apache.solr.handler.admin; +import java.lang.invoke.MethodHandles; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.schema.SchemaRequest; import org.apache.solr.client.solrj.response.LukeResponse; +import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class LukeRequestHandlerDistribTest extends SolrCloudTestCase { @@ -37,7 +48,10 @@ public class LukeRequestHandlerDistribTest extends SolrCloudTestCase { @BeforeClass public static void setupCluster() throws Exception { - configureCluster(2).addConfig("conf", configset("cloud-dynamic")).configure(); + configureCluster(2) + .addConfig("conf", configset("cloud-dynamic")) + .addConfig("managed", configset("cloud-managed")) + .configure(); CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1) .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); @@ -60,6 +74,19 @@ public static void afterClass() throws Exception { shutdownCluster(); } + /** Walks the exception cause chain and concatenates all messages. */ + private static String getExceptionChainMessage(Throwable t) { + StringBuilder sb = new StringBuilder(); + while (t != null) { + if (t.getMessage() != null) { + if (sb.length() > 0) sb.append(" -> "); + sb.append(t.getMessage()); + } + t = t.getCause(); + } + return sb.toString(); + } + /** Sends a luke request and wraps the raw response in a typed {@link LukeResponse}. */ private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); @@ -321,6 +348,118 @@ public void testDistribShowSchema() throws Exception { assertNotNull("shards should still be present with show=schema", raw.get("shards")); } + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** + * Verifies that distributed Luke detects inconsistent index flags across shards. Uses Schema API + * to change a field's {@code stored} property between indexing on different shards, producing + * different Lucene FieldInfo (and thus different index flags strings) on each shard. + */ + @Test + public void testInconsistentIndexFlagsAcrossShards() throws Exception { + String collection = "lukeInconsistentFlags"; + try { + System.setProperty("managed.schema.mutable", "true"); + CollectionAdminRequest.createCollection(collection, "managed", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } catch (Exception e) { + log.error("yooo", e); + } + + cluster.waitForActiveCollection(collection, 2, 2); + + try { + // Add a field with stored=true, indexed=true + Map fieldAttrs = new LinkedHashMap<>(); + fieldAttrs.put("name", "test_flag_s"); + fieldAttrs.put("type", "string"); + fieldAttrs.put("stored", true); + fieldAttrs.put("indexed", true); + new SchemaRequest.AddField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Index a target doc WITH the field, plus seed docs without it + SolrInputDocument targetDoc = new SolrInputDocument(); + targetDoc.addField("id", "target"); + targetDoc.addField("test_flag_s", "has_indexed"); + cluster.getSolrClient().add(collection, targetDoc); + + List seedDocs = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "seed_" + i); + seedDocs.add(doc); + } + cluster.getSolrClient().add(collection, seedDocs); + cluster.getSolrClient().commit(collection); + + // Find which shard has the target doc by querying each replica directly. + // Must use distrib=false — SolrCloud defaults distrib to true even on direct replica queries. + DocCollection docColl = cluster.getSolrClient().getClusterState().getCollection(collection); + String targetSliceName = null; + for (Slice slice : docColl.getSlices()) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("id:target"); + q.set("distrib", "false"); + QueryResponse qr = client.query(q); + if (qr.getResults().getNumFound() > 0) { + targetSliceName = slice.getName(); + } + } + } + assertNotNull("target doc should exist on a shard", targetSliceName); + + // Find a seed doc on the other shard + String otherDocId = null; + for (Slice slice : docColl.getSlices()) { + if (!slice.getName().equals(targetSliceName)) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("*:*"); + q.setRows(1); + q.set("distrib", "false"); + QueryResponse qr = client.query(q); + assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); + otherDocId = (String) qr.getResults().get(0).getFieldValue("id"); + } + break; + } + } + assertNotNull("should find a seed doc on the other shard", otherDocId); + + // Change the field to stored=false via Schema API + fieldAttrs.put("stored", false); + new SchemaRequest.ReplaceField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Reload collection to pick up schema change + CollectionAdminRequest.reloadCollection(collection).process(cluster.getSolrClient()); + + // Update the other-shard doc to include the field (now unstored in the new segment) + SolrInputDocument updateDoc = new SolrInputDocument(); + updateDoc.addField("id", otherDocId); + updateDoc.addField("test_flag_s", "not_indexed"); + cluster.getSolrClient().add(collection, updateDoc); + cluster.getSolrClient().commit(collection); + + // Distributed Luke should detect inconsistent index flags between the two shards. + // One shard has stored=true segments, the other has stored=false segments for test_flag_s. + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("fl", "test_flag_s"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); + // The server throws SolrException, but CloudSolrClient may wrap it in + // SolrServerException after retry exhaustion. Check the full exception chain. + String fullMessage = getExceptionChainMessage(ex); + assertTrue( + "exception chain should mention inconsistent index flags: " + fullMessage, + fullMessage.contains("inconsistent")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + @Test public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { String singleShardCollection = "lukeSingleShard"; From 89c64a1b0756f45cf21c86876535b5af558fc7da Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Feb 2026 10:50:42 -0500 Subject: [PATCH 16/46] test deferred index flags --- .../handler/admin/LukeRequestHandler.java | 7 +- .../admin/LukeRequestHandlerDistribTest.java | 95 +++++++++++++++++++ 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 4899a092c157..673a4c35b5c1 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -538,9 +538,6 @@ private void validateFieldAttr( Object expectedVal, String currentShardAddr, String expectedShardAddr) { - if (currentVal == null && expectedVal == null) { - return; - } String currentStr = currentVal != null ? currentVal.toString() : null; String expectedStr = expectedVal != null ? expectedVal.toString() : null; if (!Objects.equals(currentStr, expectedStr)) { @@ -837,7 +834,7 @@ private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws I StoredFields storedFields = reader.storedFields(); // Deal with the chance that the first bunch of terms are in deleted documents. Is there a // better way? - for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) { + for (int idx = 0; idx < 1000; ++idx) { text = termsEnum.next(); // Ran off the end of the terms enum without finding any live docs with that field in them. if (text == null) { @@ -846,7 +843,7 @@ private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws I postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); final Bits liveDocs = reader.getLiveDocs(); if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - if (liveDocs != null && liveDocs.get(postingsEnum.docID())) { + if (liveDocs != null && !liveDocs.get(postingsEnum.docID())) { continue; } return storedFields.document(postingsEnum.docID()); diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 28b04e60c437..ac2c4ce11611 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -460,6 +460,101 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { } } + /** + * Exercises the deferred index flags path (lines 510-513 of LukeRequestHandler): when the first + * shard to report a field has null index flags (all its live docs for that field were deleted, but + * the field persists in FieldInfos from unmerged segments), the merge should still populate index + * flags from a later shard that has live docs. + * + *

Setup: 8-shard collection. Each shard gets one doc with field "flag_target_s" (which is then + * deleted) plus an anchor doc without it (to keep the shard non-empty). Only one shard retains a + * live doc with "flag_target_s". With 8 shards, the probability that the one live shard is + * processed first is 1/8 = 12.5%, so we exercise the deferred path ~87.5% of the time. Either + * way, the merged response should have index flags for the field. + */ + @Test + @SuppressWarnings("unchecked") + public void testDeferredIndexFlags() throws Exception { + String collection = "lukeDeferredFlags"; + int numShards = 8; + CollectionAdminRequest.createCollection(collection, "conf", numShards, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(collection, numShards, numShards); + + try { + // Index one doc with the target field per shard, plus an anchor doc without it. + // The anchor doc keeps the shard non-empty after we delete the target doc. + // We use enough docs to spread across shards via hash routing. + List docs = new ArrayList<>(); + for (int i = 0; i < numShards * 4; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "target_" + i); + doc.addField("flag_target_s", "value_" + i); + docs.add(doc); + + SolrInputDocument anchor = new SolrInputDocument(); + anchor.addField("id", "anchor_" + i); + anchor.addField("name", "anchor"); + docs.add(anchor); + } + cluster.getSolrClient().add(collection, docs); + cluster.getSolrClient().commit(collection); + + // Delete all target docs, leaving only anchors (which don't have flag_target_s) + // on most shards. Keep exactly one target doc alive. + for (int i = 1; i < numShards * 4; i++) { + cluster.getSolrClient().deleteById(collection, "target_" + i); + } + // Do NOT force merge — we need the deleted docs' field to persist in FieldInfos + // so that getFirstLiveDoc returns null, producing null index flags. + cluster.getSolrClient().commit(collection); + + // Verify: distributed Luke should have index flags for flag_target_s in the merged response, + // whether they came from the first shard (constructor path) or a later shard (deferred path). + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("fl", "flag_target_s"); + + LukeResponse rsp = requestLuke(collection, params); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + LukeResponse.FieldInfo targetField = fields.get("flag_target_s"); + assertNotNull("'flag_target_s' field should be present", targetField); + + // The merged response should have index flags from whichever shard had live docs + NamedList mergedFieldsNL = (NamedList) rsp.getResponse().get("fields"); + NamedList rawTargetField = (NamedList) mergedFieldsNL.get("flag_target_s"); + assertNotNull("raw 'flag_target_s' should be in merged fields", rawTargetField); + assertNotNull( + "index flags should be present (populated from shard with live docs)", + rawTargetField.get("index")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + + /** + * Exercises the shard error handling path in handleDistributed (lines 336-343). Passing + * docId=0 with show=schema triggers a BAD_REQUEST on each shard (the local handler rejects + * a docId combined with a non-DOC show style). The distributed handler should propagate + * this as a SolrException. + */ + @Test + public void testDistributedShardError() { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("docId", "0"); + params.set("show", "schema"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); + String fullMessage = getExceptionChainMessage(ex); + assertTrue( + "exception should mention doc style mismatch: " + fullMessage, + fullMessage.contains("missing doc param for doc style")); + } + @Test public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { String singleShardCollection = "lukeSingleShard"; From 3a348cd4e7b01559684539b40832787d08af5295 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Feb 2026 15:57:18 -0500 Subject: [PATCH 17/46] implement distributed show=doc --- .../handler/admin/LukeRequestHandler.java | 51 ++++-- .../admin/LukeRequestHandlerDistribTest.java | 147 ++++++++++++++++-- 2 files changed, 176 insertions(+), 22 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 673a4c35b5c1..d87049e7423c 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -202,14 +202,10 @@ && handleDistributed(req, rsp)) { String v = uniqueKey.getType().toInternal(params.get(ID)); Term t = new Term(uniqueKey.getName(), v); docId = searcher.getFirstMatch(t); - if (docId < 0) { - throw new SolrException( - SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID)); - } } // Read the document from the index - if (docId != null) { + if (docId != null && docId > -1) { if (style != null && style != ShowStyle.DOC) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing doc param for doc style"); } @@ -304,6 +300,16 @@ SimpleOrderedMap toResponseEntry() { * logic. */ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { + SolrParams reqParams = req.getParams(); + + // docId is a Lucene-internal integer, not meaningful across shards + if (reqParams.getInt(DOC_ID) != null) { + throw new SolrException( + ErrorCode.BAD_REQUEST, + "docId parameter is not supported in distributed mode." + + " Use the id parameter to look up documents by their Solr unique key."); + } + ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); ResponseBuilder rb = new ResponseBuilder(req, rsp, Collections.emptyList()); shardHandler.prepDistributed(rb); @@ -321,7 +327,7 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { String reqPath = (String) req.getContext().get(PATH); for (String shard : shards) { - ModifiableSolrParams params = new ModifiableSolrParams(req.getParams()); + ModifiableSolrParams params = new ModifiableSolrParams(reqParams); params.set(CommonParams.QT, reqPath); ShardHandler.setShardAttributesToParams(params, sreq.purpose); shardHandler.submit(sreq, shard, params); @@ -331,7 +337,6 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { if (lastSrsp == null) { throw new SolrException(ErrorCode.SERVER_ERROR, "No responses received from shards"); } - List responses = sreq.responses; for (ShardResponse srsp : responses) { if (srsp.getException() != null) { @@ -353,12 +358,6 @@ private static String shardAddress(ShardResponse srsp) { } private void mergeDistributedResponses(SolrQueryResponse rsp, List responses) { - long totalNumDocs = 0; - int totalMaxDoc = 0; - long totalDeletedDocs = 0; - int totalSegmentCount = 0; - - Map mergedFields = new HashMap<>(); if (!responses.isEmpty()) { ShardResponse firstRsp = responses.getFirst(); @@ -378,6 +377,13 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedFields = new HashMap<>(); + String firstDocShard = null; + Object firstDoc = null; List shardDataList = new ArrayList<>(); for (ShardResponse srsp : responses) { @@ -402,6 +408,22 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedFieldsNL = new SimpleOrderedMap<>(); for (Map.Entry entry : mergedFields.entrySet()) { diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index ac2c4ce11611..8dec00b69704 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -16,6 +16,8 @@ */ package org.apache.solr.handler.admin; +import static org.apache.solr.common.params.CommonParams.DISTRIB; + import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -35,6 +37,11 @@ import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.embedded.JettySolrRunner; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.CommitUpdateCommand; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -462,9 +469,9 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { /** * Exercises the deferred index flags path (lines 510-513 of LukeRequestHandler): when the first - * shard to report a field has null index flags (all its live docs for that field were deleted, but - * the field persists in FieldInfos from unmerged segments), the merge should still populate index - * flags from a later shard that has live docs. + * shard to report a field has null index flags (all its live docs for that field were deleted, + * but the field persists in FieldInfos from unmerged segments), the merge should still populate + * index flags from a later shard that has live docs. * *

Setup: 8-shard collection. Each shard gets one doc with field "flag_target_s" (which is then * deleted) plus an anchor doc without it (to keep the shard non-empty). Only one shard retains a @@ -476,7 +483,7 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { @SuppressWarnings("unchecked") public void testDeferredIndexFlags() throws Exception { String collection = "lukeDeferredFlags"; - int numShards = 8; + int numShards = 16; CollectionAdminRequest.createCollection(collection, "conf", numShards, 1) .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); cluster.waitForActiveCollection(collection, numShards, numShards); @@ -536,16 +543,17 @@ public void testDeferredIndexFlags() throws Exception { } /** - * Exercises the shard error handling path in handleDistributed (lines 336-343). Passing - * docId=0 with show=schema triggers a BAD_REQUEST on each shard (the local handler rejects - * a docId combined with a non-DOC show style). The distributed handler should propagate - * this as a SolrException. + * Exercises shard error propagation through the distributed doc lookup path. Passing id=0 with + * show=schema triggers a BAD_REQUEST on the shard that has doc 0 (the local handler rejects an id + * combined with a non-DOC show style). The distributed handler should propagate this as a + * SolrException, even though other shards respond with NOT_FOUND (which is handled gracefully in + * the doc lookup path). */ @Test public void testDistributedShardError() { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("distrib", "true"); - params.set("docId", "0"); + params.set("id", "0"); params.set("show", "schema"); Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); @@ -555,6 +563,127 @@ public void testDistributedShardError() { fullMessage.contains("missing doc param for doc style")); } + /** Verifies that the docId parameter is rejected in distributed mode. */ + @Test + public void testDistributedDocIdRejected() { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("docId", "0"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); + String fullMessage = getExceptionChainMessage(ex); + assertTrue( + "exception should mention docId not supported: " + fullMessage, + fullMessage.contains("docId parameter is not supported in distributed mode")); + } + + /** Verifies distributed doc lookup returns the document when it exists. */ + @Test + public void testDistributedDocLookupFound() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("id", "0"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + NamedList raw = rsp.getResponse(); + assertNotNull("doc section should be present", raw.get("doc")); + assertNotNull("index section should be present", raw.get("index")); + assertNotNull("info section should be present", raw.get("info")); + } + + /** Verifies distributed doc lookup returns an empty response for a non-existent ID. */ + @Test + public void testDistributedDocLookupNotFound() throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("id", "this_id_does_not_exist_anywhere"); + + LukeResponse rsp = requestLuke(COLLECTION, params); + + NamedList raw = rsp.getResponse(); + assertNull("doc section should NOT be present for missing ID", raw.get("doc")); + } + + /** + * Verifies that distributed doc lookup detects a corrupt index where the same unique key exists + * on multiple shards. + */ + @Test + public void testDistributedDocLookupDuplicateId() throws Exception { + String collection = "lukeDupId"; + int numShards = 2; + CollectionAdminRequest.createCollection(collection, "conf", numShards, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(collection, numShards, numShards); + + try { + String dupId = "duplicate_doc"; + + // Write the same document directly to two shard cores via UpdateHandler, + // completely bypassing the distributed update processor chain. + DocCollection docColl = cluster.getSolrClient().getClusterState().getCollection(collection); + List slices = new ArrayList<>(docColl.getActiveSlices()); + assertTrue("need at least 2 shards", slices.size() >= 2); + + for (int i = 0; i < 2; i++) { + Replica leader = slices.get(i).getLeader(); + JettySolrRunner jetty = + cluster.getJettySolrRunners().stream() + .filter(j -> j.getNodeName().equals(leader.getNodeName())) + .findFirst() + .orElse(null); + assertNotNull("should find jetty for replica", jetty); + + try (SolrCore core = jetty.getCoreContainer().getCore(leader.getCoreName())) { + SolrInputDocument solrDoc = new SolrInputDocument(); + solrDoc.addField("id", dupId); + solrDoc.addField("name", "dup_copy_" + i); + + AddUpdateCommand addCmd = + new AddUpdateCommand(new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}); + addCmd.solrDoc = solrDoc; + core.getUpdateHandler().addDoc(addCmd); + + CommitUpdateCommand commitCmd = + new CommitUpdateCommand( + new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}, false); + commitCmd.waitSearcher = true; + core.getUpdateHandler().commit(commitCmd); + } + } + + // Verify the duplicate actually exists on both shards + int shardsWithDoc = 0; + for (Slice slice : docColl.getActiveSlices()) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("id:" + dupId); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + if (qr.getResults().getNumFound() > 0) { + shardsWithDoc++; + } + } + } + assertEquals("duplicate doc should exist on exactly 2 shards", 2, shardsWithDoc); + + // Distributed Luke doc lookup should detect the corruption + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("distrib", "true"); + params.set("id", dupId); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); + String fullMessage = getExceptionChainMessage(ex); + assertTrue( + "exception should mention duplicate/corrupt index: " + fullMessage, + fullMessage.contains("found on multiple shards")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } + @Test public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { String singleShardCollection = "lukeSingleShard"; From 0e78daeef4a20a6a54f8c96e148421c35d6a5982 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Feb 2026 16:38:14 -0500 Subject: [PATCH 18/46] clean up --- .../handler/admin/LukeRequestHandler.java | 27 +++---- .../admin/LukeRequestHandlerDistribTest.java | 70 +++++++++---------- 2 files changed, 44 insertions(+), 53 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index d87049e7423c..d5145fc664dc 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -126,6 +126,7 @@ public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAw private static final String RSP_FIELDS = "fields"; private static final String RSP_SCHEMA = "schema"; private static final String RSP_INFO = "info"; + private static final String RSP_DOC = "doc"; private static final String RSP_SHARDS = "shards"; // Field-level keys @@ -165,7 +166,7 @@ public static ShowStyle get(String v) { if (v == null) return null; if ("schema".equalsIgnoreCase(v)) return SCHEMA; if ("index".equalsIgnoreCase(v)) return INDEX; - if ("doc".equalsIgnoreCase(v)) return DOC; + if (RSP_DOC.equalsIgnoreCase(v)) return DOC; if ("all".equalsIgnoreCase(v)) return ALL; throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: " + v); } @@ -224,7 +225,7 @@ && handleDistributed(req, rsp)) { docinfo.add("docId", docId); docinfo.add("lucene", info); docinfo.add("solr", doc); - rsp.add("doc", docinfo); + rsp.add(RSP_DOC, docinfo); } else if (ShowStyle.SCHEMA == style) { rsp.add(RSP_SCHEMA, getSchemaInfo(req.getSchema())); } else { @@ -408,7 +409,7 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedFieldsNL = new SimpleOrderedMap<>(); @@ -468,19 +469,11 @@ private void processShardFields(ShardData shardData, Map topTerms = fi.getTopTerms(); - Object histogram = fi.getExtras().get(KEY_HISTOGRAM); - - if (topTerms != null || fi.getDistinct() > 0 || histogram != null) { + if (topTerms != null) { SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); - if (topTerms != null) { - detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); - } - if (fi.getDistinct() > 0) { - detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); - } - if (histogram != null) { - detailedFieldInfo.add(KEY_HISTOGRAM, histogram); - } + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + detailedFieldInfo.add(KEY_HISTOGRAM, fi.getExtras().get(KEY_HISTOGRAM)); + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); shardData.addDetailedFieldInfo(fieldName, detailedFieldInfo); } } @@ -549,7 +542,7 @@ private void mergeShardField( } Long docsAsLong = fi.getDocsAsLong(); - if (docsAsLong != null && docsAsLong > 0) { + if (docsAsLong != null) { fieldData.merged.compute( KEY_DOCS_AS_LONG, (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); } diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 8dec00b69704..870e92703270 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -31,6 +31,7 @@ import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; @@ -81,19 +82,6 @@ public static void afterClass() throws Exception { shutdownCluster(); } - /** Walks the exception cause chain and concatenates all messages. */ - private static String getExceptionChainMessage(Throwable t) { - StringBuilder sb = new StringBuilder(); - while (t != null) { - if (t.getMessage() != null) { - if (sb.length() > 0) sb.append(" -> "); - sb.append(t.getMessage()); - } - t = t.getCause(); - } - return sb.toString(); - } - /** Sends a luke request and wraps the raw response in a typed {@link LukeResponse}. */ private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); @@ -114,7 +102,7 @@ private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) @Test public void testDistributedMerge() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); LukeResponse rsp = requestLuke(COLLECTION, params); @@ -145,7 +133,7 @@ public void testDistributedMerge() throws Exception { @SuppressWarnings("unchecked") public void testDistributedFieldsMerge() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); LukeResponse rsp = requestLuke(COLLECTION, params); @@ -178,7 +166,7 @@ public void testDistributedFieldsMerge() throws Exception { @SuppressWarnings("unchecked") public void testDetailedFieldStatsPerShard() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("fl", "name"); params.set("numTerms", "5"); @@ -226,7 +214,7 @@ public void testLocalModeDefault() throws Exception { @Test public void testExplicitDistribFalse() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "false"); + params.set(DISTRIB, "false"); LukeResponse rsp = requestLuke(COLLECTION, params); @@ -258,7 +246,7 @@ public void testSparseShards() throws Exception { cluster.getSolrClient().commit(collection); ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); LukeResponse rsp = requestLuke(collection, params); @@ -327,7 +315,7 @@ public void testSparseShards() throws Exception { @SuppressWarnings("unchecked") public void testDistribShowSchema() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("show", "schema"); LukeResponse rsp = requestLuke(COLLECTION, params); @@ -407,7 +395,7 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { Replica leader = slice.getLeader(); try (SolrClient client = getHttpSolrClient(leader)) { SolrQuery q = new SolrQuery("id:target"); - q.set("distrib", "false"); + q.set(DISTRIB, "false"); QueryResponse qr = client.query(q); if (qr.getResults().getNumFound() > 0) { targetSliceName = slice.getName(); @@ -424,7 +412,7 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { try (SolrClient client = getHttpSolrClient(leader)) { SolrQuery q = new SolrQuery("*:*"); q.setRows(1); - q.set("distrib", "false"); + q.set(DISTRIB, "false"); QueryResponse qr = client.query(q); assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); otherDocId = (String) qr.getResults().get(0).getFieldValue("id"); @@ -451,13 +439,13 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { // Distributed Luke should detect inconsistent index flags between the two shards. // One shard has stored=true segments, the other has stored=false segments for test_flag_s. ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("fl", "test_flag_s"); Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); - // The server throws SolrException, but CloudSolrClient may wrap it in - // SolrServerException after retry exhaustion. Check the full exception chain. - String fullMessage = getExceptionChainMessage(ex); + // The server throws SolrException, but CloudSolrClient may wrap it. + // Check the root cause message. + String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception chain should mention inconsistent index flags: " + fullMessage, fullMessage.contains("inconsistent")); @@ -519,7 +507,7 @@ public void testDeferredIndexFlags() throws Exception { // Verify: distributed Luke should have index flags for flag_target_s in the merged response, // whether they came from the first shard (constructor path) or a later shard (deferred path). ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("fl", "flag_target_s"); LukeResponse rsp = requestLuke(collection, params); @@ -552,12 +540,12 @@ public void testDeferredIndexFlags() throws Exception { @Test public void testDistributedShardError() { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("id", "0"); params.set("show", "schema"); Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); - String fullMessage = getExceptionChainMessage(ex); + String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception should mention doc style mismatch: " + fullMessage, fullMessage.contains("missing doc param for doc style")); @@ -567,11 +555,11 @@ public void testDistributedShardError() { @Test public void testDistributedDocIdRejected() { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("docId", "0"); Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); - String fullMessage = getExceptionChainMessage(ex); + String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception should mention docId not supported: " + fullMessage, fullMessage.contains("docId parameter is not supported in distributed mode")); @@ -579,24 +567,34 @@ public void testDistributedDocIdRejected() { /** Verifies distributed doc lookup returns the document when it exists. */ @Test + @SuppressWarnings("unchecked") public void testDistributedDocLookupFound() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("id", "0"); LukeResponse rsp = requestLuke(COLLECTION, params); NamedList raw = rsp.getResponse(); - assertNotNull("doc section should be present", raw.get("doc")); assertNotNull("index section should be present", raw.get("index")); assertNotNull("info section should be present", raw.get("info")); + + // Verify doc structure: docId (Lucene int), lucene (per-field analysis), solr (stored fields) + NamedList doc = (NamedList) raw.get("doc"); + assertNotNull("doc section should be present", doc); + assertNotNull("docId should be present", doc.get("docId")); + assertNotNull("lucene section should be present", doc.get("lucene")); + + // The solr section is the stored Lucene Document; verify the fields we indexed + Object solrDoc = doc.get("solr"); + assertNotNull("solr section should be present", solrDoc); } /** Verifies distributed doc lookup returns an empty response for a non-existent ID. */ @Test public void testDistributedDocLookupNotFound() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("id", "this_id_does_not_exist_anywhere"); LukeResponse rsp = requestLuke(COLLECTION, params); @@ -670,11 +668,11 @@ public void testDistributedDocLookupDuplicateId() throws Exception { // Distributed Luke doc lookup should detect the corruption ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); params.set("id", dupId); Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); - String fullMessage = getExceptionChainMessage(ex); + String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception should mention duplicate/corrupt index: " + fullMessage, fullMessage.contains("found on multiple shards")); @@ -699,7 +697,7 @@ public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { cluster.getSolrClient().commit(singleShardCollection); ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("distrib", "true"); + params.set(DISTRIB, "true"); LukeResponse rsp = requestLuke(singleShardCollection, params); From 6d49773e7f21e7268f39d6fae445fbece5b6e6b1 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Feb 2026 16:56:45 -0500 Subject: [PATCH 19/46] clean up --- .../admin/LukeRequestHandlerDistribTest.java | 32 +++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 870e92703270..731457ce1127 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -82,7 +82,6 @@ public static void afterClass() throws Exception { shutdownCluster(); } - /** Sends a luke request and wraps the raw response in a typed {@link LukeResponse}. */ private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("qt", "/admin/luke"); @@ -154,8 +153,6 @@ public void testDistributedFieldsMerge() throws Exception { assertEquals("id field type should be string", "string", idField.getType()); // Index flags should be consistent across shards (both shards have data for "name"). - // The merge validates non-null index flags for consistency; if they were inconsistent, - // the request would have thrown an error. Verify the merged result has index flags. NamedList mergedFieldsNL = (NamedList) rsp.getResponse().get("fields"); NamedList rawNameField = (NamedList) mergedFieldsNL.get("name"); assertNotNull( @@ -303,7 +300,7 @@ public void testSparseShards() throws Exception { // and stored — but if present, it should be a non-empty string Object indexFlags = rawNameField.get("index"); if (indexFlags != null) { - assertTrue("index flags should be a non-empty string", indexFlags.toString().length() > 0); + assertFalse("index flags should be a non-empty string", indexFlags.toString().isEmpty()); } } finally { CollectionAdminRequest.deleteCollection(collection) @@ -443,8 +440,6 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { params.set("fl", "test_flag_s"); Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); - // The server throws SolrException, but CloudSolrClient may wrap it. - // Check the root cause message. String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception chain should mention inconsistent index flags: " + fullMessage, @@ -456,16 +451,16 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { } /** - * Exercises the deferred index flags path (lines 510-513 of LukeRequestHandler): when the first - * shard to report a field has null index flags (all its live docs for that field were deleted, - * but the field persists in FieldInfos from unmerged segments), the merge should still populate - * index flags from a later shard that has live docs. + * Exercises the deferred index flags path: when the first shard to report a field has null index + * flags (all its live docs for that field were deleted, but the field persists in FieldInfos from + * unmerged segments), the merge should still populate index flags from a later shard that has + * live docs. * - *

Setup: 8-shard collection. Each shard gets one doc with field "flag_target_s" (which is then - * deleted) plus an anchor doc without it (to keep the shard non-empty). Only one shard retains a - * live doc with "flag_target_s". With 8 shards, the probability that the one live shard is - * processed first is 1/8 = 12.5%, so we exercise the deferred path ~87.5% of the time. Either - * way, the merged response should have index flags for the field. + *

Setup: 16-shard collection. Each shard gets one doc with field "flag_target_s" (which is + * then deleted) plus an anchor doc without it (to keep the shard non-empty). Only one shard + * retains a live doc with "flag_target_s". With 16 shards, the probability that the one live + * shard is processed first is low enough. Either way, the merged response should have index flags + * for the field. */ @Test @SuppressWarnings("unchecked") @@ -479,7 +474,7 @@ public void testDeferredIndexFlags() throws Exception { try { // Index one doc with the target field per shard, plus an anchor doc without it. // The anchor doc keeps the shard non-empty after we delete the target doc. - // We use enough docs to spread across shards via hash routing. + // We use enough docs to spread across (hopefully) all shards. List docs = new ArrayList<>(); for (int i = 0; i < numShards * 4; i++) { SolrInputDocument doc = new SolrInputDocument(); @@ -500,8 +495,6 @@ public void testDeferredIndexFlags() throws Exception { for (int i = 1; i < numShards * 4; i++) { cluster.getSolrClient().deleteById(collection, "target_" + i); } - // Do NOT force merge — we need the deleted docs' field to persist in FieldInfos - // so that getFirstLiveDoc returns null, producing null index flags. cluster.getSolrClient().commit(collection); // Verify: distributed Luke should have index flags for flag_target_s in the merged response, @@ -534,8 +527,7 @@ public void testDeferredIndexFlags() throws Exception { * Exercises shard error propagation through the distributed doc lookup path. Passing id=0 with * show=schema triggers a BAD_REQUEST on the shard that has doc 0 (the local handler rejects an id * combined with a non-DOC show style). The distributed handler should propagate this as a - * SolrException, even though other shards respond with NOT_FOUND (which is handled gracefully in - * the doc lookup path). + * SolrException. */ @Test public void testDistributedShardError() { From d8c38a1a880557a233f204874926c4dbd7f1c304 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Feb 2026 17:44:48 -0500 Subject: [PATCH 20/46] named list to xpath validation --- .../admin/LukeRequestHandlerDistribTest.java | 200 ++++++++++-------- 1 file changed, 106 insertions(+), 94 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 731457ce1127..b42f56d87acd 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -28,6 +28,7 @@ import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.request.SolrQuery; import org.apache.solr.client.solrj.request.schema.SchemaRequest; +import org.apache.solr.client.solrj.response.InputStreamResponseParser; import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.SolrCloudTestCase; @@ -43,6 +44,7 @@ import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.CommitUpdateCommand; +import org.apache.solr.util.BaseTestHarness; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -98,6 +100,25 @@ private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) return rsp; } + private void assertLukeXPath(String collection, ModifiableSolrParams extra, String... xpaths) + throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("wt", "xml"); + if (extra != null) { + for (Map.Entry entry : extra.getMap().entrySet()) { + params.set(entry.getKey(), entry.getValue()); + } + } + QueryRequest req = new QueryRequest(params); + req.setResponseParser(new InputStreamResponseParser("xml")); + NamedList raw = cluster.getSolrClient().request(req, collection); + String xml = InputStreamResponseParser.consumeResponseToString(raw); + String failedXpath = BaseTestHarness.validateXPath(xml, xpaths); + assertNull("XPath validation failed: " + failedXpath + "\nResponse:\n" + xml, failedXpath); + } + @Test public void testDistributedMerge() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); @@ -129,7 +150,6 @@ public void testDistributedMerge() throws Exception { } @Test - @SuppressWarnings("unchecked") public void testDistributedFieldsMerge() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(DISTRIB, "true"); @@ -152,15 +172,21 @@ public void testDistributedFieldsMerge() throws Exception { assertNotNull("'id' field should be present", idField); assertEquals("id field type should be string", "string", idField.getType()); - // Index flags should be consistent across shards (both shards have data for "name"). - NamedList mergedFieldsNL = (NamedList) rsp.getResponse().get("fields"); - NamedList rawNameField = (NamedList) mergedFieldsNL.get("name"); - assertNotNull( - "index flags should be present when both shards have data", rawNameField.get("index")); + // Validate merged field metadata matches schema and test data + assertLukeXPath( + COLLECTION, + params, + "//lst[@name='index']/long[@name='numDocs'][.='20']", + "count(//lst[@name='shards']/lst)=2", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema'][.='ITS---------------']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='20']", + "//lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='id']/long[@name='docsAsLong'][.='20']"); } @Test - @SuppressWarnings("unchecked") public void testDetailedFieldStatsPerShard() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(DISTRIB, "true"); @@ -179,25 +205,23 @@ public void testDetailedFieldStatsPerShard() throws Exception { Map shards = rsp.getShardResponses(); assertNotNull("shards section should be present", shards); - boolean foundDetailedStats = false; - for (Map.Entry entry : shards.entrySet()) { - LukeResponse shardLuke = entry.getValue(); - // Access the raw shard entry for per-shard fields - NamedList shardRaw = shardLuke.getResponse(); - NamedList shardFields = (NamedList) shardRaw.get("fields"); - if (shardFields != null) { - NamedList shardNameField = (NamedList) shardFields.get("name"); - if (shardNameField != null) { - foundDetailedStats = true; - assertTrue( - "per-shard field should have topTerms, distinct, or histogram", - shardNameField.get("topTerms") != null - || shardNameField.get("distinct") != null - || shardNameField.get("histogram") != null); - } - } - } - assertTrue("at least one shard should have detailed field stats", foundDetailedStats); + ModifiableSolrParams detailedParams = new ModifiableSolrParams(); + detailedParams.set(DISTRIB, "true"); + detailedParams.set("fl", "name"); + detailedParams.set("numTerms", "5"); + assertLukeXPath( + COLLECTION, + detailedParams, + // Top-level merged field should have type and merged doc count but no detailed stats + "/response/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "/response/lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='20']", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms'])", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='histogram'])", + "not(/response/lst[@name='fields']/lst[@name='name']/int[@name='distinct'])", + // Per-shard entries should have detailed stats; each name is unique so docFreq=1 + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='histogram']/int[@name='1']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/int[@name='distinct']"); } @Test @@ -222,11 +246,9 @@ public void testExplicitDistribFalse() throws Exception { /** * 12 shards, 1 document: only one shard has data, the other 11 are empty. Verifies that * schema-derived attributes (type, schema flags, dynamicBase) merge correctly when most shards - * have no documents, and that index-derived attributes (index flags, docs count) degrade - * gracefully. + * have no documents. */ @Test - @SuppressWarnings("unchecked") public void testSparseShards() throws Exception { String collection = "lukeSparse12"; CollectionAdminRequest.createCollection(collection, "conf", 12, 1) @@ -287,21 +309,22 @@ public void testSparseShards() throws Exception { assertNotNull("cat_s type", catField.getType()); assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); - // Verify index flags in the merged response for the static "name" field. - // Luke only reports fields present in the Lucene index (via reader.getFieldInfos()), - // so only the shard with the document contributes "name" to the merge. The merge - // validates consistency of index flags across shards (null is always consistent), - // but with 11 empty shards, only one shard contributes index flags here. - NamedList mergedFieldsNL = (NamedList) rsp.getResponse().get("fields"); - assertNotNull("merged fields NamedList should be present", mergedFieldsNL); - NamedList rawNameField = (NamedList) mergedFieldsNL.get("name"); - assertNotNull("raw 'name' field should be in merged fields", rawNameField); - // The index flags key may or may not be present depending on whether the field is indexed - // and stored — but if present, it should be a non-empty string - Object indexFlags = rawNameField.get("index"); - if (indexFlags != null) { - assertFalse("index flags should be a non-empty string", indexFlags.toString().isEmpty()); - } + // Verify structural correctness of the merged response via XPath + ModifiableSolrParams xpathParams = new ModifiableSolrParams(); + xpathParams.set(DISTRIB, "true"); + assertLukeXPath( + collection, + xpathParams, + "//lst[@name='index']/long[@name='numDocs'][.='1']", + "//lst[@name='index']/long[@name='deletedDocs'][.='0']", + "count(//lst[@name='shards']/lst)=12", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema'][.='ITS---------------']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='1']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='cat_s']/long[@name='docsAsLong'][.='1']"); } finally { CollectionAdminRequest.deleteCollection(collection) .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); @@ -309,35 +332,23 @@ public void testSparseShards() throws Exception { } @Test - @SuppressWarnings("unchecked") public void testDistribShowSchema() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(DISTRIB, "true"); params.set("show", "schema"); - LukeResponse rsp = requestLuke(COLLECTION, params); - - NamedList raw = rsp.getResponse(); - NamedList schema = (NamedList) raw.get("schema"); - assertNotNull("schema section should be present", schema); - - NamedList fields = (NamedList) schema.get("fields"); - assertNotNull("schema fields should be present", fields); - assertNotNull("'id' should be in schema fields", fields.get("id")); - assertNotNull("'name' should be in schema fields", fields.get("name")); - - assertNotNull("dynamicFields should be present", schema.get("dynamicFields")); - assertNotNull("uniqueKeyField should be present", schema.get("uniqueKeyField")); - assertEquals("uniqueKeyField should be 'id'", "id", schema.get("uniqueKeyField")); - assertNotNull("types should be present", schema.get("types")); - assertNotNull("similarity should be present", schema.get("similarity")); - - // show=schema should not produce merged top-level fields (matches local mode behavior) - assertNull("top-level fields should not be present with show=schema", raw.get("fields")); - - // Shards are present for consistency: each shard entry mirrors the per-shard index info, - // just as the top-level index section is present in local mode with show=schema - assertNotNull("shards should still be present with show=schema", raw.get("shards")); + assertLukeXPath( + COLLECTION, + params, + "//lst[@name='schema']/lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='schema']/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='schema']/lst[@name='dynamicFields']/lst[@name='*_s']", + "//lst[@name='schema']/str[@name='uniqueKeyField'][.='id']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='string']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='nametext']", + "//lst[@name='schema']/lst[@name='similarity']", + "not(/response/lst[@name='fields'])", + "count(//lst[@name='shards']/lst)=2"); } private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -386,7 +397,7 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { // Find which shard has the target doc by querying each replica directly. // Must use distrib=false — SolrCloud defaults distrib to true even on direct replica queries. - DocCollection docColl = cluster.getSolrClient().getClusterState().getCollection(collection); + DocCollection docColl = getCollectionState(collection); String targetSliceName = null; for (Slice slice : docColl.getSlices()) { Replica leader = slice.getLeader(); @@ -412,7 +423,7 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { q.set(DISTRIB, "false"); QueryResponse qr = client.query(q); assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); - otherDocId = (String) qr.getResults().get(0).getFieldValue("id"); + otherDocId = (String) qr.getResults().getFirst().getFieldValue("id"); } break; } @@ -463,7 +474,6 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { * for the field. */ @Test - @SuppressWarnings("unchecked") public void testDeferredIndexFlags() throws Exception { String collection = "lukeDeferredFlags"; int numShards = 16; @@ -511,12 +521,16 @@ public void testDeferredIndexFlags() throws Exception { assertNotNull("'flag_target_s' field should be present", targetField); // The merged response should have index flags from whichever shard had live docs - NamedList mergedFieldsNL = (NamedList) rsp.getResponse().get("fields"); - NamedList rawTargetField = (NamedList) mergedFieldsNL.get("flag_target_s"); - assertNotNull("raw 'flag_target_s' should be in merged fields", rawTargetField); - assertNotNull( - "index flags should be present (populated from shard with live docs)", - rawTargetField.get("index")); + ModifiableSolrParams xpathParams = new ModifiableSolrParams(); + xpathParams.set(DISTRIB, "true"); + xpathParams.set("fl", "flag_target_s"); + assertLukeXPath( + collection, + xpathParams, + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='index']", + "//lst[@name='fields']/lst[@name='flag_target_s']/long[@name='docsAsLong'][.='1']"); } finally { CollectionAdminRequest.deleteCollection(collection) .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); @@ -559,27 +573,23 @@ public void testDistributedDocIdRejected() { /** Verifies distributed doc lookup returns the document when it exists. */ @Test - @SuppressWarnings("unchecked") public void testDistributedDocLookupFound() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(DISTRIB, "true"); params.set("id", "0"); - LukeResponse rsp = requestLuke(COLLECTION, params); - - NamedList raw = rsp.getResponse(); - assertNotNull("index section should be present", raw.get("index")); - assertNotNull("info section should be present", raw.get("info")); - - // Verify doc structure: docId (Lucene int), lucene (per-field analysis), solr (stored fields) - NamedList doc = (NamedList) raw.get("doc"); - assertNotNull("doc section should be present", doc); - assertNotNull("docId should be present", doc.get("docId")); - assertNotNull("lucene section should be present", doc.get("lucene")); - - // The solr section is the stored Lucene Document; verify the fields we indexed - Object solrDoc = doc.get("solr"); - assertNotNull("solr section should be present", solrDoc); + assertLukeXPath( + COLLECTION, + params, + "//lst[@name='doc']/int[@name='docId']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='value'][.='0']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='value'][.='name_0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='name_0']", + "//lst[@name='index']", + "//lst[@name='info']"); } /** Verifies distributed doc lookup returns an empty response for a non-existent ID. */ @@ -593,6 +603,8 @@ public void testDistributedDocLookupNotFound() throws Exception { NamedList raw = rsp.getResponse(); assertNull("doc section should NOT be present for missing ID", raw.get("doc")); + + assertLukeXPath(COLLECTION, params, "not(//lst[@name='doc'])"); } /** @@ -612,7 +624,7 @@ public void testDistributedDocLookupDuplicateId() throws Exception { // Write the same document directly to two shard cores via UpdateHandler, // completely bypassing the distributed update processor chain. - DocCollection docColl = cluster.getSolrClient().getClusterState().getCollection(collection); + DocCollection docColl = getCollectionState(collection); List slices = new ArrayList<>(docColl.getActiveSlices()); assertTrue("need at least 2 shards", slices.size() >= 2); From 195a73c4ede84c9b62212ba1bdf2c7c55a99a34a Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 25 Feb 2026 14:23:26 -0500 Subject: [PATCH 21/46] merge -> aggregate as verb --- .../handler/admin/LukeRequestHandler.java | 71 ++++++++++--------- .../admin/LukeRequestHandlerDistribTest.java | 36 +++++----- .../pages/luke-request-handler.adoc | 14 ++-- 3 files changed, 64 insertions(+), 57 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index d5145fc664dc..e15a5a3b47b4 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -242,15 +242,15 @@ && handleDistributed(req, rsp)) { rsp.setHttpCaching(false); } - /** Per-field accumulation state across shards: merged response data and field validation. */ - private static class MergedFieldData { - final SimpleOrderedMap merged = new SimpleOrderedMap<>(); + /** Per-field accumulation state across shards: aggregated response data and field validation. */ + private static class AggregatedFieldData { + final SimpleOrderedMap aggregated = new SimpleOrderedMap<>(); final String originalShardAddr; final LukeResponse.FieldInfo originalFieldInfo; private Object indexFlags; private String indexFlagsShardAddr; - MergedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { + AggregatedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { this.originalShardAddr = shardAddr; this.originalFieldInfo = fieldInfo; Object flags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); @@ -349,7 +349,7 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { } } - mergeDistributedResponses(rsp, responses); + aggregateDistributedResponses(rsp, responses); rsp.setHttpCaching(false); return true; } @@ -358,7 +358,7 @@ private static String shardAddress(ShardResponse srsp) { return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); } - private void mergeDistributedResponses(SolrQueryResponse rsp, List responses) { + private void aggregateDistributedResponses(SolrQueryResponse rsp, List responses) { if (!responses.isEmpty()) { ShardResponse firstRsp = responses.getFirst(); @@ -382,7 +382,7 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedFields = new HashMap<>(); + Map aggregatedFields = new HashMap<>(); String firstDocShard = null; Object firstDoc = null; List shardDataList = new ArrayList<>(); @@ -408,7 +408,7 @@ private void mergeDistributedResponses(SolrQueryResponse rsp, List mergedIndex = new SimpleOrderedMap<>(); - mergedIndex.add(KEY_NUM_DOCS, totalNumDocs); - mergedIndex.add(KEY_MAX_DOC, totalMaxDoc); - mergedIndex.add(KEY_DELETED_DOCS, totalDeletedDocs); - mergedIndex.add(KEY_SEGMENT_COUNT, totalSegmentCount); - rsp.add(RSP_INDEX, mergedIndex); + SimpleOrderedMap aggregatedIndex = new SimpleOrderedMap<>(); + aggregatedIndex.add(KEY_NUM_DOCS, totalNumDocs); + aggregatedIndex.add(KEY_MAX_DOC, totalMaxDoc); + aggregatedIndex.add(KEY_DELETED_DOCS, totalDeletedDocs); + aggregatedIndex.add(KEY_SEGMENT_COUNT, totalSegmentCount); + rsp.add(RSP_INDEX, aggregatedIndex); if (firstDoc != null) { rsp.add(RSP_DOC, firstDoc); } - if (!mergedFields.isEmpty()) { - SimpleOrderedMap mergedFieldsNL = new SimpleOrderedMap<>(); - for (Map.Entry entry : mergedFields.entrySet()) { - mergedFieldsNL.add(entry.getKey(), entry.getValue().merged); + if (!aggregatedFields.isEmpty()) { + SimpleOrderedMap aggregatedFieldsNL = new SimpleOrderedMap<>(); + for (Map.Entry entry : aggregatedFields.entrySet()) { + aggregatedFieldsNL.add(entry.getKey(), entry.getValue().aggregated); } - rsp.add(RSP_FIELDS, mergedFieldsNL); + rsp.add(RSP_FIELDS, aggregatedFieldsNL); } rsp.add(RSP_SHARDS, shardsInfo); } - private void processShardFields(ShardData shardData, Map mergedFields) { + private void processShardFields( + ShardData shardData, Map aggregatedFields) { if (shardData.shardFieldInfo == null) { return; } @@ -465,9 +466,9 @@ private void processShardFields(ShardData shardData, Map topTerms = fi.getTopTerms(); if (topTerms != null) { SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); @@ -479,25 +480,27 @@ private void processShardFields(ShardData shardData, Map mergedFields) { + private void aggregateShardField( + String shardAddr, + LukeResponse.FieldInfo fi, + Map aggregatedFields) { String fieldName = fi.getName(); - MergedFieldData fieldData = mergedFields.get(fieldName); + AggregatedFieldData fieldData = aggregatedFields.get(fieldName); if (fieldData == null) { - fieldData = new MergedFieldData(shardAddr, fi); - mergedFields.put(fieldName, fieldData); + fieldData = new AggregatedFieldData(shardAddr, fi); + aggregatedFields.put(fieldName, fieldData); - // First shard to report this field: populate merged with schema-derived attrs - fieldData.merged.add(KEY_TYPE, fi.getType()); - fieldData.merged.add(KEY_SCHEMA_FLAGS, fi.getSchema()); + // First shard to report this field: populate aggregated with schema-derived attrs + fieldData.aggregated.add(KEY_TYPE, fi.getType()); + fieldData.aggregated.add(KEY_SCHEMA_FLAGS, fi.getSchema()); Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); if (dynBase != null) { - fieldData.merged.add(KEY_DYNAMIC_BASE, dynBase); + fieldData.aggregated.add(KEY_DYNAMIC_BASE, dynBase); } if (fieldData.indexFlags != null) { - fieldData.merged.add(KEY_INDEX_FLAGS, fieldData.indexFlags); + fieldData.aggregated.add(KEY_INDEX_FLAGS, fieldData.indexFlags); } } else { // Subsequent shards: validate consistency @@ -528,7 +531,7 @@ private void mergeShardField( if (fieldData.indexFlags == null) { fieldData.indexFlags = indexFlags; fieldData.indexFlagsShardAddr = shardAddr; - fieldData.merged.add(KEY_INDEX_FLAGS, indexFlags); + fieldData.aggregated.add(KEY_INDEX_FLAGS, indexFlags); } else { validateFieldAttr( fieldName, @@ -543,7 +546,7 @@ private void mergeShardField( Long docsAsLong = fi.getDocsAsLong(); if (docsAsLong != null) { - fieldData.merged.compute( + fieldData.aggregated.compute( KEY_DOCS_AS_LONG, (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); } } diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index b42f56d87acd..f3d2a0e73d5e 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -120,15 +120,15 @@ private void assertLukeXPath(String collection, ModifiableSolrParams extra, Stri } @Test - public void testDistributedMerge() throws Exception { + public void testDistributedAggregate() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(DISTRIB, "true"); LukeResponse rsp = requestLuke(COLLECTION, params); assertEquals( - "merged numDocs should equal total docs", NUM_DOCS, rsp.getNumDocsAsLong().longValue()); - assertTrue("merged maxDoc should be > 0", rsp.getMaxDoc() > 0); + "aggregated numDocs should equal total docs", NUM_DOCS, rsp.getNumDocsAsLong().longValue()); + assertTrue("aggregated maxDoc should be > 0", rsp.getMaxDoc() > 0); assertNotNull("deletedDocs should be present", rsp.getDeletedDocsAsLong()); Map shards = rsp.getShardResponses(); @@ -144,13 +144,13 @@ public void testDistributedMerge() throws Exception { sumShardDocs += shardLuke.getNumDocsAsLong(); } assertEquals( - "sum of per-shard numDocs should equal merged numDocs", + "sum of per-shard numDocs should equal aggregated numDocs", rsp.getNumDocsAsLong().longValue(), sumShardDocs); } @Test - public void testDistributedFieldsMerge() throws Exception { + public void testDistributedFieldsAggregate() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(DISTRIB, "true"); @@ -164,7 +164,7 @@ public void testDistributedFieldsMerge() throws Exception { assertNotNull("field type should be present", nameField.getType()); assertNotNull("schema flags should be present", nameField.getSchema()); assertEquals( - "merged docs count for 'name' should equal total docs", + "aggregated docs count for 'name' should equal total docs", NUM_DOCS, nameField.getDocsAsLong().longValue()); @@ -172,7 +172,7 @@ public void testDistributedFieldsMerge() throws Exception { assertNotNull("'id' field should be present", idField); assertEquals("id field type should be string", "string", idField.getType()); - // Validate merged field metadata matches schema and test data + // Validate aggregated field metadata matches schema and test data assertLukeXPath( COLLECTION, params, @@ -212,7 +212,8 @@ public void testDetailedFieldStatsPerShard() throws Exception { assertLukeXPath( COLLECTION, detailedParams, - // Top-level merged field should have type and merged doc count but no detailed stats + // Top-level aggregated field should have type and aggregated doc count but no detailed + // stats "/response/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", "/response/lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='20']", "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms'])", @@ -245,8 +246,8 @@ public void testExplicitDistribFalse() throws Exception { /** * 12 shards, 1 document: only one shard has data, the other 11 are empty. Verifies that - * schema-derived attributes (type, schema flags, dynamicBase) merge correctly when most shards - * have no documents. + * schema-derived attributes (type, schema flags, dynamicBase) aggregate correctly when most + * shards have no documents. */ @Test public void testSparseShards() throws Exception { @@ -309,7 +310,7 @@ public void testSparseShards() throws Exception { assertNotNull("cat_s type", catField.getType()); assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); - // Verify structural correctness of the merged response via XPath + // Verify structural correctness of the aggregated response via XPath ModifiableSolrParams xpathParams = new ModifiableSolrParams(); xpathParams.set(DISTRIB, "true"); assertLukeXPath( @@ -464,14 +465,14 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { /** * Exercises the deferred index flags path: when the first shard to report a field has null index * flags (all its live docs for that field were deleted, but the field persists in FieldInfos from - * unmerged segments), the merge should still populate index flags from a later shard that has - * live docs. + * unmerged segments), the aggregation should still populate index flags from a later shard that + * has live docs. * *

Setup: 16-shard collection. Each shard gets one doc with field "flag_target_s" (which is * then deleted) plus an anchor doc without it (to keep the shard non-empty). Only one shard * retains a live doc with "flag_target_s". With 16 shards, the probability that the one live - * shard is processed first is low enough. Either way, the merged response should have index flags - * for the field. + * shard is processed first is low enough. Either way, the aggregated response should have index + * flags for the field. */ @Test public void testDeferredIndexFlags() throws Exception { @@ -507,7 +508,8 @@ public void testDeferredIndexFlags() throws Exception { } cluster.getSolrClient().commit(collection); - // Verify: distributed Luke should have index flags for flag_target_s in the merged response, + // Verify: distributed Luke should have index flags for flag_target_s in the aggregated + // response, // whether they came from the first shard (constructor path) or a later shard (deferred path). ModifiableSolrParams params = new ModifiableSolrParams(); params.set(DISTRIB, "true"); @@ -520,7 +522,7 @@ public void testDeferredIndexFlags() throws Exception { LukeResponse.FieldInfo targetField = fields.get("flag_target_s"); assertNotNull("'flag_target_s' field should be present", targetField); - // The merged response should have index flags from whichever shard had live docs + // The aggregated response should have index flags from whichever shard had live docs ModifiableSolrParams xpathParams = new ModifiableSolrParams(); xpathParams.set(DISTRIB, "true"); xpathParams.set("fl", "flag_target_s"); diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index 2186aeec5b78..c1f9068b7b7c 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -139,7 +139,7 @@ By default, `distrib` is `false` and the handler inspects only the local shard's To get a collection-wide view: [source,text] -http://localhost:8983/solr/techproducts/admin/luke?distrib=true&numTerms=0 +http://localhost:8983/solr/techproducts/admin/luke?distrib=true To get detailed field statistics across all shards for a specific field: @@ -150,21 +150,23 @@ http://localhost:8983/solr/techproducts/admin/luke?distrib=true&fl=manu In distributed mode, the response contains: -* `index` -- Merged metrics across all shards: `numDocs`, `deletedDocs`, `segmentCount` are summed; `maxDoc` is the maximum across shards. -* `fields` -- Merged field metadata. For each field: `type`, `schema` flags, and `dynamicBase` are validated to be consistent across shards; `index` flags use the first non-null value. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. +* `index` -- Aggregated metrics across all shards: `numDocs`, `deletedDocs`, `segmentCount` are summed; `maxDoc` is the maximum across shards. +* `fields` -- Aggregated field metadata. For each field: `type`, `schema` flags, and `dynamicBase` are validated to be consistent across shards; `index` flags use the first non-null value. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. +* `doc` -- Present when `id` is specified. Contains the document from whichever shard owns it, including a `lucene` section (per-field analysis with shard-local `docFreq` values) and a `solr` section (stored fields). Only `id` is supported for distributed doc lookup; `docId` is rejected because Lucene document IDs are shard-local. * `schema` -- Schema information from the first responding shard (identical across shards sharing the same configset). * `info` -- Static info from the first responding shard. * `shards` -- Per-shard details in response-completion order. Each entry contains: ** `index` -- Full index info for that shard (including `directory`, `segmentsFile`, `version`, `current`, `hasDeletions`, `lastModified`, `userData`). ** `fields` -- Only present when `fl` triggers detailed statistics. Contains per-field `topTerms`, `distinct`, and `histogram` from that shard. -=== Merge Semantics +=== Aggregation Semantics Field `type`, `schema` flags, and `dynamicBase` are validated for consistency across shards. If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. The `index` flags are index-derived (not schema-derived) and may legitimately differ across shards; the first non-null value is used. +If the same document `id` is found on multiple shards (indicating index corruption), the handler returns an error. -Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not merged across shards. +Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not aggregated across shards. These statistics are shard-local and appear in each shard's entry under the `shards` key. For cross-shard term-level aggregation, Solr's xref:query-guide:faceting.adoc[faceting API] provides refinement and distributed aggregation, though it differs from Luke's term statistics: faceting ignores deleted documents, is more flexible, and is generally more expensive. -A case could be made for adding merge strategies for Luke-powered term statistics (which offer better performance at the cost of flexibility), however this would require community interest to motivate. +A case could be made for adding aggregation strategies for Luke-powered term statistics (which offer better performance at the cost of flexibility), however this would require community interest to motivate. From 5de8cd79f676765ff913ebd69aee4230cb14287e Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 25 Feb 2026 16:16:52 -0500 Subject: [PATCH 22/46] changelog --- CHANGELOG.md | 329 +++++++++--------- .../unreleased/PR#4149-distributed-luke.yml | 8 + 2 files changed, 173 insertions(+), 164 deletions(-) create mode 100644 changelog/unreleased/PR#4149-distributed-luke.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index aea57a6a6d2c..cdd213e6542d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M [unreleased] ------------ -### Added (12 changes) +### Added (13 changes) - Create new v2 APIs for listing and reading collection properties ("collprops") [SOLR-12224](https://issues.apache.org/jira/browse/SOLR-12224) (Jason Gerlowski) - Introduce new SolrJ SolrRequest classes for metrics and "system info" requests. [SOLR-17136](https://issues.apache.org/jira/browse/SOLR-17136) (Isabelle Giguère) (Pierre Salagnac) @@ -27,6 +27,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - CoreAdmin API (/admin/cores?action=UPGRADECOREINDEX) to upgrade an index in-place [SOLR-18096](https://issues.apache.org/jira/browse/SOLR-18096) (Rahul Goswami) - CrossDC Consumer - add Prometheus metrics [SOLR-18060](https://issues.apache.org/jira/browse/SOLR-18060) (Andrzej Bialecki @ab) - CrossDC - support arbitrary Kafka properties [SOLR-18062](https://issues.apache.org/jira/browse/SOLR-18062) (Andrzej Bialecki @ab) +- Distributed/Cloud support for admin/luke [SOLR-8127](https://issues.apache.org/jira/browse/SOLR-8127) (Luke Kot-Zaniewski) ### Changed (2 changes) @@ -128,7 +129,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Migrate Schema Designer to use FileStore API instead of BlobHandler for persisting working data. [SOLR-17852](https://issues.apache.org/jira/browse/SOLR-17852) (Eric Pugh) - Export tool should properly output exported documents in json, json w/ lines, and javabin formats. [SOLR-17867](https://issues.apache.org/jira/browse/SOLR-17867) (Eric Pugh) - Improve tracking of time already spent to discount the limit for sub-requests when `timeAllowed` is used. [SOLR-17926](https://issues.apache.org/jira/browse/SOLR-17926) (Andrzej Bialecki) (hossman) -- MultiAuthPlugin now looks up for auth plugins configured with "xBasic" as scheme if "Basic" authentication used and no plugin with "Basic" scheme found. This allows the new UI to authenticate in browser without a credentials prompt being displayed. The MultiAuthPlugin can now also be configured with a single plugin. [SOLR-17930](https://issues.apache.org/jira/browse/SOLR-17930) +- MultiAuthPlugin now looks up for auth plugins configured with "xBasic" as scheme if "Basic" authentication used and no plugin with "Basic" scheme found. This allows the new UI to authenticate in browser without a credentials prompt being displayed. The MultiAuthPlugin can now also be configured with a single plugin. [SOLR-17930](https://issues.apache.org/jira/browse/SOLR-17930) - `Http2ClusterStateProvider` now also can work with `HttpJdkSolrClient`. [SOLR-17943](https://issues.apache.org/jira/browse/SOLR-17943) (James Dyer) - Return structured error information in responses, parse correctly in SolrJ to give useful SolrExceptions [SOLR-17998](https://issues.apache.org/jira/browse/SOLR-17998) [PR#1382](https://github.com/apache/solr/pull/1382) ([Houston Putman](https://home.apache.org/phonebook.html?uid=houston) @HoustonPutman) - PropertiesInputStream overrides bulk read method, and rename it to IndexInputInputStream to match symmetrical class IndexOutputOutputStream. [SOLR-18029](https://issues.apache.org/jira/browse/SOLR-18029) (Pierre Salagnac) @@ -179,7 +180,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Removed the Hadoop Auth module, and thus Kerberos authentication and other exotic options. [SOLR-17540](https://issues.apache.org/jira/browse/SOLR-17540) (Eric Pugh) - Remove code in Assign used for backwards compatibility with Collections created prior to 7.0 [SOLR-17564](https://issues.apache.org/jira/browse/SOLR-17564) (Paul McArthur) - Remove deprecated master/slave option language from ReplicationHandler. [SOLR-17576](https://issues.apache.org/jira/browse/SOLR-17576) (Eric Pugh) -- Removed the HDFS module and therefore support for storing indexes in HDFS. Existing "blockcache" code preserved by migrating it to core o.a.s.blackcache package. [SOLR-17609](https://issues.apache.org/jira/browse/SOLR-17609) +- Removed the HDFS module and therefore support for storing indexes in HDFS. Existing "blockcache" code preserved by migrating it to core o.a.s.blackcache package. [SOLR-17609](https://issues.apache.org/jira/browse/SOLR-17609) - Removed the "version" parameter that the SolrJ client would add to almost all Solr requests in relation to the version of the response format parsing ("wt" param). [SOLR-17634](https://issues.apache.org/jira/browse/SOLR-17634) (David Smiley) - Removed deprecated ExternalFileField field type. [SOLR-17655](https://issues.apache.org/jira/browse/SOLR-17655) (Eric Pugh) - Removed deprecated CurrencyField field type. [SOLR-17683](https://issues.apache.org/jira/browse/SOLR-17683) (Eric Pugh) @@ -329,7 +330,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (39 changes) -- Logs: removed webapp=/solr and also removed from internal context map. [PR#3758](https://github.com/apache/solr/pull/3758) +- Logs: removed webapp=/solr and also removed from internal context map. [PR#3758](https://github.com/apache/solr/pull/3758) - Deprecate CloudSolrClient's ZooKeeper Hosts constructor. [SOLR-14070](https://issues.apache.org/jira/browse/SOLR-14070) (Abhishek Umarjikar) - Switch from SysV init script to systemd service definition [SOLR-14410](https://issues.apache.org/jira/browse/SOLR-14410) (Marius Ghita) - SolrJ modules like SolrJ-Zookeeper are now opt-in from a Maven POM perspective. Previously, the modules would come transitively. [SOLR-15730](https://issues.apache.org/jira/browse/SOLR-15730) (David Smiley) @@ -429,7 +430,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Avoid creating grouping shard requests when timeAllowed has already run out. [SOLR-17869](https://issues.apache.org/jira/browse/SOLR-17869) (Andrzej Bialecki) (hossman) - Http2SolrClient wasn't honoring idle timeout configuration above 30 seconds -- a regression. [SOLR-17871](https://issues.apache.org/jira/browse/SOLR-17871) (Thomas Wöckinger) (David Smiley) - Http2SolrClient: followRedirects=true: if Http2SolrClient was created with followRedirects=true, and then was used to create future clients (via builder.withHttpClient), then redirect processing was wrongly disabled on the shared instance. [SOLR-17876](https://issues.apache.org/jira/browse/SOLR-17876) (David Smiley) -- SolrCLI tools such as "bin/solr zk" can now access jars located in `<install_dir>/lib`. [SOLR-17883](https://issues.apache.org/jira/browse/SOLR-17883) +- SolrCLI tools such as "bin/solr zk" can now access jars located in `<install_dir>/lib`. [SOLR-17883](https://issues.apache.org/jira/browse/SOLR-17883) - Fix for LB/Cloud SolrClients that could leak on close() if concurrent request failed and triggered zombie server logic. [SOLR-3696](https://issues.apache.org/jira/browse/SOLR-3696) (hossman) ### Dependency Upgrades (37 changes) @@ -666,7 +667,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Introduce unit testing for AssertTool. [SOLR-11318](https://issues.apache.org/jira/browse/SOLR-11318) (Eric Pugh) (Jason Gerlowski) - NamedList: deprecating methods: forEachEntry, forEachKey, abortableForEachKey, abortableForEach, asMap (no-arg only), get(key, default). Added getOrDefault. Deprecated the SimpleMap interface as well as the entirety of the SolrJ package org.apache.solr.cluster.api, which wasn't used except for SimpleMap. [SOLR-14680](https://issues.apache.org/jira/browse/SOLR-14680) (David Smiley) -- "<lib/>" tags in solrconfig.xml are now quietly ignored by default unless explicitly enabled with the `SOLR_CONFIG_LIB_ENABLED=true` enviroment variable (or corresponding sysprop). These tags are now considered deprecated and will be removed in Solr 10. [SOLR-16781](https://issues.apache.org/jira/browse/SOLR-16781) +- "<lib/>" tags in solrconfig.xml are now quietly ignored by default unless explicitly enabled with the `SOLR_CONFIG_LIB_ENABLED=true` enviroment variable (or corresponding sysprop). These tags are now considered deprecated and will be removed in Solr 10. [SOLR-16781](https://issues.apache.org/jira/browse/SOLR-16781) - Fix Gradle build sometimes gives spurious "unreferenced license file" warnings. [SOLR-17142](https://issues.apache.org/jira/browse/SOLR-17142) (Uwe Schindler) - Remove Deprecated URL and replace it with URI in Preparation for Java 21 [SOLR-17321](https://issues.apache.org/jira/browse/SOLR-17321) (Sanjay Dutt) (David Smiley) (Uwe Schindler) - Move Zk Arg parsing into Java Code from bin/solr scripts. [SOLR-17359](https://issues.apache.org/jira/browse/SOLR-17359) (Eric Pugh) (Rahul Goswami) @@ -934,7 +935,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Deprecate the V2RequestSupport interface, and the associated `setUseV2` and `setUseBinaryV2` SolrRequest methods. SolrJ users looking to make use of v2 APIs in their applications can use the SolrRequest implementations dedicated to that purpose. [SOLR-17042](https://issues.apache.org/jira/browse/SOLR-17042) (Jason Gerlowski) - package CLI tool prints error JSONPath [SOLR-17072](https://issues.apache.org/jira/browse/SOLR-17072) (Mikhail Khludnev) - The `train_and_upload_demo_model.py` script referenced in LTR documentation now uses Python3 [SOLR-17078](https://issues.apache.org/jira/browse/SOLR-17078) (Jason Gerlowski) -- dev tools script cloud.sh became broken after changes in 9.3 added a new -slim.tgz file it was not expecting cloud.sh has been updated to ignore the -slim.tgz version of the tarball. [SOLR-17091](https://issues.apache.org/jira/browse/SOLR-17091) +- dev tools script cloud.sh became broken after changes in 9.3 added a new -slim.tgz file it was not expecting cloud.sh has been updated to ignore the -slim.tgz version of the tarball. [SOLR-17091](https://issues.apache.org/jira/browse/SOLR-17091) [9.4.1] - 2024-01-18 @@ -1013,7 +1014,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Auto configure tracer without a <tracerConfig> tag in solr.xml [SOLR-16938](https://issues.apache.org/jira/browse/SOLR-16938) (Alex Deparvu) - Users can pass Java system properties to the SolrCLI via the SOLR_TOOL_OPTS environment variable. [SOLR-16940](https://issues.apache.org/jira/browse/SOLR-16940) (Houston Putman) - The SolrCLI now uses a smarter default for the Solr URL if none is provided, using the same envVars used when running Solr. [SOLR-16941](https://issues.apache.org/jira/browse/SOLR-16941) (Houston Putman) -- SimpleTracer propagation for manual transaction ids [SOLR-16950](https://issues.apache.org/jira/browse/SOLR-16950) +- SimpleTracer propagation for manual transaction ids [SOLR-16950](https://issues.apache.org/jira/browse/SOLR-16950) - Make the internal CoresLocator implementation configurable in solr.xml [SOLR-16959](https://issues.apache.org/jira/browse/SOLR-16959) (Vincent Primault) (David Smiley) - The solr.jetty.ssl.sniHostCheck option now defaults to the value of SOLR_SSL_CHECK_PEER_NAME, if it is provided. This will enable client and server hostName check settings to be governed by the same environment variable. If users want separate client/server settings, they can manually override the solr.jetty.ssl.sniHostCheck option in SOLR_OPTS. [SOLR-16964](https://issues.apache.org/jira/browse/SOLR-16964) (Houston Putman) - Some ConfigSet operations formerly required that solrconfig.xml exist but should not have because the name of the file is configurable when creating cores / collections. [SOLR-16967](https://issues.apache.org/jira/browse/SOLR-16967) (David Smiley) @@ -1071,7 +1072,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix typos in org.apache.solr.core package [PR#1629](https://github.com/apache/solr/pull/1629) (Andrey Bozhko) (Marcus Eagan) - new SolrJettyTestRule for tests needing HTTP or Jetty. [SOLR-16623](https://issues.apache.org/jira/browse/SOLR-16623) (David Smiley) (Joshua Ouma) -- Remove dependencies on methods defined in the SimplePostTool from Solr core and solrj modules. [SOLR-16803](https://issues.apache.org/jira/browse/SOLR-16803) +- Remove dependencies on methods defined in the SimplePostTool from Solr core and solrj modules. [SOLR-16803](https://issues.apache.org/jira/browse/SOLR-16803) - Eliminate special case code in Solr CLI by introducing explicit VersionTool.java. [SOLR-16842](https://issues.apache.org/jira/browse/SOLR-16842) (Eric Pugh) (Will White) - The default auto-soft-commit time has been set to 3 seconds. Previously, auto-soft-commit was disabled by default. [SOLR-16856](https://issues.apache.org/jira/browse/SOLR-16856) (Houston Putman) - Lower the AffinityPlacementPlugin's default minimalFreeDiskGB to 5 GB [SOLR-16915](https://issues.apache.org/jira/browse/SOLR-16915) (Houston Putman) @@ -1283,7 +1284,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - FileSystemConfigSetService: implement the abstraction completely. It could be useful for putting ConfigSets on a shared file system. [SOLR-15787](https://issues.apache.org/jira/browse/SOLR-15787) (Nazerke Seidan) (David Smiley) - Certain Jetty "server" jars (jetty-util, jetty-io, etc.) are no longer duplicated between server/lib and WEB-INF/lib. All shared jars now live in server/lib/ext. By default, Jetty does not allow these jars to be shared by the Jetty server and the web-app (Solr). In order to accomplish this, explicit exclusions have been added to server/contexts/solr-jetty-context.xml that allow these "server" jars to be shared. Solr Core also no longer contains the JettySolrRunner (now living in the testing framework), removing the need for Jetty Server dependencies in solr-core. [SOLR-16158](https://issues.apache.org/jira/browse/SOLR-16158) (Houston Putman) - WARN when restoring backup if ConfigSet with same name already exists [SOLR-16302](https://issues.apache.org/jira/browse/SOLR-16302) (Albert Moser) (Kevin Risden) -- A number of v2 "replica" endpoints have been updated to be more REST-ful, and hopefully, intuitive. ADDREPLICAPROP is now `PUT /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop {"value": $val}` DELETEREPLICAPROP is now `DELETE /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop` [SOLR-16392](https://issues.apache.org/jira/browse/SOLR-16392) +- A number of v2 "replica" endpoints have been updated to be more REST-ful, and hopefully, intuitive. ADDREPLICAPROP is now `PUT /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop {"value": $val}` DELETEREPLICAPROP is now `DELETE /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop` [SOLR-16392](https://issues.apache.org/jira/browse/SOLR-16392) - The path of the v2 "list alias" API has been tweaked slightly to be more intuitive, and is now available at `GET /api/aliases`. It is also now possible to request information about a specific alias at `GET /api/aliases/<aliasName>`. [SOLR-16393](https://issues.apache.org/jira/browse/SOLR-16393) (Alex Deparvu) (Jason Gerlowski) - /mlt now has a v2 API available at `GET /api/collections/collName/mlt` [SOLR-16397](https://issues.apache.org/jira/browse/SOLR-16397) (Ameer Albahem) (Jason Gerlowski) - Introducing `{!mlt_content}foo bar` to cover existing `/mlt` handler functionality for SolrCloud. [SOLR-16420](https://issues.apache.org/jira/browse/SOLR-16420) (Mikhail Khludnev) @@ -2158,7 +2159,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- Internal logic for identifying 'Solr Home' has been refactored to make testing less error prone. Plugin developers using SolrPaths.locateSolrHome() or 'new SolrResourceLoader' should check deprecation warnings as existing some existing functionality will be removed in 9.0. See SOLR-14934 for more details. [SOLR-14934](https://issues.apache.org/jira/browse/SOLR-14934) +- Internal logic for identifying 'Solr Home' has been refactored to make testing less error prone. Plugin developers using SolrPaths.locateSolrHome() or 'new SolrResourceLoader' should check deprecation warnings as existing some existing functionality will be removed in 9.0. See SOLR-14934 for more details. [SOLR-14934](https://issues.apache.org/jira/browse/SOLR-14934) ### Added (3 changes) @@ -2392,7 +2393,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Introducing {!bool excludeTags=...} for Query DSL. [SOLR-14539](https://issues.apache.org/jira/browse/SOLR-14539) (Mikhail Khludnev) - Add BlockMax-WAND support to ReRank queries [SOLR-14552](https://issues.apache.org/jira/browse/SOLR-14552) (Tomás Fernández Löbbe) - Add BlockMax-WAND support for queries where the score is requested [SOLR-14554](https://issues.apache.org/jira/browse/SOLR-14554) (Tomás Fernández Löbbe) -- CoreAdminAPI's parameters instanceDir and dataDir are now validated, and must be relative to either SOLR_HOME, SOLR_DATA_HOME or coreRootDir. Added new solr.xml config 'allowPaths', controlled by system property 'solr.allowPaths' that allows you to add other allowed paths when needed. [SOLR-14561](https://issues.apache.org/jira/browse/SOLR-14561) +- CoreAdminAPI's parameters instanceDir and dataDir are now validated, and must be relative to either SOLR_HOME, SOLR_DATA_HOME or coreRootDir. Added new solr.xml config 'allowPaths', controlled by system property 'solr.allowPaths' that allows you to add other allowed paths when needed. [SOLR-14561](https://issues.apache.org/jira/browse/SOLR-14561) - Limit the HTTP security headers to "/solr" end point [SOLR-14634](https://issues.apache.org/jira/browse/SOLR-14634) (noble) - Do not collect expand documents when expand.rows=0 [SOLR-8306](https://issues.apache.org/jira/browse/SOLR-8306) (Marshall Sanders) (Amelia Henderson) @@ -2606,7 +2607,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- Package manager now stores keys in package store instead of ZK. If you're using package manager and have already added any repositories, then you would need to re-add all the keys from /keys/exe location, one-by-one, using: "bin/solr add-key <keyfile.der>" [SOLR-14158](https://issues.apache.org/jira/browse/SOLR-14158) +- Package manager now stores keys in package store instead of ZK. If you're using package manager and have already added any repositories, then you would need to re-add all the keys from /keys/exe location, one-by-one, using: "bin/solr add-key <keyfile.der>" [SOLR-14158](https://issues.apache.org/jira/browse/SOLR-14158) ### Changed (1 change) @@ -2625,17 +2626,17 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (11 changes) -- QueryResponse.getExplainMap() type has changed from Map<String, String> to Map<String, Object> in order to accommodate structured explanations. This is a partially back-compatible change - compiled third-party components will work the same as before due to type erasure but source code changes may be required. [SOLR-13806](https://issues.apache.org/jira/browse/SOLR-13806) +- QueryResponse.getExplainMap() type has changed from Map<String, String> to Map<String, Object> in order to accommodate structured explanations. This is a partially back-compatible change - compiled third-party components will work the same as before due to type erasure but source code changes may be required. [SOLR-13806](https://issues.apache.org/jira/browse/SOLR-13806) - Deprecate legacy SolrCache implementations. Users are encouraged to transition their configurations to use org.apache.solr.search.CaffeineCache instead. [SOLR-13817](https://issues.apache.org/jira/browse/SOLR-13817) (ab) -- org.apache.solr.search.grouping.distributed.command.QueryCommand.Builder has new method 'setMainQuery' which is used to set top-level query. build() would fail if called without setting mainQuery. [SOLR-13823](https://issues.apache.org/jira/browse/SOLR-13823) +- org.apache.solr.search.grouping.distributed.command.QueryCommand.Builder has new method 'setMainQuery' which is used to set top-level query. build() would fail if called without setting mainQuery. [SOLR-13823](https://issues.apache.org/jira/browse/SOLR-13823) - timeAllowed parameter is allowed to have 0 value [SOLR-13904](https://issues.apache.org/jira/browse/SOLR-13904) (Houston Putman) (Mikhail Khludnev) -- Using Collapse filter or expand component with grouping is explicitly disallowed as the combination would cause inconsistent behavior and NPEs. [SOLR-13970](https://issues.apache.org/jira/browse/SOLR-13970) +- Using Collapse filter or expand component with grouping is explicitly disallowed as the combination would cause inconsistent behavior and NPEs. [SOLR-13970](https://issues.apache.org/jira/browse/SOLR-13970) - The default configset no longer has the following: - Library inclusions (<lib ../>) for extraction, solr-cell libs, clustering, velocity, LTR and language identifier - /browse, /tvrh and /update/extract handlers - TermVector component [SOLR-13978](https://issues.apache.org/jira/browse/SOLR-13978) (if someone wants it) (can be added using config APIs) - Some security-related http headers such as Content-Security-Policy are now set. If you have custom html served up by Solr's http server that contains inline javascript, it will no longer execute in modern browsers. You can fix your JS code to not run inline anymore, or edit etc/jetty.xml and weaken the CSP, or remove/alter the headers with a reverse proxy. [SOLR-13982](https://issues.apache.org/jira/browse/SOLR-13982) (rmuir) -- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) -- VelocityResponseWriter has been deprecated and may be removed in a future version. [SOLR-14065](https://issues.apache.org/jira/browse/SOLR-14065) -- Untrusted configsets (ones that are uploaded via unsecured configset API) cannot use <lib> directive. Consider enabling authentication/authorization so that the uploaded configsets are trusted. Note: If you already have a collection using untrusted configset that uses <lib> directive, it will not load after upgrading to 8.4. You can re-upload your configset using "bin/solr zk -upconfig .." or place your libraries in the classpath and restart Solr. [SOLR-14071](https://issues.apache.org/jira/browse/SOLR-14071) -- The "Blob Store" API and "runtimeLib" plugin mechanism that uses it is now considered deprecated. The replacement to it is the "Package Management" system, which includes a "File Store". These are experimental currently but will grow/stabalize/mature. [SOLR-14072](https://issues.apache.org/jira/browse/SOLR-14072) +- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) +- VelocityResponseWriter has been deprecated and may be removed in a future version. [SOLR-14065](https://issues.apache.org/jira/browse/SOLR-14065) +- Untrusted configsets (ones that are uploaded via unsecured configset API) cannot use <lib> directive. Consider enabling authentication/authorization so that the uploaded configsets are trusted. Note: If you already have a collection using untrusted configset that uses <lib> directive, it will not load after upgrading to 8.4. You can re-upload your configset using "bin/solr zk -upconfig .." or place your libraries in the classpath and restart Solr. [SOLR-14071](https://issues.apache.org/jira/browse/SOLR-14071) +- The "Blob Store" API and "runtimeLib" plugin mechanism that uses it is now considered deprecated. The replacement to it is the "Package Management" system, which includes a "File Store". These are experimental currently but will grow/stabalize/mature. [SOLR-14072](https://issues.apache.org/jira/browse/SOLR-14072) ### Added (9 changes) @@ -2718,7 +2719,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- Velocity: The initialization parameters "params.resource.loader.enabled" and "solr.resource.loader.enabled" have been removed and replaced by Java system properties "velocity.resourceloader.params.enabled" and "velocity.resourceloader.solr.enabled" respectively (both turned off by default). Please see SOLR-13971 for more details. [SOLR-13971](https://issues.apache.org/jira/browse/SOLR-13971) +- Velocity: The initialization parameters "params.resource.loader.enabled" and "solr.resource.loader.enabled" have been removed and replaced by Java system properties "velocity.resourceloader.params.enabled" and "velocity.resourceloader.solr.enabled" respectively (both turned off by default). Please see SOLR-13971 for more details. [SOLR-13971](https://issues.apache.org/jira/browse/SOLR-13971) ### Fixed (5 changes) @@ -2734,8 +2735,8 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (2 changes) -- Users who have written test cases that extend SolrTestCaseJ4 may see NullPointerExceptions if their tests directly reference both SolrTestCaseJ4.initCoreDataDir and SolrTestCaseJ4.deleteCore(). This change in behavior is due to a bug fix in deleteCore() to ensure the dataDir is properly reset in tests that call initCore()/deleteCore() multiple times in a given test (class). initCoreDataDir is now deprecated, and users are encouraged to use SolrTestCaseJ4.initAndGetDataDir() in it's place. See SOLR-13664 for more details. [SOLR-13664](https://issues.apache.org/jira/browse/SOLR-13664) -- For JWTAuthPlugin, the 'jwkUrl' configuration key is deprecated and may be removed later, please use 'jwksUrl' instead. See SOLR-13734. [SOLR-13734](https://issues.apache.org/jira/browse/SOLR-13734) +- Users who have written test cases that extend SolrTestCaseJ4 may see NullPointerExceptions if their tests directly reference both SolrTestCaseJ4.initCoreDataDir and SolrTestCaseJ4.deleteCore(). This change in behavior is due to a bug fix in deleteCore() to ensure the dataDir is properly reset in tests that call initCore()/deleteCore() multiple times in a given test (class). initCoreDataDir is now deprecated, and users are encouraged to use SolrTestCaseJ4.initAndGetDataDir() in it's place. See SOLR-13664 for more details. [SOLR-13664](https://issues.apache.org/jira/browse/SOLR-13664) +- For JWTAuthPlugin, the 'jwkUrl' configuration key is deprecated and may be removed later, please use 'jwksUrl' instead. See SOLR-13734. [SOLR-13734](https://issues.apache.org/jira/browse/SOLR-13734) ### Added (15 changes) @@ -2753,7 +2754,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - command line option to export documents to a file [SOLR-13682](https://issues.apache.org/jira/browse/SOLR-13682) (noble) - JWTAuthPlugin to support multiple JWKS endpoints [SOLR-13713](https://issues.apache.org/jira/browse/SOLR-13713) (janhoy) - JWTAuthPlugin now supports multiple IdP issuers through configuring a new 'issuers' configuration key. Access tokens issued and signed by any of the configured issuers will be validated [SOLR-13734](https://issues.apache.org/jira/browse/SOLR-13734) (janhoy) -- Add CaffeineCache, an efficient implementation of SolrCache.(Ben Manes, Shawn Heisey, David Smiley, Andrzej Bialecki) [SOLR-8241](https://issues.apache.org/jira/browse/SOLR-8241) +- Add CaffeineCache, an efficient implementation of SolrCache.(Ben Manes, Shawn Heisey, David Smiley, Andrzej Bialecki) [SOLR-8241](https://issues.apache.org/jira/browse/SOLR-8241) ### Changed (19 changes) @@ -2808,7 +2809,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SPLITSHARD (async) with failures in underlying sub-operations can result in data loss [SOLR-13718](https://issues.apache.org/jira/browse/SOLR-13718) (Ishan Chattopadhyaya) - Allow negative values for limit in TermsFacetMap [SOLR-13725](https://issues.apache.org/jira/browse/SOLR-13725) (Richard Walker) (Munendra S N) - Fixed V2Requests - HttpSolrClient replaced first instance of "/solr" with "/api" which caused a change in host names starting with "solr". [SOLR-13727](https://issues.apache.org/jira/browse/SOLR-13727) (Megan Carey) (yonik) -- Fix regression in support for Date math in TRA start date that was introduced by SOLR-13375 [SOLR-13760](https://issues.apache.org/jira/browse/SOLR-13760) [SOLR-13375](https://issues.apache.org/jira/browse/SOLR-13375) +- Fix regression in support for Date math in TRA start date that was introduced by SOLR-13375 [SOLR-13760](https://issues.apache.org/jira/browse/SOLR-13760) [SOLR-13375](https://issues.apache.org/jira/browse/SOLR-13375) - Fix ClassCastException in NestableJsonFacet [SOLR-13780](https://issues.apache.org/jira/browse/SOLR-13780) (Tiago Martinho de Barros) (Munendra S N) - LRUStatsCache size explosion and ineffective caching. [SOLR-13790](https://issues.apache.org/jira/browse/SOLR-13790) (ab) - HttpSolrCall now maintains internal request count (_forwardedCount) for remote queries and limits them to the number of replicas. This avoids making too many cascading calls to remote servers, which, if not restricted, can bring down nodes containing the said collection [SOLR-13793](https://issues.apache.org/jira/browse/SOLR-13793) (Kesharee Nandan Vishwakarma) (Ishan Chattopadhyaya) @@ -2847,9 +2848,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (6 changes) -- Time Routed Aliases now have a '__TRA__' infix preceding the timestamp portion of the collection names. collections with the old format will not be altered and will continue to work, but any new TRA's or new collections for existing TRA's will use the new format. Solr will handle this invisibly, but any external code that attempted to predict collection names in TRA's will probably need adjustment. [SOLR-13419](https://issues.apache.org/jira/browse/SOLR-13419) -- Routed Aliases now use collection properties rather than core properties to identify collections that belong to the alias by default. This should be invisible and fully backwards compatible from within solr, and existing routed alias collections with core based properties will continue to work, but new collections created will not add a property to core.properties anymoore so any external code that inspected core.properties will not find the 'routedAliasName' key in new cores belonging to routed aliases. [SOLR-13420](https://issues.apache.org/jira/browse/SOLR-13420) -- Support for "addr" parameter from the "/solr/admin/zookeeper" endpoint has now been removed and will no longer be supported. [SOLR-13507](https://issues.apache.org/jira/browse/SOLR-13507) +- Time Routed Aliases now have a '__TRA__' infix preceding the timestamp portion of the collection names. collections with the old format will not be altered and will continue to work, but any new TRA's or new collections for existing TRA's will use the new format. Solr will handle this invisibly, but any external code that attempted to predict collection names in TRA's will probably need adjustment. [SOLR-13419](https://issues.apache.org/jira/browse/SOLR-13419) +- Routed Aliases now use collection properties rather than core properties to identify collections that belong to the alias by default. This should be invisible and fully backwards compatible from within solr, and existing routed alias collections with core based properties will continue to work, but new collections created will not add a property to core.properties anymoore so any external code that inspected core.properties will not find the 'routedAliasName' key in new cores belonging to routed aliases. [SOLR-13420](https://issues.apache.org/jira/browse/SOLR-13420) +- Support for "addr" parameter from the "/solr/admin/zookeeper" endpoint has now been removed and will no longer be supported. [SOLR-13507](https://issues.apache.org/jira/browse/SOLR-13507) - org.apache.solr.util.SolrPluginUtils.IdentityRegenerator has been removed, please use the identical org.apache.solr.search.NoOpRegenerator instead. [SOLR-13515](https://issues.apache.org/jira/browse/SOLR-13515) (David Smiley) (Christine Poerschke) - Upgrade Jetty to 9.4.19.v20190610. [SOLR-13541](https://issues.apache.org/jira/browse/SOLR-13541) (Erick Erickson) (Cao Manh Dat) - Upgrade ZooKeeper to 3.5.5. ZooKeeper 3.5.5 introduces more security features. Include in your zoo.cfg file at minimum the following: 4lw.commands.whitelist=mntr,conf,ruok You can use 4lw.commands.whitelist=* to enable all ZooKeeper "4 letter commands". [SOLR-8346](https://issues.apache.org/jira/browse/SOLR-8346) (Erick Erickson) @@ -2875,7 +2876,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Expose IndexWriterConfig's ramPerThreadHardLimitMB through solrconfig.xml [SOLR-12554](https://issues.apache.org/jira/browse/SOLR-12554) (Ishan Chattopadhyaya) (Munendra S N) - Query Result Cache does not honor maxRamBytes parameter. [SOLR-13003](https://issues.apache.org/jira/browse/SOLR-13003) (ab) (Brian Ecker) - Make contrib/ltr Feature.defaultValue configurable. [SOLR-13049](https://issues.apache.org/jira/browse/SOLR-13049) (Stanislav Livotov) (Christine Poerschke) -- Add a request parameter to execute a streaming expression locally [SOLR-13306](https://issues.apache.org/jira/browse/SOLR-13306) +- Add a request parameter to execute a streaming expression locally [SOLR-13306](https://issues.apache.org/jira/browse/SOLR-13306) - In autoscaling policies, use an explicit 'put : on-each-node' to specify the rules is applied on each node [SOLR-13329](https://issues.apache.org/jira/browse/SOLR-13329) (noble) - Transaction log to natively support UUID types [SOLR-13347](https://issues.apache.org/jira/browse/SOLR-13347) (Thomas Wöckinger) (noble) - Highlighting: Range queries will now highlight in hl.method=unified mode. [SOLR-13367](https://issues.apache.org/jira/browse/SOLR-13367) (David Smiley) @@ -2902,7 +2903,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix "Search is temporarily disabled" logic to be consistent for entire request [SOLR-13474](https://issues.apache.org/jira/browse/SOLR-13474) (hossman) - Stop the leader from trying to rejoin the election on session expiration and harden our zk reconnect code path. [SOLR-13489](https://issues.apache.org/jira/browse/SOLR-13489) (Mark Miller) (Anshum Gupta) - Fix CollectionStateWatcher/CollectionStatePredicate based APIs in ZkStateReader and CloudSolrClient to be triggered on liveNode changes. Also add Predicate<DocCollection> equivalents for callers that don't care about liveNodes. [SOLR-13490](https://issues.apache.org/jira/browse/SOLR-13490) (hossman) -- SolrZkClient's watch wrapper no longer allows zookeeper to hold the same watch object multiple times. [SOLR-13491](https://issues.apache.org/jira/browse/SOLR-13491) +- SolrZkClient's watch wrapper no longer allows zookeeper to hold the same watch object multiple times. [SOLR-13491](https://issues.apache.org/jira/browse/SOLR-13491) - Fix distributed grouping related NullPointerException in JSONWriter.writeSolrDocument [SOLR-13496](https://issues.apache.org/jira/browse/SOLR-13496) (Christine Poerschke) - NPE on omitHeader=true is fixed by sending omitHeader=false to shard searches [SOLR-13509](https://issues.apache.org/jira/browse/SOLR-13509) (Munendra S N) (Mikhail Khludnev) - Fix input parameter handling in SchemaRequest.DynamicField and SchemaRequest.FieldTypes (Schema API) [SOLR-13521](https://issues.apache.org/jira/browse/SOLR-13521) (Tomás Fernández Löbbe) @@ -2945,14 +2946,14 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (8 changes) -- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) -- Velocity and Velocity Tools were both upgraded as part of this release. Velocity upgraded from 1.7 to 2.0. Please see https://velocity.apache.org/engine/2.0/upgrading.html about upgrading. Velocity Tools upgraded from 2.0 to 3.0. For more details, please see https://velocity.apache.org/tools/3.0/upgrading.html for details about the upgrade. [SOLR-13335](https://issues.apache.org/jira/browse/SOLR-13335) -- Solr's default behavior when dealing with 'maxBooleanClauses' has changed to reduce the risk of exponential query expansion when dealing with pathological query strings. A default upper limit of 1024 clauses (The same default prior to Solr 7.0) is now enforced at the node level, and can be overridden in solr.xml. The identically named solrconfig.xml setting is still available for limiting the size of 'explicit' boolean query strings, but this per-collection limit is still ristricted by the upper-bound of the global limit in solr.xml. See SOLR-13336 for more details. [SOLR-13336](https://issues.apache.org/jira/browse/SOLR-13336) -- The default GC has been changed from CMS to G1. To override this (in order to switch to CMS or any other GC), use GC_TUNE section of bin/solr.in.sh or bin/solr.in.cmd. [SOLR-13394](https://issues.apache.org/jira/browse/SOLR-13394) -- Custom TransientSolrCoreCache implementations no longer use the Observer/Observable pattern. To notify Solr that a core has been aged out of the cache, call CoreContainer.queueCoreToClose(SolrCore). See SOLR-13400 for details. [SOLR-13400](https://issues.apache.org/jira/browse/SOLR-13400) -- Update requests sent to non-routed aliases that point to multiple collections are no longer accepted. Until now Solr followed an obscure convention of updating only the first collection from the list, which usually was not what the user intended. This change explicitly rejects such update requests. [SOLR-13407](https://issues.apache.org/jira/browse/SOLR-13407) +- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) +- Velocity and Velocity Tools were both upgraded as part of this release. Velocity upgraded from 1.7 to 2.0. Please see https://velocity.apache.org/engine/2.0/upgrading.html about upgrading. Velocity Tools upgraded from 2.0 to 3.0. For more details, please see https://velocity.apache.org/tools/3.0/upgrading.html for details about the upgrade. [SOLR-13335](https://issues.apache.org/jira/browse/SOLR-13335) +- Solr's default behavior when dealing with 'maxBooleanClauses' has changed to reduce the risk of exponential query expansion when dealing with pathological query strings. A default upper limit of 1024 clauses (The same default prior to Solr 7.0) is now enforced at the node level, and can be overridden in solr.xml. The identically named solrconfig.xml setting is still available for limiting the size of 'explicit' boolean query strings, but this per-collection limit is still ristricted by the upper-bound of the global limit in solr.xml. See SOLR-13336 for more details. [SOLR-13336](https://issues.apache.org/jira/browse/SOLR-13336) +- The default GC has been changed from CMS to G1. To override this (in order to switch to CMS or any other GC), use GC_TUNE section of bin/solr.in.sh or bin/solr.in.cmd. [SOLR-13394](https://issues.apache.org/jira/browse/SOLR-13394) +- Custom TransientSolrCoreCache implementations no longer use the Observer/Observable pattern. To notify Solr that a core has been aged out of the cache, call CoreContainer.queueCoreToClose(SolrCore). See SOLR-13400 for details. [SOLR-13400](https://issues.apache.org/jira/browse/SOLR-13400) +- Update requests sent to non-routed aliases that point to multiple collections are no longer accepted. Until now Solr followed an obscure convention of updating only the first collection from the list, which usually was not what the user intended. This change explicitly rejects such update requests. [SOLR-13407](https://issues.apache.org/jira/browse/SOLR-13407) - Until now, the CREATE command of Collections API returned status 0 even in case of failure. Now, the status code will be non-zero in case of failures [SOLR-5970](https://issues.apache.org/jira/browse/SOLR-5970) (e.g. 4xx) (5xx) -- SolrGangliaReporter has been removed from Solr because support for Ganglia has been removed from Dropwizard Metrics 4 due to a transitive dependency on LGPL. +- SolrGangliaReporter has been removed from Solr because support for Ganglia has been removed from Dropwizard Metrics 4 due to a transitive dependency on LGPL. ### Added (16 changes) @@ -2975,7 +2976,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Changed (14 changes) -- (at least) 2 distinct failures possible when clients attempt searches during SolrCore reload, added test band-aid for DocValuesNotIndexedTest. [SOLR-11035](https://issues.apache.org/jira/browse/SOLR-11035) +- (at least) 2 distinct failures possible when clients attempt searches during SolrCore reload, added test band-aid for DocValuesNotIndexedTest. [SOLR-11035](https://issues.apache.org/jira/browse/SOLR-11035) - Make HDFSDirectoryFactory support other prefixes (besides hdfs:/) [SOLR-11473](https://issues.apache.org/jira/browse/SOLR-11473) (Kevin Risden) - Throw an exception, instead of just a warning, when unknown atomic update operation is encountered [SOLR-12167](https://issues.apache.org/jira/browse/SOLR-12167) (Munendra S N) (Ishan Chattopadhyaya) - Avoid unnecessary memory cost when DistributedUpdateProcessor timed-out lock is not used. [SOLR-12833](https://issues.apache.org/jira/browse/SOLR-12833) (jefferyyuan) (ab) @@ -3069,22 +3070,22 @@ This file lists Solr's raw release notes with details of every change to Solr. M - The 'func' query parser now returns scores that are equal to 0 when a negative value is produced. This change is due to the fact that Lucene now requires scores to be positive. [LUCENE-7996](https://issues.apache.org/jira/browse/LUCENE-7996) (Adrien Grand) - Memory codecs have been removed from the codebase (MemoryPostings, MemoryDocValues). If you used postingsFormat="Memory" or docValuesFormat="Memory" then either remove it to use the default or experiment with one of the others. [LUCENE-8267](https://issues.apache.org/jira/browse/LUCENE-8267) (Dawid Weiss) -- Due to the introduction of LIR redesign since Solr 7.3 (SOLR-11702) and the removing of old LIR implementation in Solr 8. Rolling updates are not possible unless all nodes must be on Solr 7.3 or higher. If not updates can be lost. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) -- In 'langid' contrib, the LanguageIdentifierUpdateProcessor base class changed some method signatures. If you have a custom language identifier implementation you will need to adapt your code. [SOLR-11774](https://issues.apache.org/jira/browse/SOLR-11774) +- Due to the introduction of LIR redesign since Solr 7.3 (SOLR-11702) and the removing of old LIR implementation in Solr 8. Rolling updates are not possible unless all nodes must be on Solr 7.3 or higher. If not updates can be lost. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) +- In 'langid' contrib, the LanguageIdentifierUpdateProcessor base class changed some method signatures. If you have a custom language identifier implementation you will need to adapt your code. [SOLR-11774](https://issues.apache.org/jira/browse/SOLR-11774) - SolrMetric registries retained references to SolrCores when closed. A change of SolrMetricMAnager.registerGauge and SolrMetricProducer.initializeMetrics method signatures was required to fix it. Third party components that use this API need to be updated. [SOLR-11882](https://issues.apache.org/jira/browse/SOLR-11882) (Eros Taborelli) (Erick Erickson) (ab) - The date format patterns used by ParseDateFieldUpdateProcessorFactory (present in "schemaless mode") are now interpreted by Java 8's java.time.DateTimeFormatter instead of Joda Time. The pattern language is very similar but not the same. Typically, simply update the pattern by changing an uppercase 'Z' to lowercase 'z' and that's it. For the current recommended set of patterns in schemaless mode, see "Schemaless Mode" in the ref guide, or simply examine the default configSet. Also note that the set of patterns (formats) here have expanded from before to subsume those patterns previously handled by the "extract" contrib (Solr Cell / Tika). [SOLR-12586](https://issues.apache.org/jira/browse/SOLR-12586) (David Smiley) (Bar Rotstein) - The "extraction" contrib (Solr Cell) no longer does any date parsing, and thus no longer has the "date.formats" configuration. To ensure date strings are properly parsed, use ParseDateFieldUpdateProcessorFactory (an URP) commonly registered with the name "parse-date" in "schemaless mode". [SOLR-12593](https://issues.apache.org/jira/browse/SOLR-12593) (David Smiley) (Bar Rotstein) -- UpdateShardHandler's updateOnlyClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. [SOLR-12605](https://issues.apache.org/jira/browse/SOLR-12605) +- UpdateShardHandler's updateOnlyClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. [SOLR-12605](https://issues.apache.org/jira/browse/SOLR-12605) - When JSON data is sent to Solr with nested child documents split using the "split" parameter, the child docs will now be associated to their parents by the field/label string used in the JSON instead of anonymously. Most users probably won't notice the distinction since the label is lost any way unless special fields are in the schema. This choice used to be toggleable with an internal/expert "anonChildDocs" parameter flag which is now gone. [SOLR-12633](https://issues.apache.org/jira/browse/SOLR-12633) (David Smiley) -- HttpShardHandlerFactory's defaultClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. LBHttpSolrClient.Req and LBHttpSolrClient.Rsp are marked as deprecated, uses LBSolrClient.Req and LBSolrClient.Rsp instead. [SOLR-12640](https://issues.apache.org/jira/browse/SOLR-12640) -- Since Http2SolrClient does not support exposing connections related metrics. These metrics are no longer available 'QUERY.httpShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}', 'UPDATE.updateShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}' [SOLR-12643](https://issues.apache.org/jira/browse/SOLR-12643) -- The UnifiedHighlighter hl.weightMatches now defaults to true. If there are unforseen highlight problems, this may be the culprit. [SOLR-12754](https://issues.apache.org/jira/browse/SOLR-12754) -- If you explicitly use BM25SimilarityFactory in your schema, the absolute scoring will be lower due to SOLR-13025. But ordering of documents will not change in the normal case. Use LegacyBM25SimilarityFactory if you need to force the old 6.x/7.x scoring. Note that if you have not specified any similarity in schema or use the default SchemaSimilarityFactory, then LegacyBM25Similarity is automatically selected for 'luceneMatchVersion' < 8.0.0. See also explanation in Reference Guide chapter "Other Schema Elements". [SOLR-13025](https://issues.apache.org/jira/browse/SOLR-13025) -- The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) -- Deleting (or updating) documents by their uniqueKey is now scoped to only consider root documents, not child/nested documents. Thus a delete-by-id won't work on a child doc (no-op), and an attempt to update a child doc by providing a new doc with the same ID would add a new doc (probably erroneous). Both these actions were and still are problematic. In-place-updates are safe though. If you want to delete certain child documents and if you know they don't themselves have nested children then you must do so with a delete-by-query technique. [SOLR-5211](https://issues.apache.org/jira/browse/SOLR-5211) +- HttpShardHandlerFactory's defaultClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. LBHttpSolrClient.Req and LBHttpSolrClient.Rsp are marked as deprecated, uses LBSolrClient.Req and LBSolrClient.Rsp instead. [SOLR-12640](https://issues.apache.org/jira/browse/SOLR-12640) +- Since Http2SolrClient does not support exposing connections related metrics. These metrics are no longer available 'QUERY.httpShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}', 'UPDATE.updateShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}' [SOLR-12643](https://issues.apache.org/jira/browse/SOLR-12643) +- The UnifiedHighlighter hl.weightMatches now defaults to true. If there are unforseen highlight problems, this may be the culprit. [SOLR-12754](https://issues.apache.org/jira/browse/SOLR-12754) +- If you explicitly use BM25SimilarityFactory in your schema, the absolute scoring will be lower due to SOLR-13025. But ordering of documents will not change in the normal case. Use LegacyBM25SimilarityFactory if you need to force the old 6.x/7.x scoring. Note that if you have not specified any similarity in schema or use the default SchemaSimilarityFactory, then LegacyBM25Similarity is automatically selected for 'luceneMatchVersion' < 8.0.0. See also explanation in Reference Guide chapter "Other Schema Elements". [SOLR-13025](https://issues.apache.org/jira/browse/SOLR-13025) +- The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) +- Deleting (or updating) documents by their uniqueKey is now scoped to only consider root documents, not child/nested documents. Thus a delete-by-id won't work on a child doc (no-op), and an attempt to update a child doc by providing a new doc with the same ID would add a new doc (probably erroneous). Both these actions were and still are problematic. In-place-updates are safe though. If you want to delete certain child documents and if you know they don't themselves have nested children then you must do so with a delete-by-query technique. [SOLR-5211](https://issues.apache.org/jira/browse/SOLR-5211) - Solr nodes can now listen and serve HTTP/2 requests. Most of internal requests (sent by UpdateShardHandler) (HttpShardHandler) -- ALPN is not supported in Java 8 or lower version therefore when a node started in Java 8 or a lower version with SSL enabled, it will send and can only be able to handle HTTP/1.1 requests. In case of using SSL Java 9 or latter versions are recommended. -- Custom AuthenticationPlugin must provide its own setup for Http2SolrClient through implementing HttpClientBuilderPlugin.setup, if not internal requests can't be authenticated. +- ALPN is not supported in Java 8 or lower version therefore when a node started in Java 8 or a lower version with SSL enabled, it will send and can only be able to handle HTTP/1.1 requests. In case of using SSL Java 9 or latter versions are recommended. +- Custom AuthenticationPlugin must provide its own setup for Http2SolrClient through implementing HttpClientBuilderPlugin.setup, if not internal requests can't be authenticated. ### Added (14 changes) @@ -3149,9 +3150,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SPLITSHARD (async) with failures in underlying sub-operations can result in data loss [SOLR-13718](https://issues.apache.org/jira/browse/SOLR-13718) (Ishan Chattopadhyaya) - Use the safe fork of simple-xml for clustering contrib. [SOLR-13779](https://issues.apache.org/jira/browse/SOLR-13779) (Dawid Weiss) (Sachin Pattan) - HttpSolrCall now maintains internal request count (_forwardedCount) for remote queries and limits them to the number of replicas. This avoids making too many cascading calls to remote servers, which, if not restricted, can bring down nodes containing the said collection [SOLR-13793](https://issues.apache.org/jira/browse/SOLR-13793) (Kesharee Nandan Vishwakarma) (Ishan Chattopadhyaya) -- Improve ExecutePlanAction error handling. [SOLR-13828](https://issues.apache.org/jira/browse/SOLR-13828) +- Improve ExecutePlanAction error handling. [SOLR-13828](https://issues.apache.org/jira/browse/SOLR-13828) - Velocity response writer's resource loading now possible only through startup parameters. Also, removed velocity response writer from _default configset. [SOLR-13971](https://issues.apache.org/jira/browse/SOLR-13971) (Ishan Chattopadhyaya) (Sachin Pattan) -- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) +- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) - Back port javabin performance regression fixes from SOLR-14013 [SOLR-14259](https://issues.apache.org/jira/browse/SOLR-14259) [SOLR-14013](https://issues.apache.org/jira/browse/SOLR-14013) (noble) @@ -3160,8 +3161,8 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (2 changes) -- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) -- The Prometheus Metric Exporter now collects metrics from Solr at a fixed schedule that is controlled by the exporter itself. The default interval is 60s but it can be changed by specifying the --scrape-interval command line parameter to the tool. The collected metrics are cached and returned for any requests coming in before the next scheduled collection. This can impact the freshness of the metrics if prometheus is configured to query the tool more frequently. [SOLR-13234](https://issues.apache.org/jira/browse/SOLR-13234) +- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) +- The Prometheus Metric Exporter now collects metrics from Solr at a fixed schedule that is controlled by the exporter itself. The default interval is 60s but it can be changed by specifying the --scrape-interval command line parameter to the tool. The collected metrics are cached and returned for any requests coming in before the next scheduled collection. This can impact the freshness of the metrics if prometheus is configured to query the tool more frequently. [SOLR-13234](https://issues.apache.org/jira/browse/SOLR-13234) ### Fixed (25 changes) @@ -3201,7 +3202,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- * SOLR-13248: The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) +- * SOLR-13248: The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) ### Fixed (2 changes) @@ -3214,7 +3215,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- The 'shards' parameter handling logic changes to use a new config element to determine what hosts can be requested. Please see Apache Solr Reference Guide chapter "Distributed Requests" for details, as well as SOLR-12770. [SOLR-12770](https://issues.apache.org/jira/browse/SOLR-12770) +- The 'shards' parameter handling logic changes to use a new config element to determine what hosts can be requested. Please see Apache Solr Reference Guide chapter "Distributed Requests" for details, as well as SOLR-12770. [SOLR-12770](https://issues.apache.org/jira/browse/SOLR-12770) ### Added (7 changes) @@ -3281,10 +3282,10 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (4 changes) -- Autoscaling policy framework is now used as the default strategy to select the nodes on which new replicas or replicas of new collections are created. Previously, the maxShardsPerNode parameter was not allowed on collections when autoscaling policy was configured. Also if an autoscaling policy was configured then the default was to set an unlimited maxShardsPerNode automatically. Now the maxShardsPerNode parameter is always allowed during collection creation and maxShardsPerNode should be set correctly (if required) regardless of whether autoscaling policies are in effect or not. The default value of maxShardsPerNode continues to be 1 as before. It can be set to -1 during collection creation to fall back to the old behavior of unlimited maxShardsPerNode when using autoscaling policy. [SOLR-12739](https://issues.apache.org/jira/browse/SOLR-12739) +- Autoscaling policy framework is now used as the default strategy to select the nodes on which new replicas or replicas of new collections are created. Previously, the maxShardsPerNode parameter was not allowed on collections when autoscaling policy was configured. Also if an autoscaling policy was configured then the default was to set an unlimited maxShardsPerNode automatically. Now the maxShardsPerNode parameter is always allowed during collection creation and maxShardsPerNode should be set correctly (if required) regardless of whether autoscaling policies are in effect or not. The default value of maxShardsPerNode continues to be 1 as before. It can be set to -1 during collection creation to fall back to the old behavior of unlimited maxShardsPerNode when using autoscaling policy. [SOLR-12739](https://issues.apache.org/jira/browse/SOLR-12739) - The min_rf parameter is no longer needed, Solr will always return the achieved replication factor [SOLR-12767](https://issues.apache.org/jira/browse/SOLR-12767) (rf) -- The cluster wide defaults structure has changed from {collectionDefaults: {nrtReplicas : 2}} to {defaults : {collection : {nrtReplicas : 2}}}. The old format continues to be supported and can be read from ZK as well as written using the V2 set-obj-property syntax but it is deprecated and will be removed in Solr 9. We recommend that users change their API calls to use the new format going forward. [SOLR-12827](https://issues.apache.org/jira/browse/SOLR-12827) -- Added a Solr factory for ByteBuffersDirectory, which will replace deprecated RAMDirectory in Solr 9.0. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) +- The cluster wide defaults structure has changed from {collectionDefaults: {nrtReplicas : 2}} to {defaults : {collection : {nrtReplicas : 2}}}. The old format continues to be supported and can be read from ZK as well as written using the V2 set-obj-property syntax but it is deprecated and will be removed in Solr 9. We recommend that users change their API calls to use the new format going forward. [SOLR-12827](https://issues.apache.org/jira/browse/SOLR-12827) +- Added a Solr factory for ByteBuffersDirectory, which will replace deprecated RAMDirectory in Solr 9.0. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) ### Added (25 changes) @@ -3361,7 +3362,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Move TestCloudJSONFacetJoinDomain and TestCloudJSONFacetSKG to the facet test package [SOLR-12793](https://issues.apache.org/jira/browse/SOLR-12793) (Varun Thacker) - Migrate cluster wide defaults syntax in cluster properties to a nested structure. The structure has changed from {collectionDefaults: {nrtReplicas : 2}} to {defaults : {collection : {nrtReplicas : 2}}}. [SOLR-12827](https://issues.apache.org/jira/browse/SOLR-12827) (ab) (shalin) - Document statistics exposed by the Query Result Cache when maxRamMB is configured. [SOLR-12835](https://issues.apache.org/jira/browse/SOLR-12835) (shalin) -- Add Solr factory for ByteBuffersDirectory. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) +- Add Solr factory for ByteBuffersDirectory. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) - Add Javadoc @since tag to Analyzer component classes [SOLR-12956](https://issues.apache.org/jira/browse/SOLR-12956) (Alexandre Rafalovitch) - Add Javadoc @since tag to URP classes [SOLR-12966](https://issues.apache.org/jira/browse/SOLR-12966) (Alexandre Rafalovitch) - ZkNodeProps to be able to load from both javabin and JSON [SOLR-13006](https://issues.apache.org/jira/browse/SOLR-13006) (noble) @@ -3375,9 +3376,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M - TieredMergePolicy now respects maxSegmentSizeMB by default when executing findForcedMerges (optimize) and findForcedDeletesMerges (expungeDeletes) [LUCENE-7976](https://issues.apache.org/jira/browse/LUCENE-7976) (Erick Erickson) - Extremely outdated UIMA contrib module has been removed [SOLR-11694](https://issues.apache.org/jira/browse/SOLR-11694) (Alexandre Rafalovitch) - The configuration file for log4j2.xml is now in ../server/resources/log4j2.xml. All other copies have been removed. WINDOWS USERS: This JIRA corrects a bug in the start scripts that put example logs under ...\server, solr.log is now under ...\example. [SOLR-12008](https://issues.apache.org/jira/browse/SOLR-12008) (Erick Erickson) -- The data-driven schema no longer returns the *_str field-copy of text fields by default. The *_str field is still created and can be used for e.g. sorting, but to retrieve it you now need to explicitly list it in "fl" SOLR-12350 [SOLR-12350](https://issues.apache.org/jira/browse/SOLR-12350) -- SignificantTermsQParserPlugin's name is now 'significantTerms' and its old name 'sigificantTerms' is deprecated. [SOLR-12395](https://issues.apache.org/jira/browse/SOLR-12395) -- The heatmap facet response structure, when returned in JSON, changed from an even/odd name/value array to an object style. Although the object style makes more sense, this was an overlooked back-compat break; sorry. [SOLR-12398](https://issues.apache.org/jira/browse/SOLR-12398) +- The data-driven schema no longer returns the *_str field-copy of text fields by default. The *_str field is still created and can be used for e.g. sorting, but to retrieve it you now need to explicitly list it in "fl" SOLR-12350 [SOLR-12350](https://issues.apache.org/jira/browse/SOLR-12350) +- SignificantTermsQParserPlugin's name is now 'significantTerms' and its old name 'sigificantTerms' is deprecated. [SOLR-12395](https://issues.apache.org/jira/browse/SOLR-12395) +- The heatmap facet response structure, when returned in JSON, changed from an even/odd name/value array to an object style. Although the object style makes more sense, this was an overlooked back-compat break; sorry. [SOLR-12398](https://issues.apache.org/jira/browse/SOLR-12398) - The data-driven schema no longer returns the *_str field-copy of text fields by default. The *_str field is still created and can be used for e.g. sorting, but to retrieve it you now need to explicitly list it in "fl" (SOLR-12350) ### Added (42 changes) @@ -3447,7 +3448,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Solr SQL does not work with point numeric fields [SOLR-11585](https://issues.apache.org/jira/browse/SOLR-11585) (Joel Bernstein) (Kiran Chitturi) - Improve error handling of shard splitting. Fix splitting of mixed replica types. [SOLR-11665](https://issues.apache.org/jira/browse/SOLR-11665) (ab) - Keep nrtReplicas and replicationFactor in sync while creating a collection and modifying a collection [SOLR-11676](https://issues.apache.org/jira/browse/SOLR-11676) (Varun Thacker) -- NPE in tvrh if no field is specified and document doesn't contain any fields with term vectors [SOLR-11770](https://issues.apache.org/jira/browse/SOLR-11770) +- NPE in tvrh if no field is specified and document doesn't contain any fields with term vectors [SOLR-11770](https://issues.apache.org/jira/browse/SOLR-11770) - Restoring collection now treats maxShardsPerNode=-1 as unlimited [SOLR-11807](https://issues.apache.org/jira/browse/SOLR-11807) (Varun Thacker) - JSON Facet API: terms facet shard requests now indicate if they have more buckets to prevent unnecessary refinement requests. [SOLR-12326](https://issues.apache.org/jira/browse/SOLR-12326) (yonk) - Fixed a bug in JSON Faceting that could cause incorrect counts/stats when using non default sort options. This also adds a new configurable "overrefine" option. [SOLR-12343](https://issues.apache.org/jira/browse/SOLR-12343) (Yonik Seeley) (hossman) @@ -3522,9 +3523,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (3 changes) -- Configuring slowQueryThresholdMillis now logs slow requests to a separate file - solr_slow_requests.log. Previously they would get logged in the solr.log file. [SOLR-11453](https://issues.apache.org/jira/browse/SOLR-11453) -- Slave doesn't commit empty index when completely new index is detected on master during replication. To return the previous behavior pass false to skipCommitOnLeaderVersionZero in slave section of replication handler configuration, or pass it to the fetchindex command. [SOLR-11673](https://issues.apache.org/jira/browse/SOLR-11673) -- Solr now uses Log4J 2.11 . The log4j configuration is now in log4j2.xml rather than log4j.properties files. This is a server side change only and clients using SolrJ won't need any changes. Clients can still use any logging implementation which is compatible with SLF4J. We now let log4j2 handle rotation of solr logs at startup, and bin/solr start scripts will no longer attempt this nor move existing console or GC logs into logs/archived either SOLR-12144 [SOLR-7887](https://issues.apache.org/jira/browse/SOLR-7887) [SOLR-12144](https://issues.apache.org/jira/browse/SOLR-12144) +- Configuring slowQueryThresholdMillis now logs slow requests to a separate file - solr_slow_requests.log. Previously they would get logged in the solr.log file. [SOLR-11453](https://issues.apache.org/jira/browse/SOLR-11453) +- Slave doesn't commit empty index when completely new index is detected on master during replication. To return the previous behavior pass false to skipCommitOnLeaderVersionZero in slave section of replication handler configuration, or pass it to the fetchindex command. [SOLR-11673](https://issues.apache.org/jira/browse/SOLR-11673) +- Solr now uses Log4J 2.11 . The log4j configuration is now in log4j2.xml rather than log4j.properties files. This is a server side change only and clients using SolrJ won't need any changes. Clients can still use any logging implementation which is compatible with SLF4J. We now let log4j2 handle rotation of solr logs at startup, and bin/solr start scripts will no longer attempt this nor move existing console or GC logs into logs/archived either SOLR-12144 [SOLR-7887](https://issues.apache.org/jira/browse/SOLR-7887) [SOLR-12144](https://issues.apache.org/jira/browse/SOLR-12144) ### Added (35 changes) @@ -3693,16 +3694,16 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (10 changes) -- If you are using the spatial JTS library with Solr, you must upgrade to 1.15.0. This new version of JTS is now dual-licensed to include a BSD style license. [LUCENE-8161](https://issues.apache.org/jira/browse/LUCENE-8161) -- Collections created without specifying a configset name use a copy of the _default configset since 7.0. Before 7.3, the copied over configset was named the same as the collection name, but 7.3 onwards it will be named with an additional ".AUTOCREATED" suffix. [SOLR-11624](https://issues.apache.org/jira/browse/SOLR-11624) -- The old Leader-In-Recovery implementation (SOLR-5495) is now deprecated and replaced. Solr will support rolling upgrades from old 7.x versions of Solr to future 7.x releases until the last release of the 7.x major version. This means in order to upgrade to Solr 8 in the future, you must be on Solr 7.3 or higher. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) [SOLR-5495](https://issues.apache.org/jira/browse/SOLR-5495) -- The behaviour of the autoscaling system has been modified to pause all triggers from execution between the start of actions and end of cool down period. The triggers will be resumed after the cool down period expires. Previously, the cool down period was a fixed period started after actions for a trigger event complete and during this time, all triggers continued to run but any events were rejected to be tried later. [SOLR-11747](https://issues.apache.org/jira/browse/SOLR-11747) -- The throttling mechanism used to limit the rate of autoscaling events processed has been removed. This deprecates the 'actionThrottlePeriodSeconds' setting in the set-properties Autoscaling API which is now a no-op. Use the 'triggerCooldownPeriodSeconds' instead to pause event processing. [SOLR-11748](https://issues.apache.org/jira/browse/SOLR-11748) -- The top-level <highlighting> syntax in solrconfig.xml is now formally deprecated in favour of <searchComponent> equivalent syntax. See also SOLR-1696. [SOLR-11798](https://issues.apache.org/jira/browse/SOLR-11798) [SOLR-1696](https://issues.apache.org/jira/browse/SOLR-1696) -- QueryComponent's rq parameter parsing no longer considers the defType parameter. [SOLR-11809](https://issues.apache.org/jira/browse/SOLR-11809) -- The default Solr log file size and number of backups is raised to 32MB and 10 respectively [SOLR-11957](https://issues.apache.org/jira/browse/SOLR-11957) -- A new mechanism is introduced in SOLR-11702 to maintain consistency in SolrCloud between leader and replicas. This mechanism lets Solr know whether a replica is in-sync with the leader or not, even when the leader is not live. If all the replicas who participate in the leader election are out-of-sync with previous leader, the election will pause until a timeout (named "leaderVoteWait") before allowing an out-of-sync replica to become leader. Note that the new leader still needs to contains more updates than any other active replicas in the same shard. Therefore by increasing leaderVoteWait will increase the consistency (over availability) of the system. The default value of leaderVoteWait is 180,000 ms (3 minutes) and it can be adjusted in the "solrcloud" section of the solr.xml [SOLR-12051](https://issues.apache.org/jira/browse/SOLR-12051) [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) -- The default value of `autoReplicaFailoverWaitAfterExpiration` has been increased to 120 seconds from the earlier default of 30 seconds. This affects how soon Solr adds new replicas to replace the replicas on nodes which have either crashed or shutdown. [SOLR-12067](https://issues.apache.org/jira/browse/SOLR-12067) +- If you are using the spatial JTS library with Solr, you must upgrade to 1.15.0. This new version of JTS is now dual-licensed to include a BSD style license. [LUCENE-8161](https://issues.apache.org/jira/browse/LUCENE-8161) +- Collections created without specifying a configset name use a copy of the _default configset since 7.0. Before 7.3, the copied over configset was named the same as the collection name, but 7.3 onwards it will be named with an additional ".AUTOCREATED" suffix. [SOLR-11624](https://issues.apache.org/jira/browse/SOLR-11624) +- The old Leader-In-Recovery implementation (SOLR-5495) is now deprecated and replaced. Solr will support rolling upgrades from old 7.x versions of Solr to future 7.x releases until the last release of the 7.x major version. This means in order to upgrade to Solr 8 in the future, you must be on Solr 7.3 or higher. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) [SOLR-5495](https://issues.apache.org/jira/browse/SOLR-5495) +- The behaviour of the autoscaling system has been modified to pause all triggers from execution between the start of actions and end of cool down period. The triggers will be resumed after the cool down period expires. Previously, the cool down period was a fixed period started after actions for a trigger event complete and during this time, all triggers continued to run but any events were rejected to be tried later. [SOLR-11747](https://issues.apache.org/jira/browse/SOLR-11747) +- The throttling mechanism used to limit the rate of autoscaling events processed has been removed. This deprecates the 'actionThrottlePeriodSeconds' setting in the set-properties Autoscaling API which is now a no-op. Use the 'triggerCooldownPeriodSeconds' instead to pause event processing. [SOLR-11748](https://issues.apache.org/jira/browse/SOLR-11748) +- The top-level <highlighting> syntax in solrconfig.xml is now formally deprecated in favour of <searchComponent> equivalent syntax. See also SOLR-1696. [SOLR-11798](https://issues.apache.org/jira/browse/SOLR-11798) [SOLR-1696](https://issues.apache.org/jira/browse/SOLR-1696) +- QueryComponent's rq parameter parsing no longer considers the defType parameter. [SOLR-11809](https://issues.apache.org/jira/browse/SOLR-11809) +- The default Solr log file size and number of backups is raised to 32MB and 10 respectively [SOLR-11957](https://issues.apache.org/jira/browse/SOLR-11957) +- A new mechanism is introduced in SOLR-11702 to maintain consistency in SolrCloud between leader and replicas. This mechanism lets Solr know whether a replica is in-sync with the leader or not, even when the leader is not live. If all the replicas who participate in the leader election are out-of-sync with previous leader, the election will pause until a timeout (named "leaderVoteWait") before allowing an out-of-sync replica to become leader. Note that the new leader still needs to contains more updates than any other active replicas in the same shard. Therefore by increasing leaderVoteWait will increase the consistency (over availability) of the system. The default value of leaderVoteWait is 180,000 ms (3 minutes) and it can be adjusted in the "solrcloud" section of the solr.xml [SOLR-12051](https://issues.apache.org/jira/browse/SOLR-12051) [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) +- The default value of `autoReplicaFailoverWaitAfterExpiration` has been increased to 120 seconds from the earlier default of 30 seconds. This affects how soon Solr adds new replicas to replace the replicas on nodes which have either crashed or shutdown. [SOLR-12067](https://issues.apache.org/jira/browse/SOLR-12067) ### Added (44 changes) @@ -3969,11 +3970,11 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (6 changes) - in the ReplicationHandler the master.commitReserveDuration sub-element is deprecated. Instead please configure a direct commitReserveDuration element for use in all modes [SOLR-10962](https://issues.apache.org/jira/browse/SOLR-10962) (master) (slave) (cloud) -- shard and cluster metric reporter configuration now requires a class attribute. If a reporter configures the group="shard" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrShardReporter" attribute. If a reporter configures the group="cluster" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrClusterReporter" attribute. [SOLR-11195](https://issues.apache.org/jira/browse/SOLR-11195) -- the abstract DocTransformer class now has an abstract score-less transform method variant. [SOLR-11254](https://issues.apache.org/jira/browse/SOLR-11254) +- shard and cluster metric reporter configuration now requires a class attribute. If a reporter configures the group="shard" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrShardReporter" attribute. If a reporter configures the group="cluster" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrClusterReporter" attribute. [SOLR-11195](https://issues.apache.org/jira/browse/SOLR-11195) +- the abstract DocTransformer class now has an abstract score-less transform method variant. [SOLR-11254](https://issues.apache.org/jira/browse/SOLR-11254) - all Stream Evaluators in solrj.io.eval have been refactored to have a simplier and more robust structure. This simplifies and condenses the code required to implement a new Evaluator and makes it much easier for evaluators to handle differing data types (primitives, objects, arrays, lists, and so forth). [SOLR-11283](https://issues.apache.org/jira/browse/SOLR-11283) (Dennis Gove) - in the XML query parser [SOLR-11477](https://issues.apache.org/jira/browse/SOLR-11477) (defType=xmlparser or {!xmlparser ... }) -- 'autoAddReplicas' feature is ported to autoscaling framework. Existing users of this feature should not have to change anything. Note these changes: 1. Behaviour: Changing the autoAddReplicas property from disabled to enabled using MODIFYCOLLECTION API no longer replaces down replicas for the collection immediately. Instead, replicas are only added if a node containing them went down while autoAddReplicas was enabled. The params autoReplicaFailoverBadNodeExpiration and autoReplicaFailoverWorkLoopDelay are no longer used. 2. Deprecations: Enabling/disabling autoAddReplicas cluster wide with the API will be deprecated; use suspend/resume trigger APIs with name='.auto_add_replicas' instead. +- 'autoAddReplicas' feature is ported to autoscaling framework. Existing users of this feature should not have to change anything. Note these changes: 1. Behaviour: Changing the autoAddReplicas property from disabled to enabled using MODIFYCOLLECTION API no longer replaces down replicas for the collection immediately. Instead, replicas are only added if a node containing them went down while autoAddReplicas was enabled. The params autoReplicaFailoverBadNodeExpiration and autoReplicaFailoverWorkLoopDelay are no longer used. 2. Deprecations: Enabling/disabling autoAddReplicas cluster wide with the API will be deprecated; use suspend/resume trigger APIs with name='.auto_add_replicas' instead. ### Added (39 changes) @@ -4165,7 +4166,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Possible to set solr.data.home property as root dir for all data [SOLR-6671](https://issues.apache.org/jira/browse/SOLR-6671) (janhoy) (Shawn Heisey) (Mark Miller) - Refinement for JSON Facet API: Adding refine:true to any terms facet will cause an additional distributed search phase (overlapped with field retrieval) that requests additional info for top facet buckets from shards that did not previously contribute to that bucket. This will correct counts (and other statistics) for those top buckets collected in the first phase. [SOLR-7452](https://issues.apache.org/jira/browse/SOLR-7452) (yonik) - Data driven schema now indexes text field "foo" as both "foo" (text_general) and as "foo_str" (string) to facilitate both search and faceting. AddSchemaFieldsUpdateProcessor now has the ability to add a "copyField" to the type mappings, with an optional maxChars limitation. You can also define one typeMappings as default. This also solves issues SOLR-8495, SOLR-6966, and SOLR-7058 [SOLR-9526](https://issues.apache.org/jira/browse/SOLR-9526) [SOLR-8495](https://issues.apache.org/jira/browse/SOLR-8495) [SOLR-6966](https://issues.apache.org/jira/browse/SOLR-6966) [SOLR-7058](https://issues.apache.org/jira/browse/SOLR-7058) (janhoy) (Steve Rowe) (hossman) (Alexandre Rafalovitch) (Shawn Heisey) (Cao Manh Dat) -- Create another replication mode for SolrCloud [SOLR-9835](https://issues.apache.org/jira/browse/SOLR-9835) +- Create another replication mode for SolrCloud [SOLR-9835](https://issues.apache.org/jira/browse/SOLR-9835) - Collect aggregated metrics from nodes and shard leaders in overseer. [SOLR-9857](https://issues.apache.org/jira/browse/SOLR-9857) [SOLR-9858](https://issues.apache.org/jira/browse/SOLR-9858) (ab) - Add solr/solr.cmd parameter to append jetty parameters to the start script. [SOLR-9910](https://issues.apache.org/jira/browse/SOLR-9910) (Mano Kovacs) (Mark Miller) - Add support for PointFields in FacetModule (JSON Facets) [SOLR-9989](https://issues.apache.org/jira/browse/SOLR-9989) (Cao Manh Dat) @@ -4336,7 +4337,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (1 change) -- Lucene only release +- Lucene only release [6.6.5] - 2018-06-03 @@ -4410,9 +4411,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (3 changes) -- Solr contribs map-reduce, morphlines-core and morphlines-cell have been removed. -- JSON Facet API now uses hyper-log-log for numBuckets cardinality calculation and calculates cardinality before filtering buckets by any mincount greater than 1. -- ZooKeeper dependency has been upgraded from 3.4.6 to 3.4.10. +- Solr contribs map-reduce, morphlines-core and morphlines-cell have been removed. +- JSON Facet API now uses hyper-log-log for numBuckets cardinality calculation and calculates cardinality before filtering buckets by any mincount greater than 1. +- ZooKeeper dependency has been upgraded from 3.4.6 to 3.4.10. ### Added (36 changes) @@ -4552,7 +4553,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (2 changes) - JMX metric "avgTimePerRequest" (and the corresponding metric in the metrics API for each handler) used to be a simple non-decaying average based on total cumulative time and the number of requests. New Codahale Metrics implementation applies exponential decay to this value, which heavily biases the average towards the last 5 minutes. [SOLR-10226](https://issues.apache.org/jira/browse/SOLR-10226) (ab) -- Parallel SQL now uses Apache Calcite as its SQL framework. As part of this change the default aggregation mode has been changed to facet rather than map_reduce. There has also been changes to the SQL aggregate response and some SQL syntax changes. Consult the documentation for full details. [SOLR-8593](https://issues.apache.org/jira/browse/SOLR-8593) +- Parallel SQL now uses Apache Calcite as its SQL framework. As part of this change the default aggregation mode has been changed to facet rather than map_reduce. There has also been changes to the SQL aggregate response and some SQL syntax changes. Consult the documentation for full details. [SOLR-8593](https://issues.apache.org/jira/browse/SOLR-8593) ### Added (31 changes) @@ -4693,11 +4694,11 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (5 changes) -- complexphrase query parser now supports leading wildcards, beware of its' possible heaviness. Users are encouraged to use ReversedWildcardFilter in index time analysis. [SOLR-7466](https://issues.apache.org/jira/browse/SOLR-7466) -- Metrics related classes in org.apache.solr.util.stats have been removed in favor of the dropwizard metrics library. Any custom plugins using these classes should be changed to use the equivalent classes from the metrics library. As part of this, the following changes were made to the output of Overseer Status API: * The "totalTime" metric has been removed because it is no longer supported * The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime" and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime" and so on for consistency with stats output in other parts of Solr. * The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr. [SOLR-8785](https://issues.apache.org/jira/browse/SOLR-8785) -- Export handler returns zero for numeric fields that are not in the original doc. One consequence of this change is that you must be aware that some tuples will not have values if there were none in the original document. [SOLR-9166](https://issues.apache.org/jira/browse/SOLR-9166) -- You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the original Highlighter. That said, some options aren't supported yet. It will get more features in time, especially with your input. See HighlightParams.java for a listing of highlight parameters annotated with which highlighters use them. hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector. [SOLR-9708](https://issues.apache.org/jira/browse/SOLR-9708) -- maxWarmingSearchers now defaults to 1, and more importantly commits will now block if this limit is exceeded instead of throwing an exception (a good thing). Consequently there is no longer a risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files. [SOLR-9712](https://issues.apache.org/jira/browse/SOLR-9712) +- complexphrase query parser now supports leading wildcards, beware of its' possible heaviness. Users are encouraged to use ReversedWildcardFilter in index time analysis. [SOLR-7466](https://issues.apache.org/jira/browse/SOLR-7466) +- Metrics related classes in org.apache.solr.util.stats have been removed in favor of the dropwizard metrics library. Any custom plugins using these classes should be changed to use the equivalent classes from the metrics library. As part of this, the following changes were made to the output of Overseer Status API: * The "totalTime" metric has been removed because it is no longer supported * The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime" and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime" and so on for consistency with stats output in other parts of Solr. * The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr. [SOLR-8785](https://issues.apache.org/jira/browse/SOLR-8785) +- Export handler returns zero for numeric fields that are not in the original doc. One consequence of this change is that you must be aware that some tuples will not have values if there were none in the original document. [SOLR-9166](https://issues.apache.org/jira/browse/SOLR-9166) +- You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the original Highlighter. That said, some options aren't supported yet. It will get more features in time, especially with your input. See HighlightParams.java for a listing of highlight parameters annotated with which highlighters use them. hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector. [SOLR-9708](https://issues.apache.org/jira/browse/SOLR-9708) +- maxWarmingSearchers now defaults to 1, and more importantly commits will now block if this limit is exceeded instead of throwing an exception (a good thing). Consequently there is no longer a risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files. [SOLR-9712](https://issues.apache.org/jira/browse/SOLR-9712) ### Added (40 changes) @@ -4828,12 +4829,12 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (6 changes) -- We are no longer backing up solr.log and solr_gc.log files in date-stamped copies forever. If you relied on the solr_log_<date> or solr_gc_log_<date> being in the logs folder that will no longer be the case. See SOLR-9570 for details. [SOLR-9570](https://issues.apache.org/jira/browse/SOLR-9570) -- If you use the JSON Facet API (json.facet) with method=stream, you must now set sort='index asc' to get the streaming behavior; otherwise it won't stream. Reminder: "method" is a hint that doesn't change defaults of other parameters. -- If you use the JSON Facet API (json.facet) to facet on a numeric field and if you use mincount=0 or if you set the prefix, then you will now get an error as these options are incompatible with numeric faceting. -- Solr's logging verbosity at the INFO level has been greatly reduced, and you may need to update the log configs to use the DEBUG level to see all the logging messages you used to see at INFO level before. -- The create/deleteCollection methods on MiniSolrCloudCluster have been deprecated. Clients should instead use the CollectionAdminRequest API. In addition, MiniSolrCloudCluster#uploadConfigDir(File, String) has been deprecated in favour of #uploadConfigSet(Path, String) -- The bin/solr.in.sh (bin/solr.in.cmd on Windows) is now completely commented by default. Previously, this wasn't so, which had the effect of masking existing environment variables. +- We are no longer backing up solr.log and solr_gc.log files in date-stamped copies forever. If you relied on the solr_log_<date> or solr_gc_log_<date> being in the logs folder that will no longer be the case. See SOLR-9570 for details. [SOLR-9570](https://issues.apache.org/jira/browse/SOLR-9570) +- If you use the JSON Facet API (json.facet) with method=stream, you must now set sort='index asc' to get the streaming behavior; otherwise it won't stream. Reminder: "method" is a hint that doesn't change defaults of other parameters. +- If you use the JSON Facet API (json.facet) to facet on a numeric field and if you use mincount=0 or if you set the prefix, then you will now get an error as these options are incompatible with numeric faceting. +- Solr's logging verbosity at the INFO level has been greatly reduced, and you may need to update the log configs to use the DEBUG level to see all the logging messages you used to see at INFO level before. +- The create/deleteCollection methods on MiniSolrCloudCluster have been deprecated. Clients should instead use the CollectionAdminRequest API. In addition, MiniSolrCloudCluster#uploadConfigDir(File, String) has been deprecated in favour of #uploadConfigSet(Path, String) +- The bin/solr.in.sh (bin/solr.in.cmd on Windows) is now completely commented by default. Previously, this wasn't so, which had the effect of masking existing environment variables. ### Added (27 changes) @@ -4916,7 +4917,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Move lots of SolrCloud logging from 'info' to 'debug' [SOLR-5563](https://issues.apache.org/jira/browse/SOLR-5563) (janhoy) (Alan Woodward) - Remove unreachable printLayout usage in cloud tests. [SOLR-6090](https://issues.apache.org/jira/browse/SOLR-6090) (Cao Manh Dat) (shalin) - Reduced logging during startup and shutdown, moved more logs to DEBUG level [SOLR-6677](https://issues.apache.org/jira/browse/SOLR-6677) (janhoy) (Shawn Heisey) (Alan Woodward) -- Updated the quickstart tutorial to cover the 6.2.0 release, and added ant target "generate-website-quickstart" to convert the bundled version of the tutorial into one suitable for the website. [SOLR-6871](https://issues.apache.org/jira/browse/SOLR-6871) +- Updated the quickstart tutorial to cover the 6.2.0 release, and added ant target "generate-website-quickstart" to convert the bundled version of the tutorial into one suitable for the website. [SOLR-6871](https://issues.apache.org/jira/browse/SOLR-6871) - Solr stops printing stacktraces in log and output [SOLR-7436](https://issues.apache.org/jira/browse/SOLR-7436) (janhoy) (hossman) (Markus Jelsma) - Refuse "bin/solr create" if run as root, unless -force is specified [SOLR-7826](https://issues.apache.org/jira/browse/SOLR-7826) (janhoy) (Binoy Dalal) - Moved defaults within bin/solr.in.sh (and bin/solr.in.cmd on Windows) to bin/solr (and bin/solr.cmd) such that the default state of these files is to set nothing. This makes Solr work better with Docker. [SOLR-7850](https://issues.apache.org/jira/browse/SOLR-7850) (David Smiley) @@ -5195,7 +5196,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- If you use historical dates, specifically on or before the year 1582, you should re-index. +- If you use historical dates, specifically on or before the year 1582, you should re-index. ### Fixed (24 changes) @@ -5346,7 +5347,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SQL JDBC - DriverImpl loadParams doesn't support keys with no values in the connection string [SOLR-8179](https://issues.apache.org/jira/browse/SOLR-8179) (Kevin Risden) (Joel Bernstein) - Implement Closeable on TupleStream [SOLR-8190](https://issues.apache.org/jira/browse/SOLR-8190) (Kevin Risden) (Joel Bernstein) - Change default hdfs tlog replication factor from 1 to 3. [SOLR-8258](https://issues.apache.org/jira/browse/SOLR-8258) (Mark Miller) -- Remove deprecated JettySolrRunner.getDispatchFilter() [SOLR-8259](https://issues.apache.org/jira/browse/SOLR-8259) +- Remove deprecated JettySolrRunner.getDispatchFilter() [SOLR-8259](https://issues.apache.org/jira/browse/SOLR-8259) - Change SchemaSimilarityFactory default to BM25Similarity [SOLR-8261](https://issues.apache.org/jira/browse/SOLR-8261) (hossman) - Remove Java Serialization from the Streaming API. The /stream handler now only accepts Streaming Expressions. [SOLR-8266](https://issues.apache.org/jira/browse/SOLR-8266) (Jason Gerlowski) (Joel Bernstein) - Change implicit default Similarity to use BM25 when luceneMatchVersion >= 6 [SOLR-8270](https://issues.apache.org/jira/browse/SOLR-8270) (hossman) @@ -5408,7 +5409,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (1 change) -- Lucene only release +- Lucene only release [5.5.2] - 2016-06-25 @@ -5576,7 +5577,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Highlighting a geo RPT field would throw an NPE instead of doing nothing. [SOLR-8541](https://issues.apache.org/jira/browse/SOLR-8541) (Pawel Rog) (David Smiley) - Core discovery was not following symlinks [SOLR-8548](https://issues.apache.org/jira/browse/SOLR-8548) (Aaron LaBella) (Alan Woodward) - Make collection deletion more robust. [SOLR-8551](https://issues.apache.org/jira/browse/SOLR-8551) (Mark Miller) -- Fix Embedded ZooKeeper to use <solrHome>/zoo_data for it's data directory [SOLR-8564](https://issues.apache.org/jira/browse/SOLR-8564) +- Fix Embedded ZooKeeper to use <solrHome>/zoo_data for it's data directory [SOLR-8564](https://issues.apache.org/jira/browse/SOLR-8564) - Fix HDFSLogReader replay status numbers, a performance bug where we can reopen FSDataInputStream much too often, and an hdfs tlog data integrity bug. [SOLR-8575](https://issues.apache.org/jira/browse/SOLR-8575) (Mark Miller) (Patrick Dvorack) (yonik) - memory leak in JsonRecordReader affecting /update/json/docs. Large payloads cause OOM [SOLR-8582](https://issues.apache.org/jira/browse/SOLR-8582) (noble) (shalin) - Regular expression queries starting with escaped forward slash caused an exception. [SOLR-8605](https://issues.apache.org/jira/browse/SOLR-8605) (Scott Blum) (yonik) @@ -5681,7 +5682,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Add a LIST command to ConfigSets API [SOLR-7995](https://issues.apache.org/jira/browse/SOLR-7995) (Gregory Chanan) - Basic auth support in SolrJ [SOLR-8053](https://issues.apache.org/jira/browse/SOLR-8053) (noble) - CloneFieldUpdateProcessorFactory now supports choosing a "dest" field name based on a regex pattern and replacement init options. [SOLR-8113](https://issues.apache.org/jira/browse/SOLR-8113) (Gus Heck) (hossman) -- Create/delete fields/dynamic fields/copy fields via schema tab on Angular UI [SOLR-8139](https://issues.apache.org/jira/browse/SOLR-8139) +- Create/delete fields/dynamic fields/copy fields via schema tab on Angular UI [SOLR-8139](https://issues.apache.org/jira/browse/SOLR-8139) - Introduce possibility to configure ParseContext in ExtractingRequestHandler/ExtractingDocumentLoader [SOLR-8166](https://issues.apache.org/jira/browse/SOLR-8166) (Andriy Binetsky) (Uwe Schindler) - JSON Facet API: add "method" param to terms/field facets to give an execution hint for what method should be used to facet. [SOLR-8217](https://issues.apache.org/jira/browse/SOLR-8217) (yonik) - SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using a fieldType name to identify which Similarity to use as a default. [SOLR-8329](https://issues.apache.org/jira/browse/SOLR-8329) (hossman) @@ -5810,7 +5811,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SolrResourceLoader now takes a Path as its instance directory [SOLR-8302](https://issues.apache.org/jira/browse/SOLR-8302) (Alan Woodward) (Shawn Heisey) - CustomBufferedIndexInput now includes resource description when throwing EOFException. [SOLR-8303](https://issues.apache.org/jira/browse/SOLR-8303) (Mike Drob) (Uwe Schindler) - Removed default core checks in the dispatch filter since we don't have a default core anymore [SOLR-8315](https://issues.apache.org/jira/browse/SOLR-8315) (Varun Thacker) -- Standardize and fix logger creation and usage so that they aren't shared across source files.(Jason Gerlowski, Uwe Schindler, Anshum Gupta) [SOLR-8330](https://issues.apache.org/jira/browse/SOLR-8330) +- Standardize and fix logger creation and usage so that they aren't shared across source files.(Jason Gerlowski, Uwe Schindler, Anshum Gupta) [SOLR-8330](https://issues.apache.org/jira/browse/SOLR-8330) - Fix check-example-lucene-match-version Ant task and addVersion.py script to check and update luceneMatchVersion under solr/example/ configs as well logic. [SOLR-8363](https://issues.apache.org/jira/browse/SOLR-8363) (Varun Thacker) @@ -5862,7 +5863,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Expose znodeVersion property for each of the collections returned for the clusterstatus operation in the collections API [SOLR-7389](https://issues.apache.org/jira/browse/SOLR-7389) (Marius Grama) (shalin) - Expose HDFS Block Locality Metrics via JMX [SOLR-7458](https://issues.apache.org/jira/browse/SOLR-7458) (Mike Drob) (Mark Miller) - A DocTransformer can now request fields from the SolrIndexSearcher that are not necessarily returned in the file SolrDocument by returning a list of fields from DocTransformer#getExtraRequestFields [SOLR-7622](https://issues.apache.org/jira/browse/SOLR-7622) (ryan) -- MoreLikeThis QParser now supports all options provided by the MLT Handler i.e. mintf, mindf, minwl, maxwl, maxqt, and maxntp. [SOLR-7639](https://issues.apache.org/jira/browse/SOLR-7639) +- MoreLikeThis QParser now supports all options provided by the MLT Handler i.e. mintf, mindf, minwl, maxwl, maxqt, and maxntp. [SOLR-7639](https://issues.apache.org/jira/browse/SOLR-7639) - New response format added wt=smile [SOLR-7651](https://issues.apache.org/jira/browse/SOLR-7651) (noble) - Add 'port' tag support in replica placement rules [SOLR-7668](https://issues.apache.org/jira/browse/SOLR-7668) (Adam McElwee) (Noble Paul) - Faceting on nested objects / Block-join faceting with the new JSON Facet API. Example: Assuming books with nested pages and an input domain of pages, the following will switch the domain to books before faceting on the author field: authors:{ type:terms, field:author, domain:{toParent:"type:book"} } [SOLR-7676](https://issues.apache.org/jira/browse/SOLR-7676) (yonik) @@ -5956,7 +5957,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix typo bug with TestConfigOverlay [SOLR-7740](https://issues.apache.org/jira/browse/SOLR-7740) (Christine Poerschke) (Ramkumar Aiyengar) - Change TestConfig.testDefaults to cover all SolrIndexConfig fields [SOLR-7750](https://issues.apache.org/jira/browse/SOLR-7750) (Christine Poerschke) (Ramkumar Aiyengar) - Removed fastutil and java-hll dependency, integrated HyperLogLog from java-hll into Solr core. [SOLR-7787](https://issues.apache.org/jira/browse/SOLR-7787) (Dawid Weiss) -- JSON Facet API: the avg() facet function now skips missing values rather than treating them as a 0 value. The def() function can be used to treat missing values as 0 if that is desired. Example: facet:{ mean:"avg(def(myfield,0))" } [SOLR-7800](https://issues.apache.org/jira/browse/SOLR-7800) +- JSON Facet API: the avg() facet function now skips missing values rather than treating them as a 0 value. The def() function can be used to treat missing values as 0 if that is desired. Example: facet:{ mean:"avg(def(myfield,0))" } [SOLR-7800](https://issues.apache.org/jira/browse/SOLR-7800) - Prevent class loading deadlock in TrieDateField; refactor date formatting and parsing out of TrieDateField and move to static utility class DateFormatUtil. [SOLR-7803](https://issues.apache.org/jira/browse/SOLR-7803) (Markus Heiden) (Uwe Schindler) - Update Kite Morphlines to 1.1.0 [SOLR-7805](https://issues.apache.org/jira/browse/SOLR-7805) (Mark Miller) - TestMiniSolrCloudCluster.testCollectionCreateSearchDelete async collection-creation (sometimes) [SOLR-7823](https://issues.apache.org/jira/browse/SOLR-7823) (Christine Poerschke) @@ -6051,7 +6052,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SolrConfig#getConfigOverlay does not clean up it's resources. [SOLR-7426](https://issues.apache.org/jira/browse/SOLR-7426) (Mark Miller) - DebugComponent does not return the right requestPurpose for pivot facet refinements. [SOLR-7440](https://issues.apache.org/jira/browse/SOLR-7440) (shalin) - Implemented range faceting over date fields in the new facet module (JSON Facet API). [SOLR-7443](https://issues.apache.org/jira/browse/SOLR-7443) (yonik) -- solr/server/etc/jetty-https-ssl.xml hard codes the key store file and password rather than pulling them from the sysprops defined in solr/bin/solr.in.{sh,cmd} [SOLR-7449](https://issues.apache.org/jira/browse/SOLR-7449) +- solr/server/etc/jetty-https-ssl.xml hard codes the key store file and password rather than pulling them from the sysprops defined in solr/bin/solr.in.{sh,cmd} [SOLR-7449](https://issues.apache.org/jira/browse/SOLR-7449) - Fix edge case which could cause `bin/solr stop` to hang forever [SOLR-7450](https://issues.apache.org/jira/browse/SOLR-7450) (Ramkumar Aiyengar) - Fix check-licenses to correctly detect if start.jar.sha1 is incorrect [SOLR-7469](https://issues.apache.org/jira/browse/SOLR-7469) (hossman) - Fix sample data to eliminate file order dependency for successful indexing, also fixed SolrCloudExampleTest to help catch this in the future. [SOLR-7470](https://issues.apache.org/jira/browse/SOLR-7470) (hossman) @@ -6268,7 +6269,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Add an onlyIfDown flag for DELETEREPLICA collections API command [SOLR-6482](https://issues.apache.org/jira/browse/SOLR-6482) (Erick Erickson) - ReplicationHandler should have an option to throttle the speed of replication [SOLR-6485](https://issues.apache.org/jira/browse/SOLR-6485) (Varun Thacker) (Noble Paul) - Add a collections API call to add/delete arbitrary properties to a specific replica. Optionally adding sliceUnique=true will remove this property from all other replicas within a particular slice. [SOLR-6512](https://issues.apache.org/jira/browse/SOLR-6512) (Erick Erickson) -- Add a collectionsAPI call BALANCESLICEUNIQUE. Allows the even distribution of custom replica properties across nodes making up a collection, at most one node per slice will have the property. [SOLR-6513](https://issues.apache.org/jira/browse/SOLR-6513) +- Add a collectionsAPI call BALANCESLICEUNIQUE. Allows the even distribution of custom replica properties across nodes making up a collection, at most one node per slice will have the property. [SOLR-6513](https://issues.apache.org/jira/browse/SOLR-6513) - Support editing common solrconfig.xml values [SOLR-6533](https://issues.apache.org/jira/browse/SOLR-6533) (Noble Paul) - Give HttpSolrClient the ability to send PUT requests [SOLR-6543](https://issues.apache.org/jira/browse/SOLR-6543) (Gregory Chanan) - SolrRequest support for query params [SOLR-6565](https://issues.apache.org/jira/browse/SOLR-6565) (Gregory Chanan) @@ -6666,7 +6667,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Opened up "public" access to DataSource, DocBuilder, and EntityProcessorWrapper in DIH. [SOLR-6194](https://issues.apache.org/jira/browse/SOLR-6194) (Aaron LaBella) (ehatcher) - Fixed bug in TestReplicationHandler.doTestIndexAndConfigReplication. [SOLR-6228](https://issues.apache.org/jira/browse/SOLR-6228) (shalin) - Increased timeouts and hardened the RollingRestartTest. [SOLR-6231](https://issues.apache.org/jira/browse/SOLR-6231) (Noble Paul) (shalin) -- CoreContainer.remove() replaced with CoreContainer.unload(). A call to unload will also close the core. [SOLR-6232](https://issues.apache.org/jira/browse/SOLR-6232) +- CoreContainer.remove() replaced with CoreContainer.unload(). A call to unload will also close the core. [SOLR-6232](https://issues.apache.org/jira/browse/SOLR-6232) - Removed unused coreName parameter in ZkStateReader.getReplicaProps. [SOLR-6240](https://issues.apache.org/jira/browse/SOLR-6240) (shalin) - Harden the HttpPartitionTest. [SOLR-6241](https://issues.apache.org/jira/browse/SOLR-6241) (shalin) - A couple of small improvements to UnInvertedField class. [SOLR-6252](https://issues.apache.org/jira/browse/SOLR-6252) (Vamsee Yarlagadda) (Gregory Chanan) (Mark Miller) @@ -6748,7 +6749,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix method visibility of Evaluator, refactor DateFormatEvaluator for extensibility. [SOLR-6013](https://issues.apache.org/jira/browse/SOLR-6013) (Aaron LaBella) (shalin) - Deprecate getAnalyzer() in IndexField and FieldType, and add getIndexAnalyzer(). [SOLR-6022](https://issues.apache.org/jira/browse/SOLR-6022) (Ryan Ernst) - Replace mentions of CommonsHttpSolrServer with HttpSolrServer and StreamingUpdateSolrServer with ConcurrentUpdateSolrServer. [SOLR-6025](https://issues.apache.org/jira/browse/SOLR-6025) (Ahmet Arslan) (shalin) -- REQUESTSTATUS Collection API now also checks for submitted tasks which are yet to begin execution. [SOLR-6026](https://issues.apache.org/jira/browse/SOLR-6026) +- REQUESTSTATUS Collection API now also checks for submitted tasks which are yet to begin execution. [SOLR-6026](https://issues.apache.org/jira/browse/SOLR-6026) - Refactor duplicate Collector code in SolrIndexSearcher [SOLR-6067](https://issues.apache.org/jira/browse/SOLR-6067) (Christine Poerschke) (hossman) - Refactor DocRouter.getDocRouter to accept routerName as a String. [SOLR-6116](https://issues.apache.org/jira/browse/SOLR-6116) (shalin) - Removed deprecated analysis factories and fieldTypes from the example schema.xml [SOLR-6128](https://issues.apache.org/jira/browse/SOLR-6128) (hossman) @@ -6979,7 +6980,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Using the hdfs write cache can result in appearance of corrupted index. [SOLR-5666](https://issues.apache.org/jira/browse/SOLR-5666) (Mark Miller) - Performance problem when not using hdfs block cache. [SOLR-5667](https://issues.apache.org/jira/browse/SOLR-5667) (Mark Miller) - HTTPSolrServer doesn't set own property correctly in setFollowRedirects. [SOLR-5673](https://issues.apache.org/jira/browse/SOLR-5673) (Frank Wesemann) (shalin) -- cloud-scripts/zkcli.bat: quote option log4j (Günther Ruck via steffkes [SOLR-5675](https://issues.apache.org/jira/browse/SOLR-5675) +- cloud-scripts/zkcli.bat: quote option log4j (Günther Ruck via steffkes [SOLR-5675](https://issues.apache.org/jira/browse/SOLR-5675) - SolrCloud updates rejected if talking to secure ZooKeeper. [SOLR-5676](https://issues.apache.org/jira/browse/SOLR-5676) (Greg Chanan) (Mark Miller) - SolrZkClient should throw a SolrException when connect times out rather than a RuntimeException. [SOLR-5678](https://issues.apache.org/jira/browse/SOLR-5678) (Karl Wright) (Anshum Gupta) (Mark Miller) - Shard splitting fails with ClassCastException on collections upgraded from 4.5 and earlier versions. [SOLR-5679](https://issues.apache.org/jira/browse/SOLR-5679) (Brett Hoerner) (shalin) @@ -7271,7 +7272,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - HitRatio on caches is now exposed over JMX MBeans as a float. [SOLR-4616](https://issues.apache.org/jira/browse/SOLR-4616) (Greg Bowyer) - Solrj doesn't return "between" count in range facets [SOLR-4776](https://issues.apache.org/jira/browse/SOLR-4776) (Philip K. Warren) (shalin) - Multiple Entities DIH delta import: dataimporter.[entityName].last_index_time is empty. [SOLR-4788](https://issues.apache.org/jira/browse/SOLR-4788) (chakming wong) (James Dyer) (shalin) -- Throw an error if a core has the same name as another core, both old and new style solr.xml [SOLR-4790](https://issues.apache.org/jira/browse/SOLR-4790) +- Throw an error if a core has the same name as another core, both old and new style solr.xml [SOLR-4790](https://issues.apache.org/jira/browse/SOLR-4790) - solr.xml sharedLib does not work in 4.3.0 [SOLR-4791](https://issues.apache.org/jira/browse/SOLR-4791) (Ryan Ernst) (Jan Høydahl via Erick Erickson) - Fixed core discovery mode (ie: new style solr.xml) to treat 'collection1' as the default core name. [SOLR-4803](https://issues.apache.org/jira/browse/SOLR-4803) (hossman) - SolrCore#reload should not call preRegister and publish a DOWN state to ZooKeeper. [SOLR-4805](https://issues.apache.org/jira/browse/SOLR-4805) (Mark Miller) (Jared Rodriguez) @@ -7292,7 +7293,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix race conditions in shutdown of CoreContainer and getCore that could cause a request to attempt to use a core that has shut down. [SOLR-4960](https://issues.apache.org/jira/browse/SOLR-4960) (yonik) - Outgrowth of SOLR-4960 that includes transient cores and pending cores [SOLR-4974](https://issues.apache.org/jira/browse/SOLR-4974) [SOLR-4960](https://issues.apache.org/jira/browse/SOLR-4960) (Erick Erickson) - Time is stripped from datetime column when imported into Solr date field if convertType=true. [SOLR-4978](https://issues.apache.org/jira/browse/SOLR-4978) (Bill Au) (shalin) -- Creating a core while referencing system properties looks like it loses files Actually, instanceDir, config, dataDir and schema are not dereferenced properly when creating cores that reference sys vars (e.g. &dataDir=${dir}). In the dataDir case in particular this leads to the index being put in a directory literally named ${dir} but on restart the sysvar will be properly dereferenced. [SOLR-4982](https://issues.apache.org/jira/browse/SOLR-4982) +- Creating a core while referencing system properties looks like it loses files Actually, instanceDir, config, dataDir and schema are not dereferenced properly when creating cores that reference sys vars (e.g. &dataDir=${dir}). In the dataDir case in particular this leads to the index being put in a directory literally named ${dir} but on restart the sysvar will be properly dereferenced. [SOLR-4982](https://issues.apache.org/jira/browse/SOLR-4982) - The splitshard api doesn't call commit on new sub shards before switching shard states. Multiple bugs related to sub shard recovery and replication are also fixed. [SOLR-4997](https://issues.apache.org/jira/browse/SOLR-4997) (shalin) - ManagedIndexSchema doesn't persist uniqueKey tag after calling addFields method. [SOLR-5000](https://issues.apache.org/jira/browse/SOLR-5000) (Jun Ohtani) (Steve Rowe) - The Overseer should avoid publishing the state for collections that do not exist under the /collections zk node. [SOLR-5018](https://issues.apache.org/jira/browse/SOLR-5018) (Mark Miller) @@ -7300,7 +7301,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - ShardHandlerFactory was not being created properly when using new-style solr.xml, and was not being persisted properly when using old-style. [SOLR-5028](https://issues.apache.org/jira/browse/SOLR-5028) [SOLR-5029](https://issues.apache.org/jira/browse/SOLR-5029) (Tomás Fernández Löbbe) (Ryan Ernst) (Alan Woodward) - A facet.query that parses or analyzes down to a null Query would throw a NPE. Fixed. [SOLR-5034](https://issues.apache.org/jira/browse/SOLR-5034) (David Smiley) - The CSV loader now accepts field names that are not in the schema. [SOLR-5037](https://issues.apache.org/jira/browse/SOLR-5037) (gsingers) (ehatcher) (Steve Rowe) -- Admin/Schema Browser displays -1 for term counts for multiValued fields. [SOLR-5039](https://issues.apache.org/jira/browse/SOLR-5039) +- Admin/Schema Browser displays -1 for term counts for multiValued fields. [SOLR-5039](https://issues.apache.org/jira/browse/SOLR-5039) ### Other (17 changes) @@ -7542,7 +7543,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (15 changes) -- Added more tests for VelocityResponseWriter [SOLR-2470](https://issues.apache.org/jira/browse/SOLR-2470) +- Added more tests for VelocityResponseWriter [SOLR-2470](https://issues.apache.org/jira/browse/SOLR-2470) - A bare * without a field specification is treated as *:* by the lucene and edismax query parsers. [SOLR-2996](https://issues.apache.org/jira/browse/SOLR-2996) (hossman) (Jan Høydahl) (Alan Woodward) (yonik) - SurroundQParserPlugin highlighting tests [SOLR-3060](https://issues.apache.org/jira/browse/SOLR-3060) (Ahmet Arslan) (hossman) - Include lucene codecs jar and enable per-field postings and docvalues support in the schema.xml [SOLR-3843](https://issues.apache.org/jira/browse/SOLR-3843) (Robert Muir) (Steve Rowe) @@ -7564,7 +7565,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Added (35 changes) -- The ability to specify "transient" and "loadOnStartup" as a new properties of <core> tags in solr.xml. Can specify "transientCacheSize" in the <cores> tag. Together these allow cores to be loaded only when needed and only transientCacheSize transient cores will be loaded at a time, the rest aged out on an LRU basis. [SOLR-1028](https://issues.apache.org/jira/browse/SOLR-1028) +- The ability to specify "transient" and "loadOnStartup" as a new properties of <core> tags in solr.xml. Can specify "transientCacheSize" in the <cores> tag. Together these allow cores to be loaded only when needed and only transientCacheSize transient cores will be loaded at a time, the rest aged out on an LRU basis. [SOLR-1028](https://issues.apache.org/jira/browse/SOLR-1028) - Add extra statistics to RequestHandlers - 5 & 15-minute reqs/sec rolling averages; median, 75th, 95th, 99th, 99.9th percentile request times [SOLR-1972](https://issues.apache.org/jira/browse/SOLR-1972) (Alan Woodward) (Shawn Heisey) (Adrien Grand) (Uwe Schindler) - DIH's "formatDate" function now supports a timezone as an optional fourth parameter [SOLR-2201](https://issues.apache.org/jira/browse/SOLR-2201) (James Dyer) (Mark Waddle) - Enhanced pivot faceting to use local-params in the same way that regular field value faceting can. This means support for excluding a filter query, using a different output key, and specifying 'threads' to do facet.method=fcs concurrently. PivotFacetHelper now extends SimpleFacet and the getFacetImplementation() extension hook was removed. [SOLR-2255](https://issues.apache.org/jira/browse/SOLR-2255) (dsmiley) @@ -7764,7 +7765,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Boolean type added to function queries, along with new functions exists(), if(), and(), or(), xor(), not(), def(), and true and false constants. [SOLR-2136](https://issues.apache.org/jira/browse/SOLR-2136) (yonik) - You may now specify a 'soft' commit when committing. This will use Lucene's NRT feature to avoid guaranteeing documents are on stable storage in exchange for faster reopen times. There is also a new 'soft' autocommit tracker that can be configured. [SOLR-2193](https://issues.apache.org/jira/browse/SOLR-2193) [SOLR-2565](https://issues.apache.org/jira/browse/SOLR-2565) (Mark Miller) (Robert Muir) - Pseudo-join queries / filters. Examples: - To restrict to the set of parents with at least one blue-eyed child: fq={!join from=parent to=name}eyes:blue - To restrict to the set of children with at least one blue-eyed parent: fq={!join from=name to=parent}eyes:blue [SOLR-2272](https://issues.apache.org/jira/browse/SOLR-2272) (yonik) -- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) +- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) - Add support for using <similarity/> in a schema's fieldType, for customizing scoring on a per-field basis. [SOLR-2338](https://issues.apache.org/jira/browse/SOLR-2338) (hossman) (yonik) (rmuir) - /browse improvements: generalize range and date facet display [SOLR-2383](https://issues.apache.org/jira/browse/SOLR-2383) (Jan Høydahl) - Add CollationField, which is much more efficient than the Solr 3.x CollationKeyFilterFactory, and also supports Locale-sensitive range queries. [SOLR-2396](https://issues.apache.org/jira/browse/SOLR-2396) (rmuir) @@ -7966,7 +7967,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (81 changes) -- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) +- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) - Moved some spellchecker and suggest APIs to modules/suggest: HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the suggester APIs and implementations. [LUCENE-2995](https://issues.apache.org/jira/browse/LUCENE-2995) (rmuir) - Moved MutableValue classes to new 'common' module. [LUCENE-3232](https://issues.apache.org/jira/browse/LUCENE-3232) (Chris Male) - 'luke' ant target has been disabled due to incompatibilities with XML queryparser location [LUCENE-3286](https://issues.apache.org/jira/browse/LUCENE-3286) (Chris Male) @@ -7999,7 +8000,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Enhance CoreAdmin STATUS command to return index size. [SOLR-2698](https://issues.apache.org/jira/browse/SOLR-2698) (Yury Kats) (hossman) (Mark Miller) - Updated changes2html.pl to handle Solr's CHANGES.txt; added target 'changes-to-html' to solr/build.xml. [SOLR-2747](https://issues.apache.org/jira/browse/SOLR-2747) (Steve Rowe) (Robert Muir) - Maven configuration: Excluded transitive stax:stax-api dependency from org.codehaus.woodstox:wstx-asl dependency. [SOLR-2756](https://issues.apache.org/jira/browse/SOLR-2756) (David Smiley) -- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) +- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) - The /update/json and /update/csv URLs were restored to aid in the migration of existing clients. [SOLR-2857](https://issues.apache.org/jira/browse/SOLR-2857) (yonik) - More explicit lexical resources location logged if Carrot2 clustering extension is used. Fixed solr. impl. of IResource and IResourceLookup. [SOLR-2862](https://issues.apache.org/jira/browse/SOLR-2862) (Dawid Weiss) - Default QueryResponseWriters are now initialized via init() with an empty NamedList. [SOLR-3005](https://issues.apache.org/jira/browse/SOLR-3005) (Gasol Wu) (Chris Male) @@ -8139,7 +8140,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Boolean type added to function queries, along with new functions exists(), if(), and(), or(), xor(), not(), def(), and true and false constants. [SOLR-2136](https://issues.apache.org/jira/browse/SOLR-2136) (yonik) - You may now specify a 'soft' commit when committing. This will use Lucene's NRT feature to avoid guaranteeing documents are on stable storage in exchange for faster reopen times. There is also a new 'soft' autocommit tracker that can be configured. [SOLR-2193](https://issues.apache.org/jira/browse/SOLR-2193) [SOLR-2565](https://issues.apache.org/jira/browse/SOLR-2565) (Mark Miller) (Robert Muir) - Pseudo-join queries / filters. Examples: - To restrict to the set of parents with at least one blue-eyed child: fq={!join from=parent to=name}eyes:blue - To restrict to the set of children with at least one blue-eyed parent: fq={!join from=name to=parent}eyes:blue [SOLR-2272](https://issues.apache.org/jira/browse/SOLR-2272) (yonik) -- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) +- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) - Add support for using <similarity/> in a schema's fieldType, for customizing scoring on a per-field basis. [SOLR-2338](https://issues.apache.org/jira/browse/SOLR-2338) (hossman) (yonik) (rmuir) - /browse improvements: generalize range and date facet display [SOLR-2383](https://issues.apache.org/jira/browse/SOLR-2383) (Jan Høydahl) (yonik) - Add CollationField, which is much more efficient than the Solr 3.x CollationKeyFilterFactory, and also supports Locale-sensitive range queries. [SOLR-2396](https://issues.apache.org/jira/browse/SOLR-2396) (rmuir) @@ -8239,7 +8240,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (49 changes) -- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) +- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) - Moved some spellchecker and suggest APIs to modules/suggest: HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the suggester APIs and implementations. [LUCENE-2995](https://issues.apache.org/jira/browse/LUCENE-2995) (rmuir) - Moved MutableValue classes to new 'common' module. [LUCENE-3232](https://issues.apache.org/jira/browse/LUCENE-3232) (Chris Male) - 'luke' ant target has been disabled due to incompatibilities with XML queryparser location [LUCENE-3286](https://issues.apache.org/jira/browse/LUCENE-3286) (Chris Male) @@ -8269,7 +8270,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - FieldTypePluginLoader has been refactored out of IndexSchema and made public. [SOLR-2663](https://issues.apache.org/jira/browse/SOLR-2663) (hossman) - Enhance CoreAdmin STATUS command to return index size. [SOLR-2698](https://issues.apache.org/jira/browse/SOLR-2698) (Yury Kats) (hossman) (Mark Miller) - Maven configuration: Excluded transitive stax:stax-api dependency from org.codehaus.woodstox:wstx-asl dependency. [SOLR-2756](https://issues.apache.org/jira/browse/SOLR-2756) (David Smiley) (Steve Rowe) -- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) +- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) - More explicit lexical resources location logged if Carrot2 clustering extension is used. Fixed solr. impl. of IResource and IResourceLookup. [SOLR-2862](https://issues.apache.org/jira/browse/SOLR-2862) (Dawid Weiss) - Default QueryResponseWriters are now initialized via init() with an empty NamedList. [SOLR-3005](https://issues.apache.org/jira/browse/SOLR-3005) (Gasol Wu) (Chris Male) - logOnce from SolrException logOnce and all the supporting structure is gone. abortOnConfigurationError is also gone as it is no longer referenced. Errors should be caught and logged at the top-most level or logged and NOT propagated up the chain. [SOLR-3032](https://issues.apache.org/jira/browse/SOLR-3032) (Erick Erickson) @@ -8472,7 +8473,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Upgrade commons-compress to 1.2 [LUCENE-3457](https://issues.apache.org/jira/browse/LUCENE-3457) (Doron Cohen) - Upgrade Solr to Tika 0.10 [SOLR-2372](https://issues.apache.org/jira/browse/SOLR-2372) (janhoy) - Remove commitLockTimeout option from solrconfig.xml [SOLR-2591](https://issues.apache.org/jira/browse/SOLR-2591) (Luca Cavanna) (Martijn van Groningen) -- Upgraded UIMA dependencies from *-2.3.1-SNAPSHOT.jar to *-2.3.1.jar. [SOLR-2746](https://issues.apache.org/jira/browse/SOLR-2746) +- Upgraded UIMA dependencies from *-2.3.1-SNAPSHOT.jar to *-2.3.1.jar. [SOLR-2746](https://issues.apache.org/jira/browse/SOLR-2746) - Make both "update.chain" and the deprecated "update.param" work consistently everywhere; see also SOLR-2105. [SOLR-2750](https://issues.apache.org/jira/browse/SOLR-2750) [SOLR-2105](https://issues.apache.org/jira/browse/SOLR-2105) (Mark Miller) (janhoy) - min() and max() functions now support an arbitrary number of ValueSources [SOLR-2757](https://issues.apache.org/jira/browse/SOLR-2757) (Bill Bell) (hossman) - Moved ConcurrentLRUCache from o.a.s.common.util package in the solrj module to the o.a.s.util package in the Solr core module. [SOLR-2758](https://issues.apache.org/jira/browse/SOLR-2758) (David Smiley) (Steve Rowe) @@ -8637,7 +8638,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Support sorting on trie fields with Distributed Search. [SOLR-1110](https://issues.apache.org/jira/browse/SOLR-1110) (Mark Miller) (Uwe Schindler) (shalin) - <bool>on</bool> and <bool>yes</bool> work as expected in solrconfig.xml. [SOLR-1115](https://issues.apache.org/jira/browse/SOLR-1115) (koji) - Add a Binary FieldType [SOLR-1116](https://issues.apache.org/jira/browse/SOLR-1116) (noble) -- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) +- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) - Add a top() function query that causes its argument to have its values derived from the top level IndexReader, even when invoked from a sub-reader. top() is implicitly used for the ord() and rord() functions. [SOLR-1124](https://issues.apache.org/jira/browse/SOLR-1124) (yonik) - Added metadata output to extraction request handler "extract only" option. [SOLR-1128](https://issues.apache.org/jira/browse/SOLR-1128) (gsingers) - FieldTypes can now output multiple Fields per Type and still be searched. This can be handy for hiding the details of a particular implementation such as in the spatial case. [SOLR-1131](https://issues.apache.org/jira/browse/SOLR-1131) (Chris Mattmann) (shalin) (noble) (gsingers) (yonik) @@ -8736,7 +8737,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Allow tagging and exclusion of main query for faceting. [SOLR-2325](https://issues.apache.org/jira/browse/SOLR-2325) (yonik) - Add configurable IndexReaderFactory so that alternate IndexReader implementations can be specified via solrconfig.xml. Note that using a custom IndexReader may be incompatible with ReplicationHandler (see comments in SOLR-1366). This should be treated as an experimental feature. [SOLR-243](https://issues.apache.org/jira/browse/SOLR-243) [SOLR-1366](https://issues.apache.org/jira/browse/SOLR-1366) (Andrzej Bialecki) (hossman) (Mark Miller) (John Wang) - Added support for extracting content from binary documents like MS Word and PDF using Apache Tika. See also contrib/extraction/CHANGES.txt [SOLR-284](https://issues.apache.org/jira/browse/SOLR-284) (Eric Pugh) (Chris Harris) (yonik) (gsingers) -- Date Faceting now supports a "facet.date.include" param for specifying when the upper & lower end points of computed date ranges should be included in the range. Legal values are: "all", "lower", "upper", "edge", and "outer". For backwards compatibility the default value is the set: [lower,upper,edge], so that all ranges between start and end are inclusive of their endpoints, but the "before" and "after" ranges are not. [SOLR-397](https://issues.apache.org/jira/browse/SOLR-397) +- Date Faceting now supports a "facet.date.include" param for specifying when the upper & lower end points of computed date ranges should be included in the range. Legal values are: "all", "lower", "upper", "edge", and "outer". For backwards compatibility the default value is the set: [lower,upper,edge], so that all ranges between start and end are inclusive of their endpoints, but the "before" and "after" ranges are not. [SOLR-397](https://issues.apache.org/jira/browse/SOLR-397) - Add configurable DirectoryProvider so that alternate Directory implementations can be specified via solrconfig.xml. The default DirectoryProvider will use NIOFSDirectory for better concurrency on non Windows platforms. [SOLR-465](https://issues.apache.org/jira/browse/SOLR-465) (Mark Miller) (TJ Laurenzo) (yonik) - Add maxChars attribute for copyField function so that the length limit for destination can be specified. [SOLR-538](https://issues.apache.org/jira/browse/SOLR-538) (Georgios Stamatis) (Lars Kotthoff) (Chris Harris) (koji) - Add support for globbing in field names to highlight. For example, hl.fl=*_text will highlight all fieldnames ending with _text. [SOLR-540](https://issues.apache.org/jira/browse/SOLR-540) (Lars Kotthoff) (yonik) @@ -8784,7 +8785,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Adding a StreamingUpdateSolrServer that writes update commands to an open HTTP connection. If you are using solrj for bulk update requests you should consider switching to this implementaion. However, note that the error handling is not immediate as it is with the standard SolrServer. [SOLR-906](https://issues.apache.org/jira/browse/SOLR-906) (ryan) - CommonGramsFilterFactory/CommonGramsQueryFilterFactory for speeding up phrase queries containing common words by indexing n-grams and using them at query time. [SOLR-908](https://issues.apache.org/jira/browse/SOLR-908) (Tom Burton-West) (Jason Rutherglen) (yonik) - Add a few utility commands to the DIH admin page such as full import, delta import, status, reload config. [SOLR-910](https://issues.apache.org/jira/browse/SOLR-910) (Ahmed Hammad) (shalin) -- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) +- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) - SolrDocument and SolrInputDocument now implement the Map<String,?> interface. This should make plugging into other standard tools easier. [SOLR-928](https://issues.apache.org/jira/browse/SOLR-928) (ryan) - Add event listener API for DIH import start and end. [SOLR-938](https://issues.apache.org/jira/browse/SOLR-938) (Kay Kay) (Noble Paul) (shalin) - ValueSourceRangeFilter/Query - filter based on values in a FieldCache entry or on any arbitrary function of field values. [SOLR-939](https://issues.apache.org/jira/browse/SOLR-939) (yonik) @@ -9065,7 +9066,7 @@ sample syntax.) - Add HSQLDB Jar to example-DIH, unzip database and update instructions. ================== Release 1.3.0 ================== Upgrading from Solr 1.2 ----------------------- IMPORTANT UPGRADE NOTE: In a master/slave configuration, all searchers/slaves should be upgraded before the master! If the master were to be updated first, the older searchers would not be able to read the new index format. The Porter snowball based stemmers in Lucene were updated (LUCENE-1142), and are not guaranteed to be backward compatible at the index level (the stem of certain words may have changed). Re-indexing is recommended. Older Apache Solr installations can be upgraded by replacing the relevant war file with the new version. No changes to configuration files should be needed. This version of Solr contains a new version of Lucene implementing an updated index format. This version of Solr/Lucene can still read and update indexes in the older formats, and will convert them to the new format on the first index change. Be sure to backup your index before upgrading in case you need to downgrade. Solr now recognizes HTTP Request headers related to HTTP Caching (see RFC 2616 sec13) and will by default respond with "304 Not Modified" when appropriate. This should only affect users who access Solr via an HTTP Cache, or via a Web-browser that has an internal cache, but if you wish to suppress this behavior an '<httpCaching never304="true"/>' option can be added to your solrconfig.xml. See the wiki (or the example solrconfig.xml) for more details... http://wiki.apache.org/solr/SolrConfigXml#HTTPCaching In Solr 1.2, DateField did not enforce the canonical representation of the ISO 8601 format when parsing incoming data, and did not generation the canonical format when generating dates from "Date Math" strings (particularly as it pertains to milliseconds ending in trailing zeros). As a result equivalent dates could not always be compared properly. This problem is corrected in Solr 1.3, but DateField users that might have been affected by indexing inconsistent formats of equivalent dates (ie: 1995-12-31T23:59:59Z vs 1995-12-31T23:59:59.000Z) may want to consider reindexing to correct these inconsistencies. Users who depend on some of the "broken" behavior of DateField in Solr 1.2 (specificly: accepting any input that ends in a 'Z') should consider using the LegacyDateField class as a possible alternative. Users that desire 100% backwards compatibility should consider using the Solr 1.2 version of DateField. Due to some changes in the lifecycle of TokenFilterFactories, users of Solr 1.2 who have written Java code which constructs new instances of StopFilterFactory, SynonymFilterFactory, or EnglishProterFilterFactory will need to modify their code by adding a line like the following prior to using the factory object... factory.inform(SolrCore.getSolrCore().getSolrConfig().getResourceLoader()); These lifecycle changes do not affect people who use Solr "out of the box" or who have developed their own TokenFilterFactory plugins. More info can be found in SOLR-594. The python client that used to ship with Solr is no longer included in the distribution [SOLR-1369](https://issues.apache.org/jira/browse/SOLR-1369) [LUCENE-1142](https://issues.apache.org/jira/browse/LUCENE-1142) [SOLR-594](https://issues.apache.org/jira/browse/SOLR-594) (see client) (python) (README.txt) - The TokenizerFactory API has changed to explicitly return a Tokenizer rather then a TokenStream (that may be or may not be a Tokenizer). This change is required to take advantage of the Token reuse improvements in lucene 2.9. [SOLR-1377](https://issues.apache.org/jira/browse/SOLR-1377) (ryan) - In build.xml, use longfile="gnu" in tar task to avoid warnings about long file names [SOLR-1386](https://issues.apache.org/jira/browse/SOLR-1386) (Mark Miller) (shalin) -- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) +- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) - Errors and Exceptions are formated by ResponseWriter. [SOLR-141](https://issues.apache.org/jira/browse/SOLR-141) (Mike Sokolov) (Rich Cariens) (Daniel Naber) (ryan) - Log a warning if the deprecated charset option is used on GreekLowerCaseFilterFactory, RussianStemFilterFactory, RussianLowerCaseFilterFactory or RussianLetterTokenizerFactory. [SOLR-1410](https://issues.apache.org/jira/browse/SOLR-1410) (Robert Muir) (hossman) - Due to LUCENE-1906, Solr's tokenizer should use Tokenizer.correctOffset() instead of CharStream.correctOffset(). [SOLR-1423](https://issues.apache.org/jira/browse/SOLR-1423) [LUCENE-1906](https://issues.apache.org/jira/browse/LUCENE-1906) (Uwe Schindler) (koji) @@ -9081,7 +9082,7 @@ sample syntax.) - Extract base class from TestDistributedSearch to make it easy to write test cases for other distributed components. [SOLR-1608](https://issues.apache.org/jira/browse/SOLR-1608) (shalin) - Generify SolrCache [SOLR-1610](https://issues.apache.org/jira/browse/SOLR-1610) (Jason Rutherglen) (shalin) - Added Lucene 2.9.1 collation contrib jar to lib [SOLR-1611](https://issues.apache.org/jira/browse/SOLR-1611) (shalin) -- Remove ALIAS command [SOLR-1637](https://issues.apache.org/jira/browse/SOLR-1637) +- Remove ALIAS command [SOLR-1637](https://issues.apache.org/jira/browse/SOLR-1637) - Remove adminCore from CoreContainer . removed deprecated methods setAdminCore(), getAdminCore() [SOLR-1661](https://issues.apache.org/jira/browse/SOLR-1661) (noble) - Improve analysis tests and cut over to new TokenStream API. [SOLR-1674](https://issues.apache.org/jira/browse/SOLR-1674) (Robert Muir) (Mark Miller) - Improved error messages when adding a document that does not contain exactly one value for the uniqueKey field [SOLR-1695](https://issues.apache.org/jira/browse/SOLR-1695) (hossman) @@ -9098,7 +9099,7 @@ sample syntax.) - luceneAutoCommit no longer has any effect - it has been remove [SOLR-1851](https://issues.apache.org/jira/browse/SOLR-1851) (Mark Miller) - SolrResourceLoader.getLines ignores Byte Order Markers (BOMs) at the beginning of input files, these are often created by editors such as Windows Notepad. [SOLR-1865](https://issues.apache.org/jira/browse/SOLR-1865) (rmuir) (hossman) - Make lucene-jars-to-solr fail if copying any of the jars fails, and update clean to remove the jars in that directory [SOLR-1891](https://issues.apache.org/jira/browse/SOLR-1891) (Mark Miller) -- Upgraded to Tika 0.8 and changed deprecated parse call [SOLR-1902](https://issues.apache.org/jira/browse/SOLR-1902) +- Upgraded to Tika 0.8 and changed deprecated parse call [SOLR-1902](https://issues.apache.org/jira/browse/SOLR-1902) - ElisionFilterFactory will use a default set of French contractions if you do not supply a custom articles file. [SOLR-1938](https://issues.apache.org/jira/browse/SOLR-1938) (rmuir) - SolrResourceLoader will report any encoding errors, rather than silently using replacement characters for invalid inputs [SOLR-2003](https://issues.apache.org/jira/browse/SOLR-2003) (blargy) (rmuir) - Add mapping-FoldToASCII.txt to example conf directory. [SOLR-2013](https://issues.apache.org/jira/browse/SOLR-2013) (Steven Rowe) (koji) @@ -9203,7 +9204,7 @@ sample syntax.) - Support sorting on trie fields with Distributed Search. [SOLR-1110](https://issues.apache.org/jira/browse/SOLR-1110) (Mark Miller) (Uwe Schindler) (shalin) - <bool>on</bool> and <bool>yes</bool> work as expected in solrconfig.xml. [SOLR-1115](https://issues.apache.org/jira/browse/SOLR-1115) (koji) - Add a Binary FieldType [SOLR-1116](https://issues.apache.org/jira/browse/SOLR-1116) (noble) -- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) +- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) - Add a top() function query that causes its argument to have its values derived from the top level IndexReader, even when invoked from a sub-reader. top() is implicitly used for the ord() and rord() functions. [SOLR-1124](https://issues.apache.org/jira/browse/SOLR-1124) (yonik) - Added metadata output to extraction request handler "extract only" option. [SOLR-1128](https://issues.apache.org/jira/browse/SOLR-1128) (gsingers) - Add capability to specify an infoStream log file for the underlying Lucene IndexWriter in solrconfig.xml. This is an advanced debug log file that can be used to aid developers in fixing IndexWriter bugs. See the commented out example in the example solrconfig.xml under the indexDefaults section. [SOLR-1145](https://issues.apache.org/jira/browse/SOLR-1145) (Chris Harris) (Mark Miller) @@ -9281,7 +9282,7 @@ sample syntax.) - Adding a StreamingUpdateSolrServer that writes update commands to an open HTTP connection. If you are using solrj for bulk update requests you should consider switching to this implementaion. However, note that the error handling is not immediate as it is with the standard SolrServer. [SOLR-906](https://issues.apache.org/jira/browse/SOLR-906) (ryan) - CommonGramsFilterFactory/CommonGramsQueryFilterFactory for speeding up phrase queries containing common words by indexing n-grams and using them at query time. [SOLR-908](https://issues.apache.org/jira/browse/SOLR-908) (Tom Burton-West) (Jason Rutherglen) (yonik) - Add a few utility commands to the DIH admin page such as full import, delta import, status, reload config. [SOLR-910](https://issues.apache.org/jira/browse/SOLR-910) (Ahmed Hammad) (shalin) -- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) +- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) - SolrDocument and SolrInputDocument now implement the Map<String,?> interface. This should make plugging into other standard tools easier. [SOLR-928](https://issues.apache.org/jira/browse/SOLR-928) (ryan) - Add event listener API for DIH import start and end. [SOLR-938](https://issues.apache.org/jira/browse/SOLR-938) (Kay Kay) (Noble Paul) (shalin) - ValueSourceRangeFilter/Query - filter based on values in a FieldCache entry or on any arbitrary function of field values. [SOLR-939](https://issues.apache.org/jira/browse/SOLR-939) (yonik) @@ -9447,10 +9448,10 @@ sample syntax.) - Upgrade to Tika 0.4. Note there are some differences in detecting Languages now in extracting request handler. See http://www.lucidimagination.com/search/document/d6f1899a85b2a45c/vote_apache_tika_0_4_release_candidate_2#d6f1899a85b2a45c for discussion on language detection. See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. [SOLR-1310](https://issues.apache.org/jira/browse/SOLR-1310) (gsingers) - Upgrade Solr Highlighter classes to new Lucene Highlighter API. This upgrade has resulted in a back compat break in the DefaultSolrHighlighter class - getQueryScorer is no longer protected. If you happened to be overriding that method in custom code, overide getHighlighter instead. Also, HighlightingUtils#getQueryScorer has been removed as it was deprecated and backcompat has been broken with it anyway. [SOLR-1319](https://issues.apache.org/jira/browse/SOLR-1319) [SOLR-1345](https://issues.apache.org/jira/browse/SOLR-1345) (Mark Miller) - SolrInputDocument cannot process dynamic fields [SOLR-1357](https://issues.apache.org/jira/browse/SOLR-1357) (Lars Grote) (noble) -- Add HSQLDB Jar to example-DIH, unzip database and update instructions. [SOLR-1369](https://issues.apache.org/jira/browse/SOLR-1369) +- Add HSQLDB Jar to example-DIH, unzip database and update instructions. [SOLR-1369](https://issues.apache.org/jira/browse/SOLR-1369) - The TokenizerFactory API has changed to explicitly return a Tokenizer rather then a TokenStream (that may be or may not be a Tokenizer). This change is required to take advantage of the Token reuse improvements in lucene 2.9. [SOLR-1377](https://issues.apache.org/jira/browse/SOLR-1377) (ryan) - In build.xml, use longfile="gnu" in tar task to avoid warnings about long file names [SOLR-1386](https://issues.apache.org/jira/browse/SOLR-1386) (Mark Miller) (shalin) -- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) +- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) - Log a warning if the deprecated charset option is used on GreekLowerCaseFilterFactory, RussianStemFilterFactory, RussianLowerCaseFilterFactory or RussianLetterTokenizerFactory. [SOLR-1410](https://issues.apache.org/jira/browse/SOLR-1410) (Robert Muir) (hossman) - Due to LUCENE-1906, Solr's tokenizer should use Tokenizer.correctOffset() instead of CharStream.correctOffset(). [SOLR-1423](https://issues.apache.org/jira/browse/SOLR-1423) [LUCENE-1906](https://issues.apache.org/jira/browse/LUCENE-1906) (Uwe Schindler) (koji) - Make it possible to run all tests in a package [SOLR-1441](https://issues.apache.org/jira/browse/SOLR-1441) (shalin) @@ -9599,7 +9600,7 @@ sample syntax.) - Fix bug in RequiredSolrParams where requiring a field specific param would fail if a general default value had been supplied. [SOLR-297](https://issues.apache.org/jira/browse/SOLR-297) (hossman) - Add proper support for Long and Doubles in sorting, etc. [SOLR-324](https://issues.apache.org/jira/browse/SOLR-324) (gsingers) - Fix WordDelimiterFilter handling of offsets for synonyms or other injected tokens that can break highlighting. [SOLR-331](https://issues.apache.org/jira/browse/SOLR-331) (yonik) -- Changed distributiondump.jsp to use Solr HOME instead of CWD to set path. [SOLR-333](https://issues.apache.org/jira/browse/SOLR-333) +- Changed distributiondump.jsp to use Solr HOME instead of CWD to set path. [SOLR-333](https://issues.apache.org/jira/browse/SOLR-333) - Removed duplicate contentType from raw-schema.jsp. [SOLR-393](https://issues.apache.org/jira/browse/SOLR-393) (bill) - SolrExceptionTest should now handle using OpenDNS as a DNS provider [SOLR-400](https://issues.apache.org/jira/browse/SOLR-400) (gsingers) - Requesting a large numbers of documents to be returned (limit) can result in an out-of-memory exception, even for a small index. [SOLR-413](https://issues.apache.org/jira/browse/SOLR-413) (yonik) @@ -9626,7 +9627,7 @@ sample syntax.) - Now sets the QParser on the ResponseBuilder [SOLR-585](https://issues.apache.org/jira/browse/SOLR-585) (gsingers) - Improved handling of badly formated query strings [SOLR-589](https://issues.apache.org/jira/browse/SOLR-589) (Sean Timm) (Otis Gospodnetic) - Limitation in pgrep on Linux platform breaks script-utils fixUser. [SOLR-590](https://issues.apache.org/jira/browse/SOLR-590) (Hannes Schmidt) (billa) -- SolrServlet no longer "caches" SolrCore. This was causing problems in Resin, and could potentially cause problems for customized usages of SolrServlet. [SOLR-597](https://issues.apache.org/jira/browse/SOLR-597) +- SolrServlet no longer "caches" SolrCore. This was causing problems in Resin, and could potentially cause problems for customized usages of SolrServlet. [SOLR-597](https://issues.apache.org/jira/browse/SOLR-597) - DebugComponent now always occurs last in the SearchHandler list unless the components are explicitly declared. [SOLR-598](https://issues.apache.org/jira/browse/SOLR-598) (gsingers) - If the spellchecking path is relative, make it relative to the Solr Data Directory. [SOLR-604](https://issues.apache.org/jira/browse/SOLR-604) (Shalin Shekhar Mangar) (gsingers) - Fixed spell check collation offset issue. [SOLR-606](https://issues.apache.org/jira/browse/SOLR-606) (Stefan Oestreicher) (Geoffrey Young) (gsingers) @@ -9650,9 +9651,9 @@ sample syntax.) - Tweaks to post.jar so it prints the error message from Solr. [SOLR-268](https://issues.apache.org/jira/browse/SOLR-268) (Brian Whitman) (hossman) - The create method in all TokenFilter and Tokenizer Factories provided by Solr now declare their specific return types instead of just using "TokenStream" [SOLR-367](https://issues.apache.org/jira/browse/SOLR-367) (hossman) - Hooks add to build system for automatic generation of (stub) Tokenizer and TokenFilter Factories. Also: new Factories for all Tokenizers and TokenFilters provided by the lucene-analyzers-2.2.0.jar -- includes support for German, Chinese, Russan, Dutch, Greek, Brazilian, Thai, and French. [SOLR-396](https://issues.apache.org/jira/browse/SOLR-396) (hossman) -- . Changed the names of the Solr JARs to use the defacto standard JAR names based on project-name-version.jar. This yields, for example: apache-solr-common-1.3-dev.jar apache-solr-solrj-1.3-dev.jar apache-solr-1.3-dev.jar [SOLR-411](https://issues.apache.org/jira/browse/SOLR-411) +- . Changed the names of the Solr JARs to use the defacto standard JAR names based on project-name-version.jar. This yields, for example: apache-solr-common-1.3-dev.jar apache-solr-solrj-1.3-dev.jar apache-solr-1.3-dev.jar [SOLR-411](https://issues.apache.org/jira/browse/SOLR-411) - Changed analysis.jsp to use POST instead of GET, also made the input area a bit bigger [SOLR-451](https://issues.apache.org/jira/browse/SOLR-451) (gsingers) -- Added clover code coverage targets for committers and the nightly build. Requires the Clover library, as licensed to Apache and only available privately. To run: ant -Drun.clover=true clean clover test generate-clover-reports [SOLR-479](https://issues.apache.org/jira/browse/SOLR-479) +- Added clover code coverage targets for committers and the nightly build. Requires the Clover library, as licensed to Apache and only available privately. To run: ant -Drun.clover=true clean clover test generate-clover-reports [SOLR-479](https://issues.apache.org/jira/browse/SOLR-479) - Added in deprecation comments. [SOLR-489](https://issues.apache.org/jira/browse/SOLR-489) (Sean Timm) (Lars Kothoff) (gsingers) - Nightly release includes client sources. [SOLR-510](https://issues.apache.org/jira/browse/SOLR-510) (koji) - Changed luke.xsl to use divs w/css for generating histograms instead of SVG [SOLR-518](https://issues.apache.org/jira/browse/SOLR-518) (Thomas Peuss) (hossman) @@ -9674,11 +9675,11 @@ sample syntax.) - / SOLR-695: If you have used "MultiCore" functionality in an unreleased version of 1.3-dev, many classes and configs have been renamed for the official 1.3 release. Speciffically, solr.xml has replaced multicore.xml, and uses a slightly different syntax. The solrj classes: MultiCore{Request/Response/Params} have been renamed: CoreAdmin{Request/Response/Params} [SOLR-689](https://issues.apache.org/jira/browse/SOLR-689) [SOLR-695](https://issues.apache.org/jira/browse/SOLR-695) (hossman) (ryan) (Henri Biestro) - Migrated to stable released builds of StAX API 1.0.1 and StAX 1.2.0 [SOLR-692](https://issues.apache.org/jira/browse/SOLR-692) (shalin) - SolrQueryParser now uses a ConstantScoreQuery for wildcard queries that prevent an exception from being thrown when the number of matching terms exceeds the BooleanQuery clause limit. [SOLR-737](https://issues.apache.org/jira/browse/SOLR-737) (yonik) -- Upgraded to Lucene 2.2.0; June 18, 2007. +- Upgraded to Lucene 2.2.0; June 18, 2007. - Upgraded to commons-CSV r609327, which fixes escaping bugs and introduces new escaping and whitespace handling options to increase compatibility with different formats. (yonik) -- Upgraded to Lucene 2.3.0; Jan 23, 2008. -- Upgrade to Lucene 2.3.1 -- Upgrade to Lucene 2.3.2 +- Upgraded to Lucene 2.3.0; Jan 23, 2008. +- Upgrade to Lucene 2.3.1 +- Upgrade to Lucene 2.3.2 - Upgraded to Lucene 2.4-dev (r686801) (yonik) - Upgraded to Lucene 2.4-dev (r688745) 27-Aug-2008 (yonik) - Upgraded to Lucene 2.4-dev (r691741) 03-Sep-2008 (yonik) @@ -9766,8 +9767,8 @@ sample syntax.) - DisMaxRequestHandler's bq, bf, qf, and pf parameters can now accept multiple values (klaas) - Query are re-written before highlighting is performed. This enables proper highlighting of prefix and wildcard queries (klaas) - A meaningful exception is raised when attempting to add a doc missing a unique id if it is declared in the schema and allowDups=false. (ryan) (klaas) -- Updated to Lucene 2.1 -- Updated to Lucene 2007-05-20_00-04-53 +- Updated to Lucene 2.1 +- Updated to Lucene 2007-05-20_00-04-53 [1.1.0] - 2006-12-22 @@ -9796,27 +9797,27 @@ sample syntax.) - ${solr.home}/lib directory can now be used for specifying "plugin" jars SOLR-68 [SOLR-68](https://issues.apache.org/jira/browse/SOLR-68) (hossman) - Support for "Date Math" relative "NOW" when specifying values of a DateField in a query -- or when adding a document. SOLR-71 [SOLR-71](https://issues.apache.org/jira/browse/SOLR-71) (hossman) - useColdSearcher control in solrconfig.xml prevents the first searcher from being used before it's done warming. This can help prevent thrashing on startup when multiple requests hit a cold searcher. The default is "false", preventing use before warm. SOLR-77 [SOLR-77](https://issues.apache.org/jira/browse/SOLR-77) (yonik) -- added support for setting Lucene's positionIncrementGap -- Admin: new statistics for SolrIndexSearcher -- Admin: caches now show config params on stats page -- max() function added to FunctionQuery suite -- postOptimize hook, mirroring the functionallity of the postCommit hook, but only called on an index optimize. -- Ability to HTTP POST query requests to /select in addition to HTTP-GET +- added support for setting Lucene's positionIncrementGap +- Admin: new statistics for SolrIndexSearcher +- Admin: caches now show config params on stats page +- max() function added to FunctionQuery suite +- postOptimize hook, mirroring the functionallity of the postCommit hook, but only called on an index optimize. +- Ability to HTTP POST query requests to /select in addition to HTTP-GET - The default search field may now be overridden by requests to the standard request handler using the df query parameter. (Erik Hatcher) - Added DisMaxRequestHandler and SolrPluginUtils. (Chris Hostetter) - Added KeywordTokenizerFactory (hossman) - new DocSet.andNot(), DocSet.andNotSize() (yonik) -- Made query parser default operator configurable via schema.xml: <solrQueryParser defaultOperator="AND|OR"/> The default operator remains "OR". +- Made query parser default operator configurable via schema.xml: <solrQueryParser defaultOperator="AND|OR"/> The default operator remains "OR". - Default operator for query parsing can now be specified with q.op=AND|OR from the client request, overriding the schema value. (ehatcher) ### Changed (6 changes) - OpenBitSet completed, replaces BitSet as the implementation for BitDocSet. Iteration is faster, and BitDocSet.intersectionSize(BitDocSet) and unionSize is between 3 and 4 times faster. SOLR-15 [SOLR-15](https://issues.apache.org/jira/browse/SOLR-15) (yonik) -- getDocListAndSet can now generate both a DocList and a DocSet from a single lucene query. -- BitDocSet.intersectionSize(HashDocSet) no longer generates an intermediate set -- much faster unionSize when one of the sets is a HashDocSet: O(smaller_set_size) +- getDocListAndSet can now generate both a DocList and a DocSet from a single lucene query. +- BitDocSet.intersectionSize(HashDocSet) no longer generates an intermediate set +- much faster unionSize when one of the sets is a HashDocSet: O(smaller_set_size) - Optimized getDocSet() for term queries resulting in a 36% speedup of facet.field queries where DocSets aren't cached (for example, if the number of terms in the field is larger than the filter cache.) (yonik) -- Optimized facet.field faceting by as much as 500 times when the field has a single token per document (not multiValued & not tokenized) by using the Lucene FieldCache entry for that field to tally term counts. The first request utilizing the FieldCache will take longer than subsequent ones. +- Optimized facet.field faceting by as much as 500 times when the field has a single token per document (not multiValued & not tokenized) by using the Lucene FieldCache entry for that field to tally term counts. The first request utilizing the FieldCache will take longer than subsequent ones. ### Fixed (13 changes) @@ -9828,7 +9829,7 @@ sample syntax.) - Fixed delete-by-id for field types who's indexed form is different from the printable form (mainly sortable numeric types) - Added escaping of attribute values in the XML response (Erik Hatcher) - Added empty extractTerms() to FunctionQuery to enable use in a MultiSearcher (Yonik) -- WordDelimiterFilter sometimes lost token positionIncrement information +- WordDelimiterFilter sometimes lost token positionIncrement information - Fix reverse sorting for fields were sortMissingFirst=true (Rob Staveley) (yonik) - Escape '>' in XML output ([because ]]> is illegal in CharData]()) - field boosts weren't being applied and doc boosts were being applied to fields (klaas) @@ -9842,22 +9843,22 @@ sample syntax.) - Highlighter params changed to be prefixed with "hl."; allow fragmentsize customization and per-field overrides on many options SOLR-37 [SOLR-37](https://issues.apache.org/jira/browse/SOLR-37) (Andrew May) (klaas) - Default param values for DisMaxRequestHandler should now be specified using a '<lst name="defaults">...</lst>' init param, for backwards compatability all init prams will be used as defaults if an init param with that name does not exist. SOLR-43 [SOLR-43](https://issues.apache.org/jira/browse/SOLR-43) (hossman) - The DisMaxRequestHandler now supports multiple occurances of the "fq" param. SOLR-44 [SOLR-44](https://issues.apache.org/jira/browse/SOLR-44) (hossman) -- Added javascript to catch empty query in admin query forms (Tomislav Nakic-Alfirevic via billa, SOLR-48 [SOLR-48](https://issues.apache.org/jira/browse/SOLR-48) +- Added javascript to catch empty query in admin query forms (Tomislav Nakic-Alfirevic via billa, SOLR-48 [SOLR-48](https://issues.apache.org/jira/browse/SOLR-48) - Lazy field loading can be enabled via a solrconfig directive. This will be faster when not all stored fields are needed from a document SOLR-52 [SOLR-52](https://issues.apache.org/jira/browse/SOLR-52) (klaas) - Made admin JSPs return XML and transform them with new XSL stylesheets SOLR-58 [SOLR-58](https://issues.apache.org/jira/browse/SOLR-58) (Otis Gospodnetic) - If the "echoParams=explicit" request parameter is set, request parameters are copied to the output. In an XML output, they appear in new <lst name="params"> list inside the new <lst name="responseHeader"> element, which replaces the old <responseHeader>. Adding a version=2.1 parameter to the request produces the old format, for backwards compatibility SOLR-59 [SOLR-59](https://issues.apache.org/jira/browse/SOLR-59) (bdelacretaz and yonik) - Modified admin styles to improve display in Internet Explorer SOLR-6 [SOLR-6](https://issues.apache.org/jira/browse/SOLR-6) (Greg Ludington) (billa) -- check solr return code in admin scripts, SOLR-62 [SOLR-62](https://issues.apache.org/jira/browse/SOLR-62) -- blackslash escape * in ssh command used in snappuller for zsh compatibility, SOLR-63 [SOLR-63](https://issues.apache.org/jira/browse/SOLR-63) +- check solr return code in admin scripts, SOLR-62 [SOLR-62](https://issues.apache.org/jira/browse/SOLR-62) +- blackslash escape * in ssh command used in snappuller for zsh compatibility, SOLR-63 [SOLR-63](https://issues.apache.org/jira/browse/SOLR-63) - Document update handling locking is much sparser, allowing performance gains through multiple threads. Large commits also might be faster SOLR-65 [SOLR-65](https://issues.apache.org/jira/browse/SOLR-65) (klaas) - Config parsing should now work useing DOM Level 2 parsers -- Solr previously relied on getTextContent which is a DOM Level 3 addition SOLR-78 [SOLR-78](https://issues.apache.org/jira/browse/SOLR-78) (Alexander Saar) (hossman) -- classes reorganized into different packages, package names changed to Apache -- force read of document stored fields in QuerySenderListener -- Solr now looks in ./solr/conf for config, ./solr/data for data configurable via solr.solr.home system property -- Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224, http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=markup&pathrev=416224 -- Upgrade to Lucene 2.0 nightly build 2006-07-15, lucene SVN revision 422302, -- Updated to Lucene 2.0 nightly build 2006-09-07, SVN revision 462111 -- Updated to Lucene 2.0 nightly build 2006-11-15, SVN revision 475069 +- classes reorganized into different packages, package names changed to Apache +- force read of document stored fields in QuerySenderListener +- Solr now looks in ./solr/conf for config, ./solr/data for data configurable via solr.solr.home system property +- Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224, http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=markup&pathrev=416224 +- Upgrade to Lucene 2.0 nightly build 2006-07-15, lucene SVN revision 422302, +- Updated to Lucene 2.0 nightly build 2006-09-07, SVN revision 462111 +- Updated to Lucene 2.0 nightly build 2006-11-15, SVN revision 475069 - Simplified index.jsp and form.jsp, primarily by removing/hiding XML specific params, and adding an option to pick the output type. (hossman) - Added new numeric build property "specversion" to allow clean MANIFEST.MF files (hossman) - Added Solr/Lucene versions to "Info" page (hossman) diff --git a/changelog/unreleased/PR#4149-distributed-luke.yml b/changelog/unreleased/PR#4149-distributed-luke.yml new file mode 100644 index 000000000000..aae5ac517b55 --- /dev/null +++ b/changelog/unreleased/PR#4149-distributed-luke.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Distributed Luke +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Luke Kot-Zaniewski +links: + name: PR#4149 + url: https://github.com/apache/solr/pull/4149 From 09d0de4b41d64f80a676be1da38009959a63992e Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 25 Feb 2026 16:25:04 -0500 Subject: [PATCH 23/46] Revert "changelog" This reverts commit 5de8cd79f676765ff913ebd69aee4230cb14287e. --- CHANGELOG.md | 329 +++++++++--------- .../unreleased/PR#4149-distributed-luke.yml | 8 - 2 files changed, 164 insertions(+), 173 deletions(-) delete mode 100644 changelog/unreleased/PR#4149-distributed-luke.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index cdd213e6542d..aea57a6a6d2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M [unreleased] ------------ -### Added (13 changes) +### Added (12 changes) - Create new v2 APIs for listing and reading collection properties ("collprops") [SOLR-12224](https://issues.apache.org/jira/browse/SOLR-12224) (Jason Gerlowski) - Introduce new SolrJ SolrRequest classes for metrics and "system info" requests. [SOLR-17136](https://issues.apache.org/jira/browse/SOLR-17136) (Isabelle Giguère) (Pierre Salagnac) @@ -27,7 +27,6 @@ This file lists Solr's raw release notes with details of every change to Solr. M - CoreAdmin API (/admin/cores?action=UPGRADECOREINDEX) to upgrade an index in-place [SOLR-18096](https://issues.apache.org/jira/browse/SOLR-18096) (Rahul Goswami) - CrossDC Consumer - add Prometheus metrics [SOLR-18060](https://issues.apache.org/jira/browse/SOLR-18060) (Andrzej Bialecki @ab) - CrossDC - support arbitrary Kafka properties [SOLR-18062](https://issues.apache.org/jira/browse/SOLR-18062) (Andrzej Bialecki @ab) -- Distributed/Cloud support for admin/luke [SOLR-8127](https://issues.apache.org/jira/browse/SOLR-8127) (Luke Kot-Zaniewski) ### Changed (2 changes) @@ -129,7 +128,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Migrate Schema Designer to use FileStore API instead of BlobHandler for persisting working data. [SOLR-17852](https://issues.apache.org/jira/browse/SOLR-17852) (Eric Pugh) - Export tool should properly output exported documents in json, json w/ lines, and javabin formats. [SOLR-17867](https://issues.apache.org/jira/browse/SOLR-17867) (Eric Pugh) - Improve tracking of time already spent to discount the limit for sub-requests when `timeAllowed` is used. [SOLR-17926](https://issues.apache.org/jira/browse/SOLR-17926) (Andrzej Bialecki) (hossman) -- MultiAuthPlugin now looks up for auth plugins configured with "xBasic" as scheme if "Basic" authentication used and no plugin with "Basic" scheme found. This allows the new UI to authenticate in browser without a credentials prompt being displayed. The MultiAuthPlugin can now also be configured with a single plugin. [SOLR-17930](https://issues.apache.org/jira/browse/SOLR-17930) +- MultiAuthPlugin now looks up for auth plugins configured with "xBasic" as scheme if "Basic" authentication used and no plugin with "Basic" scheme found. This allows the new UI to authenticate in browser without a credentials prompt being displayed. The MultiAuthPlugin can now also be configured with a single plugin. [SOLR-17930](https://issues.apache.org/jira/browse/SOLR-17930) - `Http2ClusterStateProvider` now also can work with `HttpJdkSolrClient`. [SOLR-17943](https://issues.apache.org/jira/browse/SOLR-17943) (James Dyer) - Return structured error information in responses, parse correctly in SolrJ to give useful SolrExceptions [SOLR-17998](https://issues.apache.org/jira/browse/SOLR-17998) [PR#1382](https://github.com/apache/solr/pull/1382) ([Houston Putman](https://home.apache.org/phonebook.html?uid=houston) @HoustonPutman) - PropertiesInputStream overrides bulk read method, and rename it to IndexInputInputStream to match symmetrical class IndexOutputOutputStream. [SOLR-18029](https://issues.apache.org/jira/browse/SOLR-18029) (Pierre Salagnac) @@ -180,7 +179,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Removed the Hadoop Auth module, and thus Kerberos authentication and other exotic options. [SOLR-17540](https://issues.apache.org/jira/browse/SOLR-17540) (Eric Pugh) - Remove code in Assign used for backwards compatibility with Collections created prior to 7.0 [SOLR-17564](https://issues.apache.org/jira/browse/SOLR-17564) (Paul McArthur) - Remove deprecated master/slave option language from ReplicationHandler. [SOLR-17576](https://issues.apache.org/jira/browse/SOLR-17576) (Eric Pugh) -- Removed the HDFS module and therefore support for storing indexes in HDFS. Existing "blockcache" code preserved by migrating it to core o.a.s.blackcache package. [SOLR-17609](https://issues.apache.org/jira/browse/SOLR-17609) +- Removed the HDFS module and therefore support for storing indexes in HDFS. Existing "blockcache" code preserved by migrating it to core o.a.s.blackcache package. [SOLR-17609](https://issues.apache.org/jira/browse/SOLR-17609) - Removed the "version" parameter that the SolrJ client would add to almost all Solr requests in relation to the version of the response format parsing ("wt" param). [SOLR-17634](https://issues.apache.org/jira/browse/SOLR-17634) (David Smiley) - Removed deprecated ExternalFileField field type. [SOLR-17655](https://issues.apache.org/jira/browse/SOLR-17655) (Eric Pugh) - Removed deprecated CurrencyField field type. [SOLR-17683](https://issues.apache.org/jira/browse/SOLR-17683) (Eric Pugh) @@ -330,7 +329,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (39 changes) -- Logs: removed webapp=/solr and also removed from internal context map. [PR#3758](https://github.com/apache/solr/pull/3758) +- Logs: removed webapp=/solr and also removed from internal context map. [PR#3758](https://github.com/apache/solr/pull/3758) - Deprecate CloudSolrClient's ZooKeeper Hosts constructor. [SOLR-14070](https://issues.apache.org/jira/browse/SOLR-14070) (Abhishek Umarjikar) - Switch from SysV init script to systemd service definition [SOLR-14410](https://issues.apache.org/jira/browse/SOLR-14410) (Marius Ghita) - SolrJ modules like SolrJ-Zookeeper are now opt-in from a Maven POM perspective. Previously, the modules would come transitively. [SOLR-15730](https://issues.apache.org/jira/browse/SOLR-15730) (David Smiley) @@ -430,7 +429,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Avoid creating grouping shard requests when timeAllowed has already run out. [SOLR-17869](https://issues.apache.org/jira/browse/SOLR-17869) (Andrzej Bialecki) (hossman) - Http2SolrClient wasn't honoring idle timeout configuration above 30 seconds -- a regression. [SOLR-17871](https://issues.apache.org/jira/browse/SOLR-17871) (Thomas Wöckinger) (David Smiley) - Http2SolrClient: followRedirects=true: if Http2SolrClient was created with followRedirects=true, and then was used to create future clients (via builder.withHttpClient), then redirect processing was wrongly disabled on the shared instance. [SOLR-17876](https://issues.apache.org/jira/browse/SOLR-17876) (David Smiley) -- SolrCLI tools such as "bin/solr zk" can now access jars located in `<install_dir>/lib`. [SOLR-17883](https://issues.apache.org/jira/browse/SOLR-17883) +- SolrCLI tools such as "bin/solr zk" can now access jars located in `<install_dir>/lib`. [SOLR-17883](https://issues.apache.org/jira/browse/SOLR-17883) - Fix for LB/Cloud SolrClients that could leak on close() if concurrent request failed and triggered zombie server logic. [SOLR-3696](https://issues.apache.org/jira/browse/SOLR-3696) (hossman) ### Dependency Upgrades (37 changes) @@ -667,7 +666,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Introduce unit testing for AssertTool. [SOLR-11318](https://issues.apache.org/jira/browse/SOLR-11318) (Eric Pugh) (Jason Gerlowski) - NamedList: deprecating methods: forEachEntry, forEachKey, abortableForEachKey, abortableForEach, asMap (no-arg only), get(key, default). Added getOrDefault. Deprecated the SimpleMap interface as well as the entirety of the SolrJ package org.apache.solr.cluster.api, which wasn't used except for SimpleMap. [SOLR-14680](https://issues.apache.org/jira/browse/SOLR-14680) (David Smiley) -- "<lib/>" tags in solrconfig.xml are now quietly ignored by default unless explicitly enabled with the `SOLR_CONFIG_LIB_ENABLED=true` enviroment variable (or corresponding sysprop). These tags are now considered deprecated and will be removed in Solr 10. [SOLR-16781](https://issues.apache.org/jira/browse/SOLR-16781) +- "<lib/>" tags in solrconfig.xml are now quietly ignored by default unless explicitly enabled with the `SOLR_CONFIG_LIB_ENABLED=true` enviroment variable (or corresponding sysprop). These tags are now considered deprecated and will be removed in Solr 10. [SOLR-16781](https://issues.apache.org/jira/browse/SOLR-16781) - Fix Gradle build sometimes gives spurious "unreferenced license file" warnings. [SOLR-17142](https://issues.apache.org/jira/browse/SOLR-17142) (Uwe Schindler) - Remove Deprecated URL and replace it with URI in Preparation for Java 21 [SOLR-17321](https://issues.apache.org/jira/browse/SOLR-17321) (Sanjay Dutt) (David Smiley) (Uwe Schindler) - Move Zk Arg parsing into Java Code from bin/solr scripts. [SOLR-17359](https://issues.apache.org/jira/browse/SOLR-17359) (Eric Pugh) (Rahul Goswami) @@ -935,7 +934,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Deprecate the V2RequestSupport interface, and the associated `setUseV2` and `setUseBinaryV2` SolrRequest methods. SolrJ users looking to make use of v2 APIs in their applications can use the SolrRequest implementations dedicated to that purpose. [SOLR-17042](https://issues.apache.org/jira/browse/SOLR-17042) (Jason Gerlowski) - package CLI tool prints error JSONPath [SOLR-17072](https://issues.apache.org/jira/browse/SOLR-17072) (Mikhail Khludnev) - The `train_and_upload_demo_model.py` script referenced in LTR documentation now uses Python3 [SOLR-17078](https://issues.apache.org/jira/browse/SOLR-17078) (Jason Gerlowski) -- dev tools script cloud.sh became broken after changes in 9.3 added a new -slim.tgz file it was not expecting cloud.sh has been updated to ignore the -slim.tgz version of the tarball. [SOLR-17091](https://issues.apache.org/jira/browse/SOLR-17091) +- dev tools script cloud.sh became broken after changes in 9.3 added a new -slim.tgz file it was not expecting cloud.sh has been updated to ignore the -slim.tgz version of the tarball. [SOLR-17091](https://issues.apache.org/jira/browse/SOLR-17091) [9.4.1] - 2024-01-18 @@ -1014,7 +1013,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Auto configure tracer without a <tracerConfig> tag in solr.xml [SOLR-16938](https://issues.apache.org/jira/browse/SOLR-16938) (Alex Deparvu) - Users can pass Java system properties to the SolrCLI via the SOLR_TOOL_OPTS environment variable. [SOLR-16940](https://issues.apache.org/jira/browse/SOLR-16940) (Houston Putman) - The SolrCLI now uses a smarter default for the Solr URL if none is provided, using the same envVars used when running Solr. [SOLR-16941](https://issues.apache.org/jira/browse/SOLR-16941) (Houston Putman) -- SimpleTracer propagation for manual transaction ids [SOLR-16950](https://issues.apache.org/jira/browse/SOLR-16950) +- SimpleTracer propagation for manual transaction ids [SOLR-16950](https://issues.apache.org/jira/browse/SOLR-16950) - Make the internal CoresLocator implementation configurable in solr.xml [SOLR-16959](https://issues.apache.org/jira/browse/SOLR-16959) (Vincent Primault) (David Smiley) - The solr.jetty.ssl.sniHostCheck option now defaults to the value of SOLR_SSL_CHECK_PEER_NAME, if it is provided. This will enable client and server hostName check settings to be governed by the same environment variable. If users want separate client/server settings, they can manually override the solr.jetty.ssl.sniHostCheck option in SOLR_OPTS. [SOLR-16964](https://issues.apache.org/jira/browse/SOLR-16964) (Houston Putman) - Some ConfigSet operations formerly required that solrconfig.xml exist but should not have because the name of the file is configurable when creating cores / collections. [SOLR-16967](https://issues.apache.org/jira/browse/SOLR-16967) (David Smiley) @@ -1072,7 +1071,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix typos in org.apache.solr.core package [PR#1629](https://github.com/apache/solr/pull/1629) (Andrey Bozhko) (Marcus Eagan) - new SolrJettyTestRule for tests needing HTTP or Jetty. [SOLR-16623](https://issues.apache.org/jira/browse/SOLR-16623) (David Smiley) (Joshua Ouma) -- Remove dependencies on methods defined in the SimplePostTool from Solr core and solrj modules. [SOLR-16803](https://issues.apache.org/jira/browse/SOLR-16803) +- Remove dependencies on methods defined in the SimplePostTool from Solr core and solrj modules. [SOLR-16803](https://issues.apache.org/jira/browse/SOLR-16803) - Eliminate special case code in Solr CLI by introducing explicit VersionTool.java. [SOLR-16842](https://issues.apache.org/jira/browse/SOLR-16842) (Eric Pugh) (Will White) - The default auto-soft-commit time has been set to 3 seconds. Previously, auto-soft-commit was disabled by default. [SOLR-16856](https://issues.apache.org/jira/browse/SOLR-16856) (Houston Putman) - Lower the AffinityPlacementPlugin's default minimalFreeDiskGB to 5 GB [SOLR-16915](https://issues.apache.org/jira/browse/SOLR-16915) (Houston Putman) @@ -1284,7 +1283,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - FileSystemConfigSetService: implement the abstraction completely. It could be useful for putting ConfigSets on a shared file system. [SOLR-15787](https://issues.apache.org/jira/browse/SOLR-15787) (Nazerke Seidan) (David Smiley) - Certain Jetty "server" jars (jetty-util, jetty-io, etc.) are no longer duplicated between server/lib and WEB-INF/lib. All shared jars now live in server/lib/ext. By default, Jetty does not allow these jars to be shared by the Jetty server and the web-app (Solr). In order to accomplish this, explicit exclusions have been added to server/contexts/solr-jetty-context.xml that allow these "server" jars to be shared. Solr Core also no longer contains the JettySolrRunner (now living in the testing framework), removing the need for Jetty Server dependencies in solr-core. [SOLR-16158](https://issues.apache.org/jira/browse/SOLR-16158) (Houston Putman) - WARN when restoring backup if ConfigSet with same name already exists [SOLR-16302](https://issues.apache.org/jira/browse/SOLR-16302) (Albert Moser) (Kevin Risden) -- A number of v2 "replica" endpoints have been updated to be more REST-ful, and hopefully, intuitive. ADDREPLICAPROP is now `PUT /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop {"value": $val}` DELETEREPLICAPROP is now `DELETE /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop` [SOLR-16392](https://issues.apache.org/jira/browse/SOLR-16392) +- A number of v2 "replica" endpoints have been updated to be more REST-ful, and hopefully, intuitive. ADDREPLICAPROP is now `PUT /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop {"value": $val}` DELETEREPLICAPROP is now `DELETE /api/collections/$coll/shards/$shard/replicas/$rep/properties/$prop` [SOLR-16392](https://issues.apache.org/jira/browse/SOLR-16392) - The path of the v2 "list alias" API has been tweaked slightly to be more intuitive, and is now available at `GET /api/aliases`. It is also now possible to request information about a specific alias at `GET /api/aliases/<aliasName>`. [SOLR-16393](https://issues.apache.org/jira/browse/SOLR-16393) (Alex Deparvu) (Jason Gerlowski) - /mlt now has a v2 API available at `GET /api/collections/collName/mlt` [SOLR-16397](https://issues.apache.org/jira/browse/SOLR-16397) (Ameer Albahem) (Jason Gerlowski) - Introducing `{!mlt_content}foo bar` to cover existing `/mlt` handler functionality for SolrCloud. [SOLR-16420](https://issues.apache.org/jira/browse/SOLR-16420) (Mikhail Khludnev) @@ -2159,7 +2158,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- Internal logic for identifying 'Solr Home' has been refactored to make testing less error prone. Plugin developers using SolrPaths.locateSolrHome() or 'new SolrResourceLoader' should check deprecation warnings as existing some existing functionality will be removed in 9.0. See SOLR-14934 for more details. [SOLR-14934](https://issues.apache.org/jira/browse/SOLR-14934) +- Internal logic for identifying 'Solr Home' has been refactored to make testing less error prone. Plugin developers using SolrPaths.locateSolrHome() or 'new SolrResourceLoader' should check deprecation warnings as existing some existing functionality will be removed in 9.0. See SOLR-14934 for more details. [SOLR-14934](https://issues.apache.org/jira/browse/SOLR-14934) ### Added (3 changes) @@ -2393,7 +2392,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Introducing {!bool excludeTags=...} for Query DSL. [SOLR-14539](https://issues.apache.org/jira/browse/SOLR-14539) (Mikhail Khludnev) - Add BlockMax-WAND support to ReRank queries [SOLR-14552](https://issues.apache.org/jira/browse/SOLR-14552) (Tomás Fernández Löbbe) - Add BlockMax-WAND support for queries where the score is requested [SOLR-14554](https://issues.apache.org/jira/browse/SOLR-14554) (Tomás Fernández Löbbe) -- CoreAdminAPI's parameters instanceDir and dataDir are now validated, and must be relative to either SOLR_HOME, SOLR_DATA_HOME or coreRootDir. Added new solr.xml config 'allowPaths', controlled by system property 'solr.allowPaths' that allows you to add other allowed paths when needed. [SOLR-14561](https://issues.apache.org/jira/browse/SOLR-14561) +- CoreAdminAPI's parameters instanceDir and dataDir are now validated, and must be relative to either SOLR_HOME, SOLR_DATA_HOME or coreRootDir. Added new solr.xml config 'allowPaths', controlled by system property 'solr.allowPaths' that allows you to add other allowed paths when needed. [SOLR-14561](https://issues.apache.org/jira/browse/SOLR-14561) - Limit the HTTP security headers to "/solr" end point [SOLR-14634](https://issues.apache.org/jira/browse/SOLR-14634) (noble) - Do not collect expand documents when expand.rows=0 [SOLR-8306](https://issues.apache.org/jira/browse/SOLR-8306) (Marshall Sanders) (Amelia Henderson) @@ -2607,7 +2606,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- Package manager now stores keys in package store instead of ZK. If you're using package manager and have already added any repositories, then you would need to re-add all the keys from /keys/exe location, one-by-one, using: "bin/solr add-key <keyfile.der>" [SOLR-14158](https://issues.apache.org/jira/browse/SOLR-14158) +- Package manager now stores keys in package store instead of ZK. If you're using package manager and have already added any repositories, then you would need to re-add all the keys from /keys/exe location, one-by-one, using: "bin/solr add-key <keyfile.der>" [SOLR-14158](https://issues.apache.org/jira/browse/SOLR-14158) ### Changed (1 change) @@ -2626,17 +2625,17 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (11 changes) -- QueryResponse.getExplainMap() type has changed from Map<String, String> to Map<String, Object> in order to accommodate structured explanations. This is a partially back-compatible change - compiled third-party components will work the same as before due to type erasure but source code changes may be required. [SOLR-13806](https://issues.apache.org/jira/browse/SOLR-13806) +- QueryResponse.getExplainMap() type has changed from Map<String, String> to Map<String, Object> in order to accommodate structured explanations. This is a partially back-compatible change - compiled third-party components will work the same as before due to type erasure but source code changes may be required. [SOLR-13806](https://issues.apache.org/jira/browse/SOLR-13806) - Deprecate legacy SolrCache implementations. Users are encouraged to transition their configurations to use org.apache.solr.search.CaffeineCache instead. [SOLR-13817](https://issues.apache.org/jira/browse/SOLR-13817) (ab) -- org.apache.solr.search.grouping.distributed.command.QueryCommand.Builder has new method 'setMainQuery' which is used to set top-level query. build() would fail if called without setting mainQuery. [SOLR-13823](https://issues.apache.org/jira/browse/SOLR-13823) +- org.apache.solr.search.grouping.distributed.command.QueryCommand.Builder has new method 'setMainQuery' which is used to set top-level query. build() would fail if called without setting mainQuery. [SOLR-13823](https://issues.apache.org/jira/browse/SOLR-13823) - timeAllowed parameter is allowed to have 0 value [SOLR-13904](https://issues.apache.org/jira/browse/SOLR-13904) (Houston Putman) (Mikhail Khludnev) -- Using Collapse filter or expand component with grouping is explicitly disallowed as the combination would cause inconsistent behavior and NPEs. [SOLR-13970](https://issues.apache.org/jira/browse/SOLR-13970) +- Using Collapse filter or expand component with grouping is explicitly disallowed as the combination would cause inconsistent behavior and NPEs. [SOLR-13970](https://issues.apache.org/jira/browse/SOLR-13970) - The default configset no longer has the following: - Library inclusions (<lib ../>) for extraction, solr-cell libs, clustering, velocity, LTR and language identifier - /browse, /tvrh and /update/extract handlers - TermVector component [SOLR-13978](https://issues.apache.org/jira/browse/SOLR-13978) (if someone wants it) (can be added using config APIs) - Some security-related http headers such as Content-Security-Policy are now set. If you have custom html served up by Solr's http server that contains inline javascript, it will no longer execute in modern browsers. You can fix your JS code to not run inline anymore, or edit etc/jetty.xml and weaken the CSP, or remove/alter the headers with a reverse proxy. [SOLR-13982](https://issues.apache.org/jira/browse/SOLR-13982) (rmuir) -- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) -- VelocityResponseWriter has been deprecated and may be removed in a future version. [SOLR-14065](https://issues.apache.org/jira/browse/SOLR-14065) -- Untrusted configsets (ones that are uploaded via unsecured configset API) cannot use <lib> directive. Consider enabling authentication/authorization so that the uploaded configsets are trusted. Note: If you already have a collection using untrusted configset that uses <lib> directive, it will not load after upgrading to 8.4. You can re-upload your configset using "bin/solr zk -upconfig .." or place your libraries in the classpath and restart Solr. [SOLR-14071](https://issues.apache.org/jira/browse/SOLR-14071) -- The "Blob Store" API and "runtimeLib" plugin mechanism that uses it is now considered deprecated. The replacement to it is the "Package Management" system, which includes a "File Store". These are experimental currently but will grow/stabalize/mature. [SOLR-14072](https://issues.apache.org/jira/browse/SOLR-14072) +- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) +- VelocityResponseWriter has been deprecated and may be removed in a future version. [SOLR-14065](https://issues.apache.org/jira/browse/SOLR-14065) +- Untrusted configsets (ones that are uploaded via unsecured configset API) cannot use <lib> directive. Consider enabling authentication/authorization so that the uploaded configsets are trusted. Note: If you already have a collection using untrusted configset that uses <lib> directive, it will not load after upgrading to 8.4. You can re-upload your configset using "bin/solr zk -upconfig .." or place your libraries in the classpath and restart Solr. [SOLR-14071](https://issues.apache.org/jira/browse/SOLR-14071) +- The "Blob Store" API and "runtimeLib" plugin mechanism that uses it is now considered deprecated. The replacement to it is the "Package Management" system, which includes a "File Store". These are experimental currently but will grow/stabalize/mature. [SOLR-14072](https://issues.apache.org/jira/browse/SOLR-14072) ### Added (9 changes) @@ -2719,7 +2718,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- Velocity: The initialization parameters "params.resource.loader.enabled" and "solr.resource.loader.enabled" have been removed and replaced by Java system properties "velocity.resourceloader.params.enabled" and "velocity.resourceloader.solr.enabled" respectively (both turned off by default). Please see SOLR-13971 for more details. [SOLR-13971](https://issues.apache.org/jira/browse/SOLR-13971) +- Velocity: The initialization parameters "params.resource.loader.enabled" and "solr.resource.loader.enabled" have been removed and replaced by Java system properties "velocity.resourceloader.params.enabled" and "velocity.resourceloader.solr.enabled" respectively (both turned off by default). Please see SOLR-13971 for more details. [SOLR-13971](https://issues.apache.org/jira/browse/SOLR-13971) ### Fixed (5 changes) @@ -2735,8 +2734,8 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (2 changes) -- Users who have written test cases that extend SolrTestCaseJ4 may see NullPointerExceptions if their tests directly reference both SolrTestCaseJ4.initCoreDataDir and SolrTestCaseJ4.deleteCore(). This change in behavior is due to a bug fix in deleteCore() to ensure the dataDir is properly reset in tests that call initCore()/deleteCore() multiple times in a given test (class). initCoreDataDir is now deprecated, and users are encouraged to use SolrTestCaseJ4.initAndGetDataDir() in it's place. See SOLR-13664 for more details. [SOLR-13664](https://issues.apache.org/jira/browse/SOLR-13664) -- For JWTAuthPlugin, the 'jwkUrl' configuration key is deprecated and may be removed later, please use 'jwksUrl' instead. See SOLR-13734. [SOLR-13734](https://issues.apache.org/jira/browse/SOLR-13734) +- Users who have written test cases that extend SolrTestCaseJ4 may see NullPointerExceptions if their tests directly reference both SolrTestCaseJ4.initCoreDataDir and SolrTestCaseJ4.deleteCore(). This change in behavior is due to a bug fix in deleteCore() to ensure the dataDir is properly reset in tests that call initCore()/deleteCore() multiple times in a given test (class). initCoreDataDir is now deprecated, and users are encouraged to use SolrTestCaseJ4.initAndGetDataDir() in it's place. See SOLR-13664 for more details. [SOLR-13664](https://issues.apache.org/jira/browse/SOLR-13664) +- For JWTAuthPlugin, the 'jwkUrl' configuration key is deprecated and may be removed later, please use 'jwksUrl' instead. See SOLR-13734. [SOLR-13734](https://issues.apache.org/jira/browse/SOLR-13734) ### Added (15 changes) @@ -2754,7 +2753,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - command line option to export documents to a file [SOLR-13682](https://issues.apache.org/jira/browse/SOLR-13682) (noble) - JWTAuthPlugin to support multiple JWKS endpoints [SOLR-13713](https://issues.apache.org/jira/browse/SOLR-13713) (janhoy) - JWTAuthPlugin now supports multiple IdP issuers through configuring a new 'issuers' configuration key. Access tokens issued and signed by any of the configured issuers will be validated [SOLR-13734](https://issues.apache.org/jira/browse/SOLR-13734) (janhoy) -- Add CaffeineCache, an efficient implementation of SolrCache.(Ben Manes, Shawn Heisey, David Smiley, Andrzej Bialecki) [SOLR-8241](https://issues.apache.org/jira/browse/SOLR-8241) +- Add CaffeineCache, an efficient implementation of SolrCache.(Ben Manes, Shawn Heisey, David Smiley, Andrzej Bialecki) [SOLR-8241](https://issues.apache.org/jira/browse/SOLR-8241) ### Changed (19 changes) @@ -2809,7 +2808,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SPLITSHARD (async) with failures in underlying sub-operations can result in data loss [SOLR-13718](https://issues.apache.org/jira/browse/SOLR-13718) (Ishan Chattopadhyaya) - Allow negative values for limit in TermsFacetMap [SOLR-13725](https://issues.apache.org/jira/browse/SOLR-13725) (Richard Walker) (Munendra S N) - Fixed V2Requests - HttpSolrClient replaced first instance of "/solr" with "/api" which caused a change in host names starting with "solr". [SOLR-13727](https://issues.apache.org/jira/browse/SOLR-13727) (Megan Carey) (yonik) -- Fix regression in support for Date math in TRA start date that was introduced by SOLR-13375 [SOLR-13760](https://issues.apache.org/jira/browse/SOLR-13760) [SOLR-13375](https://issues.apache.org/jira/browse/SOLR-13375) +- Fix regression in support for Date math in TRA start date that was introduced by SOLR-13375 [SOLR-13760](https://issues.apache.org/jira/browse/SOLR-13760) [SOLR-13375](https://issues.apache.org/jira/browse/SOLR-13375) - Fix ClassCastException in NestableJsonFacet [SOLR-13780](https://issues.apache.org/jira/browse/SOLR-13780) (Tiago Martinho de Barros) (Munendra S N) - LRUStatsCache size explosion and ineffective caching. [SOLR-13790](https://issues.apache.org/jira/browse/SOLR-13790) (ab) - HttpSolrCall now maintains internal request count (_forwardedCount) for remote queries and limits them to the number of replicas. This avoids making too many cascading calls to remote servers, which, if not restricted, can bring down nodes containing the said collection [SOLR-13793](https://issues.apache.org/jira/browse/SOLR-13793) (Kesharee Nandan Vishwakarma) (Ishan Chattopadhyaya) @@ -2848,9 +2847,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (6 changes) -- Time Routed Aliases now have a '__TRA__' infix preceding the timestamp portion of the collection names. collections with the old format will not be altered and will continue to work, but any new TRA's or new collections for existing TRA's will use the new format. Solr will handle this invisibly, but any external code that attempted to predict collection names in TRA's will probably need adjustment. [SOLR-13419](https://issues.apache.org/jira/browse/SOLR-13419) -- Routed Aliases now use collection properties rather than core properties to identify collections that belong to the alias by default. This should be invisible and fully backwards compatible from within solr, and existing routed alias collections with core based properties will continue to work, but new collections created will not add a property to core.properties anymoore so any external code that inspected core.properties will not find the 'routedAliasName' key in new cores belonging to routed aliases. [SOLR-13420](https://issues.apache.org/jira/browse/SOLR-13420) -- Support for "addr" parameter from the "/solr/admin/zookeeper" endpoint has now been removed and will no longer be supported. [SOLR-13507](https://issues.apache.org/jira/browse/SOLR-13507) +- Time Routed Aliases now have a '__TRA__' infix preceding the timestamp portion of the collection names. collections with the old format will not be altered and will continue to work, but any new TRA's or new collections for existing TRA's will use the new format. Solr will handle this invisibly, but any external code that attempted to predict collection names in TRA's will probably need adjustment. [SOLR-13419](https://issues.apache.org/jira/browse/SOLR-13419) +- Routed Aliases now use collection properties rather than core properties to identify collections that belong to the alias by default. This should be invisible and fully backwards compatible from within solr, and existing routed alias collections with core based properties will continue to work, but new collections created will not add a property to core.properties anymoore so any external code that inspected core.properties will not find the 'routedAliasName' key in new cores belonging to routed aliases. [SOLR-13420](https://issues.apache.org/jira/browse/SOLR-13420) +- Support for "addr" parameter from the "/solr/admin/zookeeper" endpoint has now been removed and will no longer be supported. [SOLR-13507](https://issues.apache.org/jira/browse/SOLR-13507) - org.apache.solr.util.SolrPluginUtils.IdentityRegenerator has been removed, please use the identical org.apache.solr.search.NoOpRegenerator instead. [SOLR-13515](https://issues.apache.org/jira/browse/SOLR-13515) (David Smiley) (Christine Poerschke) - Upgrade Jetty to 9.4.19.v20190610. [SOLR-13541](https://issues.apache.org/jira/browse/SOLR-13541) (Erick Erickson) (Cao Manh Dat) - Upgrade ZooKeeper to 3.5.5. ZooKeeper 3.5.5 introduces more security features. Include in your zoo.cfg file at minimum the following: 4lw.commands.whitelist=mntr,conf,ruok You can use 4lw.commands.whitelist=* to enable all ZooKeeper "4 letter commands". [SOLR-8346](https://issues.apache.org/jira/browse/SOLR-8346) (Erick Erickson) @@ -2876,7 +2875,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Expose IndexWriterConfig's ramPerThreadHardLimitMB through solrconfig.xml [SOLR-12554](https://issues.apache.org/jira/browse/SOLR-12554) (Ishan Chattopadhyaya) (Munendra S N) - Query Result Cache does not honor maxRamBytes parameter. [SOLR-13003](https://issues.apache.org/jira/browse/SOLR-13003) (ab) (Brian Ecker) - Make contrib/ltr Feature.defaultValue configurable. [SOLR-13049](https://issues.apache.org/jira/browse/SOLR-13049) (Stanislav Livotov) (Christine Poerschke) -- Add a request parameter to execute a streaming expression locally [SOLR-13306](https://issues.apache.org/jira/browse/SOLR-13306) +- Add a request parameter to execute a streaming expression locally [SOLR-13306](https://issues.apache.org/jira/browse/SOLR-13306) - In autoscaling policies, use an explicit 'put : on-each-node' to specify the rules is applied on each node [SOLR-13329](https://issues.apache.org/jira/browse/SOLR-13329) (noble) - Transaction log to natively support UUID types [SOLR-13347](https://issues.apache.org/jira/browse/SOLR-13347) (Thomas Wöckinger) (noble) - Highlighting: Range queries will now highlight in hl.method=unified mode. [SOLR-13367](https://issues.apache.org/jira/browse/SOLR-13367) (David Smiley) @@ -2903,7 +2902,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix "Search is temporarily disabled" logic to be consistent for entire request [SOLR-13474](https://issues.apache.org/jira/browse/SOLR-13474) (hossman) - Stop the leader from trying to rejoin the election on session expiration and harden our zk reconnect code path. [SOLR-13489](https://issues.apache.org/jira/browse/SOLR-13489) (Mark Miller) (Anshum Gupta) - Fix CollectionStateWatcher/CollectionStatePredicate based APIs in ZkStateReader and CloudSolrClient to be triggered on liveNode changes. Also add Predicate<DocCollection> equivalents for callers that don't care about liveNodes. [SOLR-13490](https://issues.apache.org/jira/browse/SOLR-13490) (hossman) -- SolrZkClient's watch wrapper no longer allows zookeeper to hold the same watch object multiple times. [SOLR-13491](https://issues.apache.org/jira/browse/SOLR-13491) +- SolrZkClient's watch wrapper no longer allows zookeeper to hold the same watch object multiple times. [SOLR-13491](https://issues.apache.org/jira/browse/SOLR-13491) - Fix distributed grouping related NullPointerException in JSONWriter.writeSolrDocument [SOLR-13496](https://issues.apache.org/jira/browse/SOLR-13496) (Christine Poerschke) - NPE on omitHeader=true is fixed by sending omitHeader=false to shard searches [SOLR-13509](https://issues.apache.org/jira/browse/SOLR-13509) (Munendra S N) (Mikhail Khludnev) - Fix input parameter handling in SchemaRequest.DynamicField and SchemaRequest.FieldTypes (Schema API) [SOLR-13521](https://issues.apache.org/jira/browse/SOLR-13521) (Tomás Fernández Löbbe) @@ -2946,14 +2945,14 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (8 changes) -- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) -- Velocity and Velocity Tools were both upgraded as part of this release. Velocity upgraded from 1.7 to 2.0. Please see https://velocity.apache.org/engine/2.0/upgrading.html about upgrading. Velocity Tools upgraded from 2.0 to 3.0. For more details, please see https://velocity.apache.org/tools/3.0/upgrading.html for details about the upgrade. [SOLR-13335](https://issues.apache.org/jira/browse/SOLR-13335) -- Solr's default behavior when dealing with 'maxBooleanClauses' has changed to reduce the risk of exponential query expansion when dealing with pathological query strings. A default upper limit of 1024 clauses (The same default prior to Solr 7.0) is now enforced at the node level, and can be overridden in solr.xml. The identically named solrconfig.xml setting is still available for limiting the size of 'explicit' boolean query strings, but this per-collection limit is still ristricted by the upper-bound of the global limit in solr.xml. See SOLR-13336 for more details. [SOLR-13336](https://issues.apache.org/jira/browse/SOLR-13336) -- The default GC has been changed from CMS to G1. To override this (in order to switch to CMS or any other GC), use GC_TUNE section of bin/solr.in.sh or bin/solr.in.cmd. [SOLR-13394](https://issues.apache.org/jira/browse/SOLR-13394) -- Custom TransientSolrCoreCache implementations no longer use the Observer/Observable pattern. To notify Solr that a core has been aged out of the cache, call CoreContainer.queueCoreToClose(SolrCore). See SOLR-13400 for details. [SOLR-13400](https://issues.apache.org/jira/browse/SOLR-13400) -- Update requests sent to non-routed aliases that point to multiple collections are no longer accepted. Until now Solr followed an obscure convention of updating only the first collection from the list, which usually was not what the user intended. This change explicitly rejects such update requests. [SOLR-13407](https://issues.apache.org/jira/browse/SOLR-13407) +- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) +- Velocity and Velocity Tools were both upgraded as part of this release. Velocity upgraded from 1.7 to 2.0. Please see https://velocity.apache.org/engine/2.0/upgrading.html about upgrading. Velocity Tools upgraded from 2.0 to 3.0. For more details, please see https://velocity.apache.org/tools/3.0/upgrading.html for details about the upgrade. [SOLR-13335](https://issues.apache.org/jira/browse/SOLR-13335) +- Solr's default behavior when dealing with 'maxBooleanClauses' has changed to reduce the risk of exponential query expansion when dealing with pathological query strings. A default upper limit of 1024 clauses (The same default prior to Solr 7.0) is now enforced at the node level, and can be overridden in solr.xml. The identically named solrconfig.xml setting is still available for limiting the size of 'explicit' boolean query strings, but this per-collection limit is still ristricted by the upper-bound of the global limit in solr.xml. See SOLR-13336 for more details. [SOLR-13336](https://issues.apache.org/jira/browse/SOLR-13336) +- The default GC has been changed from CMS to G1. To override this (in order to switch to CMS or any other GC), use GC_TUNE section of bin/solr.in.sh or bin/solr.in.cmd. [SOLR-13394](https://issues.apache.org/jira/browse/SOLR-13394) +- Custom TransientSolrCoreCache implementations no longer use the Observer/Observable pattern. To notify Solr that a core has been aged out of the cache, call CoreContainer.queueCoreToClose(SolrCore). See SOLR-13400 for details. [SOLR-13400](https://issues.apache.org/jira/browse/SOLR-13400) +- Update requests sent to non-routed aliases that point to multiple collections are no longer accepted. Until now Solr followed an obscure convention of updating only the first collection from the list, which usually was not what the user intended. This change explicitly rejects such update requests. [SOLR-13407](https://issues.apache.org/jira/browse/SOLR-13407) - Until now, the CREATE command of Collections API returned status 0 even in case of failure. Now, the status code will be non-zero in case of failures [SOLR-5970](https://issues.apache.org/jira/browse/SOLR-5970) (e.g. 4xx) (5xx) -- SolrGangliaReporter has been removed from Solr because support for Ganglia has been removed from Dropwizard Metrics 4 due to a transitive dependency on LGPL. +- SolrGangliaReporter has been removed from Solr because support for Ganglia has been removed from Dropwizard Metrics 4 due to a transitive dependency on LGPL. ### Added (16 changes) @@ -2976,7 +2975,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Changed (14 changes) -- (at least) 2 distinct failures possible when clients attempt searches during SolrCore reload, added test band-aid for DocValuesNotIndexedTest. [SOLR-11035](https://issues.apache.org/jira/browse/SOLR-11035) +- (at least) 2 distinct failures possible when clients attempt searches during SolrCore reload, added test band-aid for DocValuesNotIndexedTest. [SOLR-11035](https://issues.apache.org/jira/browse/SOLR-11035) - Make HDFSDirectoryFactory support other prefixes (besides hdfs:/) [SOLR-11473](https://issues.apache.org/jira/browse/SOLR-11473) (Kevin Risden) - Throw an exception, instead of just a warning, when unknown atomic update operation is encountered [SOLR-12167](https://issues.apache.org/jira/browse/SOLR-12167) (Munendra S N) (Ishan Chattopadhyaya) - Avoid unnecessary memory cost when DistributedUpdateProcessor timed-out lock is not used. [SOLR-12833](https://issues.apache.org/jira/browse/SOLR-12833) (jefferyyuan) (ab) @@ -3070,22 +3069,22 @@ This file lists Solr's raw release notes with details of every change to Solr. M - The 'func' query parser now returns scores that are equal to 0 when a negative value is produced. This change is due to the fact that Lucene now requires scores to be positive. [LUCENE-7996](https://issues.apache.org/jira/browse/LUCENE-7996) (Adrien Grand) - Memory codecs have been removed from the codebase (MemoryPostings, MemoryDocValues). If you used postingsFormat="Memory" or docValuesFormat="Memory" then either remove it to use the default or experiment with one of the others. [LUCENE-8267](https://issues.apache.org/jira/browse/LUCENE-8267) (Dawid Weiss) -- Due to the introduction of LIR redesign since Solr 7.3 (SOLR-11702) and the removing of old LIR implementation in Solr 8. Rolling updates are not possible unless all nodes must be on Solr 7.3 or higher. If not updates can be lost. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) -- In 'langid' contrib, the LanguageIdentifierUpdateProcessor base class changed some method signatures. If you have a custom language identifier implementation you will need to adapt your code. [SOLR-11774](https://issues.apache.org/jira/browse/SOLR-11774) +- Due to the introduction of LIR redesign since Solr 7.3 (SOLR-11702) and the removing of old LIR implementation in Solr 8. Rolling updates are not possible unless all nodes must be on Solr 7.3 or higher. If not updates can be lost. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) +- In 'langid' contrib, the LanguageIdentifierUpdateProcessor base class changed some method signatures. If you have a custom language identifier implementation you will need to adapt your code. [SOLR-11774](https://issues.apache.org/jira/browse/SOLR-11774) - SolrMetric registries retained references to SolrCores when closed. A change of SolrMetricMAnager.registerGauge and SolrMetricProducer.initializeMetrics method signatures was required to fix it. Third party components that use this API need to be updated. [SOLR-11882](https://issues.apache.org/jira/browse/SOLR-11882) (Eros Taborelli) (Erick Erickson) (ab) - The date format patterns used by ParseDateFieldUpdateProcessorFactory (present in "schemaless mode") are now interpreted by Java 8's java.time.DateTimeFormatter instead of Joda Time. The pattern language is very similar but not the same. Typically, simply update the pattern by changing an uppercase 'Z' to lowercase 'z' and that's it. For the current recommended set of patterns in schemaless mode, see "Schemaless Mode" in the ref guide, or simply examine the default configSet. Also note that the set of patterns (formats) here have expanded from before to subsume those patterns previously handled by the "extract" contrib (Solr Cell / Tika). [SOLR-12586](https://issues.apache.org/jira/browse/SOLR-12586) (David Smiley) (Bar Rotstein) - The "extraction" contrib (Solr Cell) no longer does any date parsing, and thus no longer has the "date.formats" configuration. To ensure date strings are properly parsed, use ParseDateFieldUpdateProcessorFactory (an URP) commonly registered with the name "parse-date" in "schemaless mode". [SOLR-12593](https://issues.apache.org/jira/browse/SOLR-12593) (David Smiley) (Bar Rotstein) -- UpdateShardHandler's updateOnlyClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. [SOLR-12605](https://issues.apache.org/jira/browse/SOLR-12605) +- UpdateShardHandler's updateOnlyClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. [SOLR-12605](https://issues.apache.org/jira/browse/SOLR-12605) - When JSON data is sent to Solr with nested child documents split using the "split" parameter, the child docs will now be associated to their parents by the field/label string used in the JSON instead of anonymously. Most users probably won't notice the distinction since the label is lost any way unless special fields are in the schema. This choice used to be toggleable with an internal/expert "anonChildDocs" parameter flag which is now gone. [SOLR-12633](https://issues.apache.org/jira/browse/SOLR-12633) (David Smiley) -- HttpShardHandlerFactory's defaultClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. LBHttpSolrClient.Req and LBHttpSolrClient.Rsp are marked as deprecated, uses LBSolrClient.Req and LBSolrClient.Rsp instead. [SOLR-12640](https://issues.apache.org/jira/browse/SOLR-12640) -- Since Http2SolrClient does not support exposing connections related metrics. These metrics are no longer available 'QUERY.httpShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}', 'UPDATE.updateShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}' [SOLR-12643](https://issues.apache.org/jira/browse/SOLR-12643) -- The UnifiedHighlighter hl.weightMatches now defaults to true. If there are unforseen highlight problems, this may be the culprit. [SOLR-12754](https://issues.apache.org/jira/browse/SOLR-12754) -- If you explicitly use BM25SimilarityFactory in your schema, the absolute scoring will be lower due to SOLR-13025. But ordering of documents will not change in the normal case. Use LegacyBM25SimilarityFactory if you need to force the old 6.x/7.x scoring. Note that if you have not specified any similarity in schema or use the default SchemaSimilarityFactory, then LegacyBM25Similarity is automatically selected for 'luceneMatchVersion' < 8.0.0. See also explanation in Reference Guide chapter "Other Schema Elements". [SOLR-13025](https://issues.apache.org/jira/browse/SOLR-13025) -- The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) -- Deleting (or updating) documents by their uniqueKey is now scoped to only consider root documents, not child/nested documents. Thus a delete-by-id won't work on a child doc (no-op), and an attempt to update a child doc by providing a new doc with the same ID would add a new doc (probably erroneous). Both these actions were and still are problematic. In-place-updates are safe though. If you want to delete certain child documents and if you know they don't themselves have nested children then you must do so with a delete-by-query technique. [SOLR-5211](https://issues.apache.org/jira/browse/SOLR-5211) +- HttpShardHandlerFactory's defaultClient is now a Http2SolrClient (previous HttpSolrClient). This new client does not support 'maxConnections','maxConnectionsPerHost' parameters. LBHttpSolrClient.Req and LBHttpSolrClient.Rsp are marked as deprecated, uses LBSolrClient.Req and LBSolrClient.Rsp instead. [SOLR-12640](https://issues.apache.org/jira/browse/SOLR-12640) +- Since Http2SolrClient does not support exposing connections related metrics. These metrics are no longer available 'QUERY.httpShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}', 'UPDATE.updateShardHandler.{availableConnections, leasedConnections, maxConnections, pendingConnections}' [SOLR-12643](https://issues.apache.org/jira/browse/SOLR-12643) +- The UnifiedHighlighter hl.weightMatches now defaults to true. If there are unforseen highlight problems, this may be the culprit. [SOLR-12754](https://issues.apache.org/jira/browse/SOLR-12754) +- If you explicitly use BM25SimilarityFactory in your schema, the absolute scoring will be lower due to SOLR-13025. But ordering of documents will not change in the normal case. Use LegacyBM25SimilarityFactory if you need to force the old 6.x/7.x scoring. Note that if you have not specified any similarity in schema or use the default SchemaSimilarityFactory, then LegacyBM25Similarity is automatically selected for 'luceneMatchVersion' < 8.0.0. See also explanation in Reference Guide chapter "Other Schema Elements". [SOLR-13025](https://issues.apache.org/jira/browse/SOLR-13025) +- The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) +- Deleting (or updating) documents by their uniqueKey is now scoped to only consider root documents, not child/nested documents. Thus a delete-by-id won't work on a child doc (no-op), and an attempt to update a child doc by providing a new doc with the same ID would add a new doc (probably erroneous). Both these actions were and still are problematic. In-place-updates are safe though. If you want to delete certain child documents and if you know they don't themselves have nested children then you must do so with a delete-by-query technique. [SOLR-5211](https://issues.apache.org/jira/browse/SOLR-5211) - Solr nodes can now listen and serve HTTP/2 requests. Most of internal requests (sent by UpdateShardHandler) (HttpShardHandler) -- ALPN is not supported in Java 8 or lower version therefore when a node started in Java 8 or a lower version with SSL enabled, it will send and can only be able to handle HTTP/1.1 requests. In case of using SSL Java 9 or latter versions are recommended. -- Custom AuthenticationPlugin must provide its own setup for Http2SolrClient through implementing HttpClientBuilderPlugin.setup, if not internal requests can't be authenticated. +- ALPN is not supported in Java 8 or lower version therefore when a node started in Java 8 or a lower version with SSL enabled, it will send and can only be able to handle HTTP/1.1 requests. In case of using SSL Java 9 or latter versions are recommended. +- Custom AuthenticationPlugin must provide its own setup for Http2SolrClient through implementing HttpClientBuilderPlugin.setup, if not internal requests can't be authenticated. ### Added (14 changes) @@ -3150,9 +3149,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SPLITSHARD (async) with failures in underlying sub-operations can result in data loss [SOLR-13718](https://issues.apache.org/jira/browse/SOLR-13718) (Ishan Chattopadhyaya) - Use the safe fork of simple-xml for clustering contrib. [SOLR-13779](https://issues.apache.org/jira/browse/SOLR-13779) (Dawid Weiss) (Sachin Pattan) - HttpSolrCall now maintains internal request count (_forwardedCount) for remote queries and limits them to the number of replicas. This avoids making too many cascading calls to remote servers, which, if not restricted, can bring down nodes containing the said collection [SOLR-13793](https://issues.apache.org/jira/browse/SOLR-13793) (Kesharee Nandan Vishwakarma) (Ishan Chattopadhyaya) -- Improve ExecutePlanAction error handling. [SOLR-13828](https://issues.apache.org/jira/browse/SOLR-13828) +- Improve ExecutePlanAction error handling. [SOLR-13828](https://issues.apache.org/jira/browse/SOLR-13828) - Velocity response writer's resource loading now possible only through startup parameters. Also, removed velocity response writer from _default configset. [SOLR-13971](https://issues.apache.org/jira/browse/SOLR-13971) (Ishan Chattopadhyaya) (Sachin Pattan) -- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) +- VelocityResponseWriter has been hardened - only trusted configsets can render configset provided templates and rendering templates from request parameters has been removed. [SOLR-14025](https://issues.apache.org/jira/browse/SOLR-14025) - Back port javabin performance regression fixes from SOLR-14013 [SOLR-14259](https://issues.apache.org/jira/browse/SOLR-14259) [SOLR-14013](https://issues.apache.org/jira/browse/SOLR-14013) (noble) @@ -3161,8 +3160,8 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (2 changes) -- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) -- The Prometheus Metric Exporter now collects metrics from Solr at a fixed schedule that is controlled by the exporter itself. The default interval is 60s but it can be changed by specifying the --scrape-interval command line parameter to the tool. The collected metrics are cached and returned for any requests coming in before the next scheduled collection. This can impact the freshness of the metrics if prometheus is configured to query the tool more frequently. [SOLR-13234](https://issues.apache.org/jira/browse/SOLR-13234) +- When requesting the status of an async request via REQUESTSTATUS collections API, the response will include the list of internal async requests (if any) in the "success" or "failed" keys (in addition to them being included outside those keys for backwards compatibility). See SOLR-12708 for more details [SOLR-12708](https://issues.apache.org/jira/browse/SOLR-12708) +- The Prometheus Metric Exporter now collects metrics from Solr at a fixed schedule that is controlled by the exporter itself. The default interval is 60s but it can be changed by specifying the --scrape-interval command line parameter to the tool. The collected metrics are cached and returned for any requests coming in before the next scheduled collection. This can impact the freshness of the metrics if prometheus is configured to query the tool more frequently. [SOLR-13234](https://issues.apache.org/jira/browse/SOLR-13234) ### Fixed (25 changes) @@ -3202,7 +3201,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- * SOLR-13248: The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) +- * SOLR-13248: The default replica placement strategy used in Solr has been reverted to the 'legacy' policy used by Solr 7.4 and previous versions. This is due to multiple bugs in the autoscaling based replica placement strategy that was made default in Solr 7.5 which causes multiple replicas of the same shard to be placed on the same node in addition to the maxShardsPerNode and createNodeSet parameters being ignored. Although the default has changed, autoscaling will continue to be used if a cluster policy or preference is specified or a collection level policy is in use. The default replica placement strategy can be changed to use autoscaling again by setting a cluster property: curl -X POST -H 'Content-type:application/json' --data-binary ' { "set-obj-property": { "defaults" : { "cluster": { "useLegacyReplicaAssignment":false } } } }' http://$SOLR_HOST:$SOLR_PORT/api/cluster [SOLR-13248](https://issues.apache.org/jira/browse/SOLR-13248) ### Fixed (2 changes) @@ -3215,7 +3214,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- The 'shards' parameter handling logic changes to use a new config element to determine what hosts can be requested. Please see Apache Solr Reference Guide chapter "Distributed Requests" for details, as well as SOLR-12770. [SOLR-12770](https://issues.apache.org/jira/browse/SOLR-12770) +- The 'shards' parameter handling logic changes to use a new config element to determine what hosts can be requested. Please see Apache Solr Reference Guide chapter "Distributed Requests" for details, as well as SOLR-12770. [SOLR-12770](https://issues.apache.org/jira/browse/SOLR-12770) ### Added (7 changes) @@ -3282,10 +3281,10 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (4 changes) -- Autoscaling policy framework is now used as the default strategy to select the nodes on which new replicas or replicas of new collections are created. Previously, the maxShardsPerNode parameter was not allowed on collections when autoscaling policy was configured. Also if an autoscaling policy was configured then the default was to set an unlimited maxShardsPerNode automatically. Now the maxShardsPerNode parameter is always allowed during collection creation and maxShardsPerNode should be set correctly (if required) regardless of whether autoscaling policies are in effect or not. The default value of maxShardsPerNode continues to be 1 as before. It can be set to -1 during collection creation to fall back to the old behavior of unlimited maxShardsPerNode when using autoscaling policy. [SOLR-12739](https://issues.apache.org/jira/browse/SOLR-12739) +- Autoscaling policy framework is now used as the default strategy to select the nodes on which new replicas or replicas of new collections are created. Previously, the maxShardsPerNode parameter was not allowed on collections when autoscaling policy was configured. Also if an autoscaling policy was configured then the default was to set an unlimited maxShardsPerNode automatically. Now the maxShardsPerNode parameter is always allowed during collection creation and maxShardsPerNode should be set correctly (if required) regardless of whether autoscaling policies are in effect or not. The default value of maxShardsPerNode continues to be 1 as before. It can be set to -1 during collection creation to fall back to the old behavior of unlimited maxShardsPerNode when using autoscaling policy. [SOLR-12739](https://issues.apache.org/jira/browse/SOLR-12739) - The min_rf parameter is no longer needed, Solr will always return the achieved replication factor [SOLR-12767](https://issues.apache.org/jira/browse/SOLR-12767) (rf) -- The cluster wide defaults structure has changed from {collectionDefaults: {nrtReplicas : 2}} to {defaults : {collection : {nrtReplicas : 2}}}. The old format continues to be supported and can be read from ZK as well as written using the V2 set-obj-property syntax but it is deprecated and will be removed in Solr 9. We recommend that users change their API calls to use the new format going forward. [SOLR-12827](https://issues.apache.org/jira/browse/SOLR-12827) -- Added a Solr factory for ByteBuffersDirectory, which will replace deprecated RAMDirectory in Solr 9.0. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) +- The cluster wide defaults structure has changed from {collectionDefaults: {nrtReplicas : 2}} to {defaults : {collection : {nrtReplicas : 2}}}. The old format continues to be supported and can be read from ZK as well as written using the V2 set-obj-property syntax but it is deprecated and will be removed in Solr 9. We recommend that users change their API calls to use the new format going forward. [SOLR-12827](https://issues.apache.org/jira/browse/SOLR-12827) +- Added a Solr factory for ByteBuffersDirectory, which will replace deprecated RAMDirectory in Solr 9.0. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) ### Added (25 changes) @@ -3362,7 +3361,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Move TestCloudJSONFacetJoinDomain and TestCloudJSONFacetSKG to the facet test package [SOLR-12793](https://issues.apache.org/jira/browse/SOLR-12793) (Varun Thacker) - Migrate cluster wide defaults syntax in cluster properties to a nested structure. The structure has changed from {collectionDefaults: {nrtReplicas : 2}} to {defaults : {collection : {nrtReplicas : 2}}}. [SOLR-12827](https://issues.apache.org/jira/browse/SOLR-12827) (ab) (shalin) - Document statistics exposed by the Query Result Cache when maxRamMB is configured. [SOLR-12835](https://issues.apache.org/jira/browse/SOLR-12835) (shalin) -- Add Solr factory for ByteBuffersDirectory. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) +- Add Solr factory for ByteBuffersDirectory. [SOLR-12861](https://issues.apache.org/jira/browse/SOLR-12861) - Add Javadoc @since tag to Analyzer component classes [SOLR-12956](https://issues.apache.org/jira/browse/SOLR-12956) (Alexandre Rafalovitch) - Add Javadoc @since tag to URP classes [SOLR-12966](https://issues.apache.org/jira/browse/SOLR-12966) (Alexandre Rafalovitch) - ZkNodeProps to be able to load from both javabin and JSON [SOLR-13006](https://issues.apache.org/jira/browse/SOLR-13006) (noble) @@ -3376,9 +3375,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M - TieredMergePolicy now respects maxSegmentSizeMB by default when executing findForcedMerges (optimize) and findForcedDeletesMerges (expungeDeletes) [LUCENE-7976](https://issues.apache.org/jira/browse/LUCENE-7976) (Erick Erickson) - Extremely outdated UIMA contrib module has been removed [SOLR-11694](https://issues.apache.org/jira/browse/SOLR-11694) (Alexandre Rafalovitch) - The configuration file for log4j2.xml is now in ../server/resources/log4j2.xml. All other copies have been removed. WINDOWS USERS: This JIRA corrects a bug in the start scripts that put example logs under ...\server, solr.log is now under ...\example. [SOLR-12008](https://issues.apache.org/jira/browse/SOLR-12008) (Erick Erickson) -- The data-driven schema no longer returns the *_str field-copy of text fields by default. The *_str field is still created and can be used for e.g. sorting, but to retrieve it you now need to explicitly list it in "fl" SOLR-12350 [SOLR-12350](https://issues.apache.org/jira/browse/SOLR-12350) -- SignificantTermsQParserPlugin's name is now 'significantTerms' and its old name 'sigificantTerms' is deprecated. [SOLR-12395](https://issues.apache.org/jira/browse/SOLR-12395) -- The heatmap facet response structure, when returned in JSON, changed from an even/odd name/value array to an object style. Although the object style makes more sense, this was an overlooked back-compat break; sorry. [SOLR-12398](https://issues.apache.org/jira/browse/SOLR-12398) +- The data-driven schema no longer returns the *_str field-copy of text fields by default. The *_str field is still created and can be used for e.g. sorting, but to retrieve it you now need to explicitly list it in "fl" SOLR-12350 [SOLR-12350](https://issues.apache.org/jira/browse/SOLR-12350) +- SignificantTermsQParserPlugin's name is now 'significantTerms' and its old name 'sigificantTerms' is deprecated. [SOLR-12395](https://issues.apache.org/jira/browse/SOLR-12395) +- The heatmap facet response structure, when returned in JSON, changed from an even/odd name/value array to an object style. Although the object style makes more sense, this was an overlooked back-compat break; sorry. [SOLR-12398](https://issues.apache.org/jira/browse/SOLR-12398) - The data-driven schema no longer returns the *_str field-copy of text fields by default. The *_str field is still created and can be used for e.g. sorting, but to retrieve it you now need to explicitly list it in "fl" (SOLR-12350) ### Added (42 changes) @@ -3448,7 +3447,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Solr SQL does not work with point numeric fields [SOLR-11585](https://issues.apache.org/jira/browse/SOLR-11585) (Joel Bernstein) (Kiran Chitturi) - Improve error handling of shard splitting. Fix splitting of mixed replica types. [SOLR-11665](https://issues.apache.org/jira/browse/SOLR-11665) (ab) - Keep nrtReplicas and replicationFactor in sync while creating a collection and modifying a collection [SOLR-11676](https://issues.apache.org/jira/browse/SOLR-11676) (Varun Thacker) -- NPE in tvrh if no field is specified and document doesn't contain any fields with term vectors [SOLR-11770](https://issues.apache.org/jira/browse/SOLR-11770) +- NPE in tvrh if no field is specified and document doesn't contain any fields with term vectors [SOLR-11770](https://issues.apache.org/jira/browse/SOLR-11770) - Restoring collection now treats maxShardsPerNode=-1 as unlimited [SOLR-11807](https://issues.apache.org/jira/browse/SOLR-11807) (Varun Thacker) - JSON Facet API: terms facet shard requests now indicate if they have more buckets to prevent unnecessary refinement requests. [SOLR-12326](https://issues.apache.org/jira/browse/SOLR-12326) (yonk) - Fixed a bug in JSON Faceting that could cause incorrect counts/stats when using non default sort options. This also adds a new configurable "overrefine" option. [SOLR-12343](https://issues.apache.org/jira/browse/SOLR-12343) (Yonik Seeley) (hossman) @@ -3523,9 +3522,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (3 changes) -- Configuring slowQueryThresholdMillis now logs slow requests to a separate file - solr_slow_requests.log. Previously they would get logged in the solr.log file. [SOLR-11453](https://issues.apache.org/jira/browse/SOLR-11453) -- Slave doesn't commit empty index when completely new index is detected on master during replication. To return the previous behavior pass false to skipCommitOnLeaderVersionZero in slave section of replication handler configuration, or pass it to the fetchindex command. [SOLR-11673](https://issues.apache.org/jira/browse/SOLR-11673) -- Solr now uses Log4J 2.11 . The log4j configuration is now in log4j2.xml rather than log4j.properties files. This is a server side change only and clients using SolrJ won't need any changes. Clients can still use any logging implementation which is compatible with SLF4J. We now let log4j2 handle rotation of solr logs at startup, and bin/solr start scripts will no longer attempt this nor move existing console or GC logs into logs/archived either SOLR-12144 [SOLR-7887](https://issues.apache.org/jira/browse/SOLR-7887) [SOLR-12144](https://issues.apache.org/jira/browse/SOLR-12144) +- Configuring slowQueryThresholdMillis now logs slow requests to a separate file - solr_slow_requests.log. Previously they would get logged in the solr.log file. [SOLR-11453](https://issues.apache.org/jira/browse/SOLR-11453) +- Slave doesn't commit empty index when completely new index is detected on master during replication. To return the previous behavior pass false to skipCommitOnLeaderVersionZero in slave section of replication handler configuration, or pass it to the fetchindex command. [SOLR-11673](https://issues.apache.org/jira/browse/SOLR-11673) +- Solr now uses Log4J 2.11 . The log4j configuration is now in log4j2.xml rather than log4j.properties files. This is a server side change only and clients using SolrJ won't need any changes. Clients can still use any logging implementation which is compatible with SLF4J. We now let log4j2 handle rotation of solr logs at startup, and bin/solr start scripts will no longer attempt this nor move existing console or GC logs into logs/archived either SOLR-12144 [SOLR-7887](https://issues.apache.org/jira/browse/SOLR-7887) [SOLR-12144](https://issues.apache.org/jira/browse/SOLR-12144) ### Added (35 changes) @@ -3694,16 +3693,16 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (10 changes) -- If you are using the spatial JTS library with Solr, you must upgrade to 1.15.0. This new version of JTS is now dual-licensed to include a BSD style license. [LUCENE-8161](https://issues.apache.org/jira/browse/LUCENE-8161) -- Collections created without specifying a configset name use a copy of the _default configset since 7.0. Before 7.3, the copied over configset was named the same as the collection name, but 7.3 onwards it will be named with an additional ".AUTOCREATED" suffix. [SOLR-11624](https://issues.apache.org/jira/browse/SOLR-11624) -- The old Leader-In-Recovery implementation (SOLR-5495) is now deprecated and replaced. Solr will support rolling upgrades from old 7.x versions of Solr to future 7.x releases until the last release of the 7.x major version. This means in order to upgrade to Solr 8 in the future, you must be on Solr 7.3 or higher. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) [SOLR-5495](https://issues.apache.org/jira/browse/SOLR-5495) -- The behaviour of the autoscaling system has been modified to pause all triggers from execution between the start of actions and end of cool down period. The triggers will be resumed after the cool down period expires. Previously, the cool down period was a fixed period started after actions for a trigger event complete and during this time, all triggers continued to run but any events were rejected to be tried later. [SOLR-11747](https://issues.apache.org/jira/browse/SOLR-11747) -- The throttling mechanism used to limit the rate of autoscaling events processed has been removed. This deprecates the 'actionThrottlePeriodSeconds' setting in the set-properties Autoscaling API which is now a no-op. Use the 'triggerCooldownPeriodSeconds' instead to pause event processing. [SOLR-11748](https://issues.apache.org/jira/browse/SOLR-11748) -- The top-level <highlighting> syntax in solrconfig.xml is now formally deprecated in favour of <searchComponent> equivalent syntax. See also SOLR-1696. [SOLR-11798](https://issues.apache.org/jira/browse/SOLR-11798) [SOLR-1696](https://issues.apache.org/jira/browse/SOLR-1696) -- QueryComponent's rq parameter parsing no longer considers the defType parameter. [SOLR-11809](https://issues.apache.org/jira/browse/SOLR-11809) -- The default Solr log file size and number of backups is raised to 32MB and 10 respectively [SOLR-11957](https://issues.apache.org/jira/browse/SOLR-11957) -- A new mechanism is introduced in SOLR-11702 to maintain consistency in SolrCloud between leader and replicas. This mechanism lets Solr know whether a replica is in-sync with the leader or not, even when the leader is not live. If all the replicas who participate in the leader election are out-of-sync with previous leader, the election will pause until a timeout (named "leaderVoteWait") before allowing an out-of-sync replica to become leader. Note that the new leader still needs to contains more updates than any other active replicas in the same shard. Therefore by increasing leaderVoteWait will increase the consistency (over availability) of the system. The default value of leaderVoteWait is 180,000 ms (3 minutes) and it can be adjusted in the "solrcloud" section of the solr.xml [SOLR-12051](https://issues.apache.org/jira/browse/SOLR-12051) [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) -- The default value of `autoReplicaFailoverWaitAfterExpiration` has been increased to 120 seconds from the earlier default of 30 seconds. This affects how soon Solr adds new replicas to replace the replicas on nodes which have either crashed or shutdown. [SOLR-12067](https://issues.apache.org/jira/browse/SOLR-12067) +- If you are using the spatial JTS library with Solr, you must upgrade to 1.15.0. This new version of JTS is now dual-licensed to include a BSD style license. [LUCENE-8161](https://issues.apache.org/jira/browse/LUCENE-8161) +- Collections created without specifying a configset name use a copy of the _default configset since 7.0. Before 7.3, the copied over configset was named the same as the collection name, but 7.3 onwards it will be named with an additional ".AUTOCREATED" suffix. [SOLR-11624](https://issues.apache.org/jira/browse/SOLR-11624) +- The old Leader-In-Recovery implementation (SOLR-5495) is now deprecated and replaced. Solr will support rolling upgrades from old 7.x versions of Solr to future 7.x releases until the last release of the 7.x major version. This means in order to upgrade to Solr 8 in the future, you must be on Solr 7.3 or higher. [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) [SOLR-5495](https://issues.apache.org/jira/browse/SOLR-5495) +- The behaviour of the autoscaling system has been modified to pause all triggers from execution between the start of actions and end of cool down period. The triggers will be resumed after the cool down period expires. Previously, the cool down period was a fixed period started after actions for a trigger event complete and during this time, all triggers continued to run but any events were rejected to be tried later. [SOLR-11747](https://issues.apache.org/jira/browse/SOLR-11747) +- The throttling mechanism used to limit the rate of autoscaling events processed has been removed. This deprecates the 'actionThrottlePeriodSeconds' setting in the set-properties Autoscaling API which is now a no-op. Use the 'triggerCooldownPeriodSeconds' instead to pause event processing. [SOLR-11748](https://issues.apache.org/jira/browse/SOLR-11748) +- The top-level <highlighting> syntax in solrconfig.xml is now formally deprecated in favour of <searchComponent> equivalent syntax. See also SOLR-1696. [SOLR-11798](https://issues.apache.org/jira/browse/SOLR-11798) [SOLR-1696](https://issues.apache.org/jira/browse/SOLR-1696) +- QueryComponent's rq parameter parsing no longer considers the defType parameter. [SOLR-11809](https://issues.apache.org/jira/browse/SOLR-11809) +- The default Solr log file size and number of backups is raised to 32MB and 10 respectively [SOLR-11957](https://issues.apache.org/jira/browse/SOLR-11957) +- A new mechanism is introduced in SOLR-11702 to maintain consistency in SolrCloud between leader and replicas. This mechanism lets Solr know whether a replica is in-sync with the leader or not, even when the leader is not live. If all the replicas who participate in the leader election are out-of-sync with previous leader, the election will pause until a timeout (named "leaderVoteWait") before allowing an out-of-sync replica to become leader. Note that the new leader still needs to contains more updates than any other active replicas in the same shard. Therefore by increasing leaderVoteWait will increase the consistency (over availability) of the system. The default value of leaderVoteWait is 180,000 ms (3 minutes) and it can be adjusted in the "solrcloud" section of the solr.xml [SOLR-12051](https://issues.apache.org/jira/browse/SOLR-12051) [SOLR-11702](https://issues.apache.org/jira/browse/SOLR-11702) +- The default value of `autoReplicaFailoverWaitAfterExpiration` has been increased to 120 seconds from the earlier default of 30 seconds. This affects how soon Solr adds new replicas to replace the replicas on nodes which have either crashed or shutdown. [SOLR-12067](https://issues.apache.org/jira/browse/SOLR-12067) ### Added (44 changes) @@ -3970,11 +3969,11 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (6 changes) - in the ReplicationHandler the master.commitReserveDuration sub-element is deprecated. Instead please configure a direct commitReserveDuration element for use in all modes [SOLR-10962](https://issues.apache.org/jira/browse/SOLR-10962) (master) (slave) (cloud) -- shard and cluster metric reporter configuration now requires a class attribute. If a reporter configures the group="shard" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrShardReporter" attribute. If a reporter configures the group="cluster" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrClusterReporter" attribute. [SOLR-11195](https://issues.apache.org/jira/browse/SOLR-11195) -- the abstract DocTransformer class now has an abstract score-less transform method variant. [SOLR-11254](https://issues.apache.org/jira/browse/SOLR-11254) +- shard and cluster metric reporter configuration now requires a class attribute. If a reporter configures the group="shard" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrShardReporter" attribute. If a reporter configures the group="cluster" attribute then please also configure the class="org.apache.solr.metrics.reporters.solr.SolrClusterReporter" attribute. [SOLR-11195](https://issues.apache.org/jira/browse/SOLR-11195) +- the abstract DocTransformer class now has an abstract score-less transform method variant. [SOLR-11254](https://issues.apache.org/jira/browse/SOLR-11254) - all Stream Evaluators in solrj.io.eval have been refactored to have a simplier and more robust structure. This simplifies and condenses the code required to implement a new Evaluator and makes it much easier for evaluators to handle differing data types (primitives, objects, arrays, lists, and so forth). [SOLR-11283](https://issues.apache.org/jira/browse/SOLR-11283) (Dennis Gove) - in the XML query parser [SOLR-11477](https://issues.apache.org/jira/browse/SOLR-11477) (defType=xmlparser or {!xmlparser ... }) -- 'autoAddReplicas' feature is ported to autoscaling framework. Existing users of this feature should not have to change anything. Note these changes: 1. Behaviour: Changing the autoAddReplicas property from disabled to enabled using MODIFYCOLLECTION API no longer replaces down replicas for the collection immediately. Instead, replicas are only added if a node containing them went down while autoAddReplicas was enabled. The params autoReplicaFailoverBadNodeExpiration and autoReplicaFailoverWorkLoopDelay are no longer used. 2. Deprecations: Enabling/disabling autoAddReplicas cluster wide with the API will be deprecated; use suspend/resume trigger APIs with name='.auto_add_replicas' instead. +- 'autoAddReplicas' feature is ported to autoscaling framework. Existing users of this feature should not have to change anything. Note these changes: 1. Behaviour: Changing the autoAddReplicas property from disabled to enabled using MODIFYCOLLECTION API no longer replaces down replicas for the collection immediately. Instead, replicas are only added if a node containing them went down while autoAddReplicas was enabled. The params autoReplicaFailoverBadNodeExpiration and autoReplicaFailoverWorkLoopDelay are no longer used. 2. Deprecations: Enabling/disabling autoAddReplicas cluster wide with the API will be deprecated; use suspend/resume trigger APIs with name='.auto_add_replicas' instead. ### Added (39 changes) @@ -4166,7 +4165,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Possible to set solr.data.home property as root dir for all data [SOLR-6671](https://issues.apache.org/jira/browse/SOLR-6671) (janhoy) (Shawn Heisey) (Mark Miller) - Refinement for JSON Facet API: Adding refine:true to any terms facet will cause an additional distributed search phase (overlapped with field retrieval) that requests additional info for top facet buckets from shards that did not previously contribute to that bucket. This will correct counts (and other statistics) for those top buckets collected in the first phase. [SOLR-7452](https://issues.apache.org/jira/browse/SOLR-7452) (yonik) - Data driven schema now indexes text field "foo" as both "foo" (text_general) and as "foo_str" (string) to facilitate both search and faceting. AddSchemaFieldsUpdateProcessor now has the ability to add a "copyField" to the type mappings, with an optional maxChars limitation. You can also define one typeMappings as default. This also solves issues SOLR-8495, SOLR-6966, and SOLR-7058 [SOLR-9526](https://issues.apache.org/jira/browse/SOLR-9526) [SOLR-8495](https://issues.apache.org/jira/browse/SOLR-8495) [SOLR-6966](https://issues.apache.org/jira/browse/SOLR-6966) [SOLR-7058](https://issues.apache.org/jira/browse/SOLR-7058) (janhoy) (Steve Rowe) (hossman) (Alexandre Rafalovitch) (Shawn Heisey) (Cao Manh Dat) -- Create another replication mode for SolrCloud [SOLR-9835](https://issues.apache.org/jira/browse/SOLR-9835) +- Create another replication mode for SolrCloud [SOLR-9835](https://issues.apache.org/jira/browse/SOLR-9835) - Collect aggregated metrics from nodes and shard leaders in overseer. [SOLR-9857](https://issues.apache.org/jira/browse/SOLR-9857) [SOLR-9858](https://issues.apache.org/jira/browse/SOLR-9858) (ab) - Add solr/solr.cmd parameter to append jetty parameters to the start script. [SOLR-9910](https://issues.apache.org/jira/browse/SOLR-9910) (Mano Kovacs) (Mark Miller) - Add support for PointFields in FacetModule (JSON Facets) [SOLR-9989](https://issues.apache.org/jira/browse/SOLR-9989) (Cao Manh Dat) @@ -4337,7 +4336,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (1 change) -- Lucene only release +- Lucene only release [6.6.5] - 2018-06-03 @@ -4411,9 +4410,9 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (3 changes) -- Solr contribs map-reduce, morphlines-core and morphlines-cell have been removed. -- JSON Facet API now uses hyper-log-log for numBuckets cardinality calculation and calculates cardinality before filtering buckets by any mincount greater than 1. -- ZooKeeper dependency has been upgraded from 3.4.6 to 3.4.10. +- Solr contribs map-reduce, morphlines-core and morphlines-cell have been removed. +- JSON Facet API now uses hyper-log-log for numBuckets cardinality calculation and calculates cardinality before filtering buckets by any mincount greater than 1. +- ZooKeeper dependency has been upgraded from 3.4.6 to 3.4.10. ### Added (36 changes) @@ -4553,7 +4552,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (2 changes) - JMX metric "avgTimePerRequest" (and the corresponding metric in the metrics API for each handler) used to be a simple non-decaying average based on total cumulative time and the number of requests. New Codahale Metrics implementation applies exponential decay to this value, which heavily biases the average towards the last 5 minutes. [SOLR-10226](https://issues.apache.org/jira/browse/SOLR-10226) (ab) -- Parallel SQL now uses Apache Calcite as its SQL framework. As part of this change the default aggregation mode has been changed to facet rather than map_reduce. There has also been changes to the SQL aggregate response and some SQL syntax changes. Consult the documentation for full details. [SOLR-8593](https://issues.apache.org/jira/browse/SOLR-8593) +- Parallel SQL now uses Apache Calcite as its SQL framework. As part of this change the default aggregation mode has been changed to facet rather than map_reduce. There has also been changes to the SQL aggregate response and some SQL syntax changes. Consult the documentation for full details. [SOLR-8593](https://issues.apache.org/jira/browse/SOLR-8593) ### Added (31 changes) @@ -4694,11 +4693,11 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (5 changes) -- complexphrase query parser now supports leading wildcards, beware of its' possible heaviness. Users are encouraged to use ReversedWildcardFilter in index time analysis. [SOLR-7466](https://issues.apache.org/jira/browse/SOLR-7466) -- Metrics related classes in org.apache.solr.util.stats have been removed in favor of the dropwizard metrics library. Any custom plugins using these classes should be changed to use the equivalent classes from the metrics library. As part of this, the following changes were made to the output of Overseer Status API: * The "totalTime" metric has been removed because it is no longer supported * The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime" and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime" and so on for consistency with stats output in other parts of Solr. * The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr. [SOLR-8785](https://issues.apache.org/jira/browse/SOLR-8785) -- Export handler returns zero for numeric fields that are not in the original doc. One consequence of this change is that you must be aware that some tuples will not have values if there were none in the original document. [SOLR-9166](https://issues.apache.org/jira/browse/SOLR-9166) -- You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the original Highlighter. That said, some options aren't supported yet. It will get more features in time, especially with your input. See HighlightParams.java for a listing of highlight parameters annotated with which highlighters use them. hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector. [SOLR-9708](https://issues.apache.org/jira/browse/SOLR-9708) -- maxWarmingSearchers now defaults to 1, and more importantly commits will now block if this limit is exceeded instead of throwing an exception (a good thing). Consequently there is no longer a risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files. [SOLR-9712](https://issues.apache.org/jira/browse/SOLR-9712) +- complexphrase query parser now supports leading wildcards, beware of its' possible heaviness. Users are encouraged to use ReversedWildcardFilter in index time analysis. [SOLR-7466](https://issues.apache.org/jira/browse/SOLR-7466) +- Metrics related classes in org.apache.solr.util.stats have been removed in favor of the dropwizard metrics library. Any custom plugins using these classes should be changed to use the equivalent classes from the metrics library. As part of this, the following changes were made to the output of Overseer Status API: * The "totalTime" metric has been removed because it is no longer supported * The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime" and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime" and so on for consistency with stats output in other parts of Solr. * The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr. [SOLR-8785](https://issues.apache.org/jira/browse/SOLR-8785) +- Export handler returns zero for numeric fields that are not in the original doc. One consequence of this change is that you must be aware that some tuples will not have values if there were none in the original document. [SOLR-9166](https://issues.apache.org/jira/browse/SOLR-9166) +- You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the original Highlighter. That said, some options aren't supported yet. It will get more features in time, especially with your input. See HighlightParams.java for a listing of highlight parameters annotated with which highlighters use them. hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector. [SOLR-9708](https://issues.apache.org/jira/browse/SOLR-9708) +- maxWarmingSearchers now defaults to 1, and more importantly commits will now block if this limit is exceeded instead of throwing an exception (a good thing). Consequently there is no longer a risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files. [SOLR-9712](https://issues.apache.org/jira/browse/SOLR-9712) ### Added (40 changes) @@ -4829,12 +4828,12 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (6 changes) -- We are no longer backing up solr.log and solr_gc.log files in date-stamped copies forever. If you relied on the solr_log_<date> or solr_gc_log_<date> being in the logs folder that will no longer be the case. See SOLR-9570 for details. [SOLR-9570](https://issues.apache.org/jira/browse/SOLR-9570) -- If you use the JSON Facet API (json.facet) with method=stream, you must now set sort='index asc' to get the streaming behavior; otherwise it won't stream. Reminder: "method" is a hint that doesn't change defaults of other parameters. -- If you use the JSON Facet API (json.facet) to facet on a numeric field and if you use mincount=0 or if you set the prefix, then you will now get an error as these options are incompatible with numeric faceting. -- Solr's logging verbosity at the INFO level has been greatly reduced, and you may need to update the log configs to use the DEBUG level to see all the logging messages you used to see at INFO level before. -- The create/deleteCollection methods on MiniSolrCloudCluster have been deprecated. Clients should instead use the CollectionAdminRequest API. In addition, MiniSolrCloudCluster#uploadConfigDir(File, String) has been deprecated in favour of #uploadConfigSet(Path, String) -- The bin/solr.in.sh (bin/solr.in.cmd on Windows) is now completely commented by default. Previously, this wasn't so, which had the effect of masking existing environment variables. +- We are no longer backing up solr.log and solr_gc.log files in date-stamped copies forever. If you relied on the solr_log_<date> or solr_gc_log_<date> being in the logs folder that will no longer be the case. See SOLR-9570 for details. [SOLR-9570](https://issues.apache.org/jira/browse/SOLR-9570) +- If you use the JSON Facet API (json.facet) with method=stream, you must now set sort='index asc' to get the streaming behavior; otherwise it won't stream. Reminder: "method" is a hint that doesn't change defaults of other parameters. +- If you use the JSON Facet API (json.facet) to facet on a numeric field and if you use mincount=0 or if you set the prefix, then you will now get an error as these options are incompatible with numeric faceting. +- Solr's logging verbosity at the INFO level has been greatly reduced, and you may need to update the log configs to use the DEBUG level to see all the logging messages you used to see at INFO level before. +- The create/deleteCollection methods on MiniSolrCloudCluster have been deprecated. Clients should instead use the CollectionAdminRequest API. In addition, MiniSolrCloudCluster#uploadConfigDir(File, String) has been deprecated in favour of #uploadConfigSet(Path, String) +- The bin/solr.in.sh (bin/solr.in.cmd on Windows) is now completely commented by default. Previously, this wasn't so, which had the effect of masking existing environment variables. ### Added (27 changes) @@ -4917,7 +4916,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Move lots of SolrCloud logging from 'info' to 'debug' [SOLR-5563](https://issues.apache.org/jira/browse/SOLR-5563) (janhoy) (Alan Woodward) - Remove unreachable printLayout usage in cloud tests. [SOLR-6090](https://issues.apache.org/jira/browse/SOLR-6090) (Cao Manh Dat) (shalin) - Reduced logging during startup and shutdown, moved more logs to DEBUG level [SOLR-6677](https://issues.apache.org/jira/browse/SOLR-6677) (janhoy) (Shawn Heisey) (Alan Woodward) -- Updated the quickstart tutorial to cover the 6.2.0 release, and added ant target "generate-website-quickstart" to convert the bundled version of the tutorial into one suitable for the website. [SOLR-6871](https://issues.apache.org/jira/browse/SOLR-6871) +- Updated the quickstart tutorial to cover the 6.2.0 release, and added ant target "generate-website-quickstart" to convert the bundled version of the tutorial into one suitable for the website. [SOLR-6871](https://issues.apache.org/jira/browse/SOLR-6871) - Solr stops printing stacktraces in log and output [SOLR-7436](https://issues.apache.org/jira/browse/SOLR-7436) (janhoy) (hossman) (Markus Jelsma) - Refuse "bin/solr create" if run as root, unless -force is specified [SOLR-7826](https://issues.apache.org/jira/browse/SOLR-7826) (janhoy) (Binoy Dalal) - Moved defaults within bin/solr.in.sh (and bin/solr.in.cmd on Windows) to bin/solr (and bin/solr.cmd) such that the default state of these files is to set nothing. This makes Solr work better with Docker. [SOLR-7850](https://issues.apache.org/jira/browse/SOLR-7850) (David Smiley) @@ -5196,7 +5195,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Upgrade Notes (1 change) -- If you use historical dates, specifically on or before the year 1582, you should re-index. +- If you use historical dates, specifically on or before the year 1582, you should re-index. ### Fixed (24 changes) @@ -5347,7 +5346,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SQL JDBC - DriverImpl loadParams doesn't support keys with no values in the connection string [SOLR-8179](https://issues.apache.org/jira/browse/SOLR-8179) (Kevin Risden) (Joel Bernstein) - Implement Closeable on TupleStream [SOLR-8190](https://issues.apache.org/jira/browse/SOLR-8190) (Kevin Risden) (Joel Bernstein) - Change default hdfs tlog replication factor from 1 to 3. [SOLR-8258](https://issues.apache.org/jira/browse/SOLR-8258) (Mark Miller) -- Remove deprecated JettySolrRunner.getDispatchFilter() [SOLR-8259](https://issues.apache.org/jira/browse/SOLR-8259) +- Remove deprecated JettySolrRunner.getDispatchFilter() [SOLR-8259](https://issues.apache.org/jira/browse/SOLR-8259) - Change SchemaSimilarityFactory default to BM25Similarity [SOLR-8261](https://issues.apache.org/jira/browse/SOLR-8261) (hossman) - Remove Java Serialization from the Streaming API. The /stream handler now only accepts Streaming Expressions. [SOLR-8266](https://issues.apache.org/jira/browse/SOLR-8266) (Jason Gerlowski) (Joel Bernstein) - Change implicit default Similarity to use BM25 when luceneMatchVersion >= 6 [SOLR-8270](https://issues.apache.org/jira/browse/SOLR-8270) (hossman) @@ -5409,7 +5408,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (1 change) -- Lucene only release +- Lucene only release [5.5.2] - 2016-06-25 @@ -5577,7 +5576,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Highlighting a geo RPT field would throw an NPE instead of doing nothing. [SOLR-8541](https://issues.apache.org/jira/browse/SOLR-8541) (Pawel Rog) (David Smiley) - Core discovery was not following symlinks [SOLR-8548](https://issues.apache.org/jira/browse/SOLR-8548) (Aaron LaBella) (Alan Woodward) - Make collection deletion more robust. [SOLR-8551](https://issues.apache.org/jira/browse/SOLR-8551) (Mark Miller) -- Fix Embedded ZooKeeper to use <solrHome>/zoo_data for it's data directory [SOLR-8564](https://issues.apache.org/jira/browse/SOLR-8564) +- Fix Embedded ZooKeeper to use <solrHome>/zoo_data for it's data directory [SOLR-8564](https://issues.apache.org/jira/browse/SOLR-8564) - Fix HDFSLogReader replay status numbers, a performance bug where we can reopen FSDataInputStream much too often, and an hdfs tlog data integrity bug. [SOLR-8575](https://issues.apache.org/jira/browse/SOLR-8575) (Mark Miller) (Patrick Dvorack) (yonik) - memory leak in JsonRecordReader affecting /update/json/docs. Large payloads cause OOM [SOLR-8582](https://issues.apache.org/jira/browse/SOLR-8582) (noble) (shalin) - Regular expression queries starting with escaped forward slash caused an exception. [SOLR-8605](https://issues.apache.org/jira/browse/SOLR-8605) (Scott Blum) (yonik) @@ -5682,7 +5681,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Add a LIST command to ConfigSets API [SOLR-7995](https://issues.apache.org/jira/browse/SOLR-7995) (Gregory Chanan) - Basic auth support in SolrJ [SOLR-8053](https://issues.apache.org/jira/browse/SOLR-8053) (noble) - CloneFieldUpdateProcessorFactory now supports choosing a "dest" field name based on a regex pattern and replacement init options. [SOLR-8113](https://issues.apache.org/jira/browse/SOLR-8113) (Gus Heck) (hossman) -- Create/delete fields/dynamic fields/copy fields via schema tab on Angular UI [SOLR-8139](https://issues.apache.org/jira/browse/SOLR-8139) +- Create/delete fields/dynamic fields/copy fields via schema tab on Angular UI [SOLR-8139](https://issues.apache.org/jira/browse/SOLR-8139) - Introduce possibility to configure ParseContext in ExtractingRequestHandler/ExtractingDocumentLoader [SOLR-8166](https://issues.apache.org/jira/browse/SOLR-8166) (Andriy Binetsky) (Uwe Schindler) - JSON Facet API: add "method" param to terms/field facets to give an execution hint for what method should be used to facet. [SOLR-8217](https://issues.apache.org/jira/browse/SOLR-8217) (yonik) - SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using a fieldType name to identify which Similarity to use as a default. [SOLR-8329](https://issues.apache.org/jira/browse/SOLR-8329) (hossman) @@ -5811,7 +5810,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SolrResourceLoader now takes a Path as its instance directory [SOLR-8302](https://issues.apache.org/jira/browse/SOLR-8302) (Alan Woodward) (Shawn Heisey) - CustomBufferedIndexInput now includes resource description when throwing EOFException. [SOLR-8303](https://issues.apache.org/jira/browse/SOLR-8303) (Mike Drob) (Uwe Schindler) - Removed default core checks in the dispatch filter since we don't have a default core anymore [SOLR-8315](https://issues.apache.org/jira/browse/SOLR-8315) (Varun Thacker) -- Standardize and fix logger creation and usage so that they aren't shared across source files.(Jason Gerlowski, Uwe Schindler, Anshum Gupta) [SOLR-8330](https://issues.apache.org/jira/browse/SOLR-8330) +- Standardize and fix logger creation and usage so that they aren't shared across source files.(Jason Gerlowski, Uwe Schindler, Anshum Gupta) [SOLR-8330](https://issues.apache.org/jira/browse/SOLR-8330) - Fix check-example-lucene-match-version Ant task and addVersion.py script to check and update luceneMatchVersion under solr/example/ configs as well logic. [SOLR-8363](https://issues.apache.org/jira/browse/SOLR-8363) (Varun Thacker) @@ -5863,7 +5862,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Expose znodeVersion property for each of the collections returned for the clusterstatus operation in the collections API [SOLR-7389](https://issues.apache.org/jira/browse/SOLR-7389) (Marius Grama) (shalin) - Expose HDFS Block Locality Metrics via JMX [SOLR-7458](https://issues.apache.org/jira/browse/SOLR-7458) (Mike Drob) (Mark Miller) - A DocTransformer can now request fields from the SolrIndexSearcher that are not necessarily returned in the file SolrDocument by returning a list of fields from DocTransformer#getExtraRequestFields [SOLR-7622](https://issues.apache.org/jira/browse/SOLR-7622) (ryan) -- MoreLikeThis QParser now supports all options provided by the MLT Handler i.e. mintf, mindf, minwl, maxwl, maxqt, and maxntp. [SOLR-7639](https://issues.apache.org/jira/browse/SOLR-7639) +- MoreLikeThis QParser now supports all options provided by the MLT Handler i.e. mintf, mindf, minwl, maxwl, maxqt, and maxntp. [SOLR-7639](https://issues.apache.org/jira/browse/SOLR-7639) - New response format added wt=smile [SOLR-7651](https://issues.apache.org/jira/browse/SOLR-7651) (noble) - Add 'port' tag support in replica placement rules [SOLR-7668](https://issues.apache.org/jira/browse/SOLR-7668) (Adam McElwee) (Noble Paul) - Faceting on nested objects / Block-join faceting with the new JSON Facet API. Example: Assuming books with nested pages and an input domain of pages, the following will switch the domain to books before faceting on the author field: authors:{ type:terms, field:author, domain:{toParent:"type:book"} } [SOLR-7676](https://issues.apache.org/jira/browse/SOLR-7676) (yonik) @@ -5957,7 +5956,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix typo bug with TestConfigOverlay [SOLR-7740](https://issues.apache.org/jira/browse/SOLR-7740) (Christine Poerschke) (Ramkumar Aiyengar) - Change TestConfig.testDefaults to cover all SolrIndexConfig fields [SOLR-7750](https://issues.apache.org/jira/browse/SOLR-7750) (Christine Poerschke) (Ramkumar Aiyengar) - Removed fastutil and java-hll dependency, integrated HyperLogLog from java-hll into Solr core. [SOLR-7787](https://issues.apache.org/jira/browse/SOLR-7787) (Dawid Weiss) -- JSON Facet API: the avg() facet function now skips missing values rather than treating them as a 0 value. The def() function can be used to treat missing values as 0 if that is desired. Example: facet:{ mean:"avg(def(myfield,0))" } [SOLR-7800](https://issues.apache.org/jira/browse/SOLR-7800) +- JSON Facet API: the avg() facet function now skips missing values rather than treating them as a 0 value. The def() function can be used to treat missing values as 0 if that is desired. Example: facet:{ mean:"avg(def(myfield,0))" } [SOLR-7800](https://issues.apache.org/jira/browse/SOLR-7800) - Prevent class loading deadlock in TrieDateField; refactor date formatting and parsing out of TrieDateField and move to static utility class DateFormatUtil. [SOLR-7803](https://issues.apache.org/jira/browse/SOLR-7803) (Markus Heiden) (Uwe Schindler) - Update Kite Morphlines to 1.1.0 [SOLR-7805](https://issues.apache.org/jira/browse/SOLR-7805) (Mark Miller) - TestMiniSolrCloudCluster.testCollectionCreateSearchDelete async collection-creation (sometimes) [SOLR-7823](https://issues.apache.org/jira/browse/SOLR-7823) (Christine Poerschke) @@ -6052,7 +6051,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - SolrConfig#getConfigOverlay does not clean up it's resources. [SOLR-7426](https://issues.apache.org/jira/browse/SOLR-7426) (Mark Miller) - DebugComponent does not return the right requestPurpose for pivot facet refinements. [SOLR-7440](https://issues.apache.org/jira/browse/SOLR-7440) (shalin) - Implemented range faceting over date fields in the new facet module (JSON Facet API). [SOLR-7443](https://issues.apache.org/jira/browse/SOLR-7443) (yonik) -- solr/server/etc/jetty-https-ssl.xml hard codes the key store file and password rather than pulling them from the sysprops defined in solr/bin/solr.in.{sh,cmd} [SOLR-7449](https://issues.apache.org/jira/browse/SOLR-7449) +- solr/server/etc/jetty-https-ssl.xml hard codes the key store file and password rather than pulling them from the sysprops defined in solr/bin/solr.in.{sh,cmd} [SOLR-7449](https://issues.apache.org/jira/browse/SOLR-7449) - Fix edge case which could cause `bin/solr stop` to hang forever [SOLR-7450](https://issues.apache.org/jira/browse/SOLR-7450) (Ramkumar Aiyengar) - Fix check-licenses to correctly detect if start.jar.sha1 is incorrect [SOLR-7469](https://issues.apache.org/jira/browse/SOLR-7469) (hossman) - Fix sample data to eliminate file order dependency for successful indexing, also fixed SolrCloudExampleTest to help catch this in the future. [SOLR-7470](https://issues.apache.org/jira/browse/SOLR-7470) (hossman) @@ -6269,7 +6268,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Add an onlyIfDown flag for DELETEREPLICA collections API command [SOLR-6482](https://issues.apache.org/jira/browse/SOLR-6482) (Erick Erickson) - ReplicationHandler should have an option to throttle the speed of replication [SOLR-6485](https://issues.apache.org/jira/browse/SOLR-6485) (Varun Thacker) (Noble Paul) - Add a collections API call to add/delete arbitrary properties to a specific replica. Optionally adding sliceUnique=true will remove this property from all other replicas within a particular slice. [SOLR-6512](https://issues.apache.org/jira/browse/SOLR-6512) (Erick Erickson) -- Add a collectionsAPI call BALANCESLICEUNIQUE. Allows the even distribution of custom replica properties across nodes making up a collection, at most one node per slice will have the property. [SOLR-6513](https://issues.apache.org/jira/browse/SOLR-6513) +- Add a collectionsAPI call BALANCESLICEUNIQUE. Allows the even distribution of custom replica properties across nodes making up a collection, at most one node per slice will have the property. [SOLR-6513](https://issues.apache.org/jira/browse/SOLR-6513) - Support editing common solrconfig.xml values [SOLR-6533](https://issues.apache.org/jira/browse/SOLR-6533) (Noble Paul) - Give HttpSolrClient the ability to send PUT requests [SOLR-6543](https://issues.apache.org/jira/browse/SOLR-6543) (Gregory Chanan) - SolrRequest support for query params [SOLR-6565](https://issues.apache.org/jira/browse/SOLR-6565) (Gregory Chanan) @@ -6667,7 +6666,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Opened up "public" access to DataSource, DocBuilder, and EntityProcessorWrapper in DIH. [SOLR-6194](https://issues.apache.org/jira/browse/SOLR-6194) (Aaron LaBella) (ehatcher) - Fixed bug in TestReplicationHandler.doTestIndexAndConfigReplication. [SOLR-6228](https://issues.apache.org/jira/browse/SOLR-6228) (shalin) - Increased timeouts and hardened the RollingRestartTest. [SOLR-6231](https://issues.apache.org/jira/browse/SOLR-6231) (Noble Paul) (shalin) -- CoreContainer.remove() replaced with CoreContainer.unload(). A call to unload will also close the core. [SOLR-6232](https://issues.apache.org/jira/browse/SOLR-6232) +- CoreContainer.remove() replaced with CoreContainer.unload(). A call to unload will also close the core. [SOLR-6232](https://issues.apache.org/jira/browse/SOLR-6232) - Removed unused coreName parameter in ZkStateReader.getReplicaProps. [SOLR-6240](https://issues.apache.org/jira/browse/SOLR-6240) (shalin) - Harden the HttpPartitionTest. [SOLR-6241](https://issues.apache.org/jira/browse/SOLR-6241) (shalin) - A couple of small improvements to UnInvertedField class. [SOLR-6252](https://issues.apache.org/jira/browse/SOLR-6252) (Vamsee Yarlagadda) (Gregory Chanan) (Mark Miller) @@ -6749,7 +6748,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix method visibility of Evaluator, refactor DateFormatEvaluator for extensibility. [SOLR-6013](https://issues.apache.org/jira/browse/SOLR-6013) (Aaron LaBella) (shalin) - Deprecate getAnalyzer() in IndexField and FieldType, and add getIndexAnalyzer(). [SOLR-6022](https://issues.apache.org/jira/browse/SOLR-6022) (Ryan Ernst) - Replace mentions of CommonsHttpSolrServer with HttpSolrServer and StreamingUpdateSolrServer with ConcurrentUpdateSolrServer. [SOLR-6025](https://issues.apache.org/jira/browse/SOLR-6025) (Ahmet Arslan) (shalin) -- REQUESTSTATUS Collection API now also checks for submitted tasks which are yet to begin execution. [SOLR-6026](https://issues.apache.org/jira/browse/SOLR-6026) +- REQUESTSTATUS Collection API now also checks for submitted tasks which are yet to begin execution. [SOLR-6026](https://issues.apache.org/jira/browse/SOLR-6026) - Refactor duplicate Collector code in SolrIndexSearcher [SOLR-6067](https://issues.apache.org/jira/browse/SOLR-6067) (Christine Poerschke) (hossman) - Refactor DocRouter.getDocRouter to accept routerName as a String. [SOLR-6116](https://issues.apache.org/jira/browse/SOLR-6116) (shalin) - Removed deprecated analysis factories and fieldTypes from the example schema.xml [SOLR-6128](https://issues.apache.org/jira/browse/SOLR-6128) (hossman) @@ -6980,7 +6979,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Using the hdfs write cache can result in appearance of corrupted index. [SOLR-5666](https://issues.apache.org/jira/browse/SOLR-5666) (Mark Miller) - Performance problem when not using hdfs block cache. [SOLR-5667](https://issues.apache.org/jira/browse/SOLR-5667) (Mark Miller) - HTTPSolrServer doesn't set own property correctly in setFollowRedirects. [SOLR-5673](https://issues.apache.org/jira/browse/SOLR-5673) (Frank Wesemann) (shalin) -- cloud-scripts/zkcli.bat: quote option log4j (Günther Ruck via steffkes [SOLR-5675](https://issues.apache.org/jira/browse/SOLR-5675) +- cloud-scripts/zkcli.bat: quote option log4j (Günther Ruck via steffkes [SOLR-5675](https://issues.apache.org/jira/browse/SOLR-5675) - SolrCloud updates rejected if talking to secure ZooKeeper. [SOLR-5676](https://issues.apache.org/jira/browse/SOLR-5676) (Greg Chanan) (Mark Miller) - SolrZkClient should throw a SolrException when connect times out rather than a RuntimeException. [SOLR-5678](https://issues.apache.org/jira/browse/SOLR-5678) (Karl Wright) (Anshum Gupta) (Mark Miller) - Shard splitting fails with ClassCastException on collections upgraded from 4.5 and earlier versions. [SOLR-5679](https://issues.apache.org/jira/browse/SOLR-5679) (Brett Hoerner) (shalin) @@ -7272,7 +7271,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - HitRatio on caches is now exposed over JMX MBeans as a float. [SOLR-4616](https://issues.apache.org/jira/browse/SOLR-4616) (Greg Bowyer) - Solrj doesn't return "between" count in range facets [SOLR-4776](https://issues.apache.org/jira/browse/SOLR-4776) (Philip K. Warren) (shalin) - Multiple Entities DIH delta import: dataimporter.[entityName].last_index_time is empty. [SOLR-4788](https://issues.apache.org/jira/browse/SOLR-4788) (chakming wong) (James Dyer) (shalin) -- Throw an error if a core has the same name as another core, both old and new style solr.xml [SOLR-4790](https://issues.apache.org/jira/browse/SOLR-4790) +- Throw an error if a core has the same name as another core, both old and new style solr.xml [SOLR-4790](https://issues.apache.org/jira/browse/SOLR-4790) - solr.xml sharedLib does not work in 4.3.0 [SOLR-4791](https://issues.apache.org/jira/browse/SOLR-4791) (Ryan Ernst) (Jan Høydahl via Erick Erickson) - Fixed core discovery mode (ie: new style solr.xml) to treat 'collection1' as the default core name. [SOLR-4803](https://issues.apache.org/jira/browse/SOLR-4803) (hossman) - SolrCore#reload should not call preRegister and publish a DOWN state to ZooKeeper. [SOLR-4805](https://issues.apache.org/jira/browse/SOLR-4805) (Mark Miller) (Jared Rodriguez) @@ -7293,7 +7292,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Fix race conditions in shutdown of CoreContainer and getCore that could cause a request to attempt to use a core that has shut down. [SOLR-4960](https://issues.apache.org/jira/browse/SOLR-4960) (yonik) - Outgrowth of SOLR-4960 that includes transient cores and pending cores [SOLR-4974](https://issues.apache.org/jira/browse/SOLR-4974) [SOLR-4960](https://issues.apache.org/jira/browse/SOLR-4960) (Erick Erickson) - Time is stripped from datetime column when imported into Solr date field if convertType=true. [SOLR-4978](https://issues.apache.org/jira/browse/SOLR-4978) (Bill Au) (shalin) -- Creating a core while referencing system properties looks like it loses files Actually, instanceDir, config, dataDir and schema are not dereferenced properly when creating cores that reference sys vars (e.g. &dataDir=${dir}). In the dataDir case in particular this leads to the index being put in a directory literally named ${dir} but on restart the sysvar will be properly dereferenced. [SOLR-4982](https://issues.apache.org/jira/browse/SOLR-4982) +- Creating a core while referencing system properties looks like it loses files Actually, instanceDir, config, dataDir and schema are not dereferenced properly when creating cores that reference sys vars (e.g. &dataDir=${dir}). In the dataDir case in particular this leads to the index being put in a directory literally named ${dir} but on restart the sysvar will be properly dereferenced. [SOLR-4982](https://issues.apache.org/jira/browse/SOLR-4982) - The splitshard api doesn't call commit on new sub shards before switching shard states. Multiple bugs related to sub shard recovery and replication are also fixed. [SOLR-4997](https://issues.apache.org/jira/browse/SOLR-4997) (shalin) - ManagedIndexSchema doesn't persist uniqueKey tag after calling addFields method. [SOLR-5000](https://issues.apache.org/jira/browse/SOLR-5000) (Jun Ohtani) (Steve Rowe) - The Overseer should avoid publishing the state for collections that do not exist under the /collections zk node. [SOLR-5018](https://issues.apache.org/jira/browse/SOLR-5018) (Mark Miller) @@ -7301,7 +7300,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - ShardHandlerFactory was not being created properly when using new-style solr.xml, and was not being persisted properly when using old-style. [SOLR-5028](https://issues.apache.org/jira/browse/SOLR-5028) [SOLR-5029](https://issues.apache.org/jira/browse/SOLR-5029) (Tomás Fernández Löbbe) (Ryan Ernst) (Alan Woodward) - A facet.query that parses or analyzes down to a null Query would throw a NPE. Fixed. [SOLR-5034](https://issues.apache.org/jira/browse/SOLR-5034) (David Smiley) - The CSV loader now accepts field names that are not in the schema. [SOLR-5037](https://issues.apache.org/jira/browse/SOLR-5037) (gsingers) (ehatcher) (Steve Rowe) -- Admin/Schema Browser displays -1 for term counts for multiValued fields. [SOLR-5039](https://issues.apache.org/jira/browse/SOLR-5039) +- Admin/Schema Browser displays -1 for term counts for multiValued fields. [SOLR-5039](https://issues.apache.org/jira/browse/SOLR-5039) ### Other (17 changes) @@ -7543,7 +7542,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (15 changes) -- Added more tests for VelocityResponseWriter [SOLR-2470](https://issues.apache.org/jira/browse/SOLR-2470) +- Added more tests for VelocityResponseWriter [SOLR-2470](https://issues.apache.org/jira/browse/SOLR-2470) - A bare * without a field specification is treated as *:* by the lucene and edismax query parsers. [SOLR-2996](https://issues.apache.org/jira/browse/SOLR-2996) (hossman) (Jan Høydahl) (Alan Woodward) (yonik) - SurroundQParserPlugin highlighting tests [SOLR-3060](https://issues.apache.org/jira/browse/SOLR-3060) (Ahmet Arslan) (hossman) - Include lucene codecs jar and enable per-field postings and docvalues support in the schema.xml [SOLR-3843](https://issues.apache.org/jira/browse/SOLR-3843) (Robert Muir) (Steve Rowe) @@ -7565,7 +7564,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Added (35 changes) -- The ability to specify "transient" and "loadOnStartup" as a new properties of <core> tags in solr.xml. Can specify "transientCacheSize" in the <cores> tag. Together these allow cores to be loaded only when needed and only transientCacheSize transient cores will be loaded at a time, the rest aged out on an LRU basis. [SOLR-1028](https://issues.apache.org/jira/browse/SOLR-1028) +- The ability to specify "transient" and "loadOnStartup" as a new properties of <core> tags in solr.xml. Can specify "transientCacheSize" in the <cores> tag. Together these allow cores to be loaded only when needed and only transientCacheSize transient cores will be loaded at a time, the rest aged out on an LRU basis. [SOLR-1028](https://issues.apache.org/jira/browse/SOLR-1028) - Add extra statistics to RequestHandlers - 5 & 15-minute reqs/sec rolling averages; median, 75th, 95th, 99th, 99.9th percentile request times [SOLR-1972](https://issues.apache.org/jira/browse/SOLR-1972) (Alan Woodward) (Shawn Heisey) (Adrien Grand) (Uwe Schindler) - DIH's "formatDate" function now supports a timezone as an optional fourth parameter [SOLR-2201](https://issues.apache.org/jira/browse/SOLR-2201) (James Dyer) (Mark Waddle) - Enhanced pivot faceting to use local-params in the same way that regular field value faceting can. This means support for excluding a filter query, using a different output key, and specifying 'threads' to do facet.method=fcs concurrently. PivotFacetHelper now extends SimpleFacet and the getFacetImplementation() extension hook was removed. [SOLR-2255](https://issues.apache.org/jira/browse/SOLR-2255) (dsmiley) @@ -7765,7 +7764,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Boolean type added to function queries, along with new functions exists(), if(), and(), or(), xor(), not(), def(), and true and false constants. [SOLR-2136](https://issues.apache.org/jira/browse/SOLR-2136) (yonik) - You may now specify a 'soft' commit when committing. This will use Lucene's NRT feature to avoid guaranteeing documents are on stable storage in exchange for faster reopen times. There is also a new 'soft' autocommit tracker that can be configured. [SOLR-2193](https://issues.apache.org/jira/browse/SOLR-2193) [SOLR-2565](https://issues.apache.org/jira/browse/SOLR-2565) (Mark Miller) (Robert Muir) - Pseudo-join queries / filters. Examples: - To restrict to the set of parents with at least one blue-eyed child: fq={!join from=parent to=name}eyes:blue - To restrict to the set of children with at least one blue-eyed parent: fq={!join from=name to=parent}eyes:blue [SOLR-2272](https://issues.apache.org/jira/browse/SOLR-2272) (yonik) -- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) +- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) - Add support for using <similarity/> in a schema's fieldType, for customizing scoring on a per-field basis. [SOLR-2338](https://issues.apache.org/jira/browse/SOLR-2338) (hossman) (yonik) (rmuir) - /browse improvements: generalize range and date facet display [SOLR-2383](https://issues.apache.org/jira/browse/SOLR-2383) (Jan Høydahl) - Add CollationField, which is much more efficient than the Solr 3.x CollationKeyFilterFactory, and also supports Locale-sensitive range queries. [SOLR-2396](https://issues.apache.org/jira/browse/SOLR-2396) (rmuir) @@ -7967,7 +7966,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (81 changes) -- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) +- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) - Moved some spellchecker and suggest APIs to modules/suggest: HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the suggester APIs and implementations. [LUCENE-2995](https://issues.apache.org/jira/browse/LUCENE-2995) (rmuir) - Moved MutableValue classes to new 'common' module. [LUCENE-3232](https://issues.apache.org/jira/browse/LUCENE-3232) (Chris Male) - 'luke' ant target has been disabled due to incompatibilities with XML queryparser location [LUCENE-3286](https://issues.apache.org/jira/browse/LUCENE-3286) (Chris Male) @@ -8000,7 +7999,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Enhance CoreAdmin STATUS command to return index size. [SOLR-2698](https://issues.apache.org/jira/browse/SOLR-2698) (Yury Kats) (hossman) (Mark Miller) - Updated changes2html.pl to handle Solr's CHANGES.txt; added target 'changes-to-html' to solr/build.xml. [SOLR-2747](https://issues.apache.org/jira/browse/SOLR-2747) (Steve Rowe) (Robert Muir) - Maven configuration: Excluded transitive stax:stax-api dependency from org.codehaus.woodstox:wstx-asl dependency. [SOLR-2756](https://issues.apache.org/jira/browse/SOLR-2756) (David Smiley) -- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) +- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) - The /update/json and /update/csv URLs were restored to aid in the migration of existing clients. [SOLR-2857](https://issues.apache.org/jira/browse/SOLR-2857) (yonik) - More explicit lexical resources location logged if Carrot2 clustering extension is used. Fixed solr. impl. of IResource and IResourceLookup. [SOLR-2862](https://issues.apache.org/jira/browse/SOLR-2862) (Dawid Weiss) - Default QueryResponseWriters are now initialized via init() with an empty NamedList. [SOLR-3005](https://issues.apache.org/jira/browse/SOLR-3005) (Gasol Wu) (Chris Male) @@ -8140,7 +8139,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Boolean type added to function queries, along with new functions exists(), if(), and(), or(), xor(), not(), def(), and true and false constants. [SOLR-2136](https://issues.apache.org/jira/browse/SOLR-2136) (yonik) - You may now specify a 'soft' commit when committing. This will use Lucene's NRT feature to avoid guaranteeing documents are on stable storage in exchange for faster reopen times. There is also a new 'soft' autocommit tracker that can be configured. [SOLR-2193](https://issues.apache.org/jira/browse/SOLR-2193) [SOLR-2565](https://issues.apache.org/jira/browse/SOLR-2565) (Mark Miller) (Robert Muir) - Pseudo-join queries / filters. Examples: - To restrict to the set of parents with at least one blue-eyed child: fq={!join from=parent to=name}eyes:blue - To restrict to the set of children with at least one blue-eyed parent: fq={!join from=name to=parent}eyes:blue [SOLR-2272](https://issues.apache.org/jira/browse/SOLR-2272) (yonik) -- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) +- New 'field("...")' function syntax for referring to complex field names (containing whitespace or special characters) in functions. [SOLR-2335](https://issues.apache.org/jira/browse/SOLR-2335) - Add support for using <similarity/> in a schema's fieldType, for customizing scoring on a per-field basis. [SOLR-2338](https://issues.apache.org/jira/browse/SOLR-2338) (hossman) (yonik) (rmuir) - /browse improvements: generalize range and date facet display [SOLR-2383](https://issues.apache.org/jira/browse/SOLR-2383) (Jan Høydahl) (yonik) - Add CollationField, which is much more efficient than the Solr 3.x CollationKeyFilterFactory, and also supports Locale-sensitive range queries. [SOLR-2396](https://issues.apache.org/jira/browse/SOLR-2396) (rmuir) @@ -8240,7 +8239,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M ### Other (49 changes) -- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) +- FunctionQuery, DocValues (and its impls), ValueSource (and its impls) and BoostedQuery have been consolidated into the queries module. They can now be found at o.a.l.queries.function. [LUCENE-2883](https://issues.apache.org/jira/browse/LUCENE-2883) - Moved some spellchecker and suggest APIs to modules/suggest: HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the suggester APIs and implementations. [LUCENE-2995](https://issues.apache.org/jira/browse/LUCENE-2995) (rmuir) - Moved MutableValue classes to new 'common' module. [LUCENE-3232](https://issues.apache.org/jira/browse/LUCENE-3232) (Chris Male) - 'luke' ant target has been disabled due to incompatibilities with XML queryparser location [LUCENE-3286](https://issues.apache.org/jira/browse/LUCENE-3286) (Chris Male) @@ -8270,7 +8269,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - FieldTypePluginLoader has been refactored out of IndexSchema and made public. [SOLR-2663](https://issues.apache.org/jira/browse/SOLR-2663) (hossman) - Enhance CoreAdmin STATUS command to return index size. [SOLR-2698](https://issues.apache.org/jira/browse/SOLR-2698) (Yury Kats) (hossman) (Mark Miller) - Maven configuration: Excluded transitive stax:stax-api dependency from org.codehaus.woodstox:wstx-asl dependency. [SOLR-2756](https://issues.apache.org/jira/browse/SOLR-2756) (David Smiley) (Steve Rowe) -- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) +- Due to low level changes to support SolrCloud, the uniqueKey field can no longer be populated via <copyField/> or <field default=...> in the schema.xml. [SOLR-2796](https://issues.apache.org/jira/browse/SOLR-2796) - More explicit lexical resources location logged if Carrot2 clustering extension is used. Fixed solr. impl. of IResource and IResourceLookup. [SOLR-2862](https://issues.apache.org/jira/browse/SOLR-2862) (Dawid Weiss) - Default QueryResponseWriters are now initialized via init() with an empty NamedList. [SOLR-3005](https://issues.apache.org/jira/browse/SOLR-3005) (Gasol Wu) (Chris Male) - logOnce from SolrException logOnce and all the supporting structure is gone. abortOnConfigurationError is also gone as it is no longer referenced. Errors should be caught and logged at the top-most level or logged and NOT propagated up the chain. [SOLR-3032](https://issues.apache.org/jira/browse/SOLR-3032) (Erick Erickson) @@ -8473,7 +8472,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Upgrade commons-compress to 1.2 [LUCENE-3457](https://issues.apache.org/jira/browse/LUCENE-3457) (Doron Cohen) - Upgrade Solr to Tika 0.10 [SOLR-2372](https://issues.apache.org/jira/browse/SOLR-2372) (janhoy) - Remove commitLockTimeout option from solrconfig.xml [SOLR-2591](https://issues.apache.org/jira/browse/SOLR-2591) (Luca Cavanna) (Martijn van Groningen) -- Upgraded UIMA dependencies from *-2.3.1-SNAPSHOT.jar to *-2.3.1.jar. [SOLR-2746](https://issues.apache.org/jira/browse/SOLR-2746) +- Upgraded UIMA dependencies from *-2.3.1-SNAPSHOT.jar to *-2.3.1.jar. [SOLR-2746](https://issues.apache.org/jira/browse/SOLR-2746) - Make both "update.chain" and the deprecated "update.param" work consistently everywhere; see also SOLR-2105. [SOLR-2750](https://issues.apache.org/jira/browse/SOLR-2750) [SOLR-2105](https://issues.apache.org/jira/browse/SOLR-2105) (Mark Miller) (janhoy) - min() and max() functions now support an arbitrary number of ValueSources [SOLR-2757](https://issues.apache.org/jira/browse/SOLR-2757) (Bill Bell) (hossman) - Moved ConcurrentLRUCache from o.a.s.common.util package in the solrj module to the o.a.s.util package in the Solr core module. [SOLR-2758](https://issues.apache.org/jira/browse/SOLR-2758) (David Smiley) (Steve Rowe) @@ -8638,7 +8637,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Support sorting on trie fields with Distributed Search. [SOLR-1110](https://issues.apache.org/jira/browse/SOLR-1110) (Mark Miller) (Uwe Schindler) (shalin) - <bool>on</bool> and <bool>yes</bool> work as expected in solrconfig.xml. [SOLR-1115](https://issues.apache.org/jira/browse/SOLR-1115) (koji) - Add a Binary FieldType [SOLR-1116](https://issues.apache.org/jira/browse/SOLR-1116) (noble) -- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) +- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) - Add a top() function query that causes its argument to have its values derived from the top level IndexReader, even when invoked from a sub-reader. top() is implicitly used for the ord() and rord() functions. [SOLR-1124](https://issues.apache.org/jira/browse/SOLR-1124) (yonik) - Added metadata output to extraction request handler "extract only" option. [SOLR-1128](https://issues.apache.org/jira/browse/SOLR-1128) (gsingers) - FieldTypes can now output multiple Fields per Type and still be searched. This can be handy for hiding the details of a particular implementation such as in the spatial case. [SOLR-1131](https://issues.apache.org/jira/browse/SOLR-1131) (Chris Mattmann) (shalin) (noble) (gsingers) (yonik) @@ -8737,7 +8736,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Allow tagging and exclusion of main query for faceting. [SOLR-2325](https://issues.apache.org/jira/browse/SOLR-2325) (yonik) - Add configurable IndexReaderFactory so that alternate IndexReader implementations can be specified via solrconfig.xml. Note that using a custom IndexReader may be incompatible with ReplicationHandler (see comments in SOLR-1366). This should be treated as an experimental feature. [SOLR-243](https://issues.apache.org/jira/browse/SOLR-243) [SOLR-1366](https://issues.apache.org/jira/browse/SOLR-1366) (Andrzej Bialecki) (hossman) (Mark Miller) (John Wang) - Added support for extracting content from binary documents like MS Word and PDF using Apache Tika. See also contrib/extraction/CHANGES.txt [SOLR-284](https://issues.apache.org/jira/browse/SOLR-284) (Eric Pugh) (Chris Harris) (yonik) (gsingers) -- Date Faceting now supports a "facet.date.include" param for specifying when the upper & lower end points of computed date ranges should be included in the range. Legal values are: "all", "lower", "upper", "edge", and "outer". For backwards compatibility the default value is the set: [lower,upper,edge], so that all ranges between start and end are inclusive of their endpoints, but the "before" and "after" ranges are not. [SOLR-397](https://issues.apache.org/jira/browse/SOLR-397) +- Date Faceting now supports a "facet.date.include" param for specifying when the upper & lower end points of computed date ranges should be included in the range. Legal values are: "all", "lower", "upper", "edge", and "outer". For backwards compatibility the default value is the set: [lower,upper,edge], so that all ranges between start and end are inclusive of their endpoints, but the "before" and "after" ranges are not. [SOLR-397](https://issues.apache.org/jira/browse/SOLR-397) - Add configurable DirectoryProvider so that alternate Directory implementations can be specified via solrconfig.xml. The default DirectoryProvider will use NIOFSDirectory for better concurrency on non Windows platforms. [SOLR-465](https://issues.apache.org/jira/browse/SOLR-465) (Mark Miller) (TJ Laurenzo) (yonik) - Add maxChars attribute for copyField function so that the length limit for destination can be specified. [SOLR-538](https://issues.apache.org/jira/browse/SOLR-538) (Georgios Stamatis) (Lars Kotthoff) (Chris Harris) (koji) - Add support for globbing in field names to highlight. For example, hl.fl=*_text will highlight all fieldnames ending with _text. [SOLR-540](https://issues.apache.org/jira/browse/SOLR-540) (Lars Kotthoff) (yonik) @@ -8785,7 +8784,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - Adding a StreamingUpdateSolrServer that writes update commands to an open HTTP connection. If you are using solrj for bulk update requests you should consider switching to this implementaion. However, note that the error handling is not immediate as it is with the standard SolrServer. [SOLR-906](https://issues.apache.org/jira/browse/SOLR-906) (ryan) - CommonGramsFilterFactory/CommonGramsQueryFilterFactory for speeding up phrase queries containing common words by indexing n-grams and using them at query time. [SOLR-908](https://issues.apache.org/jira/browse/SOLR-908) (Tom Burton-West) (Jason Rutherglen) (yonik) - Add a few utility commands to the DIH admin page such as full import, delta import, status, reload config. [SOLR-910](https://issues.apache.org/jira/browse/SOLR-910) (Ahmed Hammad) (shalin) -- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) +- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) - SolrDocument and SolrInputDocument now implement the Map<String,?> interface. This should make plugging into other standard tools easier. [SOLR-928](https://issues.apache.org/jira/browse/SOLR-928) (ryan) - Add event listener API for DIH import start and end. [SOLR-938](https://issues.apache.org/jira/browse/SOLR-938) (Kay Kay) (Noble Paul) (shalin) - ValueSourceRangeFilter/Query - filter based on values in a FieldCache entry or on any arbitrary function of field values. [SOLR-939](https://issues.apache.org/jira/browse/SOLR-939) (yonik) @@ -9066,7 +9065,7 @@ sample syntax.) - Add HSQLDB Jar to example-DIH, unzip database and update instructions. ================== Release 1.3.0 ================== Upgrading from Solr 1.2 ----------------------- IMPORTANT UPGRADE NOTE: In a master/slave configuration, all searchers/slaves should be upgraded before the master! If the master were to be updated first, the older searchers would not be able to read the new index format. The Porter snowball based stemmers in Lucene were updated (LUCENE-1142), and are not guaranteed to be backward compatible at the index level (the stem of certain words may have changed). Re-indexing is recommended. Older Apache Solr installations can be upgraded by replacing the relevant war file with the new version. No changes to configuration files should be needed. This version of Solr contains a new version of Lucene implementing an updated index format. This version of Solr/Lucene can still read and update indexes in the older formats, and will convert them to the new format on the first index change. Be sure to backup your index before upgrading in case you need to downgrade. Solr now recognizes HTTP Request headers related to HTTP Caching (see RFC 2616 sec13) and will by default respond with "304 Not Modified" when appropriate. This should only affect users who access Solr via an HTTP Cache, or via a Web-browser that has an internal cache, but if you wish to suppress this behavior an '<httpCaching never304="true"/>' option can be added to your solrconfig.xml. See the wiki (or the example solrconfig.xml) for more details... http://wiki.apache.org/solr/SolrConfigXml#HTTPCaching In Solr 1.2, DateField did not enforce the canonical representation of the ISO 8601 format when parsing incoming data, and did not generation the canonical format when generating dates from "Date Math" strings (particularly as it pertains to milliseconds ending in trailing zeros). As a result equivalent dates could not always be compared properly. This problem is corrected in Solr 1.3, but DateField users that might have been affected by indexing inconsistent formats of equivalent dates (ie: 1995-12-31T23:59:59Z vs 1995-12-31T23:59:59.000Z) may want to consider reindexing to correct these inconsistencies. Users who depend on some of the "broken" behavior of DateField in Solr 1.2 (specificly: accepting any input that ends in a 'Z') should consider using the LegacyDateField class as a possible alternative. Users that desire 100% backwards compatibility should consider using the Solr 1.2 version of DateField. Due to some changes in the lifecycle of TokenFilterFactories, users of Solr 1.2 who have written Java code which constructs new instances of StopFilterFactory, SynonymFilterFactory, or EnglishProterFilterFactory will need to modify their code by adding a line like the following prior to using the factory object... factory.inform(SolrCore.getSolrCore().getSolrConfig().getResourceLoader()); These lifecycle changes do not affect people who use Solr "out of the box" or who have developed their own TokenFilterFactory plugins. More info can be found in SOLR-594. The python client that used to ship with Solr is no longer included in the distribution [SOLR-1369](https://issues.apache.org/jira/browse/SOLR-1369) [LUCENE-1142](https://issues.apache.org/jira/browse/LUCENE-1142) [SOLR-594](https://issues.apache.org/jira/browse/SOLR-594) (see client) (python) (README.txt) - The TokenizerFactory API has changed to explicitly return a Tokenizer rather then a TokenStream (that may be or may not be a Tokenizer). This change is required to take advantage of the Token reuse improvements in lucene 2.9. [SOLR-1377](https://issues.apache.org/jira/browse/SOLR-1377) (ryan) - In build.xml, use longfile="gnu" in tar task to avoid warnings about long file names [SOLR-1386](https://issues.apache.org/jira/browse/SOLR-1386) (Mark Miller) (shalin) -- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) +- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) - Errors and Exceptions are formated by ResponseWriter. [SOLR-141](https://issues.apache.org/jira/browse/SOLR-141) (Mike Sokolov) (Rich Cariens) (Daniel Naber) (ryan) - Log a warning if the deprecated charset option is used on GreekLowerCaseFilterFactory, RussianStemFilterFactory, RussianLowerCaseFilterFactory or RussianLetterTokenizerFactory. [SOLR-1410](https://issues.apache.org/jira/browse/SOLR-1410) (Robert Muir) (hossman) - Due to LUCENE-1906, Solr's tokenizer should use Tokenizer.correctOffset() instead of CharStream.correctOffset(). [SOLR-1423](https://issues.apache.org/jira/browse/SOLR-1423) [LUCENE-1906](https://issues.apache.org/jira/browse/LUCENE-1906) (Uwe Schindler) (koji) @@ -9082,7 +9081,7 @@ sample syntax.) - Extract base class from TestDistributedSearch to make it easy to write test cases for other distributed components. [SOLR-1608](https://issues.apache.org/jira/browse/SOLR-1608) (shalin) - Generify SolrCache [SOLR-1610](https://issues.apache.org/jira/browse/SOLR-1610) (Jason Rutherglen) (shalin) - Added Lucene 2.9.1 collation contrib jar to lib [SOLR-1611](https://issues.apache.org/jira/browse/SOLR-1611) (shalin) -- Remove ALIAS command [SOLR-1637](https://issues.apache.org/jira/browse/SOLR-1637) +- Remove ALIAS command [SOLR-1637](https://issues.apache.org/jira/browse/SOLR-1637) - Remove adminCore from CoreContainer . removed deprecated methods setAdminCore(), getAdminCore() [SOLR-1661](https://issues.apache.org/jira/browse/SOLR-1661) (noble) - Improve analysis tests and cut over to new TokenStream API. [SOLR-1674](https://issues.apache.org/jira/browse/SOLR-1674) (Robert Muir) (Mark Miller) - Improved error messages when adding a document that does not contain exactly one value for the uniqueKey field [SOLR-1695](https://issues.apache.org/jira/browse/SOLR-1695) (hossman) @@ -9099,7 +9098,7 @@ sample syntax.) - luceneAutoCommit no longer has any effect - it has been remove [SOLR-1851](https://issues.apache.org/jira/browse/SOLR-1851) (Mark Miller) - SolrResourceLoader.getLines ignores Byte Order Markers (BOMs) at the beginning of input files, these are often created by editors such as Windows Notepad. [SOLR-1865](https://issues.apache.org/jira/browse/SOLR-1865) (rmuir) (hossman) - Make lucene-jars-to-solr fail if copying any of the jars fails, and update clean to remove the jars in that directory [SOLR-1891](https://issues.apache.org/jira/browse/SOLR-1891) (Mark Miller) -- Upgraded to Tika 0.8 and changed deprecated parse call [SOLR-1902](https://issues.apache.org/jira/browse/SOLR-1902) +- Upgraded to Tika 0.8 and changed deprecated parse call [SOLR-1902](https://issues.apache.org/jira/browse/SOLR-1902) - ElisionFilterFactory will use a default set of French contractions if you do not supply a custom articles file. [SOLR-1938](https://issues.apache.org/jira/browse/SOLR-1938) (rmuir) - SolrResourceLoader will report any encoding errors, rather than silently using replacement characters for invalid inputs [SOLR-2003](https://issues.apache.org/jira/browse/SOLR-2003) (blargy) (rmuir) - Add mapping-FoldToASCII.txt to example conf directory. [SOLR-2013](https://issues.apache.org/jira/browse/SOLR-2013) (Steven Rowe) (koji) @@ -9204,7 +9203,7 @@ sample syntax.) - Support sorting on trie fields with Distributed Search. [SOLR-1110](https://issues.apache.org/jira/browse/SOLR-1110) (Mark Miller) (Uwe Schindler) (shalin) - <bool>on</bool> and <bool>yes</bool> work as expected in solrconfig.xml. [SOLR-1115](https://issues.apache.org/jira/browse/SOLR-1115) (koji) - Add a Binary FieldType [SOLR-1116](https://issues.apache.org/jira/browse/SOLR-1116) (noble) -- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) +- CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) [SOLR-1121](https://issues.apache.org/jira/browse/SOLR-1121) - Add a top() function query that causes its argument to have its values derived from the top level IndexReader, even when invoked from a sub-reader. top() is implicitly used for the ord() and rord() functions. [SOLR-1124](https://issues.apache.org/jira/browse/SOLR-1124) (yonik) - Added metadata output to extraction request handler "extract only" option. [SOLR-1128](https://issues.apache.org/jira/browse/SOLR-1128) (gsingers) - Add capability to specify an infoStream log file for the underlying Lucene IndexWriter in solrconfig.xml. This is an advanced debug log file that can be used to aid developers in fixing IndexWriter bugs. See the commented out example in the example solrconfig.xml under the indexDefaults section. [SOLR-1145](https://issues.apache.org/jira/browse/SOLR-1145) (Chris Harris) (Mark Miller) @@ -9282,7 +9281,7 @@ sample syntax.) - Adding a StreamingUpdateSolrServer that writes update commands to an open HTTP connection. If you are using solrj for bulk update requests you should consider switching to this implementaion. However, note that the error handling is not immediate as it is with the standard SolrServer. [SOLR-906](https://issues.apache.org/jira/browse/SOLR-906) (ryan) - CommonGramsFilterFactory/CommonGramsQueryFilterFactory for speeding up phrase queries containing common words by indexing n-grams and using them at query time. [SOLR-908](https://issues.apache.org/jira/browse/SOLR-908) (Tom Burton-West) (Jason Rutherglen) (yonik) - Add a few utility commands to the DIH admin page such as full import, delta import, status, reload config. [SOLR-910](https://issues.apache.org/jira/browse/SOLR-910) (Ahmed Hammad) (shalin) -- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) +- Add support for multi-select faceting by allowing filters to be tagged and facet commands to exclude certain filters. This patch also added the ability to change the output key for facets in the response, and optimized distributed faceting refinement by lowering parsing overhead and by making requests and responses smaller. [SOLR-911](https://issues.apache.org/jira/browse/SOLR-911) - SolrDocument and SolrInputDocument now implement the Map<String,?> interface. This should make plugging into other standard tools easier. [SOLR-928](https://issues.apache.org/jira/browse/SOLR-928) (ryan) - Add event listener API for DIH import start and end. [SOLR-938](https://issues.apache.org/jira/browse/SOLR-938) (Kay Kay) (Noble Paul) (shalin) - ValueSourceRangeFilter/Query - filter based on values in a FieldCache entry or on any arbitrary function of field values. [SOLR-939](https://issues.apache.org/jira/browse/SOLR-939) (yonik) @@ -9448,10 +9447,10 @@ sample syntax.) - Upgrade to Tika 0.4. Note there are some differences in detecting Languages now in extracting request handler. See http://www.lucidimagination.com/search/document/d6f1899a85b2a45c/vote_apache_tika_0_4_release_candidate_2#d6f1899a85b2a45c for discussion on language detection. See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. [SOLR-1310](https://issues.apache.org/jira/browse/SOLR-1310) (gsingers) - Upgrade Solr Highlighter classes to new Lucene Highlighter API. This upgrade has resulted in a back compat break in the DefaultSolrHighlighter class - getQueryScorer is no longer protected. If you happened to be overriding that method in custom code, overide getHighlighter instead. Also, HighlightingUtils#getQueryScorer has been removed as it was deprecated and backcompat has been broken with it anyway. [SOLR-1319](https://issues.apache.org/jira/browse/SOLR-1319) [SOLR-1345](https://issues.apache.org/jira/browse/SOLR-1345) (Mark Miller) - SolrInputDocument cannot process dynamic fields [SOLR-1357](https://issues.apache.org/jira/browse/SOLR-1357) (Lars Grote) (noble) -- Add HSQLDB Jar to example-DIH, unzip database and update instructions. [SOLR-1369](https://issues.apache.org/jira/browse/SOLR-1369) +- Add HSQLDB Jar to example-DIH, unzip database and update instructions. [SOLR-1369](https://issues.apache.org/jira/browse/SOLR-1369) - The TokenizerFactory API has changed to explicitly return a Tokenizer rather then a TokenStream (that may be or may not be a Tokenizer). This change is required to take advantage of the Token reuse improvements in lucene 2.9. [SOLR-1377](https://issues.apache.org/jira/browse/SOLR-1377) (ryan) - In build.xml, use longfile="gnu" in tar task to avoid warnings about long file names [SOLR-1386](https://issues.apache.org/jira/browse/SOLR-1386) (Mark Miller) (shalin) -- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) +- Added Solr Powered By Logos [SOLR-1409](https://issues.apache.org/jira/browse/SOLR-1409) - Log a warning if the deprecated charset option is used on GreekLowerCaseFilterFactory, RussianStemFilterFactory, RussianLowerCaseFilterFactory or RussianLetterTokenizerFactory. [SOLR-1410](https://issues.apache.org/jira/browse/SOLR-1410) (Robert Muir) (hossman) - Due to LUCENE-1906, Solr's tokenizer should use Tokenizer.correctOffset() instead of CharStream.correctOffset(). [SOLR-1423](https://issues.apache.org/jira/browse/SOLR-1423) [LUCENE-1906](https://issues.apache.org/jira/browse/LUCENE-1906) (Uwe Schindler) (koji) - Make it possible to run all tests in a package [SOLR-1441](https://issues.apache.org/jira/browse/SOLR-1441) (shalin) @@ -9600,7 +9599,7 @@ sample syntax.) - Fix bug in RequiredSolrParams where requiring a field specific param would fail if a general default value had been supplied. [SOLR-297](https://issues.apache.org/jira/browse/SOLR-297) (hossman) - Add proper support for Long and Doubles in sorting, etc. [SOLR-324](https://issues.apache.org/jira/browse/SOLR-324) (gsingers) - Fix WordDelimiterFilter handling of offsets for synonyms or other injected tokens that can break highlighting. [SOLR-331](https://issues.apache.org/jira/browse/SOLR-331) (yonik) -- Changed distributiondump.jsp to use Solr HOME instead of CWD to set path. [SOLR-333](https://issues.apache.org/jira/browse/SOLR-333) +- Changed distributiondump.jsp to use Solr HOME instead of CWD to set path. [SOLR-333](https://issues.apache.org/jira/browse/SOLR-333) - Removed duplicate contentType from raw-schema.jsp. [SOLR-393](https://issues.apache.org/jira/browse/SOLR-393) (bill) - SolrExceptionTest should now handle using OpenDNS as a DNS provider [SOLR-400](https://issues.apache.org/jira/browse/SOLR-400) (gsingers) - Requesting a large numbers of documents to be returned (limit) can result in an out-of-memory exception, even for a small index. [SOLR-413](https://issues.apache.org/jira/browse/SOLR-413) (yonik) @@ -9627,7 +9626,7 @@ sample syntax.) - Now sets the QParser on the ResponseBuilder [SOLR-585](https://issues.apache.org/jira/browse/SOLR-585) (gsingers) - Improved handling of badly formated query strings [SOLR-589](https://issues.apache.org/jira/browse/SOLR-589) (Sean Timm) (Otis Gospodnetic) - Limitation in pgrep on Linux platform breaks script-utils fixUser. [SOLR-590](https://issues.apache.org/jira/browse/SOLR-590) (Hannes Schmidt) (billa) -- SolrServlet no longer "caches" SolrCore. This was causing problems in Resin, and could potentially cause problems for customized usages of SolrServlet. [SOLR-597](https://issues.apache.org/jira/browse/SOLR-597) +- SolrServlet no longer "caches" SolrCore. This was causing problems in Resin, and could potentially cause problems for customized usages of SolrServlet. [SOLR-597](https://issues.apache.org/jira/browse/SOLR-597) - DebugComponent now always occurs last in the SearchHandler list unless the components are explicitly declared. [SOLR-598](https://issues.apache.org/jira/browse/SOLR-598) (gsingers) - If the spellchecking path is relative, make it relative to the Solr Data Directory. [SOLR-604](https://issues.apache.org/jira/browse/SOLR-604) (Shalin Shekhar Mangar) (gsingers) - Fixed spell check collation offset issue. [SOLR-606](https://issues.apache.org/jira/browse/SOLR-606) (Stefan Oestreicher) (Geoffrey Young) (gsingers) @@ -9651,9 +9650,9 @@ sample syntax.) - Tweaks to post.jar so it prints the error message from Solr. [SOLR-268](https://issues.apache.org/jira/browse/SOLR-268) (Brian Whitman) (hossman) - The create method in all TokenFilter and Tokenizer Factories provided by Solr now declare their specific return types instead of just using "TokenStream" [SOLR-367](https://issues.apache.org/jira/browse/SOLR-367) (hossman) - Hooks add to build system for automatic generation of (stub) Tokenizer and TokenFilter Factories. Also: new Factories for all Tokenizers and TokenFilters provided by the lucene-analyzers-2.2.0.jar -- includes support for German, Chinese, Russan, Dutch, Greek, Brazilian, Thai, and French. [SOLR-396](https://issues.apache.org/jira/browse/SOLR-396) (hossman) -- . Changed the names of the Solr JARs to use the defacto standard JAR names based on project-name-version.jar. This yields, for example: apache-solr-common-1.3-dev.jar apache-solr-solrj-1.3-dev.jar apache-solr-1.3-dev.jar [SOLR-411](https://issues.apache.org/jira/browse/SOLR-411) +- . Changed the names of the Solr JARs to use the defacto standard JAR names based on project-name-version.jar. This yields, for example: apache-solr-common-1.3-dev.jar apache-solr-solrj-1.3-dev.jar apache-solr-1.3-dev.jar [SOLR-411](https://issues.apache.org/jira/browse/SOLR-411) - Changed analysis.jsp to use POST instead of GET, also made the input area a bit bigger [SOLR-451](https://issues.apache.org/jira/browse/SOLR-451) (gsingers) -- Added clover code coverage targets for committers and the nightly build. Requires the Clover library, as licensed to Apache and only available privately. To run: ant -Drun.clover=true clean clover test generate-clover-reports [SOLR-479](https://issues.apache.org/jira/browse/SOLR-479) +- Added clover code coverage targets for committers and the nightly build. Requires the Clover library, as licensed to Apache and only available privately. To run: ant -Drun.clover=true clean clover test generate-clover-reports [SOLR-479](https://issues.apache.org/jira/browse/SOLR-479) - Added in deprecation comments. [SOLR-489](https://issues.apache.org/jira/browse/SOLR-489) (Sean Timm) (Lars Kothoff) (gsingers) - Nightly release includes client sources. [SOLR-510](https://issues.apache.org/jira/browse/SOLR-510) (koji) - Changed luke.xsl to use divs w/css for generating histograms instead of SVG [SOLR-518](https://issues.apache.org/jira/browse/SOLR-518) (Thomas Peuss) (hossman) @@ -9675,11 +9674,11 @@ sample syntax.) - / SOLR-695: If you have used "MultiCore" functionality in an unreleased version of 1.3-dev, many classes and configs have been renamed for the official 1.3 release. Speciffically, solr.xml has replaced multicore.xml, and uses a slightly different syntax. The solrj classes: MultiCore{Request/Response/Params} have been renamed: CoreAdmin{Request/Response/Params} [SOLR-689](https://issues.apache.org/jira/browse/SOLR-689) [SOLR-695](https://issues.apache.org/jira/browse/SOLR-695) (hossman) (ryan) (Henri Biestro) - Migrated to stable released builds of StAX API 1.0.1 and StAX 1.2.0 [SOLR-692](https://issues.apache.org/jira/browse/SOLR-692) (shalin) - SolrQueryParser now uses a ConstantScoreQuery for wildcard queries that prevent an exception from being thrown when the number of matching terms exceeds the BooleanQuery clause limit. [SOLR-737](https://issues.apache.org/jira/browse/SOLR-737) (yonik) -- Upgraded to Lucene 2.2.0; June 18, 2007. +- Upgraded to Lucene 2.2.0; June 18, 2007. - Upgraded to commons-CSV r609327, which fixes escaping bugs and introduces new escaping and whitespace handling options to increase compatibility with different formats. (yonik) -- Upgraded to Lucene 2.3.0; Jan 23, 2008. -- Upgrade to Lucene 2.3.1 -- Upgrade to Lucene 2.3.2 +- Upgraded to Lucene 2.3.0; Jan 23, 2008. +- Upgrade to Lucene 2.3.1 +- Upgrade to Lucene 2.3.2 - Upgraded to Lucene 2.4-dev (r686801) (yonik) - Upgraded to Lucene 2.4-dev (r688745) 27-Aug-2008 (yonik) - Upgraded to Lucene 2.4-dev (r691741) 03-Sep-2008 (yonik) @@ -9767,8 +9766,8 @@ sample syntax.) - DisMaxRequestHandler's bq, bf, qf, and pf parameters can now accept multiple values (klaas) - Query are re-written before highlighting is performed. This enables proper highlighting of prefix and wildcard queries (klaas) - A meaningful exception is raised when attempting to add a doc missing a unique id if it is declared in the schema and allowDups=false. (ryan) (klaas) -- Updated to Lucene 2.1 -- Updated to Lucene 2007-05-20_00-04-53 +- Updated to Lucene 2.1 +- Updated to Lucene 2007-05-20_00-04-53 [1.1.0] - 2006-12-22 @@ -9797,27 +9796,27 @@ sample syntax.) - ${solr.home}/lib directory can now be used for specifying "plugin" jars SOLR-68 [SOLR-68](https://issues.apache.org/jira/browse/SOLR-68) (hossman) - Support for "Date Math" relative "NOW" when specifying values of a DateField in a query -- or when adding a document. SOLR-71 [SOLR-71](https://issues.apache.org/jira/browse/SOLR-71) (hossman) - useColdSearcher control in solrconfig.xml prevents the first searcher from being used before it's done warming. This can help prevent thrashing on startup when multiple requests hit a cold searcher. The default is "false", preventing use before warm. SOLR-77 [SOLR-77](https://issues.apache.org/jira/browse/SOLR-77) (yonik) -- added support for setting Lucene's positionIncrementGap -- Admin: new statistics for SolrIndexSearcher -- Admin: caches now show config params on stats page -- max() function added to FunctionQuery suite -- postOptimize hook, mirroring the functionallity of the postCommit hook, but only called on an index optimize. -- Ability to HTTP POST query requests to /select in addition to HTTP-GET +- added support for setting Lucene's positionIncrementGap +- Admin: new statistics for SolrIndexSearcher +- Admin: caches now show config params on stats page +- max() function added to FunctionQuery suite +- postOptimize hook, mirroring the functionallity of the postCommit hook, but only called on an index optimize. +- Ability to HTTP POST query requests to /select in addition to HTTP-GET - The default search field may now be overridden by requests to the standard request handler using the df query parameter. (Erik Hatcher) - Added DisMaxRequestHandler and SolrPluginUtils. (Chris Hostetter) - Added KeywordTokenizerFactory (hossman) - new DocSet.andNot(), DocSet.andNotSize() (yonik) -- Made query parser default operator configurable via schema.xml: <solrQueryParser defaultOperator="AND|OR"/> The default operator remains "OR". +- Made query parser default operator configurable via schema.xml: <solrQueryParser defaultOperator="AND|OR"/> The default operator remains "OR". - Default operator for query parsing can now be specified with q.op=AND|OR from the client request, overriding the schema value. (ehatcher) ### Changed (6 changes) - OpenBitSet completed, replaces BitSet as the implementation for BitDocSet. Iteration is faster, and BitDocSet.intersectionSize(BitDocSet) and unionSize is between 3 and 4 times faster. SOLR-15 [SOLR-15](https://issues.apache.org/jira/browse/SOLR-15) (yonik) -- getDocListAndSet can now generate both a DocList and a DocSet from a single lucene query. -- BitDocSet.intersectionSize(HashDocSet) no longer generates an intermediate set -- much faster unionSize when one of the sets is a HashDocSet: O(smaller_set_size) +- getDocListAndSet can now generate both a DocList and a DocSet from a single lucene query. +- BitDocSet.intersectionSize(HashDocSet) no longer generates an intermediate set +- much faster unionSize when one of the sets is a HashDocSet: O(smaller_set_size) - Optimized getDocSet() for term queries resulting in a 36% speedup of facet.field queries where DocSets aren't cached (for example, if the number of terms in the field is larger than the filter cache.) (yonik) -- Optimized facet.field faceting by as much as 500 times when the field has a single token per document (not multiValued & not tokenized) by using the Lucene FieldCache entry for that field to tally term counts. The first request utilizing the FieldCache will take longer than subsequent ones. +- Optimized facet.field faceting by as much as 500 times when the field has a single token per document (not multiValued & not tokenized) by using the Lucene FieldCache entry for that field to tally term counts. The first request utilizing the FieldCache will take longer than subsequent ones. ### Fixed (13 changes) @@ -9829,7 +9828,7 @@ sample syntax.) - Fixed delete-by-id for field types who's indexed form is different from the printable form (mainly sortable numeric types) - Added escaping of attribute values in the XML response (Erik Hatcher) - Added empty extractTerms() to FunctionQuery to enable use in a MultiSearcher (Yonik) -- WordDelimiterFilter sometimes lost token positionIncrement information +- WordDelimiterFilter sometimes lost token positionIncrement information - Fix reverse sorting for fields were sortMissingFirst=true (Rob Staveley) (yonik) - Escape '>' in XML output ([because ]]> is illegal in CharData]()) - field boosts weren't being applied and doc boosts were being applied to fields (klaas) @@ -9843,22 +9842,22 @@ sample syntax.) - Highlighter params changed to be prefixed with "hl."; allow fragmentsize customization and per-field overrides on many options SOLR-37 [SOLR-37](https://issues.apache.org/jira/browse/SOLR-37) (Andrew May) (klaas) - Default param values for DisMaxRequestHandler should now be specified using a '<lst name="defaults">...</lst>' init param, for backwards compatability all init prams will be used as defaults if an init param with that name does not exist. SOLR-43 [SOLR-43](https://issues.apache.org/jira/browse/SOLR-43) (hossman) - The DisMaxRequestHandler now supports multiple occurances of the "fq" param. SOLR-44 [SOLR-44](https://issues.apache.org/jira/browse/SOLR-44) (hossman) -- Added javascript to catch empty query in admin query forms (Tomislav Nakic-Alfirevic via billa, SOLR-48 [SOLR-48](https://issues.apache.org/jira/browse/SOLR-48) +- Added javascript to catch empty query in admin query forms (Tomislav Nakic-Alfirevic via billa, SOLR-48 [SOLR-48](https://issues.apache.org/jira/browse/SOLR-48) - Lazy field loading can be enabled via a solrconfig directive. This will be faster when not all stored fields are needed from a document SOLR-52 [SOLR-52](https://issues.apache.org/jira/browse/SOLR-52) (klaas) - Made admin JSPs return XML and transform them with new XSL stylesheets SOLR-58 [SOLR-58](https://issues.apache.org/jira/browse/SOLR-58) (Otis Gospodnetic) - If the "echoParams=explicit" request parameter is set, request parameters are copied to the output. In an XML output, they appear in new <lst name="params"> list inside the new <lst name="responseHeader"> element, which replaces the old <responseHeader>. Adding a version=2.1 parameter to the request produces the old format, for backwards compatibility SOLR-59 [SOLR-59](https://issues.apache.org/jira/browse/SOLR-59) (bdelacretaz and yonik) - Modified admin styles to improve display in Internet Explorer SOLR-6 [SOLR-6](https://issues.apache.org/jira/browse/SOLR-6) (Greg Ludington) (billa) -- check solr return code in admin scripts, SOLR-62 [SOLR-62](https://issues.apache.org/jira/browse/SOLR-62) -- blackslash escape * in ssh command used in snappuller for zsh compatibility, SOLR-63 [SOLR-63](https://issues.apache.org/jira/browse/SOLR-63) +- check solr return code in admin scripts, SOLR-62 [SOLR-62](https://issues.apache.org/jira/browse/SOLR-62) +- blackslash escape * in ssh command used in snappuller for zsh compatibility, SOLR-63 [SOLR-63](https://issues.apache.org/jira/browse/SOLR-63) - Document update handling locking is much sparser, allowing performance gains through multiple threads. Large commits also might be faster SOLR-65 [SOLR-65](https://issues.apache.org/jira/browse/SOLR-65) (klaas) - Config parsing should now work useing DOM Level 2 parsers -- Solr previously relied on getTextContent which is a DOM Level 3 addition SOLR-78 [SOLR-78](https://issues.apache.org/jira/browse/SOLR-78) (Alexander Saar) (hossman) -- classes reorganized into different packages, package names changed to Apache -- force read of document stored fields in QuerySenderListener -- Solr now looks in ./solr/conf for config, ./solr/data for data configurable via solr.solr.home system property -- Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224, http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=markup&pathrev=416224 -- Upgrade to Lucene 2.0 nightly build 2006-07-15, lucene SVN revision 422302, -- Updated to Lucene 2.0 nightly build 2006-09-07, SVN revision 462111 -- Updated to Lucene 2.0 nightly build 2006-11-15, SVN revision 475069 +- classes reorganized into different packages, package names changed to Apache +- force read of document stored fields in QuerySenderListener +- Solr now looks in ./solr/conf for config, ./solr/data for data configurable via solr.solr.home system property +- Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224, http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=markup&pathrev=416224 +- Upgrade to Lucene 2.0 nightly build 2006-07-15, lucene SVN revision 422302, +- Updated to Lucene 2.0 nightly build 2006-09-07, SVN revision 462111 +- Updated to Lucene 2.0 nightly build 2006-11-15, SVN revision 475069 - Simplified index.jsp and form.jsp, primarily by removing/hiding XML specific params, and adding an option to pick the output type. (hossman) - Added new numeric build property "specversion" to allow clean MANIFEST.MF files (hossman) - Added Solr/Lucene versions to "Info" page (hossman) diff --git a/changelog/unreleased/PR#4149-distributed-luke.yml b/changelog/unreleased/PR#4149-distributed-luke.yml deleted file mode 100644 index aae5ac517b55..000000000000 --- a/changelog/unreleased/PR#4149-distributed-luke.yml +++ /dev/null @@ -1,8 +0,0 @@ -# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc -title: Distributed Luke -type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other -authors: - - name: Luke Kot-Zaniewski -links: - name: PR#4149 - url: https://github.com/apache/solr/pull/4149 From 46b20ece30eb4449685e0dbcf858d1949c985a0f Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 25 Feb 2026 16:26:46 -0500 Subject: [PATCH 24/46] whitespace --- CHANGELOG.md | 3 ++- changelog/unreleased/PR#4149-distributed-luke | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 changelog/unreleased/PR#4149-distributed-luke diff --git a/CHANGELOG.md b/CHANGELOG.md index aea57a6a6d2c..00eb87b81e68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M [unreleased] ------------ -### Added (12 changes) +### Added (13 changes) - Create new v2 APIs for listing and reading collection properties ("collprops") [SOLR-12224](https://issues.apache.org/jira/browse/SOLR-12224) (Jason Gerlowski) - Introduce new SolrJ SolrRequest classes for metrics and "system info" requests. [SOLR-17136](https://issues.apache.org/jira/browse/SOLR-17136) (Isabelle Giguère) (Pierre Salagnac) @@ -27,6 +27,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M - CoreAdmin API (/admin/cores?action=UPGRADECOREINDEX) to upgrade an index in-place [SOLR-18096](https://issues.apache.org/jira/browse/SOLR-18096) (Rahul Goswami) - CrossDC Consumer - add Prometheus metrics [SOLR-18060](https://issues.apache.org/jira/browse/SOLR-18060) (Andrzej Bialecki @ab) - CrossDC - support arbitrary Kafka properties [SOLR-18062](https://issues.apache.org/jira/browse/SOLR-18062) (Andrzej Bialecki @ab) +- Distributed/Cloud support for admin/luke [SOLR-8127](https://issues.apache.org/jira/browse/SOLR-8127) (Luke Kot-Zaniewski) ### Changed (2 changes) diff --git a/changelog/unreleased/PR#4149-distributed-luke b/changelog/unreleased/PR#4149-distributed-luke new file mode 100644 index 000000000000..aae5ac517b55 --- /dev/null +++ b/changelog/unreleased/PR#4149-distributed-luke @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Distributed Luke +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Luke Kot-Zaniewski +links: + name: PR#4149 + url: https://github.com/apache/solr/pull/4149 From 4b10c1f079c8c51e89284a8a1f382ae4742c8ad9 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 26 Feb 2026 17:00:01 -0500 Subject: [PATCH 25/46] remove log --- .../admin/LukeRequestHandlerDistribTest.java | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index f3d2a0e73d5e..216e9cbc2260 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -18,7 +18,6 @@ import static org.apache.solr.common.params.CommonParams.DISTRIB; -import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; @@ -48,8 +47,6 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class LukeRequestHandlerDistribTest extends SolrCloudTestCase { @@ -352,8 +349,6 @@ public void testDistribShowSchema() throws Exception { "count(//lst[@name='shards']/lst)=2"); } - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - /** * Verifies that distributed Luke detects inconsistent index flags across shards. Uses Schema API * to change a field's {@code stored} property between indexing on different shards, producing @@ -362,13 +357,9 @@ public void testDistribShowSchema() throws Exception { @Test public void testInconsistentIndexFlagsAcrossShards() throws Exception { String collection = "lukeInconsistentFlags"; - try { - System.setProperty("managed.schema.mutable", "true"); - CollectionAdminRequest.createCollection(collection, "managed", 2, 1) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - } catch (Exception e) { - log.error("yooo", e); - } + System.setProperty("managed.schema.mutable", "true"); + CollectionAdminRequest.createCollection(collection, "managed", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); cluster.waitForActiveCollection(collection, 2, 2); From 552d910975d316d60d6ebc8d8246942c1c4485b9 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 20 Mar 2026 13:34:45 -0400 Subject: [PATCH 26/46] Update solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc Co-authored-by: David Smiley --- .../modules/indexing-guide/pages/luke-request-handler.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index c1f9068b7b7c..41328e18f17f 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -168,5 +168,5 @@ If the same document `id` is found on multiple shards (indicating index corrupti Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not aggregated across shards. These statistics are shard-local and appear in each shard's entry under the `shards` key. -For cross-shard term-level aggregation, Solr's xref:query-guide:faceting.adoc[faceting API] provides refinement and distributed aggregation, though it differs from Luke's term statistics: faceting ignores deleted documents, is more flexible, and is generally more expensive. +For cross-shard term-level aggregation, Solr's xref:query-guide:faceting.adoc[faceting API] provides refinement and distributed aggregation, though it differs from Luke's term statistics: faceting doesn't count deleted documents, is more flexible, and is generally more expensive. A case could be made for adding aggregation strategies for Luke-powered term statistics (which offer better performance at the cost of flexibility), however this would require community interest to motivate. From 4a6a1b61e98a8680482e6c55259fb26099e9ce1f Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 20 Mar 2026 15:10:46 -0400 Subject: [PATCH 27/46] comment and changelog reversion --- CHANGELOG.md | 3 +-- .../java/org/apache/solr/handler/admin/LukeRequestHandler.java | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00eb87b81e68..aea57a6a6d2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M [unreleased] ------------ -### Added (13 changes) +### Added (12 changes) - Create new v2 APIs for listing and reading collection properties ("collprops") [SOLR-12224](https://issues.apache.org/jira/browse/SOLR-12224) (Jason Gerlowski) - Introduce new SolrJ SolrRequest classes for metrics and "system info" requests. [SOLR-17136](https://issues.apache.org/jira/browse/SOLR-17136) (Isabelle Giguère) (Pierre Salagnac) @@ -27,7 +27,6 @@ This file lists Solr's raw release notes with details of every change to Solr. M - CoreAdmin API (/admin/cores?action=UPGRADECOREINDEX) to upgrade an index in-place [SOLR-18096](https://issues.apache.org/jira/browse/SOLR-18096) (Rahul Goswami) - CrossDC Consumer - add Prometheus metrics [SOLR-18060](https://issues.apache.org/jira/browse/SOLR-18060) (Andrzej Bialecki @ab) - CrossDC - support arbitrary Kafka properties [SOLR-18062](https://issues.apache.org/jira/browse/SOLR-18062) (Andrzej Bialecki @ab) -- Distributed/Cloud support for admin/luke [SOLR-8127](https://issues.apache.org/jira/browse/SOLR-8127) (Luke Kot-Zaniewski) ### Changed (2 changes) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index e15a5a3b47b4..33edcc026010 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -244,6 +244,7 @@ && handleDistributed(req, rsp)) { /** Per-field accumulation state across shards: aggregated response data and field validation. */ private static class AggregatedFieldData { + // keyed by individual field info properties, i.e. type, schema, etc. final SimpleOrderedMap aggregated = new SimpleOrderedMap<>(); final String originalShardAddr; final LukeResponse.FieldInfo originalFieldInfo; From 607627fdef756dfc7123b7b67dd5b0fcb4274a9d Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Sun, 22 Mar 2026 13:21:28 -0400 Subject: [PATCH 28/46] address some PR comments --- .../apache/solr/handler/admin/LukeRequestHandler.java | 9 +++++---- .../indexing-guide/pages/luke-request-handler.adoc | 2 +- .../apache/solr/client/solrj/response/LukeResponse.java | 8 +++++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 33edcc026010..63babad43d40 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -563,9 +563,8 @@ private void validateFieldAttr( String currentStr = currentVal != null ? currentVal.toString() : null; String expectedStr = expectedVal != null ? expectedVal.toString() : null; if (!Objects.equals(currentStr, expectedStr)) { - throw new SolrException( - ErrorCode.SERVER_ERROR, - "Field '" + String error = + "FIELD CONFIGURATION MISMATCH! Field '" + fieldName + "' has inconsistent '" + attrName @@ -577,7 +576,9 @@ private void validateFieldAttr( + currentStr + "' (from " + currentShardAddr - + ")"); + + ")"; + log.error(error); + throw new SolrException(ErrorCode.SERVER_ERROR, error); } } diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index 41328e18f17f..85e59820085e 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -131,7 +131,7 @@ http://localhost:8983/solr/techproducts/admin/luke?fl=manu&docId=0 From SolrJ, you can access /luke using the {solr-javadocs}/solrj/org/apache/solr/client/solrj/request/LukeRequest.html[`LukeRequest`] object. -== Distributed Mode (SolrCloud) +== Distributed Mode (multiple shards) When running in SolrCloud, the Luke handler can aggregate results from all shards in a collection by setting `distrib=true`. By default, `distrib` is `false` and the handler inspects only the local shard's index. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 66faa1ac7d3b..afbd7d322fe0 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; /** * This is an incomplete representation of the data returned from Luke @@ -266,12 +267,13 @@ public void setResponse(NamedList res) { } // Parse shards section (present in distributed responses) - NamedList shardsNL = (NamedList) res.get("shards"); + SimpleOrderedMap> shardsNL = + (SimpleOrderedMap>) res.get("shards"); if (shardsNL != null) { shardResponses = new LinkedHashMap<>(); - for (Map.Entry entry : shardsNL) { + for (Map.Entry> entry : shardsNL) { LukeResponse shardRsp = new LukeResponse(); - shardRsp.setResponse((NamedList) entry.getValue()); + shardRsp.setResponse(entry.getValue()); shardResponses.put(entry.getKey(), shardRsp); } } From 84e66204b3c6ff5e90f70c36d2f3c9f4d812ef23 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Mar 2026 17:39:24 -0400 Subject: [PATCH 29/46] checkpoint --- .../handler/admin/LukeRequestHandler.java | 30 +- .../handler/admin/LukeHandlerCloudTest.java | 173 ++++ .../admin/LukeRequestHandlerDistribTest.java | 748 +++++++----------- .../pages/luke-request-handler.adoc | 9 +- .../client/solrj/response/LukeResponse.java | 15 +- 5 files changed, 476 insertions(+), 499 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 63babad43d40..84aacf133883 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -77,6 +77,7 @@ import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -137,7 +138,6 @@ public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAw private static final String KEY_TYPE = "type"; private static final String KEY_SCHEMA_FLAGS = "schema"; private static final String KEY_DOCS = "docs"; - private static final String KEY_DOCS_AS_LONG = "docsAsLong"; private static final String KEY_DISTINCT = "distinct"; private static final String KEY_TOP_TERMS = "topTerms"; private static final String KEY_DYNAMIC_BASE = "dynamicBase"; @@ -176,9 +176,12 @@ public static ShowStyle get(String v) { public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { SolrParams params = req.getParams(); - if (params.getBool(DISTRIB, false) - && req.getCoreContainer().isZooKeeperAware() - && handleDistributed(req, rsp)) { + boolean isDistrib = params.getBool(DISTRIB, req.getCoreContainer().isZooKeeperAware()); + if (!isDistrib) { + String shards = params.get(ShardParams.SHARDS); + isDistrib = shards != null && shards.indexOf('/') > 0; + } + if (isDistrib && handleDistributed(req, rsp)) { return; } @@ -493,7 +496,11 @@ private void aggregateShardField( fieldData = new AggregatedFieldData(shardAddr, fi); aggregatedFields.put(fieldName, fieldData); - // First shard to report this field: populate aggregated with schema-derived attrs + // First shard to report this field — populate response keys: + // "type" → field type name (e.g. "string", "text_general") + // "schema" → schema flags string (e.g. "I-S-M-----OF-----l") + // "dynamicBase" → dynamic field glob if this is a dynamic field (e.g. "*_s") + // "index" → index-derived flags from the first shard that has them fieldData.aggregated.add(KEY_TYPE, fi.getType()); fieldData.aggregated.add(KEY_SCHEMA_FLAGS, fi.getSchema()); Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); @@ -504,7 +511,7 @@ private void aggregateShardField( fieldData.aggregated.add(KEY_INDEX_FLAGS, fieldData.indexFlags); } } else { - // Subsequent shards: validate consistency + // Subsequent shards: validate that "type", "schema", and "dynamicBase" match validateFieldAttr( fieldName, KEY_TYPE, @@ -545,10 +552,11 @@ private void aggregateShardField( } } - Long docsAsLong = fi.getDocsAsLong(); - if (docsAsLong != null) { + // "docs" → sum of per-shard doc counts (number of documents containing this field) + Long docsLong = fi.getDocs(); + if (docsLong != null) { fieldData.aggregated.compute( - KEY_DOCS_AS_LONG, (key, val) -> val == null ? docsAsLong : (Long) val + docsAsLong); + KEY_DOCS, (key, val) -> val == null ? docsLong : (Long) val + docsLong); } } @@ -576,7 +584,7 @@ private void validateFieldAttr( + currentStr + "' (from " + currentShardAddr - + ")"; + + "). Use distrib=false to query individual shards and compare field configurations."; log.error(error); throw new SolrException(ErrorCode.SERVER_ERROR, error); } @@ -836,7 +844,7 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re } } } - fieldMap.add(KEY_DOCS, terms.getDocCount()); + fieldMap.add(KEY_DOCS, (long) terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java new file mode 100644 index 000000000000..d8c75cfcc0c8 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import static org.apache.solr.common.params.CommonParams.DISTRIB; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.schema.SchemaRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Cloud-specific Luke tests that require SolrCloud features like managed schema and Schema API. + */ +public class LukeHandlerCloudTest extends SolrCloudTestCase { + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2).addConfig("managed", configset("cloud-managed")).configure(); + } + + @AfterClass + public static void afterClass() throws Exception { + shutdownCluster(); + } + + private void requestLuke(String collection, ModifiableSolrParams extra) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + if (extra != null) { + for (Map.Entry entry : extra.getMap().entrySet()) { + params.set(entry.getKey(), entry.getValue()); + } + } + QueryRequest req = new QueryRequest(params); + cluster.getSolrClient().request(req, collection); + } + + /** + * Verifies that distributed Luke detects inconsistent index flags across shards. Uses Schema API + * to change a field's {@code stored} property between indexing on different shards, producing + * different Lucene FieldInfo (and thus different index flags strings) on each shard. + */ + @Test + public void testInconsistentIndexFlagsAcrossShards() throws Exception { + String collection = "lukeInconsistentFlags"; + System.setProperty("managed.schema.mutable", "true"); + CollectionAdminRequest.createCollection(collection, "managed", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + + cluster.waitForActiveCollection(collection, 2, 2); + + try { + // Add a field with stored=true, indexed=true + Map fieldAttrs = new LinkedHashMap<>(); + fieldAttrs.put("name", "test_flag_s"); + fieldAttrs.put("type", "string"); + fieldAttrs.put("stored", true); + fieldAttrs.put("indexed", true); + new SchemaRequest.AddField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Index a target doc WITH the field, plus seed docs without it + SolrInputDocument targetDoc = new SolrInputDocument(); + targetDoc.addField("id", "target"); + targetDoc.addField("test_flag_s", "has_indexed"); + cluster.getSolrClient().add(collection, targetDoc); + + List seedDocs = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "seed_" + i); + seedDocs.add(doc); + } + cluster.getSolrClient().add(collection, seedDocs); + cluster.getSolrClient().commit(collection); + + // Find which shard has the target doc by querying each replica directly. + // Must use distrib=false — SolrCloud defaults distrib to true even on direct replica queries. + DocCollection docColl = getCollectionState(collection); + String targetSliceName = null; + for (Slice slice : docColl.getSlices()) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("id:target"); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + if (qr.getResults().getNumFound() > 0) { + targetSliceName = slice.getName(); + } + } + } + assertNotNull("target doc should exist on a shard", targetSliceName); + + // Find a seed doc on the other shard + String otherDocId = null; + for (Slice slice : docColl.getSlices()) { + if (!slice.getName().equals(targetSliceName)) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("*:*"); + q.setRows(1); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); + otherDocId = (String) qr.getResults().getFirst().getFieldValue("id"); + } + break; + } + } + assertNotNull("should find a seed doc on the other shard", otherDocId); + + // Change the field to stored=false via Schema API + fieldAttrs.put("stored", false); + new SchemaRequest.ReplaceField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Reload collection to pick up schema change + CollectionAdminRequest.reloadCollection(collection).process(cluster.getSolrClient()); + + // Update the other-shard doc to include the field (now unstored in the new segment) + SolrInputDocument updateDoc = new SolrInputDocument(); + updateDoc.addField("id", otherDocId); + updateDoc.addField("test_flag_s", "not_indexed"); + cluster.getSolrClient().add(collection, updateDoc); + cluster.getSolrClient().commit(collection); + + // Distributed Luke should detect inconsistent index flags between the two shards. + // One shard has stored=true segments, the other has stored=false segments for test_flag_s. + // No need to set distrib=true — ZK-aware nodes default to distributed mode. + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("fl", "test_flag_s"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception chain should mention inconsistent index flags: " + fullMessage, + fullMessage.contains("inconsistent")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } +} diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 216e9cbc2260..07cc4e245671 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -16,125 +16,87 @@ */ package org.apache.solr.handler.admin; -import static org.apache.solr.common.params.CommonParams.DISTRIB; - -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.client.solrj.request.QueryRequest; -import org.apache.solr.client.solrj.request.SolrQuery; -import org.apache.solr.client.solrj.request.schema.SchemaRequest; import org.apache.solr.client.solrj.response.InputStreamResponseParser; import org.apache.solr.client.solrj.response.LukeResponse; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.Replica; -import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.embedded.JettySolrRunner; import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.util.BaseTestHarness; -import org.junit.AfterClass; -import org.junit.BeforeClass; import org.junit.Test; -public class LukeRequestHandlerDistribTest extends SolrCloudTestCase { +public class LukeRequestHandlerDistribTest extends BaseDistributedSearchTestCase { - private static final String COLLECTION = "lukeDistribTest"; private static final int NUM_DOCS = 20; - @BeforeClass - public static void setupCluster() throws Exception { - configureCluster(2) - .addConfig("conf", configset("cloud-dynamic")) - .addConfig("managed", configset("cloud-managed")) - .configure(); - - CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - cluster.waitForActiveCollection(COLLECTION, 2, 2); - - List docs = new ArrayList<>(); - for (int i = 0; i < NUM_DOCS; i++) { - SolrInputDocument doc = new SolrInputDocument(); - doc.addField("id", String.valueOf(i)); - doc.addField("name", "name_" + i); - doc.addField("subject", "subject value " + (i % 5)); - docs.add(doc); - } - cluster.getSolrClient().add(COLLECTION, docs); - cluster.getSolrClient().commit(COLLECTION); + public LukeRequestHandlerDistribTest() { + fixShardCount(2); } - @AfterClass - public static void afterClass() throws Exception { - shutdownCluster(); + private LukeResponse requestLuke() throws Exception { + return requestLuke(new ModifiableSolrParams()); } - private LukeResponse requestLuke(String collection, ModifiableSolrParams extra) throws Exception { + private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("qt", "/admin/luke"); params.set("numTerms", "0"); - if (extra != null) { - for (Map.Entry entry : extra.getMap().entrySet()) { - params.set(entry.getKey(), entry.getValue()); - } - } + params.set("shards", shards); + params.add(extra); QueryRequest req = new QueryRequest(params); - NamedList raw = cluster.getSolrClient().request(req, collection); + NamedList raw = clients.get(0).request(req); LukeResponse rsp = new LukeResponse(); rsp.setResponse(raw); return rsp; } - private void assertLukeXPath(String collection, ModifiableSolrParams extra, String... xpaths) - throws Exception { + private void assertLukeXPath(ModifiableSolrParams extra, String... xpaths) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("qt", "/admin/luke"); params.set("numTerms", "0"); params.set("wt", "xml"); - if (extra != null) { - for (Map.Entry entry : extra.getMap().entrySet()) { - params.set(entry.getKey(), entry.getValue()); - } - } + params.set("shards", shards); + params.add(extra); QueryRequest req = new QueryRequest(params); req.setResponseParser(new InputStreamResponseParser("xml")); - NamedList raw = cluster.getSolrClient().request(req, collection); + NamedList raw = clients.get(0).request(req); String xml = InputStreamResponseParser.consumeResponseToString(raw); String failedXpath = BaseTestHarness.validateXPath(xml, xpaths); assertNull("XPath validation failed: " + failedXpath + "\nResponse:\n" + xml, failedXpath); } + private void indexTestData() throws Exception { + for (int i = 0; i < NUM_DOCS; i++) { + index("id", String.valueOf(i), "name", "name_" + i, "subject", "subject value " + (i % 5)); + } + commit(); + } + @Test + @ShardsFixed(num = 2) public void testDistributedAggregate() throws Exception { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); + indexTestData(); - LukeResponse rsp = requestLuke(COLLECTION, params); + LukeResponse rsp = requestLuke(); assertEquals( "aggregated numDocs should equal total docs", NUM_DOCS, rsp.getNumDocsAsLong().longValue()); assertTrue("aggregated maxDoc should be > 0", rsp.getMaxDoc() > 0); assertNotNull("deletedDocs should be present", rsp.getDeletedDocsAsLong()); - Map shards = rsp.getShardResponses(); - assertNotNull("shards section should be present", shards); - assertEquals("should have 2 shard entries", 2, shards.size()); + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + assertEquals("should have 2 shard entries", 2, shardResponses.size()); - // Each shard should have its own index info; per-shard numDocs should sum to total long sumShardDocs = 0; - for (Map.Entry entry : shards.entrySet()) { + for (Map.Entry entry : shardResponses.entrySet()) { LukeResponse shardLuke = entry.getValue(); assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); @@ -147,11 +109,11 @@ public void testDistributedAggregate() throws Exception { } @Test + @ShardsFixed(num = 2) public void testDistributedFieldsAggregate() throws Exception { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); + indexTestData(); - LukeResponse rsp = requestLuke(COLLECTION, params); + LukeResponse rsp = requestLuke(); Map fields = rsp.getFieldInfo(); assertNotNull("fields should be present", fields); @@ -163,34 +125,34 @@ public void testDistributedFieldsAggregate() throws Exception { assertEquals( "aggregated docs count for 'name' should equal total docs", NUM_DOCS, - nameField.getDocsAsLong().longValue()); + nameField.getDocs().longValue()); LukeResponse.FieldInfo idField = fields.get("id"); assertNotNull("'id' field should be present", idField); assertEquals("id field type should be string", "string", idField.getType()); - // Validate aggregated field metadata matches schema and test data assertLukeXPath( - COLLECTION, - params, + new ModifiableSolrParams(), "//lst[@name='index']/long[@name='numDocs'][.='20']", "count(//lst[@name='shards']/lst)=2", "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", - "//lst[@name='fields']/lst[@name='name']/str[@name='schema'][.='ITS---------------']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema']", "//lst[@name='fields']/lst[@name='name']/str[@name='index']", - "//lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='20']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='20']", "//lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", - "//lst[@name='fields']/lst[@name='id']/long[@name='docsAsLong'][.='20']"); + "//lst[@name='fields']/lst[@name='id']/long[@name='docs'][.='20']"); } @Test + @ShardsFixed(num = 2) public void testDetailedFieldStatsPerShard() throws Exception { + indexTestData(); + ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); params.set("fl", "name"); params.set("numTerms", "5"); - LukeResponse rsp = requestLuke(COLLECTION, params); + LukeResponse rsp = requestLuke(params); // Top-level fields should NOT have topTerms, distinct, histogram LukeResponse.FieldInfo nameField = rsp.getFieldInfo().get("name"); @@ -199,144 +161,131 @@ public void testDetailedFieldStatsPerShard() throws Exception { assertEquals("distinct should NOT be in top-level fields", 0, nameField.getDistinct()); // Per-shard entries should have detailed stats - Map shards = rsp.getShardResponses(); - assertNotNull("shards section should be present", shards); + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); ModifiableSolrParams detailedParams = new ModifiableSolrParams(); - detailedParams.set(DISTRIB, "true"); detailedParams.set("fl", "name"); detailedParams.set("numTerms", "5"); assertLukeXPath( - COLLECTION, detailedParams, - // Top-level aggregated field should have type and aggregated doc count but no detailed - // stats "/response/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", - "/response/lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='20']", + "/response/lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='20']", "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms'])", "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='histogram'])", "not(/response/lst[@name='fields']/lst[@name='name']/int[@name='distinct'])", - // Per-shard entries should have detailed stats; each name is unique so docFreq=1 "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms']", "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='histogram']/int[@name='1']", "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/int[@name='distinct']"); } @Test + @ShardsFixed(num = 2) public void testLocalModeDefault() throws Exception { - LukeResponse rsp = requestLuke(COLLECTION, null); + indexTestData(); + + // Query a single client without the shards param — local mode + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); assertNotNull("index info should be present", rsp.getIndexInfo()); assertNull("shards should NOT be present in local mode", rsp.getShardResponses()); } @Test + @ShardsFixed(num = 2) public void testExplicitDistribFalse() throws Exception { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "false"); + indexTestData(); - LukeResponse rsp = requestLuke(COLLECTION, params); + // Query a single client with distrib=false — no shards param + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("distrib", "false"); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); assertNotNull("index info should be present", rsp.getIndexInfo()); assertNull("shards should NOT be present with distrib=false", rsp.getShardResponses()); } - /** - * 12 shards, 1 document: only one shard has data, the other 11 are empty. Verifies that - * schema-derived attributes (type, schema flags, dynamicBase) aggregate correctly when most - * shards have no documents. - */ @Test + @ShardsFixed(num = 12) public void testSparseShards() throws Exception { - String collection = "lukeSparse12"; - CollectionAdminRequest.createCollection(collection, "conf", 12, 1) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - cluster.waitForActiveCollection(collection, 12, 12); - - try { - SolrInputDocument doc = new SolrInputDocument(); - doc.addField("id", "only-one"); - doc.addField("name", "sparse test"); - doc.addField("subject", "subject value"); - doc.addField("cat_s", "category"); - cluster.getSolrClient().add(collection, doc); - cluster.getSolrClient().commit(collection); - - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); - - LukeResponse rsp = requestLuke(collection, params); - - // Index-level stats - assertEquals("numDocs should be 1", 1, rsp.getNumDocsAsLong().longValue()); - assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); - assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocsAsLong().longValue()); - - Map shards = rsp.getShardResponses(); - assertNotNull("shards section should be present", shards); - assertEquals("should have 12 shard entries", 12, shards.size()); - - // Exactly one shard should have numDocs=1 - long sumShardDocs = 0; - for (Map.Entry entry : shards.entrySet()) { - LukeResponse shardLuke = entry.getValue(); - assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); - sumShardDocs += shardLuke.getNumDocsAsLong(); - } - assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); - - // Field-level checks - Map fields = rsp.getFieldInfo(); - assertNotNull("fields should be present", fields); - - // Schema-derived attrs should be present for all fields, even with 11 empty shards - LukeResponse.FieldInfo idField = fields.get("id"); - assertNotNull("'id' field should be present", idField); - assertEquals("id type", "string", idField.getType()); - assertNotNull("id schema flags", idField.getSchema()); - - LukeResponse.FieldInfo nameField = fields.get("name"); - assertNotNull("'name' field should be present", nameField); - assertNotNull("name type", nameField.getType()); - assertNotNull("name schema flags", nameField.getSchema()); - assertEquals("name docs should be 1", 1, nameField.getDocsAsLong().longValue()); - - // Dynamic field — should have dynamicBase in extras - LukeResponse.FieldInfo catField = fields.get("cat_s"); - assertNotNull("'cat_s' field should be present", catField); - assertNotNull("cat_s type", catField.getType()); - assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); - - // Verify structural correctness of the aggregated response via XPath - ModifiableSolrParams xpathParams = new ModifiableSolrParams(); - xpathParams.set(DISTRIB, "true"); - assertLukeXPath( - collection, - xpathParams, - "//lst[@name='index']/long[@name='numDocs'][.='1']", - "//lst[@name='index']/long[@name='deletedDocs'][.='0']", - "count(//lst[@name='shards']/lst)=12", - "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", - "//lst[@name='fields']/lst[@name='name']/str[@name='schema'][.='ITS---------------']", - "//lst[@name='fields']/lst[@name='name']/str[@name='index']", - "//lst[@name='fields']/lst[@name='name']/long[@name='docsAsLong'][.='1']", - "//lst[@name='fields']/lst[@name='cat_s']/str[@name='type'][.='string']", - "//lst[@name='fields']/lst[@name='cat_s']/str[@name='dynamicBase'][.='*_s']", - "//lst[@name='fields']/lst[@name='cat_s']/long[@name='docsAsLong'][.='1']"); - } finally { - CollectionAdminRequest.deleteCollection(collection) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + // Index a single doc on shard 0 + index_specific(0, "id", "100", "name", "sparse test", "subject", "subject value", "cat_s", "category"); + commit(); + + LukeResponse rsp = requestLuke(); + + // Index-level stats + assertEquals("numDocs should be 1", 1, rsp.getNumDocsAsLong().longValue()); + assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocsAsLong().longValue()); + + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + assertEquals("should have 12 shard entries", 12, shardResponses.size()); + + long sumShardDocs = 0; + for (Map.Entry entry : shardResponses.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); + sumShardDocs += shardLuke.getNumDocsAsLong(); } + assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); + + // Field-level checks + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id type", "string", idField.getType()); + assertNotNull("id schema flags", idField.getSchema()); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("name type", nameField.getType()); + assertNotNull("name schema flags", nameField.getSchema()); + assertEquals("name docs should be 1", 1, nameField.getDocs().longValue()); + + // Dynamic field — should have dynamicBase in extras + LukeResponse.FieldInfo catField = fields.get("cat_s"); + assertNotNull("'cat_s' field should be present", catField); + assertNotNull("cat_s type", catField.getType()); + assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); + + assertLukeXPath( + new ModifiableSolrParams(), + "//lst[@name='index']/long[@name='numDocs'][.='1']", + "//lst[@name='index']/long[@name='deletedDocs'][.='0']", + "count(//lst[@name='shards']/lst)=12", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='1']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='cat_s']/long[@name='docs'][.='1']"); } @Test + @ShardsFixed(num = 2) public void testDistribShowSchema() throws Exception { + indexTestData(); + ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); params.set("show", "schema"); assertLukeXPath( - COLLECTION, params, "//lst[@name='schema']/lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", "//lst[@name='schema']/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", @@ -349,230 +298,88 @@ public void testDistribShowSchema() throws Exception { "count(//lst[@name='shards']/lst)=2"); } - /** - * Verifies that distributed Luke detects inconsistent index flags across shards. Uses Schema API - * to change a field's {@code stored} property between indexing on different shards, producing - * different Lucene FieldInfo (and thus different index flags strings) on each shard. - */ @Test - public void testInconsistentIndexFlagsAcrossShards() throws Exception { - String collection = "lukeInconsistentFlags"; - System.setProperty("managed.schema.mutable", "true"); - CollectionAdminRequest.createCollection(collection, "managed", 2, 1) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - - cluster.waitForActiveCollection(collection, 2, 2); - - try { - // Add a field with stored=true, indexed=true - Map fieldAttrs = new LinkedHashMap<>(); - fieldAttrs.put("name", "test_flag_s"); - fieldAttrs.put("type", "string"); - fieldAttrs.put("stored", true); - fieldAttrs.put("indexed", true); - new SchemaRequest.AddField(fieldAttrs).process(cluster.getSolrClient(), collection); - - // Index a target doc WITH the field, plus seed docs without it - SolrInputDocument targetDoc = new SolrInputDocument(); - targetDoc.addField("id", "target"); - targetDoc.addField("test_flag_s", "has_indexed"); - cluster.getSolrClient().add(collection, targetDoc); - - List seedDocs = new ArrayList<>(); - for (int i = 0; i < 20; i++) { - SolrInputDocument doc = new SolrInputDocument(); - doc.addField("id", "seed_" + i); - seedDocs.add(doc); - } - cluster.getSolrClient().add(collection, seedDocs); - cluster.getSolrClient().commit(collection); - - // Find which shard has the target doc by querying each replica directly. - // Must use distrib=false — SolrCloud defaults distrib to true even on direct replica queries. - DocCollection docColl = getCollectionState(collection); - String targetSliceName = null; - for (Slice slice : docColl.getSlices()) { - Replica leader = slice.getLeader(); - try (SolrClient client = getHttpSolrClient(leader)) { - SolrQuery q = new SolrQuery("id:target"); - q.set(DISTRIB, "false"); - QueryResponse qr = client.query(q); - if (qr.getResults().getNumFound() > 0) { - targetSliceName = slice.getName(); - } - } - } - assertNotNull("target doc should exist on a shard", targetSliceName); - - // Find a seed doc on the other shard - String otherDocId = null; - for (Slice slice : docColl.getSlices()) { - if (!slice.getName().equals(targetSliceName)) { - Replica leader = slice.getLeader(); - try (SolrClient client = getHttpSolrClient(leader)) { - SolrQuery q = new SolrQuery("*:*"); - q.setRows(1); - q.set(DISTRIB, "false"); - QueryResponse qr = client.query(q); - assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); - otherDocId = (String) qr.getResults().getFirst().getFieldValue("id"); - } - break; - } - } - assertNotNull("should find a seed doc on the other shard", otherDocId); - - // Change the field to stored=false via Schema API - fieldAttrs.put("stored", false); - new SchemaRequest.ReplaceField(fieldAttrs).process(cluster.getSolrClient(), collection); - - // Reload collection to pick up schema change - CollectionAdminRequest.reloadCollection(collection).process(cluster.getSolrClient()); - - // Update the other-shard doc to include the field (now unstored in the new segment) - SolrInputDocument updateDoc = new SolrInputDocument(); - updateDoc.addField("id", otherDocId); - updateDoc.addField("test_flag_s", "not_indexed"); - cluster.getSolrClient().add(collection, updateDoc); - cluster.getSolrClient().commit(collection); - - // Distributed Luke should detect inconsistent index flags between the two shards. - // One shard has stored=true segments, the other has stored=false segments for test_flag_s. - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); - params.set("fl", "test_flag_s"); - - Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); - String fullMessage = SolrException.getRootCause(ex).getMessage(); - assertTrue( - "exception chain should mention inconsistent index flags: " + fullMessage, - fullMessage.contains("inconsistent")); - } finally { - CollectionAdminRequest.deleteCollection(collection) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + @ShardsFixed(num = 16) + public void testDeferredIndexFlags() throws Exception { + // Index docs with the target field across shards, plus anchor docs without it. + // Use numeric IDs (the default test schema copies id to integer fields). + // Target docs get even IDs starting at 1000, anchor docs get odd IDs. + for (int i = 0; i < 16 * 4; i++) { + index("id", String.valueOf(1000 + i * 2), "flag_target_s", "value_" + i); + index("id", String.valueOf(1001 + i * 2), "name", "anchor"); } - } + commit(); + + // Delete all target docs except the first one, using per-shard deletes. + // Then optimize to force segment merge — expunges soft-deleted docs so + // Terms.getDocCount() (which backs docs) reflects only live docs. + for (int i = 0; i < clients.size(); i++) { + clients.get(i).deleteByQuery("flag_target_s:* AND -id:1000"); + clients.get(i).optimize(); + } + controlClient.deleteByQuery("flag_target_s:* AND -id:1000"); + controlClient.optimize(); - /** - * Exercises the deferred index flags path: when the first shard to report a field has null index - * flags (all its live docs for that field were deleted, but the field persists in FieldInfos from - * unmerged segments), the aggregation should still populate index flags from a later shard that - * has live docs. - * - *

Setup: 16-shard collection. Each shard gets one doc with field "flag_target_s" (which is - * then deleted) plus an anchor doc without it (to keep the shard non-empty). Only one shard - * retains a live doc with "flag_target_s". With 16 shards, the probability that the one live - * shard is processed first is low enough. Either way, the aggregated response should have index - * flags for the field. - */ - @Test - public void testDeferredIndexFlags() throws Exception { - String collection = "lukeDeferredFlags"; - int numShards = 16; - CollectionAdminRequest.createCollection(collection, "conf", numShards, 1) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - cluster.waitForActiveCollection(collection, numShards, numShards); - - try { - // Index one doc with the target field per shard, plus an anchor doc without it. - // The anchor doc keeps the shard non-empty after we delete the target doc. - // We use enough docs to spread across (hopefully) all shards. - List docs = new ArrayList<>(); - for (int i = 0; i < numShards * 4; i++) { - SolrInputDocument doc = new SolrInputDocument(); - doc.addField("id", "target_" + i); - doc.addField("flag_target_s", "value_" + i); - docs.add(doc); - - SolrInputDocument anchor = new SolrInputDocument(); - anchor.addField("id", "anchor_" + i); - anchor.addField("name", "anchor"); - docs.add(anchor); - } - cluster.getSolrClient().add(collection, docs); - cluster.getSolrClient().commit(collection); + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("fl", "flag_target_s"); - // Delete all target docs, leaving only anchors (which don't have flag_target_s) - // on most shards. Keep exactly one target doc alive. - for (int i = 1; i < numShards * 4; i++) { - cluster.getSolrClient().deleteById(collection, "target_" + i); - } - cluster.getSolrClient().commit(collection); - - // Verify: distributed Luke should have index flags for flag_target_s in the aggregated - // response, - // whether they came from the first shard (constructor path) or a later shard (deferred path). - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); - params.set("fl", "flag_target_s"); - - LukeResponse rsp = requestLuke(collection, params); - - Map fields = rsp.getFieldInfo(); - assertNotNull("fields should be present", fields); - LukeResponse.FieldInfo targetField = fields.get("flag_target_s"); - assertNotNull("'flag_target_s' field should be present", targetField); - - // The aggregated response should have index flags from whichever shard had live docs - ModifiableSolrParams xpathParams = new ModifiableSolrParams(); - xpathParams.set(DISTRIB, "true"); - xpathParams.set("fl", "flag_target_s"); - assertLukeXPath( - collection, - xpathParams, - "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='type'][.='string']", - "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='dynamicBase'][.='*_s']", - "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='index']", - "//lst[@name='fields']/lst[@name='flag_target_s']/long[@name='docsAsLong'][.='1']"); - } finally { - CollectionAdminRequest.deleteCollection(collection) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - } + LukeResponse rsp = requestLuke(params); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + LukeResponse.FieldInfo targetField = fields.get("flag_target_s"); + assertNotNull("'flag_target_s' field should be present", targetField); + + ModifiableSolrParams xpathParams = new ModifiableSolrParams(); + xpathParams.set("fl", "flag_target_s"); + assertLukeXPath( + xpathParams, + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='index']", + "//lst[@name='fields']/lst[@name='flag_target_s']/long[@name='docs'][.='1']"); } - /** - * Exercises shard error propagation through the distributed doc lookup path. Passing id=0 with - * show=schema triggers a BAD_REQUEST on the shard that has doc 0 (the local handler rejects an id - * combined with a non-DOC show style). The distributed handler should propagate this as a - * SolrException. - */ @Test - public void testDistributedShardError() { + @ShardsFixed(num = 2) + public void testDistributedShardError() throws Exception { + indexTestData(); + ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); params.set("id", "0"); params.set("show", "schema"); - Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); + Exception ex = expectThrows(Exception.class, () -> requestLuke(params)); String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception should mention doc style mismatch: " + fullMessage, fullMessage.contains("missing doc param for doc style")); } - /** Verifies that the docId parameter is rejected in distributed mode. */ @Test - public void testDistributedDocIdRejected() { + @ShardsFixed(num = 2) + public void testDistributedDocIdRejected() throws Exception { + indexTestData(); + ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); params.set("docId", "0"); - Exception ex = expectThrows(Exception.class, () -> requestLuke(COLLECTION, params)); + Exception ex = expectThrows(Exception.class, () -> requestLuke(params)); String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception should mention docId not supported: " + fullMessage, fullMessage.contains("docId parameter is not supported in distributed mode")); } - /** Verifies distributed doc lookup returns the document when it exists. */ @Test + @ShardsFixed(num = 2) public void testDistributedDocLookupFound() throws Exception { + indexTestData(); + ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); params.set("id", "0"); assertLukeXPath( - COLLECTION, params, "//lst[@name='doc']/int[@name='docId']", "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='type'][.='string']", @@ -585,128 +392,125 @@ public void testDistributedDocLookupFound() throws Exception { "//lst[@name='info']"); } - /** Verifies distributed doc lookup returns an empty response for a non-existent ID. */ @Test + @ShardsFixed(num = 2) public void testDistributedDocLookupNotFound() throws Exception { + indexTestData(); + ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); - params.set("id", "this_id_does_not_exist_anywhere"); + params.set("id", "999888777"); - LukeResponse rsp = requestLuke(COLLECTION, params); + LukeResponse rsp = requestLuke(params); NamedList raw = rsp.getResponse(); assertNull("doc section should NOT be present for missing ID", raw.get("doc")); - assertLukeXPath(COLLECTION, params, "not(//lst[@name='doc'])"); + assertLukeXPath(params, "not(//lst[@name='doc'])"); } - /** - * Verifies that distributed doc lookup detects a corrupt index where the same unique key exists - * on multiple shards. - */ @Test + @ShardsFixed(num = 2) public void testDistributedDocLookupDuplicateId() throws Exception { - String collection = "lukeDupId"; - int numShards = 2; - CollectionAdminRequest.createCollection(collection, "conf", numShards, 1) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - cluster.waitForActiveCollection(collection, numShards, numShards); - - try { - String dupId = "duplicate_doc"; - - // Write the same document directly to two shard cores via UpdateHandler, - // completely bypassing the distributed update processor chain. - DocCollection docColl = getCollectionState(collection); - List slices = new ArrayList<>(docColl.getActiveSlices()); - assertTrue("need at least 2 shards", slices.size() >= 2); - - for (int i = 0; i < 2; i++) { - Replica leader = slices.get(i).getLeader(); - JettySolrRunner jetty = - cluster.getJettySolrRunners().stream() - .filter(j -> j.getNodeName().equals(leader.getNodeName())) - .findFirst() - .orElse(null); - assertNotNull("should find jetty for replica", jetty); - - try (SolrCore core = jetty.getCoreContainer().getCore(leader.getCoreName())) { - SolrInputDocument solrDoc = new SolrInputDocument(); - solrDoc.addField("id", dupId); - solrDoc.addField("name", "dup_copy_" + i); - - AddUpdateCommand addCmd = - new AddUpdateCommand(new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}); - addCmd.solrDoc = solrDoc; - core.getUpdateHandler().addDoc(addCmd); - - CommitUpdateCommand commitCmd = - new CommitUpdateCommand( - new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}, false); - commitCmd.waitSearcher = true; - core.getUpdateHandler().commit(commitCmd); - } - } - - // Verify the duplicate actually exists on both shards - int shardsWithDoc = 0; - for (Slice slice : docColl.getActiveSlices()) { - Replica leader = slice.getLeader(); - try (SolrClient client = getHttpSolrClient(leader)) { - SolrQuery q = new SolrQuery("id:" + dupId); - q.set(DISTRIB, "false"); - QueryResponse qr = client.query(q); - if (qr.getResults().getNumFound() > 0) { - shardsWithDoc++; - } - } + String dupId = "99999"; + + // Write the same document directly to two shard cores via UpdateHandler, + // completely bypassing the distributed update processor chain. + for (int i = 0; i < 2; i++) { + try (SolrCore core = + jettys.get(i).getCoreContainer().getCore("collection1")) { + SolrInputDocument solrDoc = new SolrInputDocument(); + solrDoc.addField("id", dupId); + solrDoc.addField("name", "dup_copy_" + i); + + AddUpdateCommand addCmd = + new AddUpdateCommand(new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}); + addCmd.solrDoc = solrDoc; + core.getUpdateHandler().addDoc(addCmd); + + CommitUpdateCommand commitCmd = + new CommitUpdateCommand( + new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}, false); + commitCmd.waitSearcher = true; + core.getUpdateHandler().commit(commitCmd); } - assertEquals("duplicate doc should exist on exactly 2 shards", 2, shardsWithDoc); - - // Distributed Luke doc lookup should detect the corruption - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); - params.set("id", dupId); - - Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); - String fullMessage = SolrException.getRootCause(ex).getMessage(); - assertTrue( - "exception should mention duplicate/corrupt index: " + fullMessage, - fullMessage.contains("found on multiple shards")); - } finally { - CollectionAdminRequest.deleteCollection(collection) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); } + + // Distributed Luke doc lookup should detect the corruption + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("id", dupId); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention duplicate/corrupt index: " + fullMessage, + fullMessage.contains("found on multiple shards")); } @Test - public void testDistribTrueOnSingleShardFallsBackToLocal() throws Exception { - String singleShardCollection = "lukeSingleShard"; - CollectionAdminRequest.createCollection(singleShardCollection, "conf", 1, 1) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - cluster.waitForActiveCollection(singleShardCollection, 1, 1); - - try { - SolrInputDocument doc = new SolrInputDocument(); - doc.addField("id", "single-1"); - doc.addField("name", "test_name"); - cluster.getSolrClient().add(singleShardCollection, doc); - cluster.getSolrClient().commit(singleShardCollection); - - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(DISTRIB, "true"); - - LukeResponse rsp = requestLuke(singleShardCollection, params); - - assertNotNull( - "index info should be present even with distrib=true on single shard", - rsp.getIndexInfo()); - assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocsAsLong().longValue()); - assertNull( - "shards should NOT be present when falling back to local", rsp.getShardResponses()); - } finally { - CollectionAdminRequest.deleteCollection(singleShardCollection) - .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); - } + @ShardsFixed(num = 2) + public void testShardsParamRoutesToSpecificShard() throws Exception { + // Index a doc with a dynamic field only to shard 0 + index_specific(0, "id", "700", "name", "shard0_only", "only_on_shard0_s", "present"); + // Index a plain doc to shard 1 (no dynamic field) + index_specific(1, "id", "701", "name", "shard1_only"); + commit(); + + // Query with shards= pointing only at shard 1 — the dynamic field should NOT appear. + // This also tests that a single remote shard is correctly fanned out to rather than + // falling through to local-mode on the coordinating node. + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("shards", shardsArr[1]); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + assertNull( + "only_on_shard0_s should NOT be present when querying only shard 1", + fields.get("only_on_shard0_s")); + assertNotNull("'name' field should still be present", fields.get("name")); + + // Now query with shards= pointing only at shard 0 — the dynamic field SHOULD appear + params.set("shards", shardsArr[0]); + req = new QueryRequest(params); + raw = clients.get(0).request(req); + rsp = new LukeResponse(); + rsp.setResponse(raw); + + fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + assertNotNull( + "only_on_shard0_s SHOULD be present when querying shard 0", + fields.get("only_on_shard0_s")); + } + + @Test + @ShardsFixed(num = 1) + public void testSingleShardViaParamStillDistributes() throws Exception { + index("id", "500", "name", "test_name"); + commit(); + + // Pass the shards param with a single shard — should still fan out to it + // rather than incorrectly falling through to local mode + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("shards", shards); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + + assertNotNull( + "index info should be present", rsp.getIndexInfo()); + assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocsAsLong().longValue()); + assertNotNull( + "shards section should be present when targeting a shard via shards param", + rsp.getShardResponses()); + assertEquals("should have 1 shard entry", 1, rsp.getShardResponses().size()); } } diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index 85e59820085e..a0ff75796c8b 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -133,18 +133,18 @@ From SolrJ, you can access /luke using the {solr-javadocs}/solrj/org/apache/solr == Distributed Mode (multiple shards) -When running in SolrCloud, the Luke handler can aggregate results from all shards in a collection by setting `distrib=true`. -By default, `distrib` is `false` and the handler inspects only the local shard's index. +When running in SolrCloud, the Luke handler automatically distributes requests across all shards in the collection, the same as search requests. +To inspect only the local shard's index, set `distrib=false`. To get a collection-wide view: [source,text] -http://localhost:8983/solr/techproducts/admin/luke?distrib=true +http://localhost:8983/solr/techproducts/admin/luke To get detailed field statistics across all shards for a specific field: [source,text] -http://localhost:8983/solr/techproducts/admin/luke?distrib=true&fl=manu +http://localhost:8983/solr/techproducts/admin/luke?fl=manu === Response Structure @@ -163,6 +163,7 @@ In distributed mode, the response contains: Field `type`, `schema` flags, and `dynamicBase` are validated for consistency across shards. If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. +You can use `distrib=false` to query individual shards and compare their field configurations when troubleshooting mismatches. The `index` flags are index-derived (not schema-derived) and may legitimately differ across shards; the first non-null value is used. If the same document `id` is found on multiple shards (indicating index corruption), the handler returns an error. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index afbd7d322fe0..e7e15bfa3b9c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -116,9 +116,8 @@ public static class FieldInfo implements Serializable { String name; String type; String schema; - int docs; + Long docs; int distinct; - Long docsAsLong; EnumSet flags; boolean cacheableFaceting; NamedList topTerms; @@ -138,11 +137,7 @@ public void read(NamedList nl) { } else if ("schema".equals(entry.getKey())) { schema = (String) entry.getValue(); } else if ("docs".equals(entry.getKey())) { - docs = (Integer) entry.getValue(); - docsAsLong = (long) docs; // widen, lossless - } else if ("docsAsLong".equals(entry.getKey())) { - // Don't set docs — narrowing Long→int is lossy - docsAsLong = (Long) entry.getValue(); + docs = ((Number) entry.getValue()).longValue(); } else if ("distinct".equals(entry.getKey())) { distinct = (Integer) entry.getValue(); } else if ("cacheableFaceting".equals(entry.getKey())) { @@ -183,14 +178,10 @@ public int getDistinct() { return distinct; } - public int getDocs() { + public Long getDocs() { return docs; } - public Long getDocsAsLong() { - return docsAsLong; - } - public String getName() { return name; } From 1c0c0f6d703a8027488f4388a799223a5b51c6c9 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Mar 2026 18:31:40 -0400 Subject: [PATCH 30/46] tidy --- .../solr/handler/admin/LukeRequestHandler.java | 4 ++++ .../solr/handler/admin/LukeHandlerCloudTest.java | 4 +--- .../handler/admin/LukeRequestHandlerDistribTest.java | 12 +++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 84aacf133883..155003c1c836 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -319,6 +319,10 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { ResponseBuilder rb = new ResponseBuilder(req, rsp, Collections.emptyList()); shardHandler.prepDistributed(rb); + // When prepDistributed short-circuits (e.g. single-shard collection where this node + // hosts the shard), it returns early without populating rb.shards — caught by the null + // check. We check length == 0 rather than <= 1 because a single remote shard targeted + // via the shards param must still be fanned out to, not handled locally. String[] shards = rb.shards; if (shards == null || shards.length == 0) { return false; diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java index d8c75cfcc0c8..75e2c889e7a4 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java @@ -39,9 +39,7 @@ import org.junit.BeforeClass; import org.junit.Test; -/** - * Cloud-specific Luke tests that require SolrCloud features like managed schema and Schema API. - */ +/** Cloud-specific Luke tests that require SolrCloud features like managed schema and Schema API. */ public class LukeHandlerCloudTest extends SolrCloudTestCase { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 07cc4e245671..58eb0932a1ca 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -220,7 +220,8 @@ public void testExplicitDistribFalse() throws Exception { @ShardsFixed(num = 12) public void testSparseShards() throws Exception { // Index a single doc on shard 0 - index_specific(0, "id", "100", "name", "sparse test", "subject", "subject value", "cat_s", "category"); + index_specific( + 0, "id", "100", "name", "sparse test", "subject", "subject value", "cat_s", "category"); commit(); LukeResponse rsp = requestLuke(); @@ -416,8 +417,7 @@ public void testDistributedDocLookupDuplicateId() throws Exception { // Write the same document directly to two shard cores via UpdateHandler, // completely bypassing the distributed update processor chain. for (int i = 0; i < 2; i++) { - try (SolrCore core = - jettys.get(i).getCoreContainer().getCore("collection1")) { + try (SolrCore core = jettys.get(i).getCoreContainer().getCore("collection1")) { SolrInputDocument solrDoc = new SolrInputDocument(); solrDoc.addField("id", dupId); solrDoc.addField("name", "dup_copy_" + i); @@ -484,8 +484,7 @@ public void testShardsParamRoutesToSpecificShard() throws Exception { fields = rsp.getFieldInfo(); assertNotNull("fields should be present", fields); assertNotNull( - "only_on_shard0_s SHOULD be present when querying shard 0", - fields.get("only_on_shard0_s")); + "only_on_shard0_s SHOULD be present when querying shard 0", fields.get("only_on_shard0_s")); } @Test @@ -505,8 +504,7 @@ public void testSingleShardViaParamStillDistributes() throws Exception { LukeResponse rsp = new LukeResponse(); rsp.setResponse(raw); - assertNotNull( - "index info should be present", rsp.getIndexInfo()); + assertNotNull("index info should be present", rsp.getIndexInfo()); assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocsAsLong().longValue()); assertNotNull( "shards section should be present when targeting a shard via shards param", From 80f2610cf26d0dd03fe9c26a8bed9709527269a6 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 23 Mar 2026 22:39:32 -0400 Subject: [PATCH 31/46] clean up --- .../org/apache/solr/handler/admin/LukeRequestHandler.java | 7 +------ .../solr/handler/admin/LukeRequestHandlerDistribTest.java | 4 ++-- .../apache/solr/client/solrj/response/LukeResponse.java | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 155003c1c836..3c1d2c5284a7 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -30,7 +30,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -316,13 +315,9 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { } ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); - ResponseBuilder rb = new ResponseBuilder(req, rsp, Collections.emptyList()); + ResponseBuilder rb = new ResponseBuilder(req, rsp, List.of()); shardHandler.prepDistributed(rb); - // When prepDistributed short-circuits (e.g. single-shard collection where this node - // hosts the shard), it returns early without populating rb.shards — caught by the null - // check. We check length == 0 rather than <= 1 because a single remote shard targeted - // via the shards param must still be fanned out to, not handled locally. String[] shards = rb.shards; if (shards == null || shards.length == 0) { return false; diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 58eb0932a1ca..50a851b74d66 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -125,7 +125,7 @@ public void testDistributedFieldsAggregate() throws Exception { assertEquals( "aggregated docs count for 'name' should equal total docs", NUM_DOCS, - nameField.getDocs().longValue()); + nameField.getDocs()); LukeResponse.FieldInfo idField = fields.get("id"); assertNotNull("'id' field should be present", idField); @@ -256,7 +256,7 @@ public void testSparseShards() throws Exception { assertNotNull("'name' field should be present", nameField); assertNotNull("name type", nameField.getType()); assertNotNull("name schema flags", nameField.getSchema()); - assertEquals("name docs should be 1", 1, nameField.getDocs().longValue()); + assertEquals("name docs should be 1", 1, nameField.getDocs()); // Dynamic field — should have dynamicBase in extras LukeResponse.FieldInfo catField = fields.get("cat_s"); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 3a3451b9474c..e1be7b3d5b6a 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -115,7 +115,7 @@ public static class FieldInfo implements Serializable { String name; String type; String schema; - Long docs; + long docs; int distinct; EnumSet flags; boolean cacheableFaceting; @@ -177,7 +177,7 @@ public int getDistinct() { return distinct; } - public Long getDocs() { + public long getDocs() { return docs; } From dda4307b2013853360b8a32f34b27f88fae5f64b Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 26 Mar 2026 21:29:17 -0400 Subject: [PATCH 32/46] remove AsLong variants in LukeResponse --- .../handler/admin/LukeRequestHandler.java | 4 +-- .../admin/LukeRequestHandlerDistribTest.java | 29 +++++++++---------- .../client/solrj/response/LukeResponse.java | 19 +++--------- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 3c1d2c5284a7..5084d595b72e 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -402,9 +402,9 @@ private void aggregateDistributedResponses(SolrQueryResponse rsp, List shardIndex = lukeRsp.getIndexInfo(); if (shardIndex != null) { - totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocsAsLong()).orElse(0L); + totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocs()).orElse(0L); totalMaxDoc = Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0)); - totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocsAsLong()).orElse(0L); + totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocs()).orElse(0L); Number segCount = (Number) shardIndex.get(KEY_SEGMENT_COUNT); totalSegmentCount += segCount != null ? segCount.intValue() : 0; diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index 50a851b74d66..cbbd94665164 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -34,7 +34,7 @@ public class LukeRequestHandlerDistribTest extends BaseDistributedSearchTestCase { - private static final int NUM_DOCS = 20; + private static final Long NUM_DOCS = 20L; public LukeRequestHandlerDistribTest() { fixShardCount(2); @@ -86,26 +86,23 @@ public void testDistributedAggregate() throws Exception { LukeResponse rsp = requestLuke(); - assertEquals( - "aggregated numDocs should equal total docs", NUM_DOCS, rsp.getNumDocsAsLong().longValue()); + assertEquals("aggregated numDocs should equal total docs", NUM_DOCS, rsp.getNumDocs()); assertTrue("aggregated maxDoc should be > 0", rsp.getMaxDoc() > 0); - assertNotNull("deletedDocs should be present", rsp.getDeletedDocsAsLong()); + assertNotNull("deletedDocs should be present", rsp.getDeletedDocs()); Map shardResponses = rsp.getShardResponses(); assertNotNull("shards section should be present", shardResponses); assertEquals("should have 2 shard entries", 2, shardResponses.size()); - long sumShardDocs = 0; + Long sumShardDocs = 0L; for (Map.Entry entry : shardResponses.entrySet()) { LukeResponse shardLuke = entry.getValue(); - assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); - sumShardDocs += shardLuke.getNumDocsAsLong(); + sumShardDocs += shardLuke.getNumDocs(); } assertEquals( - "sum of per-shard numDocs should equal aggregated numDocs", - rsp.getNumDocsAsLong().longValue(), - sumShardDocs); + "sum of per-shard numDocs should equal aggregated numDocs", rsp.getNumDocs(), sumShardDocs); } @Test @@ -124,7 +121,7 @@ public void testDistributedFieldsAggregate() throws Exception { assertNotNull("schema flags should be present", nameField.getSchema()); assertEquals( "aggregated docs count for 'name' should equal total docs", - NUM_DOCS, + (long) NUM_DOCS, nameField.getDocs()); LukeResponse.FieldInfo idField = fields.get("id"); @@ -227,9 +224,9 @@ public void testSparseShards() throws Exception { LukeResponse rsp = requestLuke(); // Index-level stats - assertEquals("numDocs should be 1", 1, rsp.getNumDocsAsLong().longValue()); + assertEquals("numDocs should be 1", 1, (long) rsp.getNumDocs()); assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); - assertEquals("deletedDocs should be 0", 0, rsp.getDeletedDocsAsLong().longValue()); + assertEquals("deletedDocs should be 0", 0L, (long) rsp.getDeletedDocs()); Map shardResponses = rsp.getShardResponses(); assertNotNull("shards section should be present", shardResponses); @@ -238,8 +235,8 @@ public void testSparseShards() throws Exception { long sumShardDocs = 0; for (Map.Entry entry : shardResponses.entrySet()) { LukeResponse shardLuke = entry.getValue(); - assertNotNull("each shard should have numDocs", shardLuke.getNumDocsAsLong()); - sumShardDocs += shardLuke.getNumDocsAsLong(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); + sumShardDocs += shardLuke.getNumDocs(); } assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); @@ -505,7 +502,7 @@ public void testSingleShardViaParamStillDistributes() throws Exception { rsp.setResponse(raw); assertNotNull("index info should be present", rsp.getIndexInfo()); - assertEquals("should see the 1 doc we indexed", 1, rsp.getNumDocsAsLong().longValue()); + assertEquals("should see the 1 doc we indexed", 1, (long) rsp.getNumDocs()); assertNotNull( "shards section should be present when targeting a shard via shards param", rsp.getShardResponses()); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index e1be7b3d5b6a..0ad3ecaea52d 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -283,27 +283,16 @@ private Long getIndexLong(String key) { return n != null ? n.longValue() : null; } - public Integer getNumDocs() { - if (indexInfo == null) { - return null; - } - Object val = indexInfo.get("numDocs"); - return val instanceof Integer i ? i : null; - } - - public Long getNumDocsAsLong() { + public Long getNumDocs() { return getIndexLong("numDocs"); } public Integer getMaxDoc() { - if (indexInfo == null) { - return null; - } - Object val = indexInfo.get("maxDoc"); - return val instanceof Integer i ? i : null; + if (indexInfo == null) return null; + return (Integer) indexInfo.get("maxDoc"); } - public Long getDeletedDocsAsLong() { + public Long getDeletedDocs() { return getIndexLong("deletedDocs"); } From 5e932be7820c472edf287d1fd8b04bab35173b9f Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 26 Mar 2026 21:34:33 -0400 Subject: [PATCH 33/46] roll back to simpler getNumTerms logic --- .../apache/solr/client/solrj/response/LukeResponse.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 0ad3ecaea52d..f56c85f743b3 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -297,11 +297,8 @@ public Long getDeletedDocs() { } public Integer getNumTerms() { - if (indexInfo == null) { - return null; - } - Object val = indexInfo.get("numTerms"); - return val instanceof Integer i ? i : null; + if (indexInfo == null) return null; + return (Integer) indexInfo.get("numTerms"); } public Map getFieldTypeInfo() { From 2466d98ef408ba2565d9c0a886e1561957c9661a Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 30 Mar 2026 12:33:55 -0400 Subject: [PATCH 34/46] better comments and docs --- .../org/apache/solr/handler/admin/LukeRequestHandler.java | 8 ++++---- .../indexing-guide/pages/luke-request-handler.adoc | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 5084d595b72e..a1c1055414ab 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -265,10 +265,10 @@ private static class AggregatedFieldData { } private static class ShardData { - final String shardAddr; - final Map shardFieldInfo; - private NamedList indexInfo; - private SimpleOrderedMap detailedFields; + final String shardAddr; // key in "shards" response map + final Map shardFieldInfo; // keyed by field name + private NamedList indexInfo; // value for "index" key in per-shard entry + private SimpleOrderedMap detailedFields; // keyed by field name ShardData(String shardAddr, Map shardFieldInfo) { this.shardAddr = shardAddr; diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index a0ff75796c8b..5ae7b216e1bc 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -164,10 +164,8 @@ In distributed mode, the response contains: Field `type`, `schema` flags, and `dynamicBase` are validated for consistency across shards. If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. You can use `distrib=false` to query individual shards and compare their field configurations when troubleshooting mismatches. -The `index` flags are index-derived (not schema-derived) and may legitimately differ across shards; the first non-null value is used. -If the same document `id` is found on multiple shards (indicating index corruption), the handler returns an error. +The `index` flags are index-derived (not schema-derived) and may be absent on shards where the field has no indexed data; the first non-null value is used, and any subsequent non-null values are validated for consistency. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not aggregated across shards. These statistics are shard-local and appear in each shard's entry under the `shards` key. -For cross-shard term-level aggregation, Solr's xref:query-guide:faceting.adoc[faceting API] provides refinement and distributed aggregation, though it differs from Luke's term statistics: faceting doesn't count deleted documents, is more flexible, and is generally more expensive. -A case could be made for adding aggregation strategies for Luke-powered term statistics (which offer better performance at the cost of flexibility), however this would require community interest to motivate. +For collection-wide term frequencies or cardinality estimates, Solr's xref:query-guide:faceting.adoc[faceting API] may cover some of these use cases. From f6dc42ae5fe269f0f1e5f296a18a57008052d2fa Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 30 Mar 2026 13:38:29 -0400 Subject: [PATCH 35/46] don't add unnecessary incompatibilities --- .../apache/solr/handler/admin/LukeRequestHandler.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index a1c1055414ab..13a88f92dd4b 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -552,11 +552,9 @@ private void aggregateShardField( } // "docs" → sum of per-shard doc counts (number of documents containing this field) - Long docsLong = fi.getDocs(); - if (docsLong != null) { - fieldData.aggregated.compute( - KEY_DOCS, (key, val) -> val == null ? docsLong : (Long) val + docsLong); - } + long docsLong = fi.getDocs(); + fieldData.aggregated.compute( + KEY_DOCS, (key, val) -> val == null ? docsLong : (Long) val + docsLong); } /** Validates that a field attribute value is identical across shards. */ @@ -843,7 +841,7 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re } } } - fieldMap.add(KEY_DOCS, (long) terms.getDocCount()); + fieldMap.add(KEY_DOCS, terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); From e3058ee653ce2aa079b43ea64c0a3cc4b352f740 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 30 Mar 2026 17:40:33 -0400 Subject: [PATCH 36/46] narrow long to int when possible --- .../handler/admin/LukeRequestHandler.java | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 13a88f92dd4b..4de8654700f5 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -70,6 +70,7 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.client.api.model.CoreStatusResponse; +import org.apache.solr.client.api.util.SolrVersion; import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -96,6 +97,7 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.security.AuthorizationContext; +import org.apache.solr.servlet.HttpSolrCall; import org.apache.solr.update.SolrIndexWriter; import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; @@ -352,7 +354,7 @@ private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { } } - aggregateDistributedResponses(rsp, responses); + aggregateDistributedResponses(req, rsp, responses); rsp.setHttpCaching(false); return true; } @@ -361,7 +363,8 @@ private static String shardAddress(ShardResponse srsp) { return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); } - private void aggregateDistributedResponses(SolrQueryResponse rsp, List responses) { + private void aggregateDistributedResponses( + SolrQueryRequest req, SolrQueryResponse rsp, List responses) { if (!responses.isEmpty()) { ShardResponse firstRsp = responses.getFirst(); @@ -449,6 +452,13 @@ private void aggregateDistributedResponses(SolrQueryResponse rsp, List aggregatedFieldsNL = new SimpleOrderedMap<>(); for (Map.Entry entry : aggregatedFields.entrySet()) { @@ -557,6 +567,32 @@ private void aggregateShardField( KEY_DOCS, (key, val) -> val == null ? docsLong : (Long) val + docsLong); } + /** + * Minimum client version that understands Long values in distributed Luke responses. Distributed + * Luke aggregates counts across shards, which can overflow Integer. Older clients cast these + * values to Integer and would fail with a ClassCastException. + */ + private static final SolrVersion DISTRIB_LONG_COUNTS_MIN_VERSION = + SolrVersion.forIntegers(10, 2, 0); + + private static boolean shouldNarrowLongsForOldClient(SolrQueryRequest req) { + HttpSolrCall call = req.getHttpSolrCall(); + if (call == null) return false; + SolrVersion clientVersion = call.getUserAgentSolrVersion(); + return clientVersion != null && clientVersion.lessThan(DISTRIB_LONG_COUNTS_MIN_VERSION); + } + + /** Narrows a Long value to Integer if it fits, for javabin backward compatibility. */ + private static void narrowLongToInt(NamedList nl, String key) { + int idx = nl.indexOf(key, 0); + if (idx >= 0) { + Object val = nl.getVal(idx); + if (val instanceof Long l && l >= Integer.MIN_VALUE && l <= Integer.MAX_VALUE) { + nl.setVal(idx, l.intValue()); + } + } + } + /** Validates that a field attribute value is identical across shards. */ private void validateFieldAttr( String fieldName, From 0eb0d58b4d1751b0b120c227efc56dc791ff71d7 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Mon, 30 Mar 2026 20:05:27 -0400 Subject: [PATCH 37/46] use `merge` --- .../org/apache/solr/handler/admin/LukeRequestHandler.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 4de8654700f5..63b8c7b7e80b 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -562,9 +562,7 @@ private void aggregateShardField( } // "docs" → sum of per-shard doc counts (number of documents containing this field) - long docsLong = fi.getDocs(); - fieldData.aggregated.compute( - KEY_DOCS, (key, val) -> val == null ? docsLong : (Long) val + docsLong); + fieldData.aggregated.merge(KEY_DOCS, fi.getDocs(), (a, b) -> Long.sum((Long) a, (Long) b)); } /** From d0bf402dde52d022474251ab414849d6b8ce0696 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Mon, 30 Mar 2026 20:26:19 -0400 Subject: [PATCH 38/46] params tricks --- .../handler/admin/LukeHandlerCloudTest.java | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java index 75e2c889e7a4..770175bb324b 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java @@ -34,7 +34,7 @@ import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; -import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -52,16 +52,9 @@ public static void afterClass() throws Exception { shutdownCluster(); } - private void requestLuke(String collection, ModifiableSolrParams extra) throws Exception { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("qt", "/admin/luke"); - params.set("numTerms", "0"); - if (extra != null) { - for (Map.Entry entry : extra.getMap().entrySet()) { - params.set(entry.getKey(), entry.getValue()); - } - } - QueryRequest req = new QueryRequest(params); + private void requestLuke(String collection, SolrParams extra) throws Exception { + QueryRequest req = new QueryRequest(SolrParams.wrapDefaults(extra, params("numTerms", "0"))); + req.setPath("/admin/luke"); cluster.getSolrClient().request(req, collection); } @@ -155,10 +148,8 @@ public void testInconsistentIndexFlagsAcrossShards() throws Exception { // Distributed Luke should detect inconsistent index flags between the two shards. // One shard has stored=true segments, the other has stored=false segments for test_flag_s. // No need to set distrib=true — ZK-aware nodes default to distributed mode. - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("fl", "test_flag_s"); - - Exception ex = expectThrows(Exception.class, () -> requestLuke(collection, params)); + Exception ex = + expectThrows(Exception.class, () -> requestLuke(collection, params("fl", "test_flag_s"))); String fullMessage = SolrException.getRootCause(ex).getMessage(); assertTrue( "exception chain should mention inconsistent index flags: " + fullMessage, From 53aaad4c75b4acd9c63c6042bc134b08f2b0eb4b Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 1 Apr 2026 11:36:48 -0400 Subject: [PATCH 39/46] address PR comments --- ...4149-distributed-luke => SOLR-8127-distributed-luke.yml} | 4 ++-- .../org/apache/solr/handler/admin/LukeRequestHandler.java | 2 +- .../org/apache/solr/handler/admin/LukeHandlerCloudTest.java | 6 ------ 3 files changed, 3 insertions(+), 9 deletions(-) rename changelog/unreleased/{PR#4149-distributed-luke => SOLR-8127-distributed-luke.yml} (76%) diff --git a/changelog/unreleased/PR#4149-distributed-luke b/changelog/unreleased/SOLR-8127-distributed-luke.yml similarity index 76% rename from changelog/unreleased/PR#4149-distributed-luke rename to changelog/unreleased/SOLR-8127-distributed-luke.yml index aae5ac517b55..5ae24a571dac 100644 --- a/changelog/unreleased/PR#4149-distributed-luke +++ b/changelog/unreleased/SOLR-8127-distributed-luke.yml @@ -4,5 +4,5 @@ type: added # added, changed, fixed, deprecated, removed, dependency_update, sec authors: - name: Luke Kot-Zaniewski links: - name: PR#4149 - url: https://github.com/apache/solr/pull/4149 + name: SOLR-8127 + url: https://issues.apache.org/jira/browse/SOLR-8127 diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 63b8c7b7e80b..715ac9b62efc 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -571,7 +571,7 @@ private void aggregateShardField( * values to Integer and would fail with a ClassCastException. */ private static final SolrVersion DISTRIB_LONG_COUNTS_MIN_VERSION = - SolrVersion.forIntegers(10, 2, 0); + SolrVersion.forIntegers(10, 1, 0); private static boolean shouldNarrowLongsForOldClient(SolrQueryRequest req) { HttpSolrCall call = req.getHttpSolrCall(); diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java index 770175bb324b..20c5ff4523b5 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java @@ -35,7 +35,6 @@ import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.params.SolrParams; -import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -47,11 +46,6 @@ public static void setupCluster() throws Exception { configureCluster(2).addConfig("managed", configset("cloud-managed")).configure(); } - @AfterClass - public static void afterClass() throws Exception { - shutdownCluster(); - } - private void requestLuke(String collection, SolrParams extra) throws Exception { QueryRequest req = new QueryRequest(SolrParams.wrapDefaults(extra, params("numTerms", "0"))); req.setPath("/admin/luke"); From f05174880823b2f4a91b61a23a89c8049f52295f Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 1 Apr 2026 13:27:01 -0400 Subject: [PATCH 40/46] minor improvements --- .../handler/admin/LukeRequestHandler.java | 2 ++ .../admin/LukeRequestHandlerDistribTest.java | 22 +++++++++++++++---- .../pages/luke-request-handler.adoc | 3 ++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 715ac9b62efc..d7ba224bc514 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -30,6 +30,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; +import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -434,6 +435,7 @@ private void aggregateDistributedResponses( shardDataList.add(shardData); } + shardDataList.sort(Comparator.comparing(sd -> sd.shardAddr)); SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); for (ShardData sd : shardDataList) { SimpleOrderedMap entry = sd.toResponseEntry(); diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index cbbd94665164..a1fc62308afc 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -50,10 +50,24 @@ private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { params.set("numTerms", "0"); params.set("shards", shards); params.add(extra); + + // Query a random shard client + int which = r.nextInt(clients.size()); QueryRequest req = new QueryRequest(params); - NamedList raw = clients.get(0).request(req); + NamedList raw = clients.get(which).request(req); LukeResponse rsp = new LukeResponse(); rsp.setResponse(raw); + + // Query the control server with the same distributed params + QueryRequest controlReq = new QueryRequest(params); + NamedList controlRaw = controlClient.request(controlReq); + LukeResponse controlRsp = new LukeResponse(); + controlRsp.setResponse(controlRaw); + + // Compare — response should be consistent regardless of coordinating node + handle.put("QTime", SKIPVAL); + compareSolrResponses(rsp, controlRsp); + return rsp; } @@ -66,7 +80,7 @@ private void assertLukeXPath(ModifiableSolrParams extra, String... xpaths) throw params.add(extra); QueryRequest req = new QueryRequest(params); req.setResponseParser(new InputStreamResponseParser("xml")); - NamedList raw = clients.get(0).request(req); + NamedList raw = controlClient.request(req); String xml = InputStreamResponseParser.consumeResponseToString(raw); String failedXpath = BaseTestHarness.validateXPath(xml, xpaths); assertNull("XPath validation failed: " + failedXpath + "\nResponse:\n" + xml, failedXpath); @@ -186,7 +200,7 @@ public void testLocalModeDefault() throws Exception { params.set("qt", "/admin/luke"); params.set("numTerms", "0"); QueryRequest req = new QueryRequest(params); - NamedList raw = clients.get(0).request(req); + NamedList raw = controlClient.request(req); LukeResponse rsp = new LukeResponse(); rsp.setResponse(raw); @@ -205,7 +219,7 @@ public void testExplicitDistribFalse() throws Exception { params.set("numTerms", "0"); params.set("distrib", "false"); QueryRequest req = new QueryRequest(params); - NamedList raw = clients.get(0).request(req); + NamedList raw = controlClient.request(req); LukeResponse rsp = new LukeResponse(); rsp.setResponse(raw); diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index 5ae7b216e1bc..4a79cea529e5 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -134,7 +134,8 @@ From SolrJ, you can access /luke using the {solr-javadocs}/solrj/org/apache/solr == Distributed Mode (multiple shards) When running in SolrCloud, the Luke handler automatically distributes requests across all shards in the collection, the same as search requests. -To inspect only the local shard's index, set `distrib=false`. +To inspect only the local shard's index set `distrib=false`. +In non-cloud mode, you can still distribute across shards by passing the `shards` parameter with explicit shard URLs. To get a collection-wide view: From 2d4b262495ecc9a4b706a8fc74b01c3a226421ce Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 1 Apr 2026 14:11:44 -0400 Subject: [PATCH 41/46] fix flakiness due to ordering --- .../handler/admin/LukeRequestHandler.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index d7ba224bc514..d0dbebe7fb9b 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -255,6 +255,7 @@ private static class AggregatedFieldData { final LukeResponse.FieldInfo originalFieldInfo; private Object indexFlags; private String indexFlagsShardAddr; + private long docsSum; AggregatedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { this.originalShardAddr = shardAddr; @@ -462,6 +463,15 @@ private void aggregateDistributedResponses( } } if (!aggregatedFields.isEmpty()) { + // Finalize field entries: add index flags and docs in a consistent order. + // These are deferred from aggregateShardField because index flags may arrive + // from any shard, and we want a deterministic key order in the response. + for (AggregatedFieldData fd : aggregatedFields.values()) { + if (fd.indexFlags != null) { + fd.aggregated.add(KEY_INDEX_FLAGS, fd.indexFlags); + } + fd.aggregated.add(KEY_DOCS, fd.docsSum); + } SimpleOrderedMap aggregatedFieldsNL = new SimpleOrderedMap<>(); for (Map.Entry entry : aggregatedFields.entrySet()) { aggregatedFieldsNL.add(entry.getKey(), entry.getValue().aggregated); @@ -518,9 +528,6 @@ private void aggregateShardField( if (dynBase != null) { fieldData.aggregated.add(KEY_DYNAMIC_BASE, dynBase); } - if (fieldData.indexFlags != null) { - fieldData.aggregated.add(KEY_INDEX_FLAGS, fieldData.indexFlags); - } } else { // Subsequent shards: validate that "type", "schema", and "dynamicBase" match validateFieldAttr( @@ -550,7 +557,6 @@ private void aggregateShardField( if (fieldData.indexFlags == null) { fieldData.indexFlags = indexFlags; fieldData.indexFlagsShardAddr = shardAddr; - fieldData.aggregated.add(KEY_INDEX_FLAGS, indexFlags); } else { validateFieldAttr( fieldName, @@ -563,8 +569,9 @@ private void aggregateShardField( } } - // "docs" → sum of per-shard doc counts (number of documents containing this field) - fieldData.aggregated.merge(KEY_DOCS, fi.getDocs(), (a, b) -> Long.sum((Long) a, (Long) b)); + // Sum per-shard doc counts (number of documents containing this field). + // Added to the response in finalization to ensure consistent key ordering. + fieldData.docsSum += fi.getDocs(); } /** From d6991e66dbc461d6827600e9073a036bf75b9b22 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 1 Apr 2026 17:22:28 -0400 Subject: [PATCH 42/46] make distrib and local mode more similar --- .../handler/admin/LukeRequestHandler.java | 16 ++++++---- .../admin/LukeRequestHandlerDistribTest.java | 29 +++++++++---------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index d0dbebe7fb9b..1dd04ea1d468 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -380,17 +380,13 @@ private void aggregateDistributedResponses( if (schema != null) { rsp.add(RSP_SCHEMA, schema); } - Object info = firstShardRsp.get(RSP_INFO); - if (info != null) { - rsp.add(RSP_INFO, info); - } } long totalNumDocs = 0; int totalMaxDoc = 0; long totalDeletedDocs = 0; int totalSegmentCount = 0; - Map aggregatedFields = new HashMap<>(); + Map aggregatedFields = new TreeMap<>(); String firstDocShard = null; Object firstDoc = null; List shardDataList = new ArrayList<>(); @@ -479,6 +475,15 @@ private void aggregateDistributedResponses( rsp.add(RSP_FIELDS, aggregatedFieldsNL); } + // Add info section last (before shards), matching the local-mode key order. + if (!responses.isEmpty()) { + NamedList firstShardRsp = responses.getFirst().getSolrResponse().getResponse(); + Object info = firstShardRsp == null ? null : firstShardRsp.get(RSP_INFO); + if (info != null) { + rsp.add(RSP_INFO, info); + } + } + rsp.add(RSP_SHARDS, shardsInfo); } @@ -862,6 +867,7 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re // Not indexed, so we need to report what we can (it made it through the fl param if // specified) if (terms == null) { + fieldMap.add(KEY_DOCS, 0); finfo.add(fieldName, fieldMap); continue; } diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index a1fc62308afc..a177e209faab 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -21,6 +21,7 @@ import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.response.InputStreamResponseParser; import org.apache.solr.client.solrj.response.LukeResponse; +import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.ModifiableSolrParams; @@ -48,25 +49,21 @@ private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("qt", "/admin/luke"); params.set("numTerms", "0"); - params.set("shards", shards); params.add(extra); - // Query a random shard client - int which = r.nextInt(clients.size()); - QueryRequest req = new QueryRequest(params); - NamedList raw = clients.get(which).request(req); - LukeResponse rsp = new LukeResponse(); - rsp.setResponse(raw); - - // Query the control server with the same distributed params - QueryRequest controlReq = new QueryRequest(params); - NamedList controlRaw = controlClient.request(controlReq); - LukeResponse controlRsp = new LukeResponse(); - controlRsp.setResponse(controlRaw); - - // Compare — response should be consistent regardless of coordinating node + // query() sends to control and a random shard with shards param, compares responses handle.put("QTime", SKIPVAL); - compareSolrResponses(rsp, controlRsp); + handle.put("index", SKIP); + handle.put("shards", SKIP); + // Detailed per-field stats (distinct, topTerms, histogram) are kept per-shard in + // distributed mode and intentionally excluded from the aggregated top-level fields. + // Local mode includes them inline, so skip them in the comparison. + handle.put("distinct", SKIP); + handle.put("topTerms", SKIP); + handle.put("histogram", SKIP); + QueryResponse qr = query(params); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(qr.getResponse()); return rsp; } From bda836c07fee8a269b99d263af0cd7e34f7a298a Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 2 Apr 2026 15:32:29 -0400 Subject: [PATCH 43/46] add extra param support for LukeRequest --- .../handler/admin/LukeRequestHandler.java | 36 +++++----- .../handler/admin/LukeHandlerCloudTest.java | 8 +-- .../admin/LukeRequestHandlerDistribTest.java | 69 +++++++------------ .../client/solrj/request/LukeRequest.java | 9 +++ 4 files changed, 54 insertions(+), 68 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 1dd04ea1d468..0065834a5887 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -125,26 +125,26 @@ public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAw static final int HIST_ARRAY_SIZE = 33; // Response section keys - private static final String RSP_INDEX = "index"; - private static final String RSP_FIELDS = "fields"; - private static final String RSP_SCHEMA = "schema"; - private static final String RSP_INFO = "info"; - private static final String RSP_DOC = "doc"; - private static final String RSP_SHARDS = "shards"; + static final String RSP_INDEX = "index"; + static final String RSP_FIELDS = "fields"; + static final String RSP_SCHEMA = "schema"; + static final String RSP_INFO = "info"; + static final String RSP_DOC = "doc"; + static final String RSP_SHARDS = "shards"; // Field-level keys - private static final String KEY_NUM_DOCS = "numDocs"; - private static final String KEY_MAX_DOC = "maxDoc"; - private static final String KEY_DELETED_DOCS = "deletedDocs"; - private static final String KEY_SEGMENT_COUNT = "segmentCount"; - private static final String KEY_TYPE = "type"; - private static final String KEY_SCHEMA_FLAGS = "schema"; - private static final String KEY_DOCS = "docs"; - private static final String KEY_DISTINCT = "distinct"; - private static final String KEY_TOP_TERMS = "topTerms"; - private static final String KEY_DYNAMIC_BASE = "dynamicBase"; - private static final String KEY_INDEX_FLAGS = "index"; - private static final String KEY_HISTOGRAM = "histogram"; + static final String KEY_NUM_DOCS = "numDocs"; + static final String KEY_MAX_DOC = "maxDoc"; + static final String KEY_DELETED_DOCS = "deletedDocs"; + static final String KEY_SEGMENT_COUNT = "segmentCount"; + static final String KEY_TYPE = "type"; + static final String KEY_SCHEMA_FLAGS = "schema"; + static final String KEY_DOCS = "docs"; + static final String KEY_DISTINCT = "distinct"; + static final String KEY_TOP_TERMS = "topTerms"; + static final String KEY_DYNAMIC_BASE = "dynamicBase"; + static final String KEY_INDEX_FLAGS = "index"; + static final String KEY_HISTOGRAM = "histogram"; private ShardHandlerFactory shardHandlerFactory; diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java index 20c5ff4523b5..f49ac3ccca0a 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java @@ -24,7 +24,7 @@ import java.util.Map; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.LukeRequest; import org.apache.solr.client.solrj.request.SolrQuery; import org.apache.solr.client.solrj.request.schema.SchemaRequest; import org.apache.solr.client.solrj.response.QueryResponse; @@ -47,9 +47,9 @@ public static void setupCluster() throws Exception { } private void requestLuke(String collection, SolrParams extra) throws Exception { - QueryRequest req = new QueryRequest(SolrParams.wrapDefaults(extra, params("numTerms", "0"))); - req.setPath("/admin/luke"); - cluster.getSolrClient().request(req, collection); + LukeRequest req = new LukeRequest(extra); + req.setNumTerms(0); + req.process(cluster.getSolrClient(), collection); } /** diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index a177e209faab..c704ba0cd5c8 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -18,7 +18,7 @@ import java.util.Map; import org.apache.solr.BaseDistributedSearchTestCase; -import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.LukeRequest; import org.apache.solr.client.solrj.response.InputStreamResponseParser; import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.client.solrj.response.QueryResponse; @@ -53,14 +53,14 @@ private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { // query() sends to control and a random shard with shards param, compares responses handle.put("QTime", SKIPVAL); - handle.put("index", SKIP); - handle.put("shards", SKIP); + handle.put(LukeRequestHandler.RSP_INDEX, SKIP); + handle.put(LukeRequestHandler.RSP_SHARDS, SKIP); // Detailed per-field stats (distinct, topTerms, histogram) are kept per-shard in // distributed mode and intentionally excluded from the aggregated top-level fields. // Local mode includes them inline, so skip them in the comparison. - handle.put("distinct", SKIP); - handle.put("topTerms", SKIP); - handle.put("histogram", SKIP); + handle.put(LukeRequestHandler.KEY_DISTINCT, SKIP); + handle.put(LukeRequestHandler.KEY_TOP_TERMS, SKIP); + handle.put(LukeRequestHandler.KEY_HISTOGRAM, SKIP); QueryResponse qr = query(params); LukeResponse rsp = new LukeResponse(); rsp.setResponse(qr.getResponse()); @@ -70,12 +70,10 @@ private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { private void assertLukeXPath(ModifiableSolrParams extra, String... xpaths) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("qt", "/admin/luke"); - params.set("numTerms", "0"); - params.set("wt", "xml"); params.set("shards", shards); params.add(extra); - QueryRequest req = new QueryRequest(params); + LukeRequest req = new LukeRequest(params); + req.setNumTerms(0); req.setResponseParser(new InputStreamResponseParser("xml")); NamedList raw = controlClient.request(req); String xml = InputStreamResponseParser.consumeResponseToString(raw); @@ -193,13 +191,9 @@ public void testLocalModeDefault() throws Exception { indexTestData(); // Query a single client without the shards param — local mode - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("qt", "/admin/luke"); - params.set("numTerms", "0"); - QueryRequest req = new QueryRequest(params); - NamedList raw = controlClient.request(req); - LukeResponse rsp = new LukeResponse(); - rsp.setResponse(raw); + LukeRequest req = new LukeRequest(); + req.setNumTerms(0); + LukeResponse rsp = req.process(controlClient); assertNotNull("index info should be present", rsp.getIndexInfo()); assertNull("shards should NOT be present in local mode", rsp.getShardResponses()); @@ -211,14 +205,9 @@ public void testExplicitDistribFalse() throws Exception { indexTestData(); // Query a single client with distrib=false — no shards param - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("qt", "/admin/luke"); - params.set("numTerms", "0"); - params.set("distrib", "false"); - QueryRequest req = new QueryRequest(params); - NamedList raw = controlClient.request(req); - LukeResponse rsp = new LukeResponse(); - rsp.setResponse(raw); + LukeRequest req = new LukeRequest(params("distrib", "false")); + req.setNumTerms(0); + LukeResponse rsp = req.process(controlClient); assertNotNull("index info should be present", rsp.getIndexInfo()); assertNull("shards should NOT be present with distrib=false", rsp.getShardResponses()); @@ -466,14 +455,9 @@ public void testShardsParamRoutesToSpecificShard() throws Exception { // Query with shards= pointing only at shard 1 — the dynamic field should NOT appear. // This also tests that a single remote shard is correctly fanned out to rather than // falling through to local-mode on the coordinating node. - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("qt", "/admin/luke"); - params.set("numTerms", "0"); - params.set("shards", shardsArr[1]); - QueryRequest req = new QueryRequest(params); - NamedList raw = clients.get(0).request(req); - LukeResponse rsp = new LukeResponse(); - rsp.setResponse(raw); + LukeRequest req = new LukeRequest(params("shards", shardsArr[1])); + req.setNumTerms(0); + LukeResponse rsp = req.process(controlClient); Map fields = rsp.getFieldInfo(); assertNotNull("fields should be present", fields); @@ -483,11 +467,9 @@ public void testShardsParamRoutesToSpecificShard() throws Exception { assertNotNull("'name' field should still be present", fields.get("name")); // Now query with shards= pointing only at shard 0 — the dynamic field SHOULD appear - params.set("shards", shardsArr[0]); - req = new QueryRequest(params); - raw = clients.get(0).request(req); - rsp = new LukeResponse(); - rsp.setResponse(raw); + req = new LukeRequest(params("shards", shardsArr[0])); + req.setNumTerms(0); + rsp = req.process(controlClient); fields = rsp.getFieldInfo(); assertNotNull("fields should be present", fields); @@ -503,14 +485,9 @@ public void testSingleShardViaParamStillDistributes() throws Exception { // Pass the shards param with a single shard — should still fan out to it // rather than incorrectly falling through to local mode - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("qt", "/admin/luke"); - params.set("numTerms", "0"); - params.set("shards", shards); - QueryRequest req = new QueryRequest(params); - NamedList raw = clients.get(0).request(req); - LukeResponse rsp = new LukeResponse(); - rsp.setResponse(raw); + LukeRequest req = new LukeRequest(params("shards", shards)); + req.setNumTerms(0); + LukeResponse rsp = req.process(controlClient); assertNotNull("index info should be present", rsp.getIndexInfo()); assertEquals("should see the 1 doc we indexed", 1, (long) rsp.getNumDocs()); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java index 243d811e5d7e..2680992bb7e3 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java @@ -33,12 +33,18 @@ public class LukeRequest extends CollectionRequiringSolrRequest { private int numTerms = -1; private boolean showSchema = false; private Boolean includeIndexFieldFlags = null; + private SolrParams extraParams; public LukeRequest() { // this request is not processed as an ADMIN request super(METHOD.GET, "/admin/luke", SolrRequestType.ADMIN); } + public LukeRequest(SolrParams params) { + this(); + this.extraParams = params; + } + public LukeRequest(String path) { super(METHOD.GET, path, SolrRequestType.ADMIN); } @@ -122,6 +128,9 @@ public SolrParams getParams() { if (includeIndexFieldFlags != null) { params.add("includeIndexFieldFlags", includeIndexFieldFlags.toString()); } + if (extraParams != null) { + params.add(extraParams); + } return params; } From c734337c5cb57d05d5b81ed70929c2a83bef68e7 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Thu, 2 Apr 2026 15:39:05 -0400 Subject: [PATCH 44/46] minor tweaks to LukeResponse --- .../apache/solr/client/solrj/response/LukeResponse.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index f56c85f743b3..f23bb29cffab 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -23,6 +23,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -120,7 +121,7 @@ public static class FieldInfo implements Serializable { EnumSet flags; boolean cacheableFaceting; NamedList topTerms; - Map extras = new HashMap<>(); + Map extras = new TreeMap<>(); public FieldInfo(String n) { name = n; @@ -277,14 +278,14 @@ public String getIndexDirectory() { return (String) indexInfo.get("directory"); } - private Long getIndexLong(String key) { + private Long getLong(String key) { if (indexInfo == null) return null; Number n = (Number) indexInfo.get(key); return n != null ? n.longValue() : null; } public Long getNumDocs() { - return getIndexLong("numDocs"); + return getLong("numDocs"); } public Integer getMaxDoc() { @@ -293,7 +294,7 @@ public Integer getMaxDoc() { } public Long getDeletedDocs() { - return getIndexLong("deletedDocs"); + return getLong("deletedDocs"); } public Integer getNumTerms() { From e16b5b1d505921ce4294737484f8e6b1549761f2 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 3 Apr 2026 18:32:50 -0400 Subject: [PATCH 45/46] gate shards with shards.info --- .../solr/handler/admin/LukeRequestHandler.java | 4 +++- .../admin/LukeRequestHandlerDistribTest.java | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 0065834a5887..ea6c81e7ba66 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -484,7 +484,9 @@ private void aggregateDistributedResponses( } } - rsp.add(RSP_SHARDS, shardsInfo); + if (req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { + rsp.add(RSP_SHARDS, shardsInfo); + } } private void processShardFields( diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java index c704ba0cd5c8..152960277569 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -49,6 +49,7 @@ private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("qt", "/admin/luke"); params.set("numTerms", "0"); + params.set("shards.info", "true"); params.add(extra); // query() sends to control and a random shard with shards param, compares responses @@ -71,6 +72,7 @@ private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { private void assertLukeXPath(ModifiableSolrParams extra, String... xpaths) throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("shards", shards); + params.set("shards.info", "true"); params.add(extra); LukeRequest req = new LukeRequest(params); req.setNumTerms(0); @@ -485,15 +487,22 @@ public void testSingleShardViaParamStillDistributes() throws Exception { // Pass the shards param with a single shard — should still fan out to it // rather than incorrectly falling through to local mode - LukeRequest req = new LukeRequest(params("shards", shards)); + LukeRequest req = new LukeRequest(params("shards", shards, "shards.info", "true")); req.setNumTerms(0); LukeResponse rsp = req.process(controlClient); assertNotNull("index info should be present", rsp.getIndexInfo()); assertEquals("should see the 1 doc we indexed", 1, (long) rsp.getNumDocs()); assertNotNull( - "shards section should be present when targeting a shard via shards param", - rsp.getShardResponses()); + "shards section should be present when shards.info=true", rsp.getShardResponses()); assertEquals("should have 1 shard entry", 1, rsp.getShardResponses().size()); + + // Without shards.info, shards section should be absent + req = new LukeRequest(params("shards", shards)); + req.setNumTerms(0); + rsp = req.process(controlClient); + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertEquals("should see the 1 doc we indexed", 1, (long) rsp.getNumDocs()); + assertNull("shards section should be absent without shards.info", rsp.getShardResponses()); } } From 76b263fd680f74ad1c9c37d8d8d65982e66a22f9 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 3 Apr 2026 23:16:56 -0400 Subject: [PATCH 46/46] some refactor and update docs --- .../handler/admin/LukeRequestHandler.java | 91 +++++++++++-------- .../pages/luke-request-handler.adoc | 4 +- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index ea6c81e7ba66..eb58059457e3 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -32,6 +32,7 @@ import java.util.Base64; import java.util.Comparator; import java.util.Date; +import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -247,25 +248,54 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw rsp.setHttpCaching(false); } + /** + * Field-level response keys, declared in the order they appear in the local (non-distributed) + * response. EnumMap iteration follows declaration order, giving deterministic output. + */ + enum FieldDataKey { + TYPE(KEY_TYPE), + SCHEMA(KEY_SCHEMA_FLAGS), + DYNAMIC_BASE(KEY_DYNAMIC_BASE), + INDEX(KEY_INDEX_FLAGS), + DOCS(KEY_DOCS); + + final String responseKey; + + FieldDataKey(String responseKey) { + this.responseKey = responseKey; + } + } + /** Per-field accumulation state across shards: aggregated response data and field validation. */ private static class AggregatedFieldData { - // keyed by individual field info properties, i.e. type, schema, etc. - final SimpleOrderedMap aggregated = new SimpleOrderedMap<>(); + final EnumMap properties = new EnumMap<>(FieldDataKey.class); final String originalShardAddr; final LukeResponse.FieldInfo originalFieldInfo; - private Object indexFlags; private String indexFlagsShardAddr; - private long docsSum; AggregatedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { this.originalShardAddr = shardAddr; this.originalFieldInfo = fieldInfo; - Object flags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); - if (flags != null) { - this.indexFlags = flags; + properties.put(FieldDataKey.TYPE, fieldInfo.getType()); + properties.put(FieldDataKey.SCHEMA, fieldInfo.getSchema()); + Object dynBase = fieldInfo.getExtras().get(KEY_DYNAMIC_BASE); + if (dynBase != null) { + properties.put(FieldDataKey.DYNAMIC_BASE, dynBase); + } + Object indexFlags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); + if (indexFlags != null) { + properties.put(FieldDataKey.INDEX, indexFlags); this.indexFlagsShardAddr = shardAddr; } } + + SimpleOrderedMap toResponse() { + SimpleOrderedMap result = new SimpleOrderedMap<>(); + for (Map.Entry entry : properties.entrySet()) { + result.add(entry.getKey().responseKey, entry.getValue()); + } + return result; + } } private static class ShardData { @@ -451,26 +481,19 @@ private void aggregateDistributedResponses( if (firstDoc != null) { rsp.add(RSP_DOC, firstDoc); } - if (shouldNarrowLongsForOldClient(req)) { + boolean narrowLongs = shouldNarrowLongsForOldClient(req); + if (narrowLongs) { narrowLongToInt(aggregatedIndex, KEY_NUM_DOCS); narrowLongToInt(aggregatedIndex, KEY_DELETED_DOCS); - for (AggregatedFieldData fd : aggregatedFields.values()) { - narrowLongToInt(fd.aggregated, KEY_DOCS); - } } if (!aggregatedFields.isEmpty()) { - // Finalize field entries: add index flags and docs in a consistent order. - // These are deferred from aggregateShardField because index flags may arrive - // from any shard, and we want a deterministic key order in the response. - for (AggregatedFieldData fd : aggregatedFields.values()) { - if (fd.indexFlags != null) { - fd.aggregated.add(KEY_INDEX_FLAGS, fd.indexFlags); - } - fd.aggregated.add(KEY_DOCS, fd.docsSum); - } SimpleOrderedMap aggregatedFieldsNL = new SimpleOrderedMap<>(); for (Map.Entry entry : aggregatedFields.entrySet()) { - aggregatedFieldsNL.add(entry.getKey(), entry.getValue().aggregated); + SimpleOrderedMap fieldResponse = entry.getValue().toResponse(); + if (narrowLongs) { + narrowLongToInt(fieldResponse, KEY_DOCS); + } + aggregatedFieldsNL.add(entry.getKey(), fieldResponse); } rsp.add(RSP_FIELDS, aggregatedFieldsNL); } @@ -523,20 +546,8 @@ private void aggregateShardField( if (fieldData == null) { fieldData = new AggregatedFieldData(shardAddr, fi); aggregatedFields.put(fieldName, fieldData); - - // First shard to report this field — populate response keys: - // "type" → field type name (e.g. "string", "text_general") - // "schema" → schema flags string (e.g. "I-S-M-----OF-----l") - // "dynamicBase" → dynamic field glob if this is a dynamic field (e.g. "*_s") - // "index" → index-derived flags from the first shard that has them - fieldData.aggregated.add(KEY_TYPE, fi.getType()); - fieldData.aggregated.add(KEY_SCHEMA_FLAGS, fi.getSchema()); - Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); - if (dynBase != null) { - fieldData.aggregated.add(KEY_DYNAMIC_BASE, dynBase); - } } else { - // Subsequent shards: validate that "type", "schema", and "dynamicBase" match + // Subsequent shards: validate that type, schema, and dynamicBase match validateFieldAttr( fieldName, KEY_TYPE, @@ -561,24 +572,24 @@ private void aggregateShardField( Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); if (indexFlags != null) { - if (fieldData.indexFlags == null) { - fieldData.indexFlags = indexFlags; + Object existing = fieldData.properties.get(FieldDataKey.INDEX); + if (existing == null) { + fieldData.properties.put(FieldDataKey.INDEX, indexFlags); fieldData.indexFlagsShardAddr = shardAddr; } else { validateFieldAttr( fieldName, KEY_INDEX_FLAGS, indexFlags, - fieldData.indexFlags, + existing, shardAddr, fieldData.indexFlagsShardAddr); } } } - // Sum per-shard doc counts (number of documents containing this field). - // Added to the response in finalization to ensure consistent key ordering. - fieldData.docsSum += fi.getDocs(); + // Sum per-shard doc counts + fieldData.properties.merge(FieldDataKey.DOCS, fi.getDocs(), (a, b) -> (long) a + (long) b); } /** diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index 4a79cea529e5..7b51d013f22b 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -156,7 +156,7 @@ In distributed mode, the response contains: * `doc` -- Present when `id` is specified. Contains the document from whichever shard owns it, including a `lucene` section (per-field analysis with shard-local `docFreq` values) and a `solr` section (stored fields). Only `id` is supported for distributed doc lookup; `docId` is rejected because Lucene document IDs are shard-local. * `schema` -- Schema information from the first responding shard (identical across shards sharing the same configset). * `info` -- Static info from the first responding shard. -* `shards` -- Per-shard details in response-completion order. Each entry contains: +* `shards` -- Only present when `shards.info=true`. Contains per-shard details, with each entry keyed by shard address: ** `index` -- Full index info for that shard (including `directory`, `segmentsFile`, `version`, `current`, `hasDeletions`, `lastModified`, `userData`). ** `fields` -- Only present when `fl` triggers detailed statistics. Contains per-field `topTerms`, `distinct`, and `histogram` from that shard. @@ -168,5 +168,5 @@ You can use `distrib=false` to query individual shards and compare their field c The `index` flags are index-derived (not schema-derived) and may be absent on shards where the field has no indexed data; the first non-null value is used, and any subsequent non-null values are validated for consistency. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not aggregated across shards. -These statistics are shard-local and appear in each shard's entry under the `shards` key. +These statistics are shard-local and appear in each shard's entry under the `shards` key (requires `shards.info=true`). For collection-wide term frequencies or cardinality estimates, Solr's xref:query-guide:faceting.adoc[faceting API] may cover some of these use cases.