diff --git a/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java
new file mode 100644
index 00000000000..f728e0905d4
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.function.Predicate;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.RealTimeGetComponent;
+import org.apache.solr.handler.component.RealTimeGetComponent.Resolution;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TextField;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.UpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An implementation of {@link UpdateRequestProcessor} which computes a hash of selected doc values,
+ * and uses this hash value to reject/accept doc updates.
+ *
+ *
+ * - When no corresponding doc with same id exists (create), computed hash is added to the
+ * document.
+ *
- When a previous doc exists (update), a new hash is computed using new version values and
+ * compared with old hash.
+ *
+ *
+ * Depending on {#discardSameDocuments} value, this processor may reject or accept doc update. This
+ * implementation can be used for monitoring or rejecting no-op updates (updates that do not change
+ * Solr document).
+ *
+ * Note: hash is computed using {@link Lookup3Signature}.
+ *
+ * @see Lookup3Signature
+ */
+public class ContentHashVersionProcessor extends UpdateRequestProcessor {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ private final SchemaField hashField;
+ private final SolrQueryResponse rsp;
+ private final SolrCore core;
+ private final Predicate includedFields; // Matcher for included fields in hash
+ private final Predicate excludedFields; // Matcher for excluded fields from hash
+ private OldDocProvider oldDocProvider = new DefaultDocProvider();
+ private boolean discardSameDocuments;
+ private int sameCount = 0;
+ private int differentCount = 0;
+ private int unknownCount = 0;
+
+ public ContentHashVersionProcessor(
+ Predicate hashIncludedFields,
+ Predicate hashExcludedFields,
+ String hashFieldName,
+ SolrQueryRequest req,
+ SolrQueryResponse rsp,
+ UpdateRequestProcessor next) {
+ super(next);
+ this.core = req.getCore();
+ this.hashField = new SchemaField(hashFieldName, new TextField());
+ this.rsp = rsp;
+ this.includedFields = hashIncludedFields;
+ this.excludedFields = hashExcludedFields;
+ }
+
+ @Override
+ public void processAdd(AddUpdateCommand cmd) throws IOException {
+ SolrInputDocument newDoc = cmd.getSolrInputDocument();
+ String newHash = computeDocHash(newDoc);
+ newDoc.setField(hashField.getName(), newHash);
+ int i = 0;
+
+ if (!validateHash(cmd.getIndexedId(), newHash)) {
+ return;
+ }
+
+ while (true) {
+ logOverlyFailedRetries(i, cmd);
+ try {
+ super.processAdd(cmd);
+ return;
+ } catch (SolrException e) {
+ if (e.code() != 409) {
+ throw e;
+ }
+ ++i;
+ }
+ }
+ }
+
+ @Override
+ public void finish() throws IOException {
+ try {
+ super.finish();
+ } finally {
+ rsp.addToLog("numAddsExisting", sameCount + differentCount + unknownCount);
+ rsp.addToLog("numAddsExistingWithIdentical", sameCount);
+ rsp.addToLog("numAddsExistingUnknown", unknownCount);
+ }
+ }
+
+ private static void logOverlyFailedRetries(int i, UpdateCommand cmd) {
+ if ((i & 255) == 255) {
+ log.warn("Unusual number of optimistic concurrency retries: retries={} cmd={}", i, cmd);
+ }
+ }
+
+ void setOldDocProvider(OldDocProvider oldDocProvider) {
+ this.oldDocProvider = oldDocProvider;
+ }
+
+ void setDiscardSameDocuments(boolean discardSameDocuments) {
+ this.discardSameDocuments = discardSameDocuments;
+ }
+
+ private boolean validateHash(BytesRef indexedDocId, String newHash) throws IOException {
+ assert null != indexedDocId;
+
+ var docFoundAndOldUserVersions = getOldUserVersionsFromStored(indexedDocId);
+ if (docFoundAndOldUserVersions.found) {
+ String oldHash =
+ docFoundAndOldUserVersions.oldHash; // No hash: might want to keep track of these too
+ if (oldHash == null) {
+ unknownCount++;
+ return true;
+ } else if (Objects.equals(newHash, oldHash)) {
+ sameCount++;
+ return !discardSameDocuments;
+ } else {
+ differentCount++;
+ return true;
+ }
+ }
+ return true; // Doc not found
+ }
+
+ private DocFoundAndOldUserAndSolrVersions getOldUserVersionsFromStored(BytesRef indexedDocId)
+ throws IOException {
+ SolrInputDocument oldDoc = oldDocProvider.getDocument(core, hashField.getName(), indexedDocId);
+ return null == oldDoc
+ ? DocFoundAndOldUserAndSolrVersions.NOT_FOUND
+ : getUserVersionAndSolrVersionFromDocument(oldDoc);
+ }
+
+ private DocFoundAndOldUserAndSolrVersions getUserVersionAndSolrVersionFromDocument(
+ SolrInputDocument oldDoc) {
+ Object o = oldDoc.getFieldValue(hashField.getName());
+ if (o != null) {
+ return new DocFoundAndOldUserAndSolrVersions(o.toString());
+ }
+ return new DocFoundAndOldUserAndSolrVersions();
+ }
+
+ public String computeDocHash(SolrInputDocument doc) {
+ List docIncludedFieldNames =
+ doc.getFieldNames().stream()
+ .filter(includedFields) // Keep fields that match 'included fields' matcher...
+ .filter(
+ excludedFields
+ .negate()) // ...and exclude fields that match 'excluded fields' matcher
+ .sorted() // Sort to ensure consistent field order across different doc field orders
+ .toList();
+
+ final Signature sig = new Lookup3Signature();
+ for (String fieldName : docIncludedFieldNames) {
+ sig.add(fieldName);
+ Object o = doc.getFieldValue(fieldName);
+ if (o instanceof Collection) {
+ for (Object oo : (Collection>) o) {
+ sig.add(String.valueOf(oo));
+ }
+ } else {
+ sig.add(String.valueOf(o));
+ }
+ }
+
+ // Signature, depending on implementation, may return 8-byte or 16-byte value
+ byte[] signature = sig.getSignature();
+ return Base64.getEncoder()
+ .encodeToString(signature); // Makes a base64 hash out of signature value
+ }
+
+ interface OldDocProvider {
+ SolrInputDocument getDocument(SolrCore core, String hashField, BytesRef indexedDocId)
+ throws IOException;
+ }
+
+ private static class DefaultDocProvider implements OldDocProvider {
+ @Override
+ public SolrInputDocument getDocument(SolrCore core, String hashField, BytesRef indexedDocId)
+ throws IOException {
+ return RealTimeGetComponent.getInputDocument(
+ core, indexedDocId, indexedDocId, null, Set.of(hashField), Resolution.PARTIAL);
+ }
+ }
+
+ private static class DocFoundAndOldUserAndSolrVersions {
+ private static final DocFoundAndOldUserAndSolrVersions NOT_FOUND =
+ new DocFoundAndOldUserAndSolrVersions();
+ private final boolean found;
+
+ public String oldHash;
+
+ private DocFoundAndOldUserAndSolrVersions() {
+ this.found = false;
+ }
+
+ private DocFoundAndOldUserAndSolrVersions(String oldHash) {
+ this.found = true;
+ this.oldHash = oldHash;
+ }
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java
new file mode 100644
index 00000000000..48fa52db928
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+/** Factory for {@link ContentHashVersionProcessor} instances. */
+public class ContentHashVersionProcessorFactory extends UpdateRequestProcessorFactory
+ implements SolrCoreAware, UpdateRequestProcessorFactory.RunAlways {
+ private static final char SEPARATOR = ','; // Separator for included/excluded fields
+ static final String CONTENT_HASH_ENABLED_PARAM = "contentHashEnabled";
+ private List includeFields =
+ Collections.singletonList("*"); // Included fields defaults to 'all'
+ private List excludeFields = new ArrayList<>();
+ // No excluded field by default, yet hashFieldName is excluded by default
+ private String hashFieldName =
+ "content_hash"; // Field name to store computed hash on create/update operations
+ private boolean discardSameDocuments = true;
+
+ public ContentHashVersionProcessorFactory() {}
+
+ public void init(NamedList> args) {
+ Object tmp = args.remove("includeFields");
+ if (tmp != null) {
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'includeFields' must be configured as a ");
+ }
+ // Include fields support comma separated list of fields (e.g. "field1,field2,field3").
+ // Also supports "*" to include all fields
+ this.includeFields =
+ StrUtils.splitSmart((String) tmp, SEPARATOR).stream()
+ .map(String::trim)
+ .collect(Collectors.toList());
+ }
+ tmp = args.remove("hashFieldName");
+ if (tmp != null) {
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'hashFieldName' must be configured as a ");
+ }
+ this.hashFieldName = (String) tmp;
+ }
+
+ tmp = args.remove("excludeFields");
+ if (tmp != null) {
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'excludeFields' must be configured as a ");
+ }
+ if ("*".equals(((String) tmp).trim())) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "'excludeFields' can't exclude all fields.");
+ }
+ // Exclude fields support comma separated list of fields (e.g.
+ // "excluded_field1,excluded_field2").
+ // Also supports "*" to exclude all fields
+ this.excludeFields =
+ StrUtils.splitSmart((String) tmp, SEPARATOR).stream()
+ .map(String::trim)
+ .collect(Collectors.toList());
+ }
+ excludeFields.add(hashFieldName); // Hash field name is excluded from hash computation
+
+ tmp = args.remove("hashCompareStrategy");
+ if (tmp != null) {
+ if (!(tmp instanceof String)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "'hashCompareStrategy' must be configured as a ");
+ }
+ String value = ((String) tmp).toLowerCase(Locale.ROOT);
+ if ("discard".equalsIgnoreCase(value)) {
+ discardSameDocuments = true;
+ } else if ("log".equalsIgnoreCase(value)) {
+ discardSameDocuments = false;
+ } else {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "Value '"
+ + value
+ + "' is unsupported for 'hashCompareStrategy', only 'discard' and 'log' are supported.");
+ }
+ }
+
+ super.init(args);
+ }
+
+ public UpdateRequestProcessor getInstance(
+ SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
+ if (!req.getParams().getBool(CONTENT_HASH_ENABLED_PARAM, false)) {
+ return next;
+ }
+
+ ContentHashVersionProcessor processor =
+ new ContentHashVersionProcessor(
+ buildFieldMatcher(includeFields),
+ buildFieldMatcher(excludeFields),
+ hashFieldName,
+ req,
+ rsp,
+ next);
+ processor.setDiscardSameDocuments(discardSameDocuments);
+ return processor;
+ }
+
+ public void inform(SolrCore core) {
+ if (core.getLatestSchema().getUniqueKeyField() == null) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "schema must have uniqueKey defined.");
+ }
+ }
+
+ public String getHashFieldName() {
+ return hashFieldName;
+ }
+
+ public List getIncludeFields() {
+ return includeFields;
+ }
+
+ public List getExcludeFields() {
+ return excludeFields;
+ }
+
+ public boolean discardSameDocuments() {
+ return discardSameDocuments;
+ }
+
+ static Predicate buildFieldMatcher(List fieldNames) {
+ return fieldName -> {
+ for (String currentFieldName : fieldNames) {
+ if ("*".equals(currentFieldName)) {
+ return true;
+ }
+ if (fieldName.equals(currentFieldName)) {
+ return true;
+ }
+ if (currentFieldName.length() > 1
+ && currentFieldName.endsWith("*")
+ && fieldName.startsWith(currentFieldName.substring(0, currentFieldName.length() - 1))) {
+ return true;
+ }
+ }
+ return false;
+ };
+ }
+}
diff --git a/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java
new file mode 100644
index 00000000000..2f2e920ac6d
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import static org.apache.solr.SolrTestCaseJ4.assumeWorkingMockito;
+import static org.apache.solr.update.processor.ContentHashVersionProcessorFactory.CONTENT_HASH_ENABLED_PARAM;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.anyBoolean;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.List;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class ContentHashVersionProcessorFactoryTest {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ assumeWorkingMockito();
+ }
+
+ @Test
+ public void shouldHaveSensibleDefaultValues() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ assertEquals(List.of("*"), factory.getIncludeFields());
+ assertEquals("content_hash", factory.getHashFieldName());
+ assertTrue(factory.discardSameDocuments());
+ }
+
+ @Test
+ public void shouldInitWithHashFieldName() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashFieldName", "_hash_field_");
+ factory.init(args);
+
+ assertEquals("_hash_field_", factory.getHashFieldName());
+ }
+
+ @Test
+ public void shouldInitWithAllField() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("includeFields", "*");
+ factory.init(args);
+
+ assertEquals(1, factory.getIncludeFields().size());
+ assertEquals("*", factory.getIncludeFields().getFirst());
+ }
+
+ @Test
+ public void shouldInitWithIncludedFields() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("includeFields", " field1,field2 , field3 ");
+ factory.init(args);
+
+ assertEquals(3, factory.getIncludeFields().size());
+ assertEquals(List.of("field1", "field2", "field3"), factory.getIncludeFields());
+ }
+
+ @Test
+ public void shouldInitWithExcludedFields() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("excludeFields", " field1,field2 , field3 ");
+ factory.init(args);
+
+ assertEquals(4, factory.getExcludeFields().size());
+ assertEquals(List.of("field1", "field2", "field3", "content_hash"), factory.getExcludeFields());
+ }
+
+ @Test
+ public void shouldSelectRejectSameHashStrategy() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashCompareStrategy", "discard");
+ factory.init(args);
+
+ assertTrue(factory.discardSameDocuments());
+ }
+
+ @Test
+ public void shouldSelectLogStrategy() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashCompareStrategy", "log");
+ factory.init(args);
+
+ assertFalse(factory.discardSameDocuments());
+ }
+
+ @Test
+ public void shouldSelectDiscardStrategy() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashCompareStrategy", "discard");
+ factory.init(args);
+
+ assertTrue(factory.discardSameDocuments());
+ }
+
+ @Test(expected = SolrException.class)
+ public void shouldSelectUnsupportedStrategy() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("hashCompareStrategy", "unsupported value");
+ factory.init(args);
+ }
+
+ @Test(expected = SolrException.class)
+ public void shouldRejectExcludeAllFields() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ NamedList args = new NamedList<>();
+ args.add("excludeFields", "*");
+ factory.init(args);
+ }
+
+ @Test
+ public void shouldDisableContentHashByQueryParameter() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ UpdateRequestProcessor next = mock(UpdateRequestProcessor.class);
+ SolrQueryRequest updateRequest = createUpdateRequest(false); // Request disables processor
+
+ UpdateRequestProcessor instance =
+ factory.getInstance(updateRequest, mock(SolrQueryResponse.class), next);
+
+ assertEquals(instance, next);
+ }
+
+ @Test
+ public void shouldEnableContentHashByQueryParameter() {
+ ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory();
+ UpdateRequestProcessor next = mock(UpdateRequestProcessor.class);
+ SolrQueryRequest updateRequest = createUpdateRequest(true); // Request enables processor
+
+ UpdateRequestProcessor instance =
+ factory.getInstance(updateRequest, mock(SolrQueryResponse.class), next);
+
+ assertNotEquals(instance, next);
+ }
+
+ private static SolrQueryRequest createUpdateRequest(boolean enableContentHashParamValue) {
+ SolrQueryRequest req = mock(SolrQueryRequest.class);
+ SolrParams solrParams = mock(SolrParams.class);
+ when(solrParams.getBool(eq(CONTENT_HASH_ENABLED_PARAM), anyBoolean()))
+ .thenReturn(enableContentHashParamValue);
+ when(req.getParams()).thenReturn(solrParams);
+
+ return req;
+ }
+}
diff --git a/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java
new file mode 100644
index 00000000000..92ea8282f9e
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java
@@ -0,0 +1,443 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.UUIDField;
+import org.apache.solr.update.AddUpdateCommand;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class ContentHashVersionProcessorTest extends UpdateProcessorTestBase {
+
+ public static final String ID_FIELD = "_id";
+ public static final String HASH_FIELD_NAME = "_hash_";
+ public static final String FIRST_FIELD = "field1";
+ public static final String SECOND_FIELD = "field2";
+ public static final String THIRD_FIELD = "docField3";
+ public static final String FOURTH_FIELD = "field4";
+ private SolrQueryRequest req;
+
+ private ContentHashVersionProcessor getContentHashVersionProcessor(
+ SolrQueryRequest req,
+ SolrQueryResponse rsp,
+ UpdateRequestProcessor next,
+ List includedFields,
+ List excludedFields) {
+ ContentHashVersionProcessor processor =
+ new ContentHashVersionProcessor(
+ ContentHashVersionProcessorFactory.buildFieldMatcher(includedFields),
+ ContentHashVersionProcessorFactory.buildFieldMatcher(excludedFields),
+ HASH_FIELD_NAME,
+ req,
+ rsp,
+ next);
+
+ // Given (previous doc retrieval configuration)
+ processor.setOldDocProvider(
+ (core, hashField, indexedDocId) -> {
+ final SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, indexedDocId.utf8ToString()),
+ f(FIRST_FIELD, "Initial values used to compute initial hash"),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"));
+ return doc(
+ f(ID_FIELD, inputDocument.getFieldValue(ID_FIELD)),
+ f(FIRST_FIELD, inputDocument.getFieldValue(FIRST_FIELD)),
+ f(SECOND_FIELD, inputDocument.getFieldValue(SECOND_FIELD)),
+ f(hashField, processor.computeDocHash(inputDocument)));
+ });
+ return processor;
+ }
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ assumeWorkingMockito();
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+
+ // Given (processor configuration)
+ req = mock(SolrQueryRequest.class);
+ when(req.getParams()).thenReturn(mock(SolrParams.class));
+
+ // Given (schema configuration)
+ IndexSchema indexSchema = mock(IndexSchema.class);
+ when(req.getSchema()).thenReturn(indexSchema);
+ when(indexSchema.getUniqueKeyField()).thenReturn(new SchemaField(ID_FIELD, new UUIDField()));
+ when(indexSchema.indexableUniqueKey(anyString()))
+ .then(invocationOnMock -> new BytesRef(invocationOnMock.getArgument(0).toString()));
+ }
+
+ @Test
+ public void shouldComputeHashForDoc() {
+ // Given
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ mock(SolrQueryResponse.class),
+ mock(UpdateRequestProcessor.class),
+ List.of("*"),
+ List.of(ID_FIELD));
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument1 =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(FIRST_FIELD, "Values will serve as input to compute a hash"),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"));
+
+ // Then
+ assertEquals("Tak0G5a/DIE=", processor.computeDocHash(inputDocument1));
+
+ // Given (doc for update - with different order)
+ SolrInputDocument inputDocument2 =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"),
+ f(FIRST_FIELD, "Values will serve as input to compute a hash"));
+
+ // Then (hash remain same, since id is excluded from signature fields)
+ assertEquals("Tak0G5a/DIE=", processor.computeDocHash(inputDocument2));
+ }
+
+ @Test
+ public void shouldUseExcludedFieldsWildcard() {
+ // Given
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ mock(SolrQueryResponse.class),
+ mock(UpdateRequestProcessor.class),
+ List.of("*"),
+ List.of("field*"));
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, UUID.randomUUID().toString()),
+ f(SECOND_FIELD, UUID.randomUUID().toString()),
+ f(THIRD_FIELD, "constant to have a constant hash"),
+ f(FOURTH_FIELD, UUID.randomUUID().toString()));
+
+ // Then (only ID and THIRD_FIELD is used in hash, other fields contain random values)
+ assertEquals(
+ "bwE8Zjq0aOs=", processor.computeDocHash(inputDocument)); // Hash if only ID field was used
+ }
+
+ @Test
+ public void shouldUseIncludedFieldsWildcard() {
+ // Given
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ mock(SolrQueryResponse.class),
+ mock(UpdateRequestProcessor.class),
+ List.of("field*"),
+ List.of(THIRD_FIELD));
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, "constant to have a constant hash for field1"),
+ f(SECOND_FIELD, "constant to have a constant hash for field2"),
+ f(THIRD_FIELD, UUID.randomUUID().toString()),
+ f(FOURTH_FIELD, "constant to have a constant hash for field4"));
+
+ // Then
+ assertEquals("PozPs2qZQtw=", processor.computeDocHash(inputDocument));
+ }
+
+ @Test
+ public void shouldUseIncludedFieldsWildcard2() {
+ // Given (variant of previous shouldUseIncludedFieldsWildcard, without the excludedField config)
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ mock(SolrQueryResponse.class),
+ mock(UpdateRequestProcessor.class),
+ List.of("field*"),
+ List.of());
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, "constant to have a constant hash for field1"),
+ f(SECOND_FIELD, "constant to have a constant hash for field2"),
+ f(THIRD_FIELD, UUID.randomUUID().toString()),
+ f(FOURTH_FIELD, "constant to have a constant hash for field4"));
+
+ // Then
+ assertEquals("PozPs2qZQtw=", processor.computeDocHash(inputDocument));
+ }
+
+ @Test
+ public void shouldDedupIncludedFields() {
+ // Given (processor to include field1 and field2 only)
+ ContentHashVersionProcessor processorWithDuplicatedFieldName =
+ getContentHashVersionProcessor(
+ req,
+ mock(SolrQueryResponse.class),
+ mock(UpdateRequestProcessor.class),
+ List.of(FIRST_FIELD, FIRST_FIELD, SECOND_FIELD),
+ List.of());
+
+ ContentHashVersionProcessor processorWithWildcard =
+ getContentHashVersionProcessor(
+ req,
+ mock(SolrQueryResponse.class),
+ mock(UpdateRequestProcessor.class),
+ List.of(
+ SECOND_FIELD,
+ FIRST_FIELD,
+ "field1*"), // Also change order of config (test reorder of field names)
+ List.of());
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, "0000000001"),
+ f(FIRST_FIELD, "constant to have a constant hash for field1"),
+ f(SECOND_FIELD, "constant to have a constant hash for field2"),
+ f(THIRD_FIELD, UUID.randomUUID().toString()),
+ f(FOURTH_FIELD, "constant to have a constant hash for field4"));
+
+ // Then
+ assertEquals("XavrOYGlkXM=", processorWithDuplicatedFieldName.computeDocHash(inputDocument));
+ assertEquals("XavrOYGlkXM=", processorWithWildcard.computeDocHash(inputDocument));
+ }
+
+ @Test
+ public void shouldCreateSignatureForNewDoc() throws IOException {
+ // Given
+ SolrQueryResponse response = mock(SolrQueryResponse.class);
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ response,
+ mock(UpdateRequestProcessor.class),
+ Arrays.asList(FIRST_FIELD, SECOND_FIELD),
+ List.of());
+ processor.setDiscardSameDocuments(false);
+ processor.setOldDocProvider((core, hashField, indexedDocId) -> null);
+
+ // Given (command)
+ AddUpdateCommand cmd = new AddUpdateCommand(req);
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(FIRST_FIELD, "Values will serve as input to compute a hash"),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"));
+ cmd.solrDoc = inputDocument;
+
+ // When
+ processor.processAdd(cmd);
+ processor.finish();
+
+ // Then
+ assertNotNull(inputDocument.getField(HASH_FIELD_NAME)); // signature field got added
+ assertEquals(
+ processor.computeDocHash(inputDocument),
+ inputDocument.getField(HASH_FIELD_NAME).getValue()); // ... and contains expected value
+
+ // Then (asserts on hash comparison results)
+ verify(response, times(1)).addToLog(eq("numAddsExisting"), eq(0));
+ verify(response, times(1))
+ .addToLog(eq("numAddsExistingWithIdentical"), eq(0)); // And no hash clash with old doc
+ }
+
+ @Test
+ public void shouldAddToResponseLog() throws IOException {
+ // Given
+ SolrQueryResponse response = mock(SolrQueryResponse.class);
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ response,
+ mock(UpdateRequestProcessor.class),
+ Arrays.asList(FIRST_FIELD, SECOND_FIELD),
+ List.of());
+ processor.setDiscardSameDocuments(false);
+
+ // Given (command to update existing doc)
+ AddUpdateCommand cmdDoesNotChangeValues = new AddUpdateCommand(req);
+
+ // Given (doc for update - matches the existing doc, see getContentHashVersionProcessor())
+ SolrInputDocument initialDocument =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(FIRST_FIELD, "Initial values used to compute initial hash"),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"));
+ cmdDoesNotChangeValues.solrDoc =
+ doc(
+ f(ID_FIELD, initialDocument.getFieldValue(ID_FIELD)),
+ f(FIRST_FIELD, initialDocument.getFieldValue(FIRST_FIELD)),
+ f(SECOND_FIELD, initialDocument.getFieldValue(SECOND_FIELD)),
+ f(HASH_FIELD_NAME, processor.computeDocHash(initialDocument)));
+
+ // Given (command to update existing doc with different content)
+ AddUpdateCommand cmdChangesDocValues = new AddUpdateCommand(req);
+
+ // Given (doc for update - does *not* match the existing doc, see
+ // getContentHashVersionProcessor())
+ cmdChangesDocValues.solrDoc =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(FIRST_FIELD, "This is a doc with values"),
+ f(SECOND_FIELD, "that differs from stored doc, so it's considered new"));
+
+ // When
+ processor.processAdd(cmdDoesNotChangeValues);
+ processor.processAdd(cmdChangesDocValues);
+ processor.finish();
+
+ // Then (read as follows: 2 updates occurred for an existing doc. Among these updates, 1 update
+ // tried to replace
+ // doc with the same content)
+ verify(response, times(1)).addToLog(eq("numAddsExisting"), eq(2));
+ verify(response, times(1)).addToLog(eq("numAddsExistingWithIdentical"), eq(1));
+ }
+
+ @Test
+ public void shouldNotUpdateSignatureForNewDoc() throws IOException {
+ // Given
+ SolrQueryResponse response = mock(SolrQueryResponse.class);
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req, response, mock(UpdateRequestProcessor.class), List.of(SECOND_FIELD), List.of());
+
+ // Given (command)
+ AddUpdateCommand cmd = new AddUpdateCommand(req);
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(FIRST_FIELD, "Values will serve as input to compute a hash"),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"));
+ cmd.solrDoc = inputDocument;
+
+ // When
+ processor.processAdd(cmd);
+
+ // Then
+ assertNotNull(inputDocument.getField(HASH_FIELD_NAME)); // signature field got added
+ assertEquals(
+ processor.computeDocHash(inputDocument),
+ inputDocument.getField(HASH_FIELD_NAME).getValue()); // ... and contains expected value
+ verify(response, never()).addToLog(eq("numAddsExisting"), eq(0));
+ verify(response, never()).addToLog(eq("numAddsExistingWithIdentical"), eq(0));
+ }
+
+ @Test
+ public void shouldExcludeFieldsUpdateSignatureForNewDoc() throws IOException {
+ // Given
+ SolrQueryResponse response = mock(SolrQueryResponse.class);
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ response,
+ mock(UpdateRequestProcessor.class),
+ List.of(FIRST_FIELD, SECOND_FIELD),
+ List.of(FIRST_FIELD));
+ processor.setDiscardSameDocuments(false);
+
+ // Given (command)
+ AddUpdateCommand cmd = new AddUpdateCommand(req);
+
+ // Given (doc for update)
+ SolrInputDocument inputDocument =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(FIRST_FIELD, "Values will serve as input to compute a hash"),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"));
+ cmd.solrDoc = inputDocument;
+
+ // When
+ processor.processAdd(cmd);
+
+ // Then
+ assertNotNull(inputDocument.getField(HASH_FIELD_NAME)); // signature field got added
+ assertEquals(
+ processor.computeDocHash(inputDocument),
+ inputDocument.getField(HASH_FIELD_NAME).getValue()); // ... and contains expected value
+ verify(response, never()).addToLog(eq("numAddsExisting"), eq(1));
+ verify(response, never()).addToLog(eq("numAddsExistingWithIdentical"), eq(0));
+ }
+
+ @Test
+ public void shouldCommitWithDiscardModeEnabled() throws IOException {
+ // Given
+ UpdateRequestProcessor nextProcessor = mock(UpdateRequestProcessor.class);
+ ContentHashVersionProcessor processor =
+ getContentHashVersionProcessor(
+ req,
+ mock(SolrQueryResponse.class),
+ nextProcessor,
+ List.of(FIRST_FIELD, SECOND_FIELD),
+ List.of(FIRST_FIELD));
+ processor.setDiscardSameDocuments(true);
+
+ // Given (command to update existing doc)
+ AddUpdateCommand cmdDoesNotChangeValues = new AddUpdateCommand(req);
+
+ // Given (doc for update - matches the existing doc, see getContentHashVersionProcessor())
+ SolrInputDocument initialDocument =
+ doc(
+ f(ID_FIELD, UUID.randomUUID().toString()),
+ f(FIRST_FIELD, "Initial values used to compute initial hash"),
+ f(SECOND_FIELD, "This a constant value for testing include/exclude fields"));
+ cmdDoesNotChangeValues.solrDoc =
+ doc(
+ f(ID_FIELD, initialDocument.getFieldValue(ID_FIELD)),
+ f(FIRST_FIELD, initialDocument.getFieldValue(FIRST_FIELD)),
+ f(SECOND_FIELD, initialDocument.getFieldValue(SECOND_FIELD)),
+ f(HASH_FIELD_NAME, processor.computeDocHash(initialDocument)));
+
+ // When
+ processor.processAdd(cmdDoesNotChangeValues);
+
+ // Then
+ verify(nextProcessor, never()).processAdd(eq(cmdDoesNotChangeValues));
+ }
+}